27 Commits

Author SHA1 Message Date
48fe8d1bc9 Bump version. 2025-01-24 15:31:31 -05:00
a4026bfe00 Prep for release. 2025-01-24 15:30:59 -05:00
1e945cb750 Add LICENSE files to example install list. 2025-01-24 14:44:44 -05:00
4cb4ceaadd Update docos with fixed codedoc. 2025-01-24 14:42:41 -05:00
cca7383c73 Fix support for UTF-16 string values in dictionaries (Issue #92)
Specifically to support Unicode Title and Author values.
2025-01-24 10:43:41 -05:00
6c68b9fa5a Add URLs and copyrights for Code 128 font and ProPhoto ICC profile (Issue #91) 2025-01-24 09:56:51 -05:00
dd7ed67ec1 Update makesrcdist to validate CHANGES.md. 2025-01-23 15:34:43 -05:00
9e2f3aba10 Fix reading of compressed object streams (Issue #92) 2025-01-23 15:27:22 -05:00
fca4dbd395 Make sure we have license files for the example fonts (Issue #91) 2025-01-23 13:03:23 -05:00
41ac7a0b4b Changelog. 2025-01-18 09:45:29 -05:00
5fc571b711 Merge pull request #89 from vlasovsoft1979/master
Fix undefined behavior in _pdfioFileSeek
2025-01-18 09:42:58 -05:00
acf27d29c6 Fix undefined behavior 2025-01-18 13:56:25 +03:00
026f653e07 Fix loading of last 1024 bytes for small PDF files (Issue #87) 2025-01-17 16:58:33 -05:00
3bc041e6d3 Delay loading of the Info object and clean up the pdfioinfo example (Issue #87) 2025-01-17 16:50:30 -05:00
fbd61d1fe9 Bump copyright and version, changelog for example makefile fix. 2025-01-10 14:54:11 -05:00
ee2794199c Merge pull request #86 from tlaronde/master
examples/Makefile: libm is not added by default by all
2025-01-10 14:50:44 -05:00
31c3400f23 examples/Makefile: libm is not added by default by all
-lm has to be added for system/compilers that don't add the lib by
default (the case on NetBSD).
2025-01-10 20:18:06 +01:00
6d65a609e5 Update documentation and examples makefile. 2024-12-26 15:12:56 -05:00
e96f9bfa6b Fix compiler warning and update Xcode project. 2024-12-23 15:07:32 -05:00
10c15fc281 Bump NuGet package versions. 2024-12-22 21:33:35 -05:00
fd8427d68a Add pdf2text example docos, install examples to doc directory. 2024-12-22 21:29:32 -05:00
ed1421287f Move pdfiototext to examples. 2024-12-22 19:00:17 -05:00
aa91b141a8 Finalize md2pdf example docos. 2024-12-22 12:09:03 -05:00
5dc68f3285 Save work on docos. 2024-12-21 23:20:36 -05:00
52b508bdd2 Block quote rendering changes. 2024-12-21 14:15:48 -05:00
41ebe39f3b Save work. 2024-12-21 14:04:27 -05:00
62df5f5c78 Add CODE_PADDING and use it for code blocks. 2024-12-21 12:16:36 -05:00
28 changed files with 3181 additions and 321 deletions

2
.gitignore vendored
View File

@ -14,6 +14,7 @@
/examples/code128
/examples/image2pdf
/examples/md2pdf
/examples/pdf2text
/examples/pdfioinfo
/Makefile
/packages
@ -21,7 +22,6 @@
/pdfio.xcodeproj/xcshareddata
/pdfio-*.tar.gz*
/pdfio-*.zip*
/pdfiototext
/testpdfio
/testpdfio-*.pdf
/testttf

View File

@ -1,8 +1,19 @@
Changes in PDFio
================
v1.4.1 - 2025-01-24
-------------------
v1.4.0 - YYYY-MM-DD
- Added license files for the example fonts now bundled with PDFio (Issue #91)
- Fixed the link libraries for the example source code (Issue #86)
- Fixed handling of the Info object (Issue #87)
- Fixed opening of PDF files less than 1024 bytes in length (Issue #87)
- Fixed potential `NULL` dereference when reading (Issue #89)
- Fixed reading of compressed object streams (Issue #92)
- Fixed reading of UTF-16 string values (Issue #92)
v1.4.0 - 2024-12-26
-------------------
- Added new `pdfioDictGetKey` and `pdfioDictGetNumPairs` APIs (Issue #63)

View File

@ -1,7 +1,7 @@
#
# Makefile for PDFio.
#
# Copyright © 2021-2024 by Michael R Sweet.
# Copyright © 2021-2025 by Michael R Sweet.
#
# Licensed under Apache License v2.0. See the file "LICENSE" for more
# information.
@ -103,15 +103,35 @@ LIBOBJS = \
ttf.o
OBJS = \
$(LIBOBJS) \
pdfiototext.o \
testpdfio.o \
testttf.o
TARGETS = \
$(LIBPDFIO) \
$(LIBPDFIO_STATIC) \
pdfiototext \
testpdfio \
testttf
DOCFILES = \
doc/pdfio.html \
doc/pdfio-512.png \
LICENSE \
NOTICE
EXAMPLES = \
examples/Makefile \
examples/Roboto-LICENSE.txt \
examples/Roboto-Bold.ttf \
examples/Roboto-Italic.ttf \
examples/Roboto-Regular.ttf \
examples/RobotoMono-Regular.ttf \
examples/code128.c \
examples/code128.ttf \
examples/code128-LICENSE.txt \
examples/image2pdf.c \
examples/md2pdf.c \
examples/md2pdf.md \
examples/mmd.c \
examples/mmd.h \
examples/pdf2text.c \
examples/pdfioinfo.c
# Make everything
@ -152,8 +172,13 @@ install: $(TARGETS)
$(INSTALL) -c -m 644 pdfio.pc $(BUILDROOT)$(libdir)/pkgconfig
echo Installing documentation to $(BUILDROOT)$(datadir)/doc/pdfio...
$(INSTALL) -d -m 755 $(BUILDROOT)$(datadir)/doc/pdfio
for file in doc/pdfio.html doc/pdfio-512.png LICENSE NOTICE; do \
$(INSTALL) -c -m 644 $$file $(BUILDROOT)$(datadir)/doc/pdfio; \
for file in $(DOCFILES); do \
$(INSTALL) -c -m 644 $$file $(BUILDROOT)$(datadir)/doc/pdfio; \
done
echo Installing examples to $(BUILDROOT)$(datadir)/doc/pdfio/examples...
$(INSTALL) -d -m 755 $(BUILDROOT)$(datadir)/doc/pdfio/examples
for file in $(EXAMPLES); do \
$(INSTALL) -c -m 644 $$file $(BUILDROOT)$(datadir)/doc/pdfio/examples; \
done
echo Installing man page to $(BUILDROOT)$(mandir)/man3...
$(INSTALL) -d -m 755 $(BUILDROOT)$(mandir)/man3
@ -201,12 +226,6 @@ pdfio1.def: $(LIBOBJS) Makefile
grep -v '^_ttf' | sed -e '1,$$s/^_//' | sort >>$@
# pdfio text extraction (demo, doesn't handle a lot of things yet)
pdfiototext: pdfiototext.o libpdfio.a
echo Linking $@...
$(CC) $(LDFLAGS) -o $@ pdfiototext.o libpdfio.a $(LIBS)
# pdfio test program
testpdfio: testpdfio.o libpdfio.a
echo Linking $@...
@ -229,7 +248,7 @@ ttf.o: ttf.h
# Make documentation using Codedoc <https://www.msweet.org/codedoc>
DOCFLAGS = \
--author "Michael R Sweet" \
--copyright "Copyright (c) 2021-2024 by Michael R Sweet" \
--copyright "Copyright (c) 2021-2025 by Michael R Sweet" \
--docversion $(PDFIO_VERSION)
.PHONY: doc

2
NOTICE
View File

@ -1,6 +1,6 @@
PDFio - PDF Read/Write Library
Copyright © 2021-2024 by Michael R Sweet.
Copyright © 2021-2025 by Michael R Sweet.
(Optional) Exceptions to the Apache 2.0 License:
================================================

View File

@ -89,7 +89,7 @@ generates a static library that will be installed under "/usr/local" with:
Legal Stuff
-----------
PDFio is Copyright © 2021-2024 by Michael R Sweet.
PDFio is Copyright © 2021-2025 by Michael R Sweet.
This software is licensed under the Apache License Version 2.0 with an
(optional) exception to allow linking against GPL2/LGPL2 software. See the

24
configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for pdfio 1.4.0.
# Generated by GNU Autoconf 2.71 for pdfio 1.4.1.
#
# Report bugs to <https://github.com/michaelrsweet/pdfio/issues>.
#
@ -610,8 +610,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='pdfio'
PACKAGE_TARNAME='pdfio'
PACKAGE_VERSION='1.4.0'
PACKAGE_STRING='pdfio 1.4.0'
PACKAGE_VERSION='1.4.1'
PACKAGE_STRING='pdfio 1.4.1'
PACKAGE_BUGREPORT='https://github.com/michaelrsweet/pdfio/issues'
PACKAGE_URL='https://www.msweet.org/pdfio'
@ -1293,7 +1293,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures pdfio 1.4.0 to adapt to many kinds of systems.
\`configure' configures pdfio 1.4.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1359,7 +1359,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of pdfio 1.4.0:";;
short | recursive ) echo "Configuration of pdfio 1.4.1:";;
esac
cat <<\_ACEOF
@ -1456,7 +1456,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
pdfio configure 1.4.0
pdfio configure 1.4.1
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@ -1612,7 +1612,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by pdfio $as_me 1.4.0, which was
It was created by pdfio $as_me 1.4.1, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@ -2368,9 +2368,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
PDFIO_VERSION="1.4.0"
PDFIO_VERSION_MAJOR="`echo 1.4.0 | awk -F. '{print $1}'`"
PDFIO_VERSION_MINOR="`echo 1.4.0 | awk -F. '{printf("%d\n",$2);}'`"
PDFIO_VERSION="1.4.1"
PDFIO_VERSION_MAJOR="`echo 1.4.1 | awk -F. '{print $1}'`"
PDFIO_VERSION_MINOR="`echo 1.4.1 | awk -F. '{printf("%d\n",$2);}'`"
@ -4935,7 +4935,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by pdfio $as_me 1.4.0, which was
This file was extended by pdfio $as_me 1.4.1, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -4991,7 +4991,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
pdfio config.status 1.4.0
pdfio config.status 1.4.1
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"

View File

@ -21,7 +21,7 @@ AC_PREREQ([2.70])
dnl Package name and version...
AC_INIT([pdfio], [1.4.0], [https://github.com/michaelrsweet/pdfio/issues], [pdfio], [https://www.msweet.org/pdfio])
AC_INIT([pdfio], [1.4.1], [https://github.com/michaelrsweet/pdfio/issues], [pdfio], [https://www.msweet.org/pdfio])
PDFIO_VERSION="AC_PACKAGE_VERSION"
PDFIO_VERSION_MAJOR="`echo AC_PACKAGE_VERSION | awk -F. '{print $1}'`"

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,13 @@
<!DOCTYPE html>
<html lang="en-US">
<head>
<title>PDFio Programming Manual v1.4.0</title>
<title>PDFio Programming Manual v1.4.1</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="generator" content="codedoc v3.8">
<meta name="author" content="Michael R Sweet">
<meta name="language" content="en-US">
<meta name="copyright" content="Copyright © 2021-2024 by Michael R Sweet">
<meta name="version" content="1.4.0">
<meta name="copyright" content="Copyright © 2021-2025 by Michael R Sweet">
<meta name="version" content="1.4.1">
<style type="text/css"><!--
body {
background: white;
@ -251,9 +251,9 @@ span.string {
<body>
<div class="header">
<p><img class="title" src="pdfio-512.png"></p>
<h1 class="title">PDFio Programming Manual v1.4.0</h1>
<h1 class="title">PDFio Programming Manual v1.4.1</h1>
<p>Michael R Sweet</p>
<p>Copyright © 2021-2024 by Michael R Sweet</p>
<p>Copyright © 2021-2025 by Michael R Sweet</p>
</div>
<div class="contents">
<h2 class="title">Contents</h2>
@ -276,7 +276,8 @@ span.string {
</ul></li>
<li><a href="#examples">Examples</a><ul class="subcontents">
<li><a href="#read-pdf-metadata">Read PDF Metadata</a></li>
<li><a href="#create-pdf-file-with-text-and-image">Create PDF File With Text and Image</a></li>
<li><a href="#extract-text-from-pdf-file">Extract Text from PDF File</a></li>
<li><a href="#create-a-pdf-file-with-text-and-an-image">Create a PDF File With Text and an Image</a></li>
<li><a href="#generate-a-code-128-barcode">Generate a Code 128 Barcode</a></li>
<li><a href="#convert-markdown-to-pdf">Convert Markdown to PDF</a></li>
</ul></li>
@ -505,7 +506,7 @@ span.string {
</div>
<div class="body">
<h2 class="title" id="introduction">Introduction</h2>
<p>PDFio is a simple C library for reading and writing PDF files. The primary goals of pdfio are:</p>
<p>PDFio is a simple C library for reading and writing PDF files. The primary goals of PDFio are:</p>
<ul>
<li><p>Read and write any version of PDF file</p>
</li>
@ -709,8 +710,8 @@ startxref % startxref keyword
<h3 class="title" id="reading-pdf-files">Reading PDF Files</h3>
<p>You open an existing PDF file using the <a href="#pdfioFileOpen"><code>pdfioFileOpen</code></a> function:</p>
<pre><code class="language-c">pdfio_file_t *pdf =
pdfioFileOpen(<span class="string">&quot;myinputfile.pdf&quot;</span>, password_cb, password_data,
error_cb, error_data);
pdfioFileOpen(<span class="string">&quot;myinputfile.pdf&quot;</span>, password_cb, password_data, error_cb,
error_data);
</code></pre>
<p>where the five arguments to the function are the filename (&quot;myinputfile.pdf&quot;), an optional password callback function (<code>password_cb</code>) and data pointer value (<code>password_data</code>), and an optional error callback function (<code>error_cb</code>) and data pointer value (<code>error_data</code>). The password callback is called for encrypted PDF files that are not using the default password, for example:</p>
<pre><code class="language-c"><span class="reserved">const</span> <span class="reserved">char</span> *
@ -817,8 +818,7 @@ pdfio_array_t *crop_box; <span class="comment">// CropBox array</span>
<pre><code class="language-c">pdfio_rect_t media_box = { <span class="number">0.0</span>, <span class="number">0.0</span>, <span class="number">612.0</span>, <span class="number">792.0</span> }; <span class="comment">// US Letter</span>
pdfio_rect_t crop_box = { <span class="number">36.0</span>, <span class="number">36.0</span>, <span class="number">576.0</span>, <span class="number">756.0</span> }; <span class="comment">// w/0.5&quot; margins</span>
pdfio_file_t *pdf = pdfioFileCreate(<span class="string">&quot;myoutputfile.pdf&quot;</span>, <span class="string">&quot;2.0&quot;</span>,
&amp;media_box, &amp;crop_box,
pdfio_file_t *pdf = pdfioFileCreate(<span class="string">&quot;myoutputfile.pdf&quot;</span>, <span class="string">&quot;2.0&quot;</span>, &amp;media_box, &amp;crop_box,
error_cb, error_data);
</code></pre>
<p>where the six arguments to the function are the filename (&quot;myoutputfile.pdf&quot;), PDF version (&quot;2.0&quot;), media box (<code>media_box</code>), crop box (<code>crop_box</code>), an optional error callback function (<code>error_cb</code>), and an optional pointer value for the error callback function (<code>error_data</code>). The units for the media and crop boxes are points (1/72nd of an inch).</p>
@ -826,9 +826,8 @@ pdfio_file_t *pdf = pdfioFileCreate(<span class="string">&quot;myoutputfile.pdf&
<pre><code class="language-c">pdfio_rect_t media_box = { <span class="number">0.0</span>, <span class="number">0.0</span>, <span class="number">612.0</span>, <span class="number">792.0</span> }; <span class="comment">// US Letter</span>
pdfio_rect_t crop_box = { <span class="number">36.0</span>, <span class="number">36.0</span>, <span class="number">576.0</span>, <span class="number">756.0</span> }; <span class="comment">// w/0.5&quot; margins</span>
pdfio_file_t *pdf = pdfioFileCreateOutput(output_cb, output_ctx, <span class="string">&quot;2.0&quot;</span>,
&amp;media_box, &amp;crop_box,
error_cb, error_data);
pdfio_file_t *pdf = pdfioFileCreateOutput(output_cb, output_ctx, <span class="string">&quot;2.0&quot;</span>, &amp;media_box,
&amp;crop_box, error_cb, error_data);
</code></pre>
<p>Once the file is created, use the <a href="#pdfioFileCreateObj"><code>pdfioFileCreateObj</code></a>, <a href="#pdfioFileCreatePage"><code>pdfioFileCreatePage</code></a>, and <a href="#pdfioPageCopy"><code>pdfioPageCopy</code></a> functions to create objects and pages in the file.</p>
<p>Finally, the <a href="#pdfioFileClose"><code>pdfioFileClose</code></a> function writes the PDF cross-reference and &quot;trailer&quot; information, closes the file, and frees all memory that was used for it.</p>
@ -998,10 +997,9 @@ pdfio_obj_t *arial =
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileCreate(...);
<span class="reserved">unsigned</span> <span class="reserved">char</span> data[<span class="number">1024</span> * <span class="number">1024</span> * <span class="number">4</span>]; <span class="comment">// 1024x1024 RGBA image data</span>
pdfio_obj_t *img =
pdfioFileCreateImageObjFromData(pdf, data, <span class="comment">/*width*/</span><span class="number">1024</span>,
<span class="comment">/*height*/</span><span class="number">1024</span>, <span class="comment">/*num_colors*/</span><span class="number">3</span>,
<span class="comment">/*color_data*/</span>NULL, <span class="comment">/*alpha*/</span><span class="reserved">true</span>,
<span class="comment">/*interpolate*/</span><span class="reserved">false</span>);
pdfioFileCreateImageObjFromData(pdf, data, <span class="comment">/*width*/</span><span class="number">1024</span>, <span class="comment">/*height*/</span><span class="number">1024</span>,
<span class="comment">/*num_colors*/</span><span class="number">3</span>, <span class="comment">/*color_data*/</span>NULL,
<span class="comment">/*alpha*/</span><span class="reserved">true</span>, <span class="comment">/*interpolate*/</span><span class="reserved">false</span>);
</code></pre>
<p>will create an object for a 1024x1024 RGBA image in memory, using the default color space for 3 colors (&quot;DeviceRGB&quot;). We can use one of the <a href="#color-space-functions">color space functions</a> to use a specific color space for this image, for example:</p>
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileCreate(...);
@ -1009,24 +1007,20 @@ pdfio_obj_t *img =
<span class="comment">// Create an AdobeRGB color array</span>
pdfio_array_t *adobe_rgb =
pdfioArrayCreateColorFromMatrix(pdf, <span class="number">3</span>, pdfioAdobeRGBGamma,
pdfioAdobeRGBMatrix,
pdfioAdobeRGBWhitePoint);
pdfioAdobeRGBMatrix, pdfioAdobeRGBWhitePoint);
<span class="comment">// Create a 1024x1024 RGBA image using AdobeRGB</span>
<span class="reserved">unsigned</span> <span class="reserved">char</span> data[<span class="number">1024</span> * <span class="number">1024</span> * <span class="number">4</span>]; <span class="comment">// 1024x1024 RGBA image data</span>
pdfio_obj_t *img =
pdfioFileCreateImageObjFromData(pdf, data, <span class="comment">/*width*/</span><span class="number">1024</span>,
<span class="comment">/*height*/</span><span class="number">1024</span>, <span class="comment">/*num_colors*/</span><span class="number">3</span>,
<span class="comment">/*color_data*/</span>adobe_rgb,
<span class="comment">/*alpha*/</span><span class="reserved">true</span>,
<span class="comment">/*interpolate*/</span><span class="reserved">false</span>);
pdfioFileCreateImageObjFromData(pdf, data, <span class="comment">/*width*/</span><span class="number">1024</span>, <span class="comment">/*height*/</span><span class="number">1024</span>,
<span class="comment">/*num_colors*/</span><span class="number">3</span>, <span class="comment">/*color_data*/</span>adobe_rgb,
<span class="comment">/*alpha*/</span><span class="reserved">true</span>, <span class="comment">/*interpolate*/</span><span class="reserved">false</span>);
</code></pre>
<p>The &quot;interpolate&quot; argument specifies whether the colors in the image should be smoothed/interpolated when scaling. This is most useful for photographs but should be <code>false</code> for screenshot and barcode images.</p>
<p>If you have a JPEG or PNG file, use the <a href="#pdfioFileCreateImageObjFromFile"><code>pdfioFileCreateImageObjFromFile</code></a> function to copy the image into a PDF image object, for example:</p>
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *img =
pdfioFileCreateImageObjFromFile(pdf, <span class="string">&quot;myphoto.jpg&quot;</span>,
<span class="comment">/*interpolate*/</span><span class="reserved">true</span>);
pdfioFileCreateImageObjFromFile(pdf, <span class="string">&quot;myphoto.jpg&quot;</span>, <span class="comment">/*interpolate*/</span><span class="reserved">true</span>);
</code></pre>
<blockquote>
<p>Note: Currently <code>pdfioFileCreateImageObjFromFile</code> does not support 12 bit JPEG files or PNG files with an alpha channel.</p>
@ -1154,6 +1148,7 @@ pdfio_obj_t *img =
</li>
</ul>
<h2 class="title" id="examples">Examples</h2>
<p>PDFio includes several example programs that are typically installed to the <code>/usr/share/doc/pdfio/examples</code> or <code>/usr/local/share/doc/pdfio/examples</code> directories. A makefile is included to build them.</p>
<h3 class="title" id="read-pdf-metadata">Read PDF Metadata</h3>
<p>The <code>pdfioinfo.c</code> example program opens a PDF file and prints the title, author, creation date, and number of pages:</p>
<pre><code class="language-c"><span class="directive">#include &lt;pdfio.h&gt;</span>
@ -1166,9 +1161,11 @@ main(<span class="reserved">int</span> argc, <span clas
{
<span class="reserved">const</span> <span class="reserved">char</span> *filename; <span class="comment">// PDF filename</span>
pdfio_file_t *pdf; <span class="comment">// PDF file</span>
<span class="reserved">const</span> <span class="reserved">char</span> *author; <span class="comment">// Author name</span>
time_t creation_date; <span class="comment">// Creation date</span>
<span class="reserved">struct</span> tm *creation_tm; <span class="comment">// Creation date/time information</span>
<span class="reserved">char</span> creation_text[<span class="number">256</span>]; <span class="comment">// Creation date/time as a string</span>
<span class="reserved">const</span> <span class="reserved">char</span> *title; <span class="comment">// Title</span>
<span class="comment">// Get the filename from the command-line...</span>
@ -1181,21 +1178,30 @@ main(<span class="reserved">int</span> argc, <span clas
filename = argv[<span class="number">1</span>];
<span class="comment">// Open the PDF file with the default callbacks...</span>
pdf = pdfioFileOpen(filename, <span class="comment">/*password_cb*/</span>NULL,
<span class="comment">/*password_cbdata*/</span>NULL, <span class="comment">/*error_cb*/</span>NULL,
<span class="comment">/*error_cbdata*/</span>NULL);
pdf = pdfioFileOpen(filename, <span class="comment">/*password_cb*/</span>NULL, <span class="comment">/*password_cbdata*/</span>NULL,
<span class="comment">/*error_cb*/</span>NULL, <span class="comment">/*error_cbdata*/</span>NULL);
<span class="reserved">if</span> (pdf == NULL)
<span class="reserved">return</span> (<span class="number">1</span>);
<span class="comment">// Get the title and author...</span>
author = pdfioFileGetAuthor(pdf);
title = pdfioFileGetTitle(pdf);
<span class="comment">// Get the creation date and convert to a string...</span>
creation_date = pdfioFileGetCreationDate(pdf);
creation_tm = localtime(&amp;creation_date);
strftime(creation_text, <span class="reserved">sizeof</span>(creation_text), <span class="string">&quot;%c&quot;</span>, creation_tm);
<span class="reserved">if</span> ((creation_date = pdfioFileGetCreationDate(pdf)) &gt; <span class="number">0</span>)
{
creation_tm = localtime(&amp;creation_date);
strftime(creation_text, <span class="reserved">sizeof</span>(creation_text), <span class="string">&quot;%c&quot;</span>, creation_tm);
}
<span class="reserved">else</span>
{
snprintf(creation_text, <span class="reserved">sizeof</span>(creation_text), <span class="string">&quot;-- not set --&quot;</span>);
}
<span class="comment">// Print file information to stdout...</span>
printf(<span class="string">&quot;%s:\n&quot;</span>, filename);
printf(<span class="string">&quot; Title: %s\n&quot;</span>, pdfioFileGetTitle(pdf));
printf(<span class="string">&quot; Author: %s\n&quot;</span>, pdfioFileGetAuthor(pdf));
printf(<span class="string">&quot; Title: %s\n&quot;</span>, title ? title : <span class="string">&quot;-- not set --&quot;</span>);
printf(<span class="string">&quot; Author: %s\n&quot;</span>, author ? author : <span class="string">&quot;-- not set --&quot;</span>);
printf(<span class="string">&quot; Created On: %s\n&quot;</span>, creation_text);
printf(<span class="string">&quot; Number Pages: %u\n&quot;</span>, (<span class="reserved">unsigned</span>)pdfioFileGetNumPages(pdf));
@ -1205,7 +1211,39 @@ main(<span class="reserved">int</span> argc, <span clas
<span class="reserved">return</span> (<span class="number">0</span>);
}
</code></pre>
<h3 class="title" id="create-pdf-file-with-text-and-image">Create PDF File With Text and Image</h3>
<h3 class="title" id="extract-text-from-pdf-file">Extract Text from PDF File</h3>
<p>The <code>pdf2text.c</code> example code extracts non-Unicode text from a PDF file by scanning each page for strings and text drawing commands. Since it doesn't look at the font encoding or support Unicode text, it is really only useful to extract plain ASCII text from a PDF file. And since it writes text in the order it appears in the page stream, it may not come out in the same order as appears on the page.</p>
<p>The <a href="#pdfioStreamGetToken"><code>pdfioStreamGetToken</code></a> function is used to read individual tokens from the page streams. Tokens starting with the open parenthesis are text strings, while PDF operators are left as-is. We use some simple logic to make sure that we include spaces between text strings and add newlines for the text operators that start a new line in a text block:</p>
<pre><code class="language-c">pdfio_stream_t *st; <span class="comment">// Page stream</span>
<span class="reserved">bool</span> first = <span class="reserved">true</span>; <span class="comment">// First string on line?</span>
<span class="reserved">char</span> buffer[<span class="number">1024</span>]; <span class="comment">// Token buffer</span>
<span class="comment">// Read PDF tokens from the page stream...</span>
<span class="reserved">while</span> (pdfioStreamGetToken(st, buffer, <span class="reserved">sizeof</span>(buffer)))
{
<span class="reserved">if</span> (buffer[<span class="number">0</span>] == <span class="string">'('</span>)
{
<span class="comment">// Text string using an 8-bit encoding</span>
<span class="reserved">if</span> (first)
first = <span class="reserved">false</span>;
<span class="reserved">else</span> <span class="reserved">if</span> (buffer[<span class="number">1</span>] != <span class="string">' '</span>)
putchar(<span class="string">' '</span>);
fputs(buffer + <span class="number">1</span>, stdout);
}
<span class="reserved">else</span> <span class="reserved">if</span> (!strcmp(buffer, <span class="string">&quot;Td&quot;</span>) || !strcmp(buffer, <span class="string">&quot;TD&quot;</span>) || !strcmp(buffer, <span class="string">&quot;T*&quot;</span>) ||
!strcmp(buffer, <span class="string">&quot;\'&quot;</span>) || !strcmp(buffer, <span class="string">&quot;\&quot;&quot;</span>))
{
<span class="comment">// Text operators that advance to the next line in the block</span>
putchar(<span class="string">'\n'</span>);
first = <span class="reserved">true</span>;
}
}
<span class="reserved">if</span> (!first)
putchar(<span class="string">'\n'</span>);
</code></pre>
<h3 class="title" id="create-a-pdf-file-with-text-and-an-image">Create a PDF File With Text and an Image</h3>
<p>The <code>image2pdf.c</code> example code creates a PDF file containing a JPEG or PNG image file and optional caption on a single page. The <code>create_pdf_image_file</code> function creates the PDF file, embeds a base font and the named JPEG or PNG image file, and then creates a page with the image centered on the page with any text centered below:</p>
<pre><code class="language-c"><span class="directive">#include &lt;pdfio.h&gt;</span>
<span class="directive">#include &lt;pdfio-content.h&gt;</span>
@ -1229,9 +1267,8 @@ create_pdf_image_file(
<span class="comment">// Create the PDF file...</span>
pdf = pdfioFileCreate(pdfname, <span class="comment">/*version*/</span>NULL, <span class="comment">/*media_box*/</span>NULL,
<span class="comment">/*crop_box*/</span>NULL, <span class="comment">/*error_cb*/</span>NULL,
<span class="comment">/*error_cbdata*/</span>NULL);
pdf = pdfioFileCreate(pdfname, <span class="comment">/*version*/</span>NULL, <span class="comment">/*media_box*/</span>NULL, <span class="comment">/*crop_box*/</span>NULL,
<span class="comment">/*error_cb*/</span>NULL, <span class="comment">/*error_cbdata*/</span>NULL);
<span class="reserved">if</span> (!pdf)
<span class="reserved">return</span> (<span class="reserved">false</span>);
@ -1265,9 +1302,9 @@ create_pdf_image_file(
width = pdfioImageGetWidth(image);
height = pdfioImageGetHeight(image);
<span class="comment">// Default media_box is &quot;universal&quot; 595.28x792 points (8.27x11in or</span>
<span class="comment">// 210x279mm). Use margins of 36 points (0.5in or 12.7mm) with another</span>
<span class="comment">// 36 points for the caption underneath...</span>
<span class="comment">// Default media_box is &quot;universal&quot; 595.28x792 points (8.27x11in or 210x279mm).</span>
<span class="comment">// Use margins of 36 points (0.5in or 12.7mm) with another 36 points for the</span>
<span class="comment">// caption underneath...</span>
swidth = <span class="number">595.28</span> - <span class="number">72.0</span>;
sheight = swidth * height / width;
<span class="reserved">if</span> (sheight &gt; (<span class="number">792.0</span> - <span class="number">36.0</span> - <span class="number">72.0</span>))
@ -1284,8 +1321,8 @@ create_pdf_image_file(
<span class="comment">// Draw the caption in black...</span>
pdfioContentSetFillColorDeviceGray(page, <span class="number">0.0</span>);
<span class="comment">// Compute the starting point for the text - Courier is monospaced</span>
<span class="comment">// with a nominal width of 0.6 times the text height...</span>
<span class="comment">// Compute the starting point for the text - Courier is monospaced with a</span>
<span class="comment">// nominal width of 0.6 times the text height...</span>
tx = <span class="number">0.5</span> * (<span class="number">595.28</span> - <span class="number">18.0</span> * <span class="number">0.6</span> * strlen(caption));
<span class="comment">// Position and draw the caption underneath...</span>
@ -1353,8 +1390,7 @@ make_code128(<span class="reserved">char</span> *dst, <span clas
}
</code></pre>
<p>The <code>main</code> function does the rest of the work. The barcode font is imported using the <a href="#pdfioFileCreateFontObjFromFile"><code>pdfioFileCreateFontObjFromFile</code></a> function. We pass <code>false</code> for the &quot;unicode&quot; argument since we just want the (default) ASCII encoding:</p>
<pre><code class="language-c">barcode_font = pdfioFileCreateFontObjFromFile(pdf, <span class="string">&quot;code128.ttf&quot;</span>,
<span class="comment">/*unicode*/</span><span class="reserved">false</span>);
<pre><code class="language-c">barcode_font = pdfioFileCreateFontObjFromFile(pdf, <span class="string">&quot;code128.ttf&quot;</span>, <span class="comment">/*unicode*/</span><span class="reserved">false</span>);
</code></pre>
<p>Since barcodes usually have the number or text represented by the barcode printed underneath it, we also need a regular text font, for which we can choose one of the standard 14 PostScript base fonts using the <a href="#pdfioFIleCreateFontObjFromBase"><code>pdfioFIleCreateFontObjFromBase</code></a> function:</p>
<pre><code class="language-c">text_font = pdfioFileCreateFontObjFromBase(pdf, <span class="string">&quot;Helvetica&quot;</span>);
@ -1378,8 +1414,7 @@ make_code128(<span class="reserved">char</span> *dst, <span clas
<span class="reserved">if</span> (text &amp;&amp; text_font)
{
text_height = <span class="number">9.0</span>;
text_width = pdfioContentTextMeasure(text_font, text,
text_height);
text_width = pdfioContentTextMeasure(text_font, text, text_height);
}
<span class="comment">// Compute the size of the PDF page...</span>
@ -1387,8 +1422,7 @@ pdfio_rect_t media_box;
media_box.x1 = <span class="number">0.0</span>;
media_box.y1 = <span class="number">0.0</span>;
media_box.x2 = (barcode_width &gt; text_width ?
barcode_width : text_width) + <span class="number">18.0</span>;
media_box.x2 = (barcode_width &gt; text_width ? barcode_width : text_width) + <span class="number">18.0</span>;
media_box.y2 = barcode_height + text_height + <span class="number">18.0</span>;
</code></pre>
<p>Finally, we just need to create a page of the specified size that references the two fonts:</p>
@ -1429,8 +1463,650 @@ pdfioStreamClose(page_st);
<h3 class="title" id="convert-markdown-to-pdf">Convert Markdown to PDF</h3>
<p>Markdown is a simple plain text format that supports things like headings, links, character styles, tables, and embedded images. The <code>md2pdf.c</code> example code uses the <a href="https://www.msweet.org/mmd/">mmd</a> library to convert markdown to a PDF file that can be distributed.</p>
<blockquote>
<p>Note: The md2pdf example is by far the most complex example code included with PDFio and shows how to layout text, add headers and footers, add links, embed images, and format tables.</p>
<p>Note: The md2pdf example is by far the most complex example code included with PDFio and shows how to layout text, add headers and footers, add links, embed images, format tables, and add an outline (table of contents) for navigation.</p>
</blockquote>
<h4 id="managing-document-state">Managing Document State</h4>
<p>The <code>md2pdf</code> program needs to maintain three sets of state - one for the markdown document which is represented by nodes of type <code>mmd_t</code> and the others for the PDF document and current PDF page which are contained in the <code>docdata_t</code> structure:</p>
<pre><code class="language-c"><span class="reserved">typedef</span> <span class="reserved">struct</span> docdata_s <span class="comment">// Document formatting data</span>
{
<span class="comment">// State for the whole document</span>
pdfio_file_t *pdf; <span class="comment">// PDF file</span>
pdfio_rect_t media_box; <span class="comment">// Media (page) box</span>
pdfio_rect_t crop_box; <span class="comment">// Crop box (for margins)</span>
pdfio_rect_t art_box; <span class="comment">// Art box (for markdown content)</span>
pdfio_obj_t *fonts[DOCFONT_MAX]; <span class="comment">// Embedded fonts</span>
<span class="reserved">double</span> font_space; <span class="comment">// Unit width of a space</span>
size_t num_images; <span class="comment">// Number of embedded images</span>
docimage_t images[DOCIMAGE_MAX]; <span class="comment">// Embedded images</span>
<span class="reserved">const</span> <span class="reserved">char</span> *title; <span class="comment">// Document title</span>
<span class="reserved">char</span> *heading; <span class="comment">// Current document heading</span>
size_t num_actions; <span class="comment">// Number of actions for this document</span>
docaction_t actions[DOCACTION_MAX]; <span class="comment">// Actions for this document</span>
size_t num_targets; <span class="comment">// Number of targets for this document</span>
doctarget_t targets[DOCTARGET_MAX]; <span class="comment">// Targets for this document</span>
size_t num_toc; <span class="comment">// Number of table-of-contents entries</span>
doctoc_t toc[DOCTOC_MAX]; <span class="comment">// Table-of-contents entries</span>
<span class="comment">// State for the current page</span>
pdfio_stream_t *st; <span class="comment">// Current page stream</span>
<span class="reserved">double</span> y; <span class="comment">// Current position on page</span>
docfont_t font; <span class="comment">// Current font</span>
<span class="reserved">double</span> fsize; <span class="comment">// Current font size</span>
doccolor_t color; <span class="comment">// Current color</span>
pdfio_array_t *annots_array; <span class="comment">// Annotations array (for links)</span>
pdfio_obj_t *annots_obj; <span class="comment">// Annotations object (for links)</span>
size_t num_links; <span class="comment">// Number of links for this page</span>
doclink_t links[DOCLINK_MAX]; <span class="comment">// Links for this page</span>
} docdata_t;
</code></pre>
<h5 id="document-state">Document State</h5>
<p>The output is fixed to the &quot;universal&quot; media size (the intersection of US Letter and ISO A4) with 1/2 inch margins - the <code>PAGE_</code> constants can be changed to select a different size or margins. The <code>media_box</code> member contains the &quot;MediaBox&quot; rectangle for the PDF pages, while the <code>crop_box</code> and <code>art_box</code> members contain the &quot;CropBox&quot; and &quot;ArtBox&quot; values, respectively.</p>
<p>Four embedded fonts are used:</p>
<ul>
<li><p><code>DOCFONT_REGULAR</code>: the default font used for text,</p>
</li>
<li><p><code>DOCFONT_BOLD</code>: a boldface font used for heading and strong text,</p>
</li>
<li><p><code>DOCFONT_ITALIC</code>: an italic/oblique font used for emphasized text, and</p>
</li>
<li><p><code>DOCFONT_MONOSPACE</code>: a fixed-width font used for code.</p>
</li>
</ul>
<p>By default the code uses the base PostScript fonts Helvetica, Helvetica-Bold, Helvetica-Oblique, and Courier. The <code>USE_TRUETYPE</code> define can be used to replace these with the Roboto TrueType fonts.</p>
<p>Embedded JPEG and PNG images are copied into the PDF document, with the <code>images</code> array containing the list of the images and their objects.</p>
<p>The <code>title</code> member contains the document title, while the <code>heading</code> member contains the current heading text.</p>
<p>The <code>actions</code> array contains a list of action dictionaries for interior document links that need to be resolved, while the <code>targets</code> array keeps track of the location of the headings in the PDF document.</p>
<p>The <code>toc</code> array contains a list of headings and is used to construct the PDF outlines dictionaries/objects, which provides a table of contents for navigation in most PDF readers.</p>
<h5 id="page-state">Page State</h5>
<p>The <code>st</code> member provides the stream for the current page content. The <code>color</code>, <code>font</code>, <code>fsize</code>, and <code>y</code> members provide the current graphics state on the page.</p>
<p>The <code>annots_array</code>, <code>annots_obj</code>, <code>num_links</code>, and <code>links</code> members contain a list of hyperlinks on the current page.</p>
<h4 id="creating-pages">Creating Pages</h4>
<p>The <code>new_page</code> function is used to start a new page. Aside from creating the new page object and stream, it adds a standard header and footer to the page. It starts by closing the current page if it is open:</p>
<pre><code class="language-c"><span class="comment">// Close the current page...</span>
<span class="reserved">if</span> (dd-&gt;st)
{
pdfioStreamClose(dd-&gt;st);
add_links(dd);
}
</code></pre>
<p>The new page needs a dictionary containing any link annotations, the media and art boxes, the four fonts, and any images:</p>
<pre><code class="language-c"><span class="comment">// Prep the new page...</span>
page_dict = pdfioDictCreate(dd-&gt;pdf);
dd-&gt;annots_array = pdfioArrayCreate(dd-&gt;pdf);
dd-&gt;annots_obj = pdfioFileCreateArrayObj(dd-&gt;pdf, dd-&gt;annots_array);
pdfioDictSetObj(page_dict, <span class="string">&quot;Annots&quot;</span>, dd-&gt;annots_obj);
pdfioDictSetRect(page_dict, <span class="string">&quot;MediaBox&quot;</span>, &amp;dd-&gt;media_box);
pdfioDictSetRect(page_dict, <span class="string">&quot;ArtBox&quot;</span>, &amp;dd-&gt;art_box);
<span class="reserved">for</span> (fontface = DOCFONT_REGULAR; fontface &lt; DOCFONT_MAX; fontface ++)
pdfioPageDictAddFont(page_dict, docfont_names[fontface], dd-&gt;fonts[fontface]);
<span class="reserved">for</span> (i = <span class="number">0</span>; i &lt; dd-&gt;num_images; i ++)
pdfioPageDictAddImage(page_dict, pdfioStringCreatef(dd-&gt;pdf, <span class="string">&quot;I%u&quot;</span>, (<span class="reserved">unsigned</span>)i),
dd-&gt;images[i].obj);
</code></pre>
<p>Once the page dictionary is initialized, we create a new page and initialize the current graphics state:</p>
<pre><code class="language-c">dd-&gt;st = pdfioFileCreatePage(dd-&gt;pdf, page_dict);
dd-&gt;color = DOCCOLOR_BLACK;
dd-&gt;font = DOCFONT_MAX;
dd-&gt;fsize = <span class="number">0.0</span>;
dd-&gt;y = dd-&gt;art_box.y2;
</code></pre>
<p>The header consists of a dark gray separating line and the document title. We don't show the header on the first page:</p>
<pre><code class="language-c"><span class="comment">// Add header/footer text</span>
set_color(dd, DOCCOLOR_GRAY);
set_font(dd, DOCFONT_REGULAR, SIZE_HEADFOOT);
<span class="reserved">if</span> (pdfioFileGetNumPages(dd-&gt;pdf) &gt; <span class="number">1</span> &amp;&amp; dd-&gt;title)
{
<span class="comment">// Show title in header...</span>
width = pdfioContentTextMeasure(dd-&gt;fonts[DOCFONT_REGULAR], dd-&gt;title,
SIZE_HEADFOOT);
pdfioContentTextBegin(dd-&gt;st);
pdfioContentTextMoveTo(dd-&gt;st,
dd-&gt;crop_box.x1 + <span class="number">0.5</span> * (dd-&gt;crop_box.x2 -
dd-&gt;crop_box.x1 - width),
dd-&gt;crop_box.y2 - SIZE_HEADFOOT);
pdfioContentTextShow(dd-&gt;st, UNICODE_VALUE, dd-&gt;title);
pdfioContentTextEnd(dd-&gt;st);
pdfioContentPathMoveTo(dd-&gt;st, dd-&gt;crop_box.x1,
dd-&gt;crop_box.y2 - <span class="number">2</span> * SIZE_HEADFOOT * LINE_HEIGHT +
SIZE_HEADFOOT);
pdfioContentPathLineTo(dd-&gt;st, dd-&gt;crop_box.x2,
dd-&gt;crop_box.y2 - <span class="number">2</span> * SIZE_HEADFOOT * LINE_HEIGHT +
SIZE_HEADFOOT);
pdfioContentStroke(dd-&gt;st);
}
</code></pre>
<p>The footer contains the same dark gray separating line with the current heading and page number on opposite sides. The page number is always positioned on the outer edge for a two-sided print - right justified on odd numbered pages and left justified on even numbered pages:</p>
<pre><code class="language-c"><span class="comment">// Show page number and current heading...</span>
pdfioContentPathMoveTo(dd-&gt;st, dd-&gt;crop_box.x1,
dd-&gt;crop_box.y1 + SIZE_HEADFOOT * LINE_HEIGHT);
pdfioContentPathLineTo(dd-&gt;st, dd-&gt;crop_box.x2,
dd-&gt;crop_box.y1 + SIZE_HEADFOOT * LINE_HEIGHT);
pdfioContentStroke(dd-&gt;st);
pdfioContentTextBegin(dd-&gt;st);
snprintf(temp, <span class="reserved">sizeof</span>(temp), <span class="string">&quot;%u&quot;</span>, (<span class="reserved">unsigned</span>)pdfioFileGetNumPages(dd-&gt;pdf));
<span class="reserved">if</span> (pdfioFileGetNumPages(dd-&gt;pdf) &amp; <span class="number">1</span>)
{
<span class="comment">// Page number on right...</span>
width = pdfioContentTextMeasure(dd-&gt;fonts[DOCFONT_REGULAR], temp, SIZE_HEADFOOT);
pdfioContentTextMoveTo(dd-&gt;st, dd-&gt;crop_box.x2 - width, dd-&gt;crop_box.y1);
}
<span class="reserved">else</span>
{
<span class="comment">// Page number on left...</span>
pdfioContentTextMoveTo(dd-&gt;st, dd-&gt;crop_box.x1, dd-&gt;crop_box.y1);
}
pdfioContentTextShow(dd-&gt;st, UNICODE_VALUE, temp);
pdfioContentTextEnd(dd-&gt;st);
<span class="reserved">if</span> (dd-&gt;heading)
{
pdfioContentTextBegin(dd-&gt;st);
<span class="reserved">if</span> (pdfioFileGetNumPages(dd-&gt;pdf) &amp; <span class="number">1</span>)
{
<span class="comment">// Current heading on left...</span>
pdfioContentTextMoveTo(dd-&gt;st, dd-&gt;crop_box.x1, dd-&gt;crop_box.y1);
}
<span class="reserved">else</span>
{
width = pdfioContentTextMeasure(dd-&gt;fonts[DOCFONT_REGULAR], dd-&gt;heading,
SIZE_HEADFOOT);
pdfioContentTextMoveTo(dd-&gt;st, dd-&gt;crop_box.x2 - width, dd-&gt;crop_box.y1);
}
pdfioContentTextShow(dd-&gt;st, UNICODE_VALUE, dd-&gt;heading);
pdfioContentTextEnd(dd-&gt;st);
}
</code></pre>
<h4 id="formatting-the-markdown-document">Formatting the Markdown Document</h4>
<p>Four functions handle the formatting of the markdown document:</p>
<ul>
<li><p><code>format_block</code> formats a single paragraph, heading, or table cell,</p>
</li>
<li><p><code>format_code</code>: formats a block of code,</p>
</li>
<li><p><code>format_doc</code>: formats the document as a whole, and</p>
</li>
<li><p><code>format_table</code>: formats a table.</p>
</li>
</ul>
<p>Formatted content is organized into arrays of <code>linefrag_t</code> and <code>tablerow_t</code> structures for a line of content or row of table cells, respectively.</p>
<h5 id="high-level-formatting">High-Level Formatting</h5>
<p>The <code>format_doc</code> function iterates over the block nodes in the markdown document. We map a &quot;thematic break&quot; (horizontal rule) to a page break, which is implemented by moving the current vertical position to the bottom of the page:</p>
<pre><code class="language-c"><span class="reserved">case</span> MMD_TYPE_THEMATIC_BREAK :
<span class="comment">// Force a page break</span>
dd-&gt;y = dd-&gt;art_box.y1;
<span class="reserved">break</span>;
</code></pre>
<p>A block quote is indented and uses the italic font by default:</p>
<pre><code class="language-c"><span class="reserved">case</span> MMD_TYPE_BLOCK_QUOTE :
format_doc(dd, current, DOCFONT_ITALIC, left + BQ_PADDING, right - BQ_PADDING);
<span class="reserved">break</span>;
</code></pre>
<p>Lists have a leading blank line and are indented:</p>
<pre><code class="language-c"><span class="reserved">case</span> MMD_TYPE_ORDERED_LIST :
<span class="reserved">case</span> MMD_TYPE_UNORDERED_LIST :
<span class="reserved">if</span> (dd-&gt;st)
dd-&gt;y -= SIZE_BODY * LINE_HEIGHT;
format_doc(dd, current, deffont, left + LIST_PADDING, right);
<span class="reserved">break</span>;
</code></pre>
<p>List items do not have a leading blank line and make use of leader text that is shown in front of the list text. The leader text is either the current item number or a bullet, which then is directly formatted using the <code>format_block</code> function:</p>
<pre><code class="language-c"><span class="reserved">case</span> MMD_TYPE_LIST_ITEM :
<span class="reserved">if</span> (doctype == MMD_TYPE_ORDERED_LIST)
{
snprintf(leader, <span class="reserved">sizeof</span>(leader), <span class="string">&quot;%d. &quot;</span>, i);
format_block(dd, current, deffont, SIZE_BODY, left, right, leader);
}
<span class="reserved">else</span>
{
format_block(dd, current, deffont, SIZE_BODY, left, right, <span class="comment">/*leader*/</span><span class="string">&quot;&quot;</span>);
}
<span class="reserved">break</span>;
</code></pre>
<p>Paragraphs have a leading blank line and are likewise directly formatted:</p>
<pre><code class="language-c"><span class="reserved">case</span> MMD_TYPE_PARAGRAPH :
<span class="comment">// Add a blank line before the paragraph...</span>
dd-&gt;y -= SIZE_BODY * LINE_HEIGHT;
<span class="comment">// Format the paragraph...</span>
format_block(dd, current, deffont, SIZE_BODY, left, right, <span class="comment">/*leader*/</span>NULL);
<span class="reserved">break</span>;
</code></pre>
<p>Tables have a leading blank line and are formatted using the <code>format_table</code> function:</p>
<pre><code class="language-c"><span class="reserved">case</span> MMD_TYPE_TABLE :
<span class="comment">// Add a blank line before the paragraph...</span>
dd-&gt;y -= SIZE_BODY * LINE_HEIGHT;
<span class="comment">// Format the table...</span>
format_table(dd, current, left, right);
<span class="reserved">break</span>;
</code></pre>
<p>Code blocks have a leading blank line, are indented slightly (to account for the padded background), and are formatted using the <code>format_code</code> function:</p>
<pre><code class="language-c"><span class="reserved">case</span> MMD_TYPE_CODE_BLOCK :
<span class="comment">// Add a blank line before the code block...</span>
dd-&gt;y -= SIZE_BODY * LINE_HEIGHT;
<span class="comment">// Format the code block...</span>
format_code(dd, current, left + CODE_PADDING, right - CODE_PADDING);
<span class="reserved">break</span>;
</code></pre>
<p>Headings get some extra processing. First, the current heading is remembered in the <code>docdata_t</code> structure so it can be used in the page footer:</p>
<pre><code class="language-c"><span class="reserved">case</span> MMD_TYPE_HEADING_1 :
<span class="reserved">case</span> MMD_TYPE_HEADING_2 :
<span class="reserved">case</span> MMD_TYPE_HEADING_3 :
<span class="reserved">case</span> MMD_TYPE_HEADING_4 :
<span class="reserved">case</span> MMD_TYPE_HEADING_5 :
<span class="reserved">case</span> MMD_TYPE_HEADING_6 :
<span class="comment">// Update the current heading</span>
free(dd-&gt;heading);
dd-&gt;heading = mmdCopyAllText(current);
</code></pre>
<p>Then we add a blank line and format the heading with the boldface font at a larger size using the <code>format_block</code> function:</p>
<pre><code class="language-c"> <span class="comment">// Add a blank line before the heading...</span>
dd-&gt;y -= heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT;
<span class="comment">// Format the heading...</span>
format_block(dd, current, DOCFONT_BOLD,
heading_sizes[curtype - MMD_TYPE_HEADING_1], left, right,
<span class="comment">/*leader*/</span>NULL);
</code></pre>
<p>Once the heading is formatted, we record it in the <code>toc</code> array as a PDF outline item object/dictionary:</p>
<pre><code class="language-c"> <span class="comment">// Add the heading to the table-of-contents...</span>
<span class="reserved">if</span> (dd-&gt;num_toc &lt; DOCTOC_MAX)
{
doctoc_t *t = dd-&gt;toc + dd-&gt;num_toc;
<span class="comment">// New TOC</span>
pdfio_array_t *dest; <span class="comment">// Destination array</span>
t-&gt;level = curtype - MMD_TYPE_HEADING_1;
t-&gt;dict = pdfioDictCreate(dd-&gt;pdf);
t-&gt;obj = pdfioFileCreateObj(dd-&gt;pdf, t-&gt;dict);
dest = pdfioArrayCreate(dd-&gt;pdf);
pdfioArrayAppendObj(dest,
pdfioFileGetPage(dd-&gt;pdf, pdfioFileGetNumPages(dd-&gt;pdf) - <span class="number">1</span>));
pdfioArrayAppendName(dest, <span class="string">&quot;XYZ&quot;</span>);
pdfioArrayAppendNumber(dest, PAGE_LEFT);
pdfioArrayAppendNumber(dest,
dd-&gt;y + heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT);
pdfioArrayAppendNumber(dest, <span class="number">0.0</span>);
pdfioDictSetArray(t-&gt;dict, <span class="string">&quot;Dest&quot;</span>, dest);
pdfioDictSetString(t-&gt;dict, <span class="string">&quot;Title&quot;</span>, pdfioStringCreate(dd-&gt;pdf, dd-&gt;heading));
dd-&gt;num_toc ++;
}
</code></pre>
<p>Finally, we also save the heading's target name and its location in the <code>targets</code> array to allow interior links to work:</p>
<pre><code class="language-c"> <span class="comment">// Add the heading to the list of link targets...</span>
<span class="reserved">if</span> (dd-&gt;num_targets &lt; DOCTARGET_MAX)
{
doctarget_t *t = dd-&gt;targets + dd-&gt;num_targets;
<span class="comment">// New target</span>
make_target_name(t-&gt;name, dd-&gt;heading, <span class="reserved">sizeof</span>(t-&gt;name));
t-&gt;page = pdfioFileGetNumPages(dd-&gt;pdf) - <span class="number">1</span>;
t-&gt;y = dd-&gt;y + heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT;
dd-&gt;num_targets ++;
}
<span class="reserved">break</span>;
</code></pre>
<h5 id="formatting-paragraphs-headings-list-items-and-table-cells">Formatting Paragraphs, Headings, List Items, and Table Cells</h5>
<p>Paragraphs, headings, list items, and table cells all use the same basic formatting algorithm. Text, checkboxes, and images are collected until the nodes in the current block are used up or the content reaches the right margin.</p>
<p>In order to keep adjacent blocks of text together, the formatting algorithm makes sure that at least 3 lines of text can fit before the bottom edge of the page:</p>
<pre><code class="language-c"><span class="reserved">if</span> (mmdGetNextSibling(block))
need_bottom = <span class="number">3.0</span> * SIZE_BODY * LINE_HEIGHT;
<span class="reserved">else</span>
need_bottom = <span class="number">0.0</span>;
</code></pre>
<p>Leader text (used for list items) is right justified to the left margin and becomes the first fragment on the line when present.</p>
<pre><code class="language-c"><span class="reserved">if</span> (leader)
{
<span class="comment">// Add leader text on first line...</span>
frags[<span class="number">0</span>].type = MMD_TYPE_NORMAL_TEXT;
frags[<span class="number">0</span>].width = pdfioContentTextMeasure(dd-&gt;fonts[deffont], leader, fsize);
frags[<span class="number">0</span>].height = fsize;
frags[<span class="number">0</span>].x = left - frags[<span class="number">0</span>].width;
frags[<span class="number">0</span>].imagenum = <span class="number">0</span>;
frags[<span class="number">0</span>].text = leader;
frags[<span class="number">0</span>].url = NULL;
frags[<span class="number">0</span>].ws = <span class="reserved">false</span>;
frags[<span class="number">0</span>].font = deffont;
frags[<span class="number">0</span>].color = DOCCOLOR_BLACK;
num_frags = <span class="number">1</span>;
lineheight = fsize * LINE_HEIGHT;
}
<span class="reserved">else</span>
{
<span class="comment">// No leader text...</span>
num_frags = <span class="number">0</span>;
lineheight = <span class="number">0.0</span>;
}
frag = frags + num_frags;
</code></pre>
<p>If the current content fragment won't fit, we call <code>render_line</code> to draw what we have, adjusting the left margin as needed for table cells:</p>
<pre><code class="language-c"> <span class="comment">// See if this node will fit on the current line...</span>
<span class="reserved">if</span> ((num_frags &gt; <span class="number">0</span> &amp;&amp; (x + width + wswidth) &gt;= right) || num_frags == LINEFRAG_MAX)
{
<span class="comment">// No, render this line and start over...</span>
<span class="reserved">if</span> (blocktype == MMD_TYPE_TABLE_HEADER_CELL ||
blocktype == MMD_TYPE_TABLE_BODY_CELL_CENTER)
margin_left = <span class="number">0.5</span> * (right - x);
<span class="reserved">else</span> <span class="reserved">if</span> (blocktype == MMD_TYPE_TABLE_BODY_CELL_RIGHT)
margin_left = right - x;
<span class="reserved">else</span>
margin_left = <span class="number">0.0</span>;
render_line(dd, margin_left, need_bottom, lineheight, num_frags, frags);
num_frags = <span class="number">0</span>;
frag = frags;
x = left;
lineheight = <span class="number">0.0</span>;
need_bottom = <span class="number">0.0</span>;
</code></pre>
<p>Block quotes (blocks use a default font of italic) have an orange bar to the left of the block:</p>
<pre><code class="language-c"> <span class="reserved">if</span> (deffont == DOCFONT_ITALIC)
{
<span class="comment">// Add an orange bar to the left of block quotes...</span>
set_color(dd, DOCCOLOR_ORANGE);
pdfioContentSave(dd-&gt;st);
pdfioContentSetLineWidth(dd-&gt;st, <span class="number">3.0</span>);
pdfioContentPathMoveTo(dd-&gt;st, left - <span class="number">6.0</span>, dd-&gt;y - (LINE_HEIGHT - <span class="number">1.0</span>) * fsize);
pdfioContentPathLineTo(dd-&gt;st, left - <span class="number">6.0</span>, dd-&gt;y + fsize);
pdfioContentStroke(dd-&gt;st);
pdfioContentRestore(dd-&gt;st);
}
</code></pre>
<p>Finally, we add the current content fragment to the array:</p>
<pre><code class="language-c"> <span class="comment">// Add the current node to the fragment list</span>
<span class="reserved">if</span> (num_frags == <span class="number">0</span>)
{
<span class="comment">// No leading whitespace at the start of the line</span>
ws = <span class="reserved">false</span>;
wswidth = <span class="number">0.0</span>;
}
frag-&gt;type = type;
frag-&gt;x = x;
frag-&gt;width = width + wswidth;
frag-&gt;height = text ? fsize : height;
frag-&gt;imagenum = imagenum;
frag-&gt;text = text;
frag-&gt;url = url;
frag-&gt;ws = ws;
frag-&gt;font = font;
frag-&gt;color = color;
num_frags ++;
frag ++;
x += width + wswidth;
<span class="reserved">if</span> (height &gt; lineheight)
lineheight = height;
</code></pre>
<h5 id="formatting-code-blocks">Formatting Code Blocks</h5>
<p>Code blocks consist of one or more lines of plain monospaced text. We draw a light gray background behind each line with a small bit of padding at the top and bottom:</p>
<pre><code class="language-c"><span class="comment">// Draw the top padding...</span>
set_color(dd, DOCCOLOR_LTGRAY);
pdfioContentPathRect(dd-&gt;st, left - CODE_PADDING, dd-&gt;y + SIZE_CODEBLOCK,
right - left + <span class="number">2.0</span> * CODE_PADDING, CODE_PADDING);
pdfioContentFillAndStroke(dd-&gt;st, <span class="reserved">false</span>);
<span class="comment">// Start a code text block...</span>
set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK);
pdfioContentTextBegin(dd-&gt;st);
pdfioContentTextMoveTo(dd-&gt;st, left, dd-&gt;y);
<span class="reserved">for</span> (code = mmdGetFirstChild(block); code; code = mmdGetNextSibling(code))
{
set_color(dd, DOCCOLOR_LTGRAY);
pdfioContentPathRect(dd-&gt;st, left - CODE_PADDING,
dd-&gt;y - (LINE_HEIGHT - <span class="number">1.0</span>) * SIZE_CODEBLOCK,
right - left + <span class="number">2.0</span> * CODE_PADDING, lineheight);
pdfioContentFillAndStroke(dd-&gt;st, <span class="reserved">false</span>);
set_color(dd, DOCCOLOR_RED);
pdfioContentTextShow(dd-&gt;st, UNICODE_VALUE, mmdGetText(code));
dd-&gt;y -= lineheight;
<span class="reserved">if</span> (dd-&gt;y &lt; dd-&gt;art_box.y1)
{
<span class="comment">// End the current text block...</span>
pdfioContentTextEnd(dd-&gt;st);
<span class="comment">// Start a new page...</span>
new_page(dd);
set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK);
dd-&gt;y -= lineheight;
pdfioContentTextBegin(dd-&gt;st);
pdfioContentTextMoveTo(dd-&gt;st, left, dd-&gt;y);
}
}
<span class="comment">// End the current text block...</span>
pdfioContentTextEnd(dd-&gt;st);
dd-&gt;y += lineheight;
<span class="comment">// Draw the bottom padding...</span>
set_color(dd, DOCCOLOR_LTGRAY);
pdfioContentPathRect(dd-&gt;st, left - CODE_PADDING,
dd-&gt;y - CODE_PADDING - (LINE_HEIGHT - <span class="number">1.0</span>) * SIZE_CODEBLOCK,
right - left + <span class="number">2.0</span> * CODE_PADDING, CODE_PADDING);
pdfioContentFillAndStroke(dd-&gt;st, <span class="reserved">false</span>);
</code></pre>
<h5 id="formatting-tables">Formatting Tables</h5>
<p>Tables are the most difficult to format. We start by scanning the entire table and measuring every cell with the <code>measure_cell</code> function:</p>
<pre><code class="language-c"><span class="reserved">for</span> (num_cols = <span class="number">0</span>, num_rows = <span class="number">0</span>, rowptr = rows, current = mmdGetFirstChild(table);
current &amp;&amp; num_rows &lt; TABLEROW_MAX;
current = next)
{
next = mmd_walk_next(table, current);
type = mmdGetType(current);
<span class="reserved">if</span> (type == MMD_TYPE_TABLE_ROW)
{
<span class="comment">// Parse row...</span>
<span class="reserved">for</span> (col = <span class="number">0</span>, current = mmdGetFirstChild(current);
current &amp;&amp; num_cols &lt; TABLECOL_MAX;
current = mmdGetNextSibling(current), col ++)
{
rowptr-&gt;cells[col] = current;
measure_cell(dd, current, cols + col);
<span class="reserved">if</span> (col &gt;= num_cols)
num_cols = col + <span class="number">1</span>;
}
rowptr ++;
num_rows ++;
}
}
</code></pre>
<p>The <code>measure_cell</code> function also updates the minimum and maximum width needed for each column. To this we add the cell padding to compute the total table width:</p>
<pre><code class="language-c"><span class="comment">// Figure out the width of each column...</span>
<span class="reserved">for</span> (col = <span class="number">0</span>, table_width = <span class="number">0.0</span>; col &lt; num_cols; col ++)
{
cols[col].max_width += <span class="number">2.0</span> * TABLE_PADDING;
table_width += cols[col].max_width;
cols[col].width = cols[col].max_width;
}
</code></pre>
<p>If the calculated width is more than the available width, we need to adjust the width of the columns. The algorithm used here breaks the available width into N equal-width columns - any columns wider than this will be scaled proportionately. This works out as two steps - one to calculate the the base width of &quot;narrow&quot; columns and a second to distribute the remaining width amongst the wider columns:</p>
<pre><code class="language-c">format_width = right - left - <span class="number">2.0</span> * TABLE_PADDING * num_cols;
<span class="reserved">if</span> (table_width &gt; format_width)
{
<span class="comment">// Content too wide, try scaling the widths...</span>
<span class="reserved">double</span> avg_width, <span class="comment">// Average column width</span>
base_width, <span class="comment">// Base width</span>
remaining_width, <span class="comment">// Remaining width</span>
scale_width; <span class="comment">// Width for scaling</span>
size_t num_remaining_cols = <span class="number">0</span>; <span class="comment">// Number of remaining columns</span>
<span class="comment">// First mark any columns that are narrower than the average width...</span>
avg_width = format_width / num_cols;
<span class="reserved">for</span> (col = <span class="number">0</span>, base_width = <span class="number">0.0</span>, remaining_width = <span class="number">0.0</span>; col &lt; num_cols; col ++)
{
<span class="reserved">if</span> (cols[col].width &gt; avg_width)
{
remaining_width += cols[col].width;
num_remaining_cols ++;
}
<span class="reserved">else</span>
{
base_width += cols[col].width;
}
}
<span class="comment">// Then proportionately distribute the remaining width to the other columns...</span>
format_width -= base_width;
<span class="reserved">for</span> (col = <span class="number">0</span>, table_width = <span class="number">0.0</span>; col &lt; num_cols; col ++)
{
<span class="reserved">if</span> (cols[col].width &gt; avg_width)
cols[col].width = cols[col].width * format_width / remaining_width;
table_width += cols[col].width;
}
}
</code></pre>
<p>Now that we have the widths of the columns, we can calculate the left and right margins of each column for formatting the cell text:</p>
<pre><code class="language-c"><span class="comment">// Calculate the margins of each column in preparation for formatting</span>
<span class="reserved">for</span> (col = <span class="number">0</span>, x = left + TABLE_PADDING; col &lt; num_cols; col ++)
{
cols[col].left = x;
cols[col].right = x + cols[col].width;
x += cols[col].width + <span class="number">2.0</span> * TABLE_PADDING;
}
</code></pre>
<p>Then we re-measure the cells using the final column widths to determine the height of each cell and row:</p>
<pre><code class="language-c"><span class="comment">// Calculate the height of each row and cell in preparation for formatting</span>
<span class="reserved">for</span> (row = <span class="number">0</span>, rowptr = rows; row &lt; num_rows; row ++, rowptr ++)
{
<span class="reserved">for</span> (col = <span class="number">0</span>; col &lt; num_cols; col ++)
{
height = measure_cell(dd, rowptr-&gt;cells[col], cols + col) + <span class="number">2.0</span> * TABLE_PADDING;
<span class="reserved">if</span> (height &gt; rowptr-&gt;height)
rowptr-&gt;height = height;
}
}
</code></pre>
<p>Finally, we render each row in the table:</p>
<pre><code class="language-c"><span class="comment">// Render each table row...</span>
<span class="reserved">for</span> (row = <span class="number">0</span>, rowptr = rows; row &lt; num_rows; row ++, rowptr ++)
render_row(dd, num_cols, cols, rowptr);
</code></pre>
<h4 id="rendering-the-markdown-document">Rendering the Markdown Document</h4>
<p>The formatted content in arrays of <code>linefrag_t</code> and <code>tablerow_t</code> structures are passed to the <code>render_line</code> and <code>render_row</code> functions respectively to produce content in the PDF document.</p>
<h5 id="rendering-a-line-in-a-paragraph-heading-or-table-cell">Rendering a Line in a Paragraph, Heading, or Table Cell</h5>
<p>The <code>render_line</code> function adds content from the <code>linefrag_t</code> array to a PDF page. It starts by determining whether a new page is needed:</p>
<pre><code class="language-c"><span class="reserved">if</span> (!dd-&gt;st)
{
new_page(dd);
margin_top = <span class="number">0.0</span>;
}
dd-&gt;y -= margin_top + lineheight;
<span class="reserved">if</span> ((dd-&gt;y - need_bottom) &lt; dd-&gt;art_box.y1)
{
new_page(dd);
dd-&gt;y -= lineheight;
}
</code></pre>
<p>We then loop through the fragments for the current line, drawing checkboxes, images, and text as needed. When a hyperlink is present, we add the link to the <code>links</code> array in the <code>docdata_t</code> structure, mapping &quot;@&quot; and &quot;@@&quot; to an internal link corresponding to the linked text:</p>
<pre><code class="language-c"><span class="reserved">if</span> (frag-&gt;url &amp;&amp; dd-&gt;num_links &lt; DOCLINK_MAX)
{
doclink_t *l = dd-&gt;links + dd-&gt;num_links;
<span class="comment">// Pointer to this link record</span>
<span class="reserved">if</span> (!strcmp(frag-&gt;url, <span class="string">&quot;@&quot;</span>))
{
<span class="comment">// Use mapped text as link target...</span>
<span class="reserved">char</span> targetlink[<span class="number">129</span>]; <span class="comment">// Targeted link</span>
targetlink[<span class="number">0</span>] = <span class="string">'#'</span>;
make_target_name(targetlink + <span class="number">1</span>, frag-&gt;text, <span class="reserved">sizeof</span>(targetlink) - <span class="number">1</span>);
l-&gt;url = pdfioStringCreate(dd-&gt;pdf, targetlink);
}
<span class="reserved">else</span> <span class="reserved">if</span> (!strcmp(frag-&gt;url, <span class="string">&quot;@@&quot;</span>))
{
<span class="comment">// Use literal text as anchor...</span>
l-&gt;url = pdfioStringCreatef(dd-&gt;pdf, <span class="string">&quot;#%s&quot;</span>, frag-&gt;text);
}
<span class="reserved">else</span>
{
<span class="comment">// Use URL as-is...</span>
l-&gt;url = frag-&gt;url;
}
l-&gt;box.x1 = frag-&gt;x;
l-&gt;box.y1 = dd-&gt;y;
l-&gt;box.x2 = frag-&gt;x + frag-&gt;width;
l-&gt;box.y2 = dd-&gt;y + frag-&gt;height;
dd-&gt;num_links ++;
}
</code></pre>
<p>These are later written as annotations in the <code>add_links</code> function.</p>
<h5 id="rendering-a-table-row">Rendering a Table Row</h5>
<p>The <code>render_row</code> function takes a row of cells and the corresponding column definitions. It starts by drawing the border boxes around body cells:</p>
<pre><code class="language-c"><span class="reserved">if</span> (mmdGetType(row-&gt;cells[<span class="number">0</span>]) == MMD_TYPE_TABLE_HEADER_CELL)
{
<span class="comment">// Header row, no border...</span>
deffont = DOCFONT_BOLD;
}
<span class="reserved">else</span>
{
<span class="comment">// Regular body row, add borders...</span>
deffont = DOCFONT_REGULAR;
set_color(dd, DOCCOLOR_GRAY);
pdfioContentPathRect(dd-&gt;st, cols[<span class="number">0</span>].left - TABLE_PADDING, dd-&gt;y - row-&gt;height,
cols[num_cols - <span class="number">1</span>].right - cols[<span class="number">0</span>].left +
<span class="number">2.0</span> * TABLE_PADDING, row-&gt;height);
<span class="reserved">for</span> (col = <span class="number">1</span>; col &lt; num_cols; col ++)
{
pdfioContentPathMoveTo(dd-&gt;st, cols[col].left - TABLE_PADDING, dd-&gt;y);
pdfioContentPathLineTo(dd-&gt;st, cols[col].left - TABLE_PADDING, dd-&gt;y - row-&gt;height);
}
pdfioContentStroke(dd-&gt;st);
}
</code></pre>
<p>Then it formats each cell using the <code>format_block</code> function described previously. The page <code>y</code> value is reset before formatting each cell:</p>
<pre><code class="language-c">row_y = dd-&gt;y;
<span class="reserved">for</span> (col = <span class="number">0</span>; col &lt; num_cols; col ++)
{
dd-&gt;y = row_y;
format_block(dd, row-&gt;cells[col], deffont, SIZE_TABLE, cols[col].left,
cols[col].right, <span class="comment">/*leader*/</span>NULL);
}
dd-&gt;y = row_y - row-&gt;height;
</code></pre>
<h2 class="title"><a id="FUNCTIONS">Functions</a></h2>
<h3 class="function"><a id="pdfioArrayAppendArray">pdfioArrayAppendArray</a></h3>
<p class="description">Add an array value to an array.</p>

View File

@ -316,8 +316,8 @@ You open an existing PDF file using the [`pdfioFileOpen`](@@) function:
```c
pdfio_file_t *pdf =
pdfioFileOpen("myinputfile.pdf", password_cb, password_data,
error_cb, error_data);
pdfioFileOpen("myinputfile.pdf", password_cb, password_data, error_cb,
error_data);
```
@ -466,8 +466,7 @@ You create a new PDF file using the [`pdfioFileCreate`](@@) function:
pdfio_rect_t media_box = { 0.0, 0.0, 612.0, 792.0 }; // US Letter
pdfio_rect_t crop_box = { 36.0, 36.0, 576.0, 756.0 }; // w/0.5" margins
pdfio_file_t *pdf = pdfioFileCreate("myoutputfile.pdf", "2.0",
&media_box, &crop_box,
pdfio_file_t *pdf = pdfioFileCreate("myoutputfile.pdf", "2.0", &media_box, &crop_box,
error_cb, error_data);
```
@ -484,9 +483,8 @@ function:
pdfio_rect_t media_box = { 0.0, 0.0, 612.0, 792.0 }; // US Letter
pdfio_rect_t crop_box = { 36.0, 36.0, 576.0, 756.0 }; // w/0.5" margins
pdfio_file_t *pdf = pdfioFileCreateOutput(output_cb, output_ctx, "2.0",
&media_box, &crop_box,
error_cb, error_data);
pdfio_file_t *pdf = pdfioFileCreateOutput(output_cb, output_ctx, "2.0", &media_box,
&crop_box, error_cb, error_data);
```
Once the file is created, use the [`pdfioFileCreateObj`](@@),
@ -619,11 +617,16 @@ PDF color spaces are specified using well-known names like "DeviceCMYK",
color spaces. PDFio provides several functions for embedding ICC profiles and
creating color space arrays:
- [`pdfioArrayCreateColorFromICCObj`](@@) creates a color array for an ICC color profile object
- [`pdfioArrayCreateColorFromMatrix`](@@) creates a color array using a CIE XYZ color transform matrix, a gamma value, and a CIE XYZ white point
- [`pdfioArrayCreateColorFromPalette`](@@) creates an indexed color array from an array of sRGB values
- [`pdfioArrayCreateColorFromPrimaries`](@@) creates a color array using CIE XYZ primaries and a gamma value
- [`pdfioArrayCreateColorFromStandard`](@@) creates a color array for a standard color space
- [`pdfioArrayCreateColorFromICCObj`](@@) creates a color array for an ICC color
profile object
- [`pdfioArrayCreateColorFromMatrix`](@@) creates a color array using a CIE XYZ
color transform matrix, a gamma value, and a CIE XYZ white point
- [`pdfioArrayCreateColorFromPalette`](@@) creates an indexed color array from
an array of sRGB values
- [`pdfioArrayCreateColorFromPrimaries`](@@) creates a color array using CIE XYZ
primaries and a gamma value
- [`pdfioArrayCreateColorFromStandard`](@@) creates a color array for a standard
color space
You can embed an ICC color profile using the
[`pdfioFileCreateICCObjFromFile`](@@) function:
@ -720,10 +723,9 @@ in memory, for example:
pdfio_file_t *pdf = pdfioFileCreate(...);
unsigned char data[1024 * 1024 * 4]; // 1024x1024 RGBA image data
pdfio_obj_t *img =
pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024,
/*height*/1024, /*num_colors*/3,
/*color_data*/NULL, /*alpha*/true,
/*interpolate*/false);
pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024, /*height*/1024,
/*num_colors*/3, /*color_data*/NULL,
/*alpha*/true, /*interpolate*/false);
```
will create an object for a 1024x1024 RGBA image in memory, using the default
@ -737,17 +739,14 @@ pdfio_file_t *pdf = pdfioFileCreate(...);
// Create an AdobeRGB color array
pdfio_array_t *adobe_rgb =
pdfioArrayCreateColorFromMatrix(pdf, 3, pdfioAdobeRGBGamma,
pdfioAdobeRGBMatrix,
pdfioAdobeRGBWhitePoint);
pdfioAdobeRGBMatrix, pdfioAdobeRGBWhitePoint);
// Create a 1024x1024 RGBA image using AdobeRGB
unsigned char data[1024 * 1024 * 4]; // 1024x1024 RGBA image data
pdfio_obj_t *img =
pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024,
/*height*/1024, /*num_colors*/3,
/*color_data*/adobe_rgb,
/*alpha*/true,
/*interpolate*/false);
pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024, /*height*/1024,
/*num_colors*/3, /*color_data*/adobe_rgb,
/*alpha*/true, /*interpolate*/false);
```
The "interpolate" argument specifies whether the colors in the image should be
@ -760,8 +759,7 @@ function to copy the image into a PDF image object, for example:
```c
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *img =
pdfioFileCreateImageObjFromFile(pdf, "myphoto.jpg",
/*interpolate*/true);
pdfioFileCreateImageObjFromFile(pdf, "myphoto.jpg", /*interpolate*/true);
```
> Note: Currently `pdfioFileCreateImageObjFromFile` does not support 12 bit JPEG
@ -866,6 +864,10 @@ escaping, as needed:
Examples
========
PDFio includes several example programs that are typically installed to the
`/usr/share/doc/pdfio/examples` or `/usr/local/share/doc/pdfio/examples`
directories. A makefile is included to build them.
Read PDF Metadata
-----------------
@ -884,9 +886,11 @@ main(int argc, // I - Number of command-line arguments
{
const char *filename; // PDF filename
pdfio_file_t *pdf; // PDF file
const char *author; // Author name
time_t creation_date; // Creation date
struct tm *creation_tm; // Creation date/time information
char creation_text[256]; // Creation date/time as a string
const char *title; // Title
// Get the filename from the command-line...
@ -899,21 +903,30 @@ main(int argc, // I - Number of command-line arguments
filename = argv[1];
// Open the PDF file with the default callbacks...
pdf = pdfioFileOpen(filename, /*password_cb*/NULL,
/*password_cbdata*/NULL, /*error_cb*/NULL,
/*error_cbdata*/NULL);
pdf = pdfioFileOpen(filename, /*password_cb*/NULL, /*password_cbdata*/NULL,
/*error_cb*/NULL, /*error_cbdata*/NULL);
if (pdf == NULL)
return (1);
// Get the title and author...
author = pdfioFileGetAuthor(pdf);
title = pdfioFileGetTitle(pdf);
// Get the creation date and convert to a string...
creation_date = pdfioFileGetCreationDate(pdf);
creation_tm = localtime(&creation_date);
strftime(creation_text, sizeof(creation_text), "%c", creation_tm);
if ((creation_date = pdfioFileGetCreationDate(pdf)) > 0)
{
creation_tm = localtime(&creation_date);
strftime(creation_text, sizeof(creation_text), "%c", creation_tm);
}
else
{
snprintf(creation_text, sizeof(creation_text), "-- not set --");
}
// Print file information to stdout...
printf("%s:\n", filename);
printf(" Title: %s\n", pdfioFileGetTitle(pdf));
printf(" Author: %s\n", pdfioFileGetAuthor(pdf));
printf(" Title: %s\n", title ? title : "-- not set --");
printf(" Author: %s\n", author ? author : "-- not set --");
printf(" Created On: %s\n", creation_text);
printf(" Number Pages: %u\n", (unsigned)pdfioFileGetNumPages(pdf));
@ -925,8 +938,56 @@ main(int argc, // I - Number of command-line arguments
```
Create PDF File With Text and Image
-----------------------------------
Extract Text from PDF File
--------------------------
The `pdf2text.c` example code extracts non-Unicode text from a PDF file by
scanning each page for strings and text drawing commands. Since it doesn't
look at the font encoding or support Unicode text, it is really only useful to
extract plain ASCII text from a PDF file. And since it writes text in the order
it appears in the page stream, it may not come out in the same order as appears
on the page.
The [`pdfioStreamGetToken`](@@) function is used to read individual tokens from
the page streams. Tokens starting with the open parenthesis are text strings,
while PDF operators are left as-is. We use some simple logic to make sure that
we include spaces between text strings and add newlines for the text operators
that start a new line in a text block:
```c
pdfio_stream_t *st; // Page stream
bool first = true; // First string on line?
char buffer[1024]; // Token buffer
// Read PDF tokens from the page stream...
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
{
if (buffer[0] == '(')
{
// Text string using an 8-bit encoding
if (first)
first = false;
else if (buffer[1] != ' ')
putchar(' ');
fputs(buffer + 1, stdout);
}
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") ||
!strcmp(buffer, "\'") || !strcmp(buffer, "\""))
{
// Text operators that advance to the next line in the block
putchar('\n');
first = true;
}
}
if (!first)
putchar('\n');
```
Create a PDF File With Text and an Image
----------------------------------------
The `image2pdf.c` example code creates a PDF file containing a JPEG or PNG
image file and optional caption on a single page. The `create_pdf_image_file`
@ -957,9 +1018,8 @@ create_pdf_image_file(
// Create the PDF file...
pdf = pdfioFileCreate(pdfname, /*version*/NULL, /*media_box*/NULL,
/*crop_box*/NULL, /*error_cb*/NULL,
/*error_cbdata*/NULL);
pdf = pdfioFileCreate(pdfname, /*version*/NULL, /*media_box*/NULL, /*crop_box*/NULL,
/*error_cb*/NULL, /*error_cbdata*/NULL);
if (!pdf)
return (false);
@ -993,9 +1053,9 @@ create_pdf_image_file(
width = pdfioImageGetWidth(image);
height = pdfioImageGetHeight(image);
// Default media_box is "universal" 595.28x792 points (8.27x11in or
// 210x279mm). Use margins of 36 points (0.5in or 12.7mm) with another
// 36 points for the caption underneath...
// Default media_box is "universal" 595.28x792 points (8.27x11in or 210x279mm).
// Use margins of 36 points (0.5in or 12.7mm) with another 36 points for the
// caption underneath...
swidth = 595.28 - 72.0;
sheight = swidth * height / width;
if (sheight > (792.0 - 36.0 - 72.0))
@ -1012,8 +1072,8 @@ create_pdf_image_file(
// Draw the caption in black...
pdfioContentSetFillColorDeviceGray(page, 0.0);
// Compute the starting point for the text - Courier is monospaced
// with a nominal width of 0.6 times the text height...
// Compute the starting point for the text - Courier is monospaced with a
// nominal width of 0.6 times the text height...
tx = 0.5 * (595.28 - 18.0 * 0.6 * strlen(caption));
// Position and draw the caption underneath...
@ -1099,8 +1159,7 @@ using the [`pdfioFileCreateFontObjFromFile`](@@) function. We pass `false`
for the "unicode" argument since we just want the (default) ASCII encoding:
```c
barcode_font = pdfioFileCreateFontObjFromFile(pdf, "code128.ttf",
/*unicode*/false);
barcode_font = pdfioFileCreateFontObjFromFile(pdf, "code128.ttf", /*unicode*/false);
```
Since barcodes usually have the number or text represented by the barcode
@ -1135,8 +1194,7 @@ double text_width = 0.0;
if (text && text_font)
{
text_height = 9.0;
text_width = pdfioContentTextMeasure(text_font, text,
text_height);
text_width = pdfioContentTextMeasure(text_font, text, text_height);
}
// Compute the size of the PDF page...
@ -1144,8 +1202,7 @@ pdfio_rect_t media_box;
media_box.x1 = 0.0;
media_box.y1 = 0.0;
media_box.x2 = (barcode_width > text_width ?
barcode_width : text_width) + 18.0;
media_box.x2 = (barcode_width > text_width ? barcode_width : text_width) + 18.0;
media_box.y2 = barcode_height + text_height + 18.0;
```
@ -1221,6 +1278,7 @@ typedef struct docdata_s // Document formatting data
pdfio_rect_t crop_box; // Crop box (for margins)
pdfio_rect_t art_box; // Art box (for markdown content)
pdfio_obj_t *fonts[DOCFONT_MAX]; // Embedded fonts
double font_space; // Unit width of a space
size_t num_images; // Number of embedded images
docimage_t images[DOCIMAGE_MAX]; // Embedded images
const char *title; // Document title
@ -1319,13 +1377,10 @@ pdfioDictSetRect(page_dict, "MediaBox", &dd->media_box);
pdfioDictSetRect(page_dict, "ArtBox", &dd->art_box);
for (fontface = DOCFONT_REGULAR; fontface < DOCFONT_MAX; fontface ++)
pdfioPageDictAddFont(page_dict, docfont_names[fontface],
dd->fonts[fontface]);
pdfioPageDictAddFont(page_dict, docfont_names[fontface], dd->fonts[fontface]);
for (i = 0; i < dd->num_images; i ++)
pdfioPageDictAddImage(page_dict,
pdfioStringCreatef(dd->pdf, "I%u",
(unsigned)i),
pdfioPageDictAddImage(page_dict, pdfioStringCreatef(dd->pdf, "I%u", (unsigned)i),
dd->images[i].obj);
```
@ -1351,8 +1406,8 @@ set_font(dd, DOCFONT_REGULAR, SIZE_HEADFOOT);
if (pdfioFileGetNumPages(dd->pdf) > 1 && dd->title)
{
// Show title in header...
width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR],
dd->title, SIZE_HEADFOOT);
width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR], dd->title,
SIZE_HEADFOOT);
pdfioContentTextBegin(dd->st);
pdfioContentTextMoveTo(dd->st,
@ -1363,12 +1418,10 @@ if (pdfioFileGetNumPages(dd->pdf) > 1 && dd->title)
pdfioContentTextEnd(dd->st);
pdfioContentPathMoveTo(dd->st, dd->crop_box.x1,
dd->crop_box.y2 -
2 * SIZE_HEADFOOT * LINE_HEIGHT +
dd->crop_box.y2 - 2 * SIZE_HEADFOOT * LINE_HEIGHT +
SIZE_HEADFOOT);
pdfioContentPathLineTo(dd->st, dd->crop_box.x2,
dd->crop_box.y2 -
2 * SIZE_HEADFOOT * LINE_HEIGHT +
dd->crop_box.y2 - 2 * SIZE_HEADFOOT * LINE_HEIGHT +
SIZE_HEADFOOT);
pdfioContentStroke(dd->st);
}
@ -1388,15 +1441,12 @@ pdfioContentPathLineTo(dd->st, dd->crop_box.x2,
pdfioContentStroke(dd->st);
pdfioContentTextBegin(dd->st);
snprintf(temp, sizeof(temp), "%u",
(unsigned)pdfioFileGetNumPages(dd->pdf));
snprintf(temp, sizeof(temp), "%u", (unsigned)pdfioFileGetNumPages(dd->pdf));
if (pdfioFileGetNumPages(dd->pdf) & 1)
{
// Page number on right...
width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR], temp,
SIZE_HEADFOOT);
pdfioContentTextMoveTo(dd->st, dd->crop_box.x2 - width,
dd->crop_box.y1);
width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR], temp, SIZE_HEADFOOT);
pdfioContentTextMoveTo(dd->st, dd->crop_box.x2 - width, dd->crop_box.y1);
}
else
{
@ -1418,13 +1468,629 @@ if (dd->heading)
}
else
{
width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR],
dd->heading, SIZE_HEADFOOT);
pdfioContentTextMoveTo(dd->st, dd->crop_box.x2 - width,
dd->crop_box.y1);
width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR], dd->heading,
SIZE_HEADFOOT);
pdfioContentTextMoveTo(dd->st, dd->crop_box.x2 - width, dd->crop_box.y1);
}
pdfioContentTextShow(dd->st, UNICODE_VALUE, dd->heading);
pdfioContentTextEnd(dd->st);
}
```
### Formatting the Markdown Document
Four functions handle the formatting of the markdown document:
- `format_block` formats a single paragraph, heading, or table cell,
- `format_code`: formats a block of code,
- `format_doc`: formats the document as a whole, and
- `format_table`: formats a table.
Formatted content is organized into arrays of `linefrag_t` and `tablerow_t`
structures for a line of content or row of table cells, respectively.
#### High-Level Formatting
The `format_doc` function iterates over the block nodes in the markdown
document. We map a "thematic break" (horizontal rule) to a page break, which
is implemented by moving the current vertical position to the bottom of the
page:
```c
case MMD_TYPE_THEMATIC_BREAK :
// Force a page break
dd->y = dd->art_box.y1;
break;
```
A block quote is indented and uses the italic font by default:
```c
case MMD_TYPE_BLOCK_QUOTE :
format_doc(dd, current, DOCFONT_ITALIC, left + BQ_PADDING, right - BQ_PADDING);
break;
```
Lists have a leading blank line and are indented:
```c
case MMD_TYPE_ORDERED_LIST :
case MMD_TYPE_UNORDERED_LIST :
if (dd->st)
dd->y -= SIZE_BODY * LINE_HEIGHT;
format_doc(dd, current, deffont, left + LIST_PADDING, right);
break;
```
List items do not have a leading blank line and make use of leader text that is
shown in front of the list text. The leader text is either the current item
number or a bullet, which then is directly formatted using the `format_block`
function:
```c
case MMD_TYPE_LIST_ITEM :
if (doctype == MMD_TYPE_ORDERED_LIST)
{
snprintf(leader, sizeof(leader), "%d. ", i);
format_block(dd, current, deffont, SIZE_BODY, left, right, leader);
}
else
{
format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/"• ");
}
break;
```
Paragraphs have a leading blank line and are likewise directly formatted:
```c
case MMD_TYPE_PARAGRAPH :
// Add a blank line before the paragraph...
dd->y -= SIZE_BODY * LINE_HEIGHT;
// Format the paragraph...
format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/NULL);
break;
```
Tables have a leading blank line and are formatted using the `format_table`
function:
```c
case MMD_TYPE_TABLE :
// Add a blank line before the paragraph...
dd->y -= SIZE_BODY * LINE_HEIGHT;
// Format the table...
format_table(dd, current, left, right);
break;
```
Code blocks have a leading blank line, are indented slightly (to account for the
padded background), and are formatted using the `format_code` function:
```c
case MMD_TYPE_CODE_BLOCK :
// Add a blank line before the code block...
dd->y -= SIZE_BODY * LINE_HEIGHT;
// Format the code block...
format_code(dd, current, left + CODE_PADDING, right - CODE_PADDING);
break;
```
Headings get some extra processing. First, the current heading is remembered in
the `docdata_t` structure so it can be used in the page footer:
```c
case MMD_TYPE_HEADING_1 :
case MMD_TYPE_HEADING_2 :
case MMD_TYPE_HEADING_3 :
case MMD_TYPE_HEADING_4 :
case MMD_TYPE_HEADING_5 :
case MMD_TYPE_HEADING_6 :
// Update the current heading
free(dd->heading);
dd->heading = mmdCopyAllText(current);
```
Then we add a blank line and format the heading with the boldface font at a
larger size using the `format_block` function:
```c
// Add a blank line before the heading...
dd->y -= heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT;
// Format the heading...
format_block(dd, current, DOCFONT_BOLD,
heading_sizes[curtype - MMD_TYPE_HEADING_1], left, right,
/*leader*/NULL);
```
Once the heading is formatted, we record it in the `toc` array as a PDF outline
item object/dictionary:
```c
// Add the heading to the table-of-contents...
if (dd->num_toc < DOCTOC_MAX)
{
doctoc_t *t = dd->toc + dd->num_toc;
// New TOC
pdfio_array_t *dest; // Destination array
t->level = curtype - MMD_TYPE_HEADING_1;
t->dict = pdfioDictCreate(dd->pdf);
t->obj = pdfioFileCreateObj(dd->pdf, t->dict);
dest = pdfioArrayCreate(dd->pdf);
pdfioArrayAppendObj(dest,
pdfioFileGetPage(dd->pdf, pdfioFileGetNumPages(dd->pdf) - 1));
pdfioArrayAppendName(dest, "XYZ");
pdfioArrayAppendNumber(dest, PAGE_LEFT);
pdfioArrayAppendNumber(dest,
dd->y + heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT);
pdfioArrayAppendNumber(dest, 0.0);
pdfioDictSetArray(t->dict, "Dest", dest);
pdfioDictSetString(t->dict, "Title", pdfioStringCreate(dd->pdf, dd->heading));
dd->num_toc ++;
}
```
Finally, we also save the heading's target name and its location in the
`targets` array to allow interior links to work:
```c
// Add the heading to the list of link targets...
if (dd->num_targets < DOCTARGET_MAX)
{
doctarget_t *t = dd->targets + dd->num_targets;
// New target
make_target_name(t->name, dd->heading, sizeof(t->name));
t->page = pdfioFileGetNumPages(dd->pdf) - 1;
t->y = dd->y + heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT;
dd->num_targets ++;
}
break;
```
#### Formatting Paragraphs, Headings, List Items, and Table Cells
Paragraphs, headings, list items, and table cells all use the same basic
formatting algorithm. Text, checkboxes, and images are collected until the
nodes in the current block are used up or the content reaches the right margin.
In order to keep adjacent blocks of text together, the formatting algorithm
makes sure that at least 3 lines of text can fit before the bottom edge of the
page:
```c
if (mmdGetNextSibling(block))
need_bottom = 3.0 * SIZE_BODY * LINE_HEIGHT;
else
need_bottom = 0.0;
```
Leader text (used for list items) is right justified to the left margin and
becomes the first fragment on the line when present.
```c
if (leader)
{
// Add leader text on first line...
frags[0].type = MMD_TYPE_NORMAL_TEXT;
frags[0].width = pdfioContentTextMeasure(dd->fonts[deffont], leader, fsize);
frags[0].height = fsize;
frags[0].x = left - frags[0].width;
frags[0].imagenum = 0;
frags[0].text = leader;
frags[0].url = NULL;
frags[0].ws = false;
frags[0].font = deffont;
frags[0].color = DOCCOLOR_BLACK;
num_frags = 1;
lineheight = fsize * LINE_HEIGHT;
}
else
{
// No leader text...
num_frags = 0;
lineheight = 0.0;
}
frag = frags + num_frags;
```
If the current content fragment won't fit, we call `render_line` to draw what we
have, adjusting the left margin as needed for table cells:
```c
// See if this node will fit on the current line...
if ((num_frags > 0 && (x + width + wswidth) >= right) || num_frags == LINEFRAG_MAX)
{
// No, render this line and start over...
if (blocktype == MMD_TYPE_TABLE_HEADER_CELL ||
blocktype == MMD_TYPE_TABLE_BODY_CELL_CENTER)
margin_left = 0.5 * (right - x);
else if (blocktype == MMD_TYPE_TABLE_BODY_CELL_RIGHT)
margin_left = right - x;
else
margin_left = 0.0;
render_line(dd, margin_left, need_bottom, lineheight, num_frags, frags);
num_frags = 0;
frag = frags;
x = left;
lineheight = 0.0;
need_bottom = 0.0;
```
Block quotes (blocks use a default font of italic) have an orange bar to the
left of the block:
```c
if (deffont == DOCFONT_ITALIC)
{
// Add an orange bar to the left of block quotes...
set_color(dd, DOCCOLOR_ORANGE);
pdfioContentSave(dd->st);
pdfioContentSetLineWidth(dd->st, 3.0);
pdfioContentPathMoveTo(dd->st, left - 6.0, dd->y - (LINE_HEIGHT - 1.0) * fsize);
pdfioContentPathLineTo(dd->st, left - 6.0, dd->y + fsize);
pdfioContentStroke(dd->st);
pdfioContentRestore(dd->st);
}
```
Finally, we add the current content fragment to the array:
```c
// Add the current node to the fragment list
if (num_frags == 0)
{
// No leading whitespace at the start of the line
ws = false;
wswidth = 0.0;
}
frag->type = type;
frag->x = x;
frag->width = width + wswidth;
frag->height = text ? fsize : height;
frag->imagenum = imagenum;
frag->text = text;
frag->url = url;
frag->ws = ws;
frag->font = font;
frag->color = color;
num_frags ++;
frag ++;
x += width + wswidth;
if (height > lineheight)
lineheight = height;
```
#### Formatting Code Blocks
Code blocks consist of one or more lines of plain monospaced text. We draw a
light gray background behind each line with a small bit of padding at the top
and bottom:
```c
// Draw the top padding...
set_color(dd, DOCCOLOR_LTGRAY);
pdfioContentPathRect(dd->st, left - CODE_PADDING, dd->y + SIZE_CODEBLOCK,
right - left + 2.0 * CODE_PADDING, CODE_PADDING);
pdfioContentFillAndStroke(dd->st, false);
// Start a code text block...
set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK);
pdfioContentTextBegin(dd->st);
pdfioContentTextMoveTo(dd->st, left, dd->y);
for (code = mmdGetFirstChild(block); code; code = mmdGetNextSibling(code))
{
set_color(dd, DOCCOLOR_LTGRAY);
pdfioContentPathRect(dd->st, left - CODE_PADDING,
dd->y - (LINE_HEIGHT - 1.0) * SIZE_CODEBLOCK,
right - left + 2.0 * CODE_PADDING, lineheight);
pdfioContentFillAndStroke(dd->st, false);
set_color(dd, DOCCOLOR_RED);
pdfioContentTextShow(dd->st, UNICODE_VALUE, mmdGetText(code));
dd->y -= lineheight;
if (dd->y < dd->art_box.y1)
{
// End the current text block...
pdfioContentTextEnd(dd->st);
// Start a new page...
new_page(dd);
set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK);
dd->y -= lineheight;
pdfioContentTextBegin(dd->st);
pdfioContentTextMoveTo(dd->st, left, dd->y);
}
}
// End the current text block...
pdfioContentTextEnd(dd->st);
dd->y += lineheight;
// Draw the bottom padding...
set_color(dd, DOCCOLOR_LTGRAY);
pdfioContentPathRect(dd->st, left - CODE_PADDING,
dd->y - CODE_PADDING - (LINE_HEIGHT - 1.0) * SIZE_CODEBLOCK,
right - left + 2.0 * CODE_PADDING, CODE_PADDING);
pdfioContentFillAndStroke(dd->st, false);
```
#### Formatting Tables
Tables are the most difficult to format. We start by scanning the entire table
and measuring every cell with the `measure_cell` function:
```c
for (num_cols = 0, num_rows = 0, rowptr = rows, current = mmdGetFirstChild(table);
current && num_rows < TABLEROW_MAX;
current = next)
{
next = mmd_walk_next(table, current);
type = mmdGetType(current);
if (type == MMD_TYPE_TABLE_ROW)
{
// Parse row...
for (col = 0, current = mmdGetFirstChild(current);
current && num_cols < TABLECOL_MAX;
current = mmdGetNextSibling(current), col ++)
{
rowptr->cells[col] = current;
measure_cell(dd, current, cols + col);
if (col >= num_cols)
num_cols = col + 1;
}
rowptr ++;
num_rows ++;
}
}
```
The `measure_cell` function also updates the minimum and maximum width needed
for each column. To this we add the cell padding to compute the total table
width:
```c
// Figure out the width of each column...
for (col = 0, table_width = 0.0; col < num_cols; col ++)
{
cols[col].max_width += 2.0 * TABLE_PADDING;
table_width += cols[col].max_width;
cols[col].width = cols[col].max_width;
}
```
If the calculated width is more than the available width, we need to adjust the
width of the columns. The algorithm used here breaks the available width into
N equal-width columns - any columns wider than this will be scaled
proportionately. This works out as two steps - one to calculate the the base
width of "narrow" columns and a second to distribute the remaining width amongst
the wider columns:
```c
format_width = right - left - 2.0 * TABLE_PADDING * num_cols;
if (table_width > format_width)
{
// Content too wide, try scaling the widths...
double avg_width, // Average column width
base_width, // Base width
remaining_width, // Remaining width
scale_width; // Width for scaling
size_t num_remaining_cols = 0; // Number of remaining columns
// First mark any columns that are narrower than the average width...
avg_width = format_width / num_cols;
for (col = 0, base_width = 0.0, remaining_width = 0.0; col < num_cols; col ++)
{
if (cols[col].width > avg_width)
{
remaining_width += cols[col].width;
num_remaining_cols ++;
}
else
{
base_width += cols[col].width;
}
}
// Then proportionately distribute the remaining width to the other columns...
format_width -= base_width;
for (col = 0, table_width = 0.0; col < num_cols; col ++)
{
if (cols[col].width > avg_width)
cols[col].width = cols[col].width * format_width / remaining_width;
table_width += cols[col].width;
}
}
```
Now that we have the widths of the columns, we can calculate the left and right
margins of each column for formatting the cell text:
```c
// Calculate the margins of each column in preparation for formatting
for (col = 0, x = left + TABLE_PADDING; col < num_cols; col ++)
{
cols[col].left = x;
cols[col].right = x + cols[col].width;
x += cols[col].width + 2.0 * TABLE_PADDING;
}
```
Then we re-measure the cells using the final column widths to determine the
height of each cell and row:
```c
// Calculate the height of each row and cell in preparation for formatting
for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++)
{
for (col = 0; col < num_cols; col ++)
{
height = measure_cell(dd, rowptr->cells[col], cols + col) + 2.0 * TABLE_PADDING;
if (height > rowptr->height)
rowptr->height = height;
}
}
```
Finally, we render each row in the table:
```c
// Render each table row...
for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++)
render_row(dd, num_cols, cols, rowptr);
```
### Rendering the Markdown Document
The formatted content in arrays of `linefrag_t` and `tablerow_t` structures
are passed to the `render_line` and `render_row` functions respectively to
produce content in the PDF document.
#### Rendering a Line in a Paragraph, Heading, or Table Cell
The `render_line` function adds content from the `linefrag_t` array to a PDF
page. It starts by determining whether a new page is needed:
```c
if (!dd->st)
{
new_page(dd);
margin_top = 0.0;
}
dd->y -= margin_top + lineheight;
if ((dd->y - need_bottom) < dd->art_box.y1)
{
new_page(dd);
dd->y -= lineheight;
}
```
We then loop through the fragments for the current line, drawing checkboxes,
images, and text as needed. When a hyperlink is present, we add the link to the
`links` array in the `docdata_t` structure, mapping "@" and "@@" to an internal
link corresponding to the linked text:
```c
if (frag->url && dd->num_links < DOCLINK_MAX)
{
doclink_t *l = dd->links + dd->num_links;
// Pointer to this link record
if (!strcmp(frag->url, "@"))
{
// Use mapped text as link target...
char targetlink[129]; // Targeted link
targetlink[0] = '#';
make_target_name(targetlink + 1, frag->text, sizeof(targetlink) - 1);
l->url = pdfioStringCreate(dd->pdf, targetlink);
}
else if (!strcmp(frag->url, "@@"))
{
// Use literal text as anchor...
l->url = pdfioStringCreatef(dd->pdf, "#%s", frag->text);
}
else
{
// Use URL as-is...
l->url = frag->url;
}
l->box.x1 = frag->x;
l->box.y1 = dd->y;
l->box.x2 = frag->x + frag->width;
l->box.y2 = dd->y + frag->height;
dd->num_links ++;
}
```
These are later written as annotations in the `add_links` function.
#### Rendering a Table Row
The `render_row` function takes a row of cells and the corresponding column
definitions. It starts by drawing the border boxes around body cells:
```c
if (mmdGetType(row->cells[0]) == MMD_TYPE_TABLE_HEADER_CELL)
{
// Header row, no border...
deffont = DOCFONT_BOLD;
}
else
{
// Regular body row, add borders...
deffont = DOCFONT_REGULAR;
set_color(dd, DOCCOLOR_GRAY);
pdfioContentPathRect(dd->st, cols[0].left - TABLE_PADDING, dd->y - row->height,
cols[num_cols - 1].right - cols[0].left +
2.0 * TABLE_PADDING, row->height);
for (col = 1; col < num_cols; col ++)
{
pdfioContentPathMoveTo(dd->st, cols[col].left - TABLE_PADDING, dd->y);
pdfioContentPathLineTo(dd->st, cols[col].left - TABLE_PADDING, dd->y - row->height);
}
pdfioContentStroke(dd->st);
}
```
Then it formats each cell using the `format_block` function described
previously. The page `y` value is reset before formatting each cell:
```c
row_y = dd->y;
for (col = 0; col < num_cols; col ++)
{
dd->y = row_y;
format_block(dd, row->cells[col], deffont, SIZE_TABLE, cols[col].left,
cols[col].right, /*leader*/NULL);
}
dd->y = row_y - row->height;
```

View File

@ -1,7 +1,7 @@
#
# Makefile for PDFio examples.
#
# Copyright © 2024 by Michael R Sweet.
# Copyright © 2024-2025 by Michael R Sweet.
#
# Licensed under Apache License v2.0. See the file "LICENSE" for more
# information.
@ -12,10 +12,10 @@
# Common options
#CFLAGS = -g $(CPPFLAGS)
CFLAGS = -g -fsanitize=address $(CPPFLAGS)
CPPFLAGS = -I..
LIBS = -L.. -lpdfio -lz
CFLAGS = -g $(CPPFLAGS)
#CFLAGS = -g -fsanitize=address $(CPPFLAGS)
CPPFLAGS = -I.. -I/usr/local/include
LIBS = -L.. -L/usr/local/lib -lpdfio -lz -lm
# Targets
@ -23,6 +23,7 @@ TARGETS = \
code128 \
image2pdf \
md2pdf \
pdf2text \
pdfioinfo
@ -50,6 +51,11 @@ md2pdf: md2pdf.c mmd.c mmd.h
$(CC) $(CFLAGS) -o $@ md2pdf.c mmd.c $(LIBS)
# pdfio text extraction (demo, doesn't handle a lot of things yet)
pdf2text: pdf2text.c
$(CC) $(CFLAGS) -o $@ pdf2text.c $(LIBS)
# pdfioinfo
pdfioinfo: pdfioinfo.c
$(CC) $(CFLAGS) -o $@ pdfioinfo.c $(LIBS)

View File

@ -0,0 +1,93 @@
Copyright 2011 The Roboto Project Authors (https://github.com/googlefonts/roboto-classic)
This Font Software is licensed under the SIL Open Font License, Version 1.1.
This license is copied below, and is also available with a FAQ at:
https://openfontlicense.org
-----------------------------------------------------------
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
-----------------------------------------------------------
PREAMBLE
The goals of the Open Font License (OFL) are to stimulate worldwide
development of collaborative font projects, to support the font creation
efforts of academic and linguistic communities, and to provide a free and
open framework in which fonts may be shared and improved in partnership
with others.
The OFL allows the licensed fonts to be used, studied, modified and
redistributed freely as long as they are not sold by themselves. The
fonts, including any derivative works, can be bundled, embedded,
redistributed and/or sold with any software provided that any reserved
names are not used by derivative works. The fonts and derivatives,
however, cannot be released under any other type of license. The
requirement for fonts to remain under this license does not apply
to any document created using the fonts or their derivatives.
DEFINITIONS
"Font Software" refers to the set of files released by the Copyright
Holder(s) under this license and clearly marked as such. This may
include source files, build scripts and documentation.
"Reserved Font Name" refers to any names specified as such after the
copyright statement(s).
"Original Version" refers to the collection of Font Software components as
distributed by the Copyright Holder(s).
"Modified Version" refers to any derivative made by adding to, deleting,
or substituting -- in part or in whole -- any of the components of the
Original Version, by changing formats or by porting the Font Software to a
new environment.
"Author" refers to any designer, engineer, programmer, technical
writer or other person who contributed to the Font Software.
PERMISSION & CONDITIONS
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Font Software, to use, study, copy, merge, embed, modify,
redistribute, and sell modified and unmodified copies of the Font
Software, subject to the following conditions:
1) Neither the Font Software nor any of its individual components,
in Original or Modified Versions, may be sold by itself.
2) Original or Modified Versions of the Font Software may be bundled,
redistributed and/or sold with any software, provided that each copy
contains the above copyright notice and this license. These can be
included either as stand-alone text files, human-readable headers or
in the appropriate machine-readable metadata fields within text or
binary files as long as those fields can be easily viewed by the user.
3) No Modified Version of the Font Software may use the Reserved Font
Name(s) unless explicit written permission is granted by the corresponding
Copyright Holder. This restriction only applies to the primary font name as
presented to the users.
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
Software shall not be used to promote, endorse or advertise any
Modified Version, except to acknowledge the contribution(s) of the
Copyright Holder(s) and the Author(s) or with their explicit written
permission.
5) The Font Software, modified or unmodified, in part or in whole,
must be distributed entirely under this license, and must not be
distributed under any other license. The requirement for fonts to
remain under this license does not apply to any document created
using the Font Software.
TERMINATION
This license becomes null and void if any of the above conditions are
not met.
DISCLAIMER
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.

View File

@ -0,0 +1,343 @@
Copyright 2003 Grandzebu, All Rights Reserved
http://grandzebu.net/informatique/codbar-en/code128.htm
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

Binary file not shown.

View File

@ -209,10 +209,17 @@ static const char * const docfont_names[] =
"FM"
};
#define BQ_PADDING 18.0 // Padding for block quotes
#define BQ_THICKNESS 3.0 // Thickness of block quote bar
#define CODE_PADDING 4.5 // Padding for code blocks
#define IMAGE_PPI 100.0 // Pixels per inch for images
#define LINE_HEIGHT 1.4 // Multiplier for line height
#define LIST_PADDING 36.0 // Padding/indentation for lists
#define SIZE_BODY 11.0 // Size of body text (points)
#define SIZE_CODEBLOCK 10.0 // Size of code block text (points)
#define SIZE_HEADFOOT 9.0 // Size of header/footer text (points)
@ -255,7 +262,7 @@ static double measure_cell(docdata_t *dd, mmd_t *cell, tablecol_t *col);
static mmd_t *mmd_walk_next(mmd_t *top, mmd_t *node);
static void new_page(docdata_t *dd);
static ssize_t output_cb(void *output_cbdata, const void *buffer, size_t bytes);
static void render_line(docdata_t *dd, double margin_left, double margin_top, double need_bottom, double lineheight, size_t num_frags, linefrag_t *frags);
static void render_line(docdata_t *dd, double margin_left, double need_bottom, double lineheight, size_t num_frags, linefrag_t *frags);
static void render_row(docdata_t *dd, size_t num_cols, tablecol_t *cols, tablerow_t *row);
static void set_color(docdata_t *dd, doccolor_t color);
static void set_font(docdata_t *dd, docfont_t font, double fsize);
@ -573,7 +580,6 @@ format_block(docdata_t *dd, // I - Document data
width, // Width of current fragment
wswidth, // Width of whitespace
margin_left, // Left margin
margin_top, // Top margin
need_bottom, // Space needed after this block
height, // Height of current fragment
lineheight; // Height of current line
@ -581,11 +587,6 @@ format_block(docdata_t *dd, // I - Document data
blocktype = mmdGetType(block);
if ((blocktype >= MMD_TYPE_TABLE_HEADER_CELL && blocktype <= MMD_TYPE_TABLE_BODY_CELL_RIGHT) || blocktype == MMD_TYPE_LIST_ITEM)
margin_top = 0.0;
else
margin_top = fsize * LINE_HEIGHT;
if (mmdGetNextSibling(block))
need_bottom = 3.0 * SIZE_BODY * LINE_HEIGHT;
else
@ -664,25 +665,20 @@ format_block(docdata_t *dd, // I - Document data
else
margin_left = 0.0;
render_line(dd, margin_left, margin_top, need_bottom, lineheight, num_frags, frags);
render_line(dd, margin_left, need_bottom, lineheight, num_frags, frags);
if (deffont == DOCFONT_ITALIC)
{
// Add an orange bar to the left of block quotes...
set_color(dd, DOCCOLOR_ORANGE);
pdfioContentSave(dd->st);
pdfioContentSetLineWidth(dd->st, 3.0);
pdfioContentPathMoveTo(dd->st, left - 6.0, dd->y - (LINE_HEIGHT - 1.0) * fsize);
pdfioContentPathLineTo(dd->st, left - 6.0, dd->y + fsize);
pdfioContentStroke(dd->st);
pdfioContentRestore(dd->st);
pdfioContentPathRect(dd->st, left - BQ_PADDING, dd->y - (LINE_HEIGHT - 1.0) * fsize - BQ_THICKNESS, BQ_THICKNESS, lineheight + 2.0 * BQ_THICKNESS);
pdfioContentFill(dd->st, /*even_odd*/false);
}
num_frags = 0;
frag = frags;
x = left;
lineheight = 0.0;
margin_top = 0.0;
need_bottom = 0.0;
continue;
@ -730,7 +726,13 @@ format_block(docdata_t *dd, // I - Document data
else
margin_left = 0.0;
render_line(dd, margin_left, margin_top, need_bottom, lineheight, num_frags, frags);
render_line(dd, margin_left, need_bottom, lineheight, num_frags, frags);
num_frags = 0;
frag = frags;
x = left;
lineheight = 0.0;
need_bottom = 0.0;
if (deffont == DOCFONT_ITALIC)
{
@ -743,18 +745,12 @@ format_block(docdata_t *dd, // I - Document data
pdfioContentStroke(dd->st);
pdfioContentRestore(dd->st);
}
num_frags = 0;
frag = frags;
x = left;
lineheight = 0.0;
margin_top = 0.0;
need_bottom = 0.0;
}
// Add the current node to the fragment list
if (num_frags == 0)
{
// No leading whitespace at the start of the line
ws = false;
wswidth = 0.0;
}
@ -786,7 +782,7 @@ format_block(docdata_t *dd, // I - Document data
else
margin_left = 0.0;
render_line(dd, margin_left, margin_top, need_bottom, lineheight, num_frags, frags);
render_line(dd, margin_left, need_bottom, lineheight, num_frags, frags);
if (deffont == DOCFONT_ITALIC)
{
@ -814,31 +810,30 @@ format_code(docdata_t *dd, // I - Document data
double right) // I - Right margin
{
mmd_t *code; // Current code block
double lineheight, // Line height
margin_top; // Top margin
double lineheight; // Line height
// Compute line height and initial top margin...
// Compute line height...
lineheight = SIZE_CODEBLOCK * LINE_HEIGHT;
margin_top = lineheight;
// Start a new page as needed...
if (!dd->st)
{
new_page(dd);
margin_top = (1.0 - LINE_HEIGHT) * lineheight;
}
dd->y -= lineheight + margin_top;
dd->y -= lineheight + CODE_PADDING;
if ((dd->y - lineheight) < dd->art_box.y1)
{
new_page(dd);
dd->y -= lineheight / LINE_HEIGHT;
dd->y -= lineheight + CODE_PADDING;
}
// Draw the top padding...
set_color(dd, DOCCOLOR_LTGRAY);
pdfioContentPathRect(dd->st, left - CODE_PADDING, dd->y + SIZE_CODEBLOCK, right - left + 2.0 * CODE_PADDING, CODE_PADDING);
pdfioContentFillAndStroke(dd->st, false);
// Start a code text block...
set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK);
pdfioContentTextBegin(dd->st);
@ -847,7 +842,7 @@ format_code(docdata_t *dd, // I - Document data
for (code = mmdGetFirstChild(block); code; code = mmdGetNextSibling(code))
{
set_color(dd, DOCCOLOR_LTGRAY);
pdfioContentPathRect(dd->st, left - 3.0, dd->y - (LINE_HEIGHT - 1.0) * SIZE_CODEBLOCK, right - left + 6.0, lineheight);
pdfioContentPathRect(dd->st, left - CODE_PADDING, dd->y - (LINE_HEIGHT - 1.0) * SIZE_CODEBLOCK, right - left + 2.0 * CODE_PADDING, lineheight);
pdfioContentFillAndStroke(dd->st, false);
set_color(dd, DOCCOLOR_RED);
@ -873,6 +868,11 @@ format_code(docdata_t *dd, // I - Document data
// End the current text block...
pdfioContentTextEnd(dd->st);
dd->y += lineheight;
// Draw the bottom padding...
set_color(dd, DOCCOLOR_LTGRAY);
pdfioContentPathRect(dd->st, left - CODE_PADDING, dd->y - CODE_PADDING - (LINE_HEIGHT - 1.0) * SIZE_CODEBLOCK, right - left + 2.0 * CODE_PADDING, CODE_PADDING);
pdfioContentFillAndStroke(dd->st, false);
}
@ -918,15 +918,14 @@ format_doc(docdata_t *dd, // I - Document data
break;
case MMD_TYPE_BLOCK_QUOTE :
format_doc(dd, current, DOCFONT_ITALIC, left + 36.0, right - 36.0);
format_doc(dd, current, DOCFONT_ITALIC, left + BQ_PADDING, right - BQ_PADDING);
break;
case MMD_TYPE_ORDERED_LIST :
case MMD_TYPE_UNORDERED_LIST :
if (dd->st)
dd->y -= SIZE_BODY * LINE_HEIGHT;
dd->y -= SIZE_BODY * LINE_HEIGHT;
format_doc(dd, current, deffont, left + 36.0, right);
format_doc(dd, current, deffont, left + LIST_PADDING, right);
break;
case MMD_TYPE_LIST_ITEM :
@ -947,12 +946,17 @@ format_doc(docdata_t *dd, // I - Document data
case MMD_TYPE_HEADING_4 :
case MMD_TYPE_HEADING_5 :
case MMD_TYPE_HEADING_6 :
// Update the current heading
free(dd->heading);
dd->heading = mmdCopyAllText(current);
// Add a blank line before the heading...
dd->y -= heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT;
// Format the heading...
format_block(dd, current, DOCFONT_BOLD, heading_sizes[curtype - MMD_TYPE_HEADING_1], left, right, /*leader*/NULL);
// Add the heading to the table-of-contents...
if (dd->num_toc < DOCTOC_MAX)
{
doctoc_t *t = dd->toc + dd->num_toc;
@ -976,6 +980,7 @@ format_doc(docdata_t *dd, // I - Document data
dd->num_toc ++;
}
// Add the heading to the list of link targets...
if (dd->num_targets < DOCTARGET_MAX)
{
doctarget_t *t = dd->targets + dd->num_targets;
@ -990,15 +995,27 @@ format_doc(docdata_t *dd, // I - Document data
break;
case MMD_TYPE_PARAGRAPH :
// Add a blank line before the paragraph...
dd->y -= SIZE_BODY * LINE_HEIGHT;
// Format the paragraph...
format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/NULL);
break;
case MMD_TYPE_TABLE :
// Add a blank line before the paragraph...
dd->y -= SIZE_BODY * LINE_HEIGHT;
// Format the table...
format_table(dd, current, left, right);
break;
case MMD_TYPE_CODE_BLOCK :
format_code(dd, current, left + 36.0, right - 36.0);
// Add a blank line before the code block...
dd->y -= SIZE_BODY * LINE_HEIGHT;
// Format the code block...
format_code(dd, current, left + CODE_PADDING, right - CODE_PADDING);
break;
}
}
@ -1032,14 +1049,10 @@ format_table(docdata_t *dd, // I - Document data
// Find all of the rows and columns in the table...
num_cols = num_rows = 0;
memset(cols, 0, sizeof(cols));
memset(rows, 0, sizeof(rows));
rowptr = rows;
for (current = mmdGetFirstChild(table); current && num_rows < TABLEROW_MAX; current = next)
for (num_cols = 0, num_rows = 0, rowptr = rows, current = mmdGetFirstChild(table); current && num_rows < TABLEROW_MAX; current = next)
{
next = mmd_walk_next(table, current);
type = mmdGetType(current);
@ -1131,9 +1144,6 @@ format_table(docdata_t *dd, // I - Document data
}
// Render each table row...
if (dd->st)
dd->y -= SIZE_TABLE * LINE_HEIGHT;
for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++)
render_row(dd, num_cols, cols, rowptr);
}
@ -1460,7 +1470,6 @@ output_cb(void *output_cbdata, // I - Callback data (not used)
static void
render_line(docdata_t *dd, // I - Document data
double margin_left, // I - Left margin
double margin_top, // I - Top margin
double need_bottom, // I - How much space is needed after
double lineheight, // I - Height of line
size_t num_frags, // I - Number of line fragments
@ -1472,17 +1481,14 @@ render_line(docdata_t *dd, // I - Document data
if (!dd->st)
{
new_page(dd);
margin_top = (1.0 - LINE_HEIGHT) * lineheight;
}
dd->y -= margin_top + lineheight;
dd->y -= lineheight;
if ((dd->y - need_bottom) < dd->art_box.y1)
{
new_page(dd);
dd->y -= lineheight / LINE_HEIGHT;
dd->y -= lineheight;
}
for (i = 0, frag = frags; i < num_frags; i ++, frag ++)

View File

@ -1,22 +1,10 @@
---
title: Markdown to PDF Converter Example
title: Markdown to PDF Converter Test File
...
Markdown to PDF Converter Example
=================================
The `md2pdf` example program reads a markdown file and formats the content onto
pages in a PDF file. It demonstrates how to:
- Embed base and TrueType fonts,
- Format text,
- Embed JPEG and PNG images,
- Add headers and footers, and
- Add hyperlinks.
Source Files
------------
Markdown to PDF Converter Test File
===================================
The `md2pdf` program is organized into three source files: `md2pdf.c` which
contains the code to format the markdown content and `mmd.h` and `mmd.c` (from
@ -24,4 +12,73 @@ the [Miniature Markdown Library][MMD] project) which load the markdown content.
[MMD]: https://www.msweet.org/mmd/
This is a test file for `md2pdf`. Here is a bullet list:
- Embed base and TrueType fonts,
- Format text with embedded JPEG and PNG images and check boxes, with support
for wrapping, alignment in table cells, leader text (as used for lists), and
variable line height,
- Add headers and footers, and
- Add hyperlinks and document platform.
And here is an ordered list:
1. Embed base and TrueType fonts,
2. Format text with embedded JPEG and PNG images and check boxes, with support
for wrapping, alignment in table cells, leader text (as used for lists), and
variable line height,
3. Add headers and footers, and
4. Add hyperlinks and document platform.
Code Blocks
-----------
```
0 1 2 3 4 5 6 7 8
12345678901234567890123456789012345678901234567890123456789012345678901234567890
```
Images
------
PDFio book cover image:
![PDFio](../doc/pdfio-epub.png)
Tables
------
Table with leading/trailing pipes:
| Heading 1 | Heading 2 | Heading 3 |
| --------- | --------- | --------- |
| Cell 1,1 | Cell 1,2 | Cell 1,3 |
| Cell 2,1 | Cell 2,2 | Cell 2,3 |
| Cell 3,1 | Cell 3,2 | Cell 3,3 |
Table without leading/trailing pipes:
Heading 1 | Heading 2 | Heading 3
--------- | --------- | ---------
Cell 1,1 | Cell 1,2 | Cell 1,3
Cell 2,1 | Cell 2,2 | Cell 2,3
Cell 3,1 | Cell 3,2 | Cell 3,3
Table with alignment:
Left Alignment | Center Alignment | Right Alignment
:-------- | :-------: | --------:
Cell 1,1 | Cell 1,2 | 1
Cell 2,1 | Cell 2,2 | 12
Cell 3,1 | Cell 3,2 | 123
Table in block quote:
> Heading 1 | Heading 2 | Heading 3
> --------- | --------- | ---------
> Cell 1,1 | Cell 1,2 | Cell 1,3
> Cell 2,1 | Cell 2,2 | Cell 2,3
> Cell 3,1 | Cell 3,2 | Cell 3,3

View File

@ -1,17 +1,17 @@
//
// PDF to text program for PDFio.
//
// Copyright © 2022 by Michael R Sweet.
// Copyright © 2022-2024 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
//
// Usage:
//
// ./pdfiototext FILENAME.pdf > FILENAME.txt
// ./pdf2text FILENAME.pdf > FILENAME.txt
//
#include "pdfio.h"
#include <pdfio.h>
#include <string.h>
@ -36,16 +36,14 @@ main(int argc, // I - Number of command-line arguments
// Verify command-line arguments...
if (argc != 2)
{
puts("Usage: pdfiototext FILENAME.pdf > FILENAME.txt");
puts("Usage: pdf2text FILENAME.pdf > FILENAME.txt");
return (1);
}
// Open the PDF file...
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL)
if ((file = pdfioFileOpen(argv[1], /*password_cb*/NULL, /*password_data*/NULL, /*error_cb*/NULL, /*error_data*/NULL)) == NULL)
return (1);
// printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
// Try grabbing content from all of the pages...
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
{
@ -54,29 +52,28 @@ main(int argc, // I - Number of command-line arguments
num_streams = pdfioPageGetNumStreams(obj);
// printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
for (j = 0; j < num_streams; j ++)
{
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
continue;
// printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
// Read PDF tokens from the page stream...
first = true;
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
{
if (buffer[0] == '(')
{
// Text string using an 8-bit encoding
if (first)
first = false;
else
else if (buffer[1] != ' ')
putchar(' ');
fputs(buffer + 1, stdout);
}
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\""))
{
// Text operators that advance to the next line in the block
putchar('\n');
first = true;
}

View File

@ -1,7 +1,7 @@
//
// PDF metadata example for PDFio.
//
// Copyright © 2023-2024 by Michael R Sweet.
// Copyright © 2023-2025 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -25,9 +25,11 @@ main(int argc, // I - Number of command-line arguments
{
const char *filename; // PDF filename
pdfio_file_t *pdf; // PDF file
const char *author; // Author name
time_t creation_date; // Creation date
struct tm *creation_tm; // Creation date/time information
char creation_text[256]; // Creation date/time as a string
const char *title; // Title
// Get the filename from the command-line...
@ -46,15 +48,25 @@ main(int argc, // I - Number of command-line arguments
if (pdf == NULL)
return (1);
// Get the title and author...
author = pdfioFileGetAuthor(pdf);
title = pdfioFileGetTitle(pdf);
// Get the creation date and convert to a string...
creation_date = pdfioFileGetCreationDate(pdf);
creation_tm = localtime(&creation_date);
strftime(creation_text, sizeof(creation_text), "%c", creation_tm);
if ((creation_date = pdfioFileGetCreationDate(pdf)) > 0)
{
creation_tm = localtime(&creation_date);
strftime(creation_text, sizeof(creation_text), "%c", creation_tm);
}
else
{
snprintf(creation_text, sizeof(creation_text), "-- not set --");
}
// Print file information to stdout...
printf("%s:\n", filename);
printf(" Title: %s\n", pdfioFileGetTitle(pdf));
printf(" Author: %s\n", pdfioFileGetAuthor(pdf));
printf(" Title: %s\n", title ? title : "-- not set --");
printf(" Author: %s\n", author ? author : "-- not set --");
printf(" Created On: %s\n", creation_text);
printf(" Number Pages: %u\n", (unsigned)pdfioFileGetNumPages(pdf));

View File

@ -29,6 +29,15 @@ if test $(grep AC_INIT configure.ac | awk '{print $2}') != "[$version],"; then
exit 1
fi
if test $(head -4 CHANGES.md | tail -1 | awk '{print $1}') != "v$version"; then
echo "Still need to update CHANGES.md version number."
exit 1
fi
if test $(head -4 CHANGES.md | tail -1 | awk '{print $3}') = "YYYY-MM-DD"; then
echo "Still need to update CHANGES.md release date."
exit 1
fi
if test $(grep PDFIO_VERSION= configure | awk -F \" '{print $2}') != "$version"; then
echo "Still need to run 'autoconf -f'."
exit 1

View File

@ -1,7 +1,7 @@
//
// Common support functions for pdfio.
//
// Copyright © 2021-2024 by Michael R Sweet.
// Copyright © 2021-2025 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -368,7 +368,7 @@ _pdfioFileSeek(pdfio_file_t *pdf, // I - PDF file
if (pdf->mode == _PDFIO_MODE_READ)
{
// Reading, see if we already have the data we need...
if (whence != SEEK_END && offset >= pdf->bufpos && offset < (pdf->bufpos + pdf->bufend - pdf->buffer))
if (whence != SEEK_END && offset >= pdf->bufpos && pdf->bufend && offset < (pdf->bufpos + pdf->bufend - pdf->buffer))
{
// Yes, seek within existing buffer...
pdf->bufptr = pdf->buffer + (offset - pdf->bufpos);
@ -398,7 +398,10 @@ _pdfioFileSeek(pdfio_file_t *pdf, // I - PDF file
}
// Seek within the file...
if ((offset = lseek(pdf->fd, offset, whence)) < 0)
if ((offset = lseek(pdf->fd, offset, whence)) < 0 && whence == SEEK_END && errno == EINVAL)
offset = lseek(pdf->fd, 0, SEEK_SET);
if (offset < 0)
{
_pdfioFileError(pdf, "Unable to seek within file - %s", strerror(errno));
return (-1);

View File

@ -1132,7 +1132,7 @@ pdfioContentTextMeasure(
}
if (i < (sizeof(_pdfio_cp1252) / sizeof(_pdfio_cp1252[0])))
ch = i + 0x80; // Extra characters from 0x80 to 0x9f
ch = (int)(i + 0x80); // Extra characters from 0x80 to 0x9f
else
ch = '?'; // Unsupported chars map to ?
}

View File

@ -465,10 +465,134 @@ pdfioDictGetString(pdfio_dict_t *dict, // I - Dictionary
else if (value && value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096)
{
// Convert binary string to regular string...
char temp[4096]; // Temporary string
char temp[4096], // Temporary string
*tempptr; // Pointer into temporary string
unsigned char *dataptr; // Pointer into the data string
memcpy(temp, value->value.binary.data, value->value.binary.datalen);
temp[value->value.binary.datalen] = '\0';
if (!(value->value.binary.datalen & 1) && !memcmp(value->value.binary.data, "\377\376", 2))
{
// Copy UTF-16 BE
int ch; // Unicode character
size_t remaining; // Remaining bytes
for (dataptr = value->value.binary.data + 2, remaining = value->value.binary.datalen - 2, tempptr = temp; remaining > 1 && tempptr < (temp + sizeof(temp) - 5); dataptr += 2, remaining -= 2)
{
ch = (dataptr[0] << 8) | dataptr[1];
if (ch >= 0xd800 && ch <= 0xdbff && remaining > 3)
{
// Multi-word UTF-16 char...
int lch; // Lower bits
lch = (dataptr[2] << 8) | dataptr[3];
if (lch < 0xdc00 || lch >= 0xdfff)
break;
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
dataptr += 2;
remaining -= 2;
}
else if (ch >= 0xfffe)
{
continue;
}
if (ch < 128)
{
// ASCII
*tempptr++ = (char)ch;
}
else if (ch < 4096)
{
// 2-byte UTF-8
*tempptr++ = (char)(0xc0 | (ch >> 6));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else if (ch < 65536)
{
// 3-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 12));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else
{
// 4-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 18));
*tempptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
}
*tempptr = '\0';
}
else if (!(value->value.binary.datalen & 1) && !memcmp(value->value.binary.data, "\376\377", 2))
{
// Copy UTF-16 LE
int ch; // Unicode character
size_t remaining; // Remaining bytes
for (dataptr = value->value.binary.data + 2, remaining = value->value.binary.datalen - 2, tempptr = temp; remaining > 1 && tempptr < (temp + sizeof(temp) - 5); dataptr += 2, remaining -= 2)
{
ch = (dataptr[1] << 8) | dataptr[0];
if (ch >= 0xd800 && ch <= 0xdbff && remaining > 3)
{
// Multi-word UTF-16 char...
int lch; // Lower bits
lch = (dataptr[3] << 8) | dataptr[2];
if (lch < 0xdc00 || lch >= 0xdfff)
break;
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
dataptr += 2;
remaining -= 2;
}
else if (ch >= 0xfffe)
{
continue;
}
if (ch < 128)
{
// ASCII
*tempptr++ = (char)ch;
}
else if (ch < 4096)
{
// 2-byte UTF-8
*tempptr++ = (char)(0xc0 | (ch >> 6));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else if (ch < 65536)
{
// 3-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 12));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else
{
// 4-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 18));
*tempptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
}
*tempptr = '\0';
}
else
{
// Copy as-is...
memcpy(temp, value->value.binary.data, value->value.binary.datalen);
temp[value->value.binary.datalen] = '\0';
}
free(value->value.binary.data);
value->type = PDFIO_VALTYPE_STRING;

View File

@ -1,7 +1,7 @@
//
// PDF file functions for PDFio.
//
// Copyright © 2021-2024 by Michael R Sweet.
// Copyright © 2021-2025 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -1517,6 +1517,7 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
cur_obj, // Current object
num_objs = 0; // Number of objects
pdfio_obj_t *objs[16384]; // Objects
int count; // Count of objects
PDFIO_DEBUG("load_obj_stream(obj=%p(%d))\n", obj, (int)obj->number);
@ -1528,12 +1529,17 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
return (false);
}
count = (int)pdfioDictGetNumber(pdfioObjGetDict(obj), "N");
PDFIO_DEBUG("load_obj_stream: N=%d\n", count);
_pdfioTokenInit(&tb, obj->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
// Read the object numbers from the beginning of the stream...
while (_pdfioTokenGet(&tb, buffer, sizeof(buffer)))
while (count > 0 && _pdfioTokenGet(&tb, buffer, sizeof(buffer)))
{
// Stop if this isn't an object number...
PDFIO_DEBUG("load_obj_stream: %s\n", buffer);
if (!isdigit(buffer[0] & 255))
break;
@ -1556,21 +1562,19 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
// Skip offset
_pdfioTokenGet(&tb, buffer, sizeof(buffer));
PDFIO_DEBUG("load_obj_stream: %ld at offset %s\n", (long)number, buffer);
// One less compressed object...
count --;
}
if (!buffer[0])
{
pdfioStreamClose(st);
return (false);
}
_pdfioTokenPush(&tb, buffer);
PDFIO_DEBUG("load_obj_stream: num_objs=%lu\n", (unsigned long)num_objs);
// Read the objects themselves...
for (cur_obj = 0; cur_obj < num_objs; cur_obj ++)
{
if (!_pdfioValueRead(obj->pdf, obj, &tb, &(objs[cur_obj]->value), 0))
{
_pdfioFileError(obj->pdf, "Unable to read compressed object.");
pdfioStreamClose(st);
return (false);
}
@ -1720,7 +1724,7 @@ load_xref(
pdfio_stream_t *st; // Stream
unsigned char buffer[32]; // Read buffer
size_t num_sobjs = 0, // Number of object streams
sobjs[8192]; // Object streams to load
sobjs[16384]; // Object streams to load
pdfio_obj_t *current; // Current object
if ((number = strtoimax(line, &ptr, 10)) < 1)
@ -1938,7 +1942,6 @@ load_xref(
// Save the trailer dictionary and grab the root (catalog) and info
// objects...
pdf->trailer_dict = trailer.value.dict;
pdf->info_obj = pdfioDictGetObj(pdf->trailer_dict, "Info");
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
@ -2086,7 +2089,6 @@ load_xref(
// Save the trailer dictionary and grab the root (catalog) and info
// objects...
pdf->trailer_dict = trailer.value.dict;
pdf->info_obj = pdfioDictGetObj(pdf->trailer_dict, "Info");
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
@ -2123,6 +2125,8 @@ load_xref(
// Once we have all of the xref tables loaded, get the important objects and
// build the pages array...
pdf->info_obj = pdfioDictGetObj(pdf->trailer_dict, "Info");
if ((pdf->root_obj = pdfioDictGetObj(pdf->trailer_dict, "Root")) == NULL)
{
_pdfioFileError(pdf, "Missing Root object.");

View File

@ -1,7 +1,7 @@
//
// Public header file for PDFio.
//
// Copyright © 2021-2024 by Michael R Sweet.
// Copyright © 2021-2025 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -23,7 +23,7 @@ extern "C" {
// Version number...
//
# define PDFIO_VERSION "1.4.0"
# define PDFIO_VERSION "1.4.1"
//

View File

@ -3,7 +3,7 @@
archiveVersion = 1;
classes = {
};
objectVersion = 50;
objectVersion = 54;
objects = {
/* Begin PBXBuildFile section */
@ -31,6 +31,7 @@
27F2F0612710BE92008ECD36 /* pdfio-rc4.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F05E2710BE92008ECD36 /* pdfio-rc4.c */; };
27F2F0622710BE92008ECD36 /* pdfio-crypto.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */; };
27F2F0642711243D008ECD36 /* pdfio-sha256.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F0632711243D008ECD36 /* pdfio-sha256.c */; };
27FCBDE42D19F9B300485EEE /* pdfio-base-font-widths.h in Headers */ = {isa = PBXBuildFile; fileRef = 27FCBDE32D19F9B300485EEE /* pdfio-base-font-widths.h */; };
/* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */
@ -89,6 +90,7 @@
27F2F05E2710BE92008ECD36 /* pdfio-rc4.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-rc4.c"; sourceTree = "<group>"; };
27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-crypto.c"; sourceTree = "<group>"; };
27F2F0632711243D008ECD36 /* pdfio-sha256.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-sha256.c"; sourceTree = "<group>"; };
27FCBDE32D19F9B300485EEE /* pdfio-base-font-widths.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "pdfio-base-font-widths.h"; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -162,6 +164,7 @@
children = (
27CF90432711DFFE00E50FE4 /* pdfio-aes.c */,
273440BA263D727800FBFD63 /* pdfio-array.c */,
27FCBDE32D19F9B300485EEE /* pdfio-base-font-widths.h */,
273440BB263D727800FBFD63 /* pdfio-common.c */,
271EA703265B2B1000ACDD39 /* pdfio-content.c */,
27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */,
@ -205,6 +208,7 @@
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
27FCBDE42D19F9B300485EEE /* pdfio-base-font-widths.h in Headers */,
273440CC263D727800FBFD63 /* pdfio.h in Headers */,
271EA706265B2B1000ACDD39 /* pdfio-content.h in Headers */,
273440C3263D727800FBFD63 /* pdfio-private.h in Headers */,
@ -256,7 +260,8 @@
273440A8263D6FE200FBFD63 /* Project object */ = {
isa = PBXProject;
attributes = {
LastUpgradeCheck = 1300;
BuildIndependentTargetsInParallel = YES;
LastUpgradeCheck = 1600;
TargetAttributes = {
273440AF263D6FE200FBFD63 = {
CreatedOnToolsVersion = 12.5;
@ -373,9 +378,11 @@
CODE_SIGN_IDENTITY = "Apple Development";
COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1.1.2;
DEAD_CODE_STRIPPING = YES;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
ENABLE_USER_SCRIPT_SANDBOXING = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_PREPROCESSOR_DEFINITIONS = (
@ -450,15 +457,15 @@
CODE_SIGN_IDENTITY = "Apple Development";
COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1.1.2;
DEAD_CODE_STRIPPING = YES;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_USER_SCRIPT_SANDBOXING = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_PREPROCESSOR_DEFINITIONS = (
"$(inherited)",
);
GCC_PREPROCESSOR_DEFINITIONS = "$(inherited)";
GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_MISSING_FIELD_INITIALIZERS = YES;
@ -486,6 +493,7 @@
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
DEAD_CODE_STRIPPING = YES;
DEVELOPMENT_TEAM = RU58A2256H;
EXECUTABLE_PREFIX = lib;
PRODUCT_NAME = "$(TARGET_NAME)";
@ -497,6 +505,7 @@
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
DEAD_CODE_STRIPPING = YES;
DEVELOPMENT_TEAM = RU58A2256H;
EXECUTABLE_PREFIX = lib;
PRODUCT_NAME = "$(TARGET_NAME)";
@ -509,6 +518,7 @@
buildSettings = {
CODE_SIGN_IDENTITY = "-";
CODE_SIGN_STYLE = Automatic;
DEAD_CODE_STRIPPING = YES;
DEVELOPMENT_TEAM = "";
ENABLE_HARDENED_RUNTIME = YES;
GCC_DYNAMIC_NO_PIC = NO;
@ -525,6 +535,7 @@
buildSettings = {
CODE_SIGN_IDENTITY = "-";
CODE_SIGN_STYLE = Automatic;
DEAD_CODE_STRIPPING = YES;
DEVELOPMENT_TEAM = "";
ENABLE_HARDENED_RUNTIME = YES;
MACOSX_DEPLOYMENT_TARGET = 11.0;

View File

@ -3,7 +3,7 @@
<metadata>
<id>pdfio_native</id>
<title>PDFio Library for VS2019+</title>
<version>1.3.2</version>
<version>1.4.1</version>
<authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
@ -13,10 +13,10 @@
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>PDFio Library for VS2019+</description>
<summary>PDFio is a simple C library for reading and writing PDF files. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2024 by Michael R Sweet</copyright>
<copyright>Copyright © 2019-2025 by Michael R Sweet</copyright>
<tags>pdf file native</tags>
<dependencies>
<dependency id="pdfio_native.redist" version="1.3.2" />
<dependency id="pdfio_native.redist" version="1.4.1" />
<dependency id="zlib_native.redist" version="1.2.11" />
</dependencies>
</metadata>

View File

@ -3,7 +3,7 @@
<metadata>
<id>pdfio_native.redist</id>
<title>PDFio Library for VS2019+</title>
<version>1.3.2</version>
<version>1.4.1</version>
<authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
@ -13,7 +13,7 @@
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>PDFio Library for VS2019+</description>
<summary>PDFio is a simple C library for reading and writing PDF files. This package provides the redistributable content for the PDFio library. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2024 by Michael R Sweet</copyright>
<copyright>Copyright © 2019-2025 by Michael R Sweet</copyright>
<tags>pdf file native</tags>
<dependencies>
<dependency id="zlib_native.redist" version="1.2.11" />

View File

@ -1,3 +1,7 @@
https://www.color.org/chardata/rgb/rommrgb.xalter
Copyright © 2006 Hewlett-Packard
Terms of use
This profile is made available by ICC, and may be copied, distributed, embedded, made, used, and sold without restriction. Altered versions of this profile shall have the original identification and copyright information removed and shall not be misrepresented as the original profile.