16 Commits

Author SHA1 Message Date
4bbb8b0b38 Update docos. 2025-05-03 10:09:45 -04:00
1657e89ddb Bump version. 2025-05-03 10:08:54 -04:00
afa6d4c4de Don't set extension pointer if object is NULL. 2025-04-25 12:41:11 -04:00
31a086e165 Fix object map to use unique file hash instead of pointer values (Issue #125) 2025-04-24 14:00:30 -04:00
01cc243bcf Add more range checking for streams. 2025-04-24 13:17:53 -04:00
25f5e28e56 Update docos. 2025-04-24 12:30:01 -04:00
e6588d3960 Backport more changes from master. 2025-04-24 12:29:53 -04:00
8f706b9fe7 Backport reading improvements/bug fixes from master. 2025-04-24 12:19:11 -04:00
f9c07a0346 Add support for EncryptMetadata key in encryption dictionary. 2025-04-23 10:12:33 -04:00
a22957baa1 Protect against invalid predictor buffer size. 2025-04-23 10:12:18 -04:00
d7f3c64f63 Fix encrypted PDF files with P values >2^31-1. 2025-04-20 17:34:38 -04:00
29eea131b9 Remove old per-object crypto stuff we dont need. 2025-04-13 09:10:17 -04:00
2dcef0936e Fix decryption of UTF-16 strings (Issue #42) 2025-04-13 09:05:02 -04:00
20dd2a6d28 Fix decryption of RC4-40 files. 2025-04-13 08:37:24 -04:00
4219b8fd77 Update release date. 2025-04-12 15:12:07 -04:00
064e7fa473 Fix makesrcdist script. 2025-04-12 15:11:36 -04:00
22 changed files with 757 additions and 426 deletions

View File

@ -2,7 +2,18 @@ Changes in PDFio
================
v1.5.2 - YYYY-MM-DD
v1.5.3 - 2025-05-03
-------------------
- Fixed decryption of PDF files "protected" by 40-bit RC4 (Issue #42)
- Fixed decryption of UTF-16 strings (Issue #42)
- Fixed decryption of PDF files with large permission values.
- Fixed support for EncryptMetadata key in the encryption dictionary.
- Fixed `pdfioObjCopy` and `pdfioPageCopy` to properly identify the source PDF
file being used (Issue #125)
v1.5.2 - 2025-04-12
-------------------
- Updated maximum allowed PDF string size to 64k (Issue #117)

24
configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for pdfio 1.5.2.
# Generated by GNU Autoconf 2.71 for pdfio 1.5.3.
#
# Report bugs to <https://github.com/michaelrsweet/pdfio/issues>.
#
@ -610,8 +610,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='pdfio'
PACKAGE_TARNAME='pdfio'
PACKAGE_VERSION='1.5.2'
PACKAGE_STRING='pdfio 1.5.2'
PACKAGE_VERSION='1.5.3'
PACKAGE_STRING='pdfio 1.5.3'
PACKAGE_BUGREPORT='https://github.com/michaelrsweet/pdfio/issues'
PACKAGE_URL='https://www.msweet.org/pdfio'
@ -1295,7 +1295,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures pdfio 1.5.2 to adapt to many kinds of systems.
\`configure' configures pdfio 1.5.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1361,7 +1361,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of pdfio 1.5.2:";;
short | recursive ) echo "Configuration of pdfio 1.5.3:";;
esac
cat <<\_ACEOF
@ -1460,7 +1460,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
pdfio configure 1.5.2
pdfio configure 1.5.3
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@ -1678,7 +1678,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by pdfio $as_me 1.5.2, which was
It was created by pdfio $as_me 1.5.3, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@ -2434,9 +2434,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
PDFIO_VERSION="1.5.2"
PDFIO_VERSION_MAJOR="`echo 1.5.2 | awk -F. '{print $1}'`"
PDFIO_VERSION_MINOR="`echo 1.5.2 | awk -F. '{printf("%d\n",$2);}'`"
PDFIO_VERSION="1.5.3"
PDFIO_VERSION_MAJOR="`echo 1.5.3 | awk -F. '{print $1}'`"
PDFIO_VERSION_MINOR="`echo 1.5.3 | awk -F. '{printf("%d\n",$2);}'`"
@ -5099,7 +5099,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by pdfio $as_me 1.5.2, which was
This file was extended by pdfio $as_me 1.5.3, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -5155,7 +5155,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
pdfio config.status 1.5.2
pdfio config.status 1.5.3
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"

View File

@ -21,7 +21,7 @@ AC_PREREQ([2.70])
dnl Package name and version...
AC_INIT([pdfio], [1.5.2], [https://github.com/michaelrsweet/pdfio/issues], [pdfio], [https://www.msweet.org/pdfio])
AC_INIT([pdfio], [1.5.3], [https://github.com/michaelrsweet/pdfio/issues], [pdfio], [https://www.msweet.org/pdfio])
PDFIO_VERSION="AC_PACKAGE_VERSION"
PDFIO_VERSION_MAJOR="`echo AC_PACKAGE_VERSION | awk -F. '{print $1}'`"

View File

@ -1,4 +1,4 @@
.TH pdfio 3 "pdf read/write library" "2025-04-12" "pdf read/write library"
.TH pdfio 3 "pdf read/write library" "2025-05-03" "pdf read/write library"
.SH NAME
pdfio \- pdf read/write library
.SH Introduction
@ -325,7 +325,7 @@ where the five arguments to the function are the filename ("myinputfile.pdf"), a
}
.fi
.PP
The error callback is called for both errors and warnings and accepts the pdfio_file_t pointer, a message string, and the callback pointer value, for example:
The error callback is called for both errors and warnings and accepts the pdfio_file_t pointer, a message string, and the callback pointer value. It returns true to continue processing the file or false to stop, for example:
.nf
bool
@ -335,12 +335,15 @@ The error callback is called for both errors and warnings and accepts the pdfio_
fprintf(stderr, "%s: %s\\n", pdfioFileGetName(pdf), message);
// Return false to treat warnings as errors
return (false);
// Return true for warning messages (continue) and false for errors (stop)
return (!strncmp(message, "WARNING:", 8));
}
.fi
.PP
The default error callback (NULL) does the equivalent of the above.
.PP
Note: Many errors are unrecoverable, so PDFio ignores the return value from the error callback and always stops processing the PDF file. Warning messages start with the prefix "WARNING:" while errors have no prefix.
.PP
Each PDF file contains one or more pages. The pdfioFileGetNumPages function returns the number of pages in the file while the pdfioFileGetPage function gets the specified page in the PDF file:
.nf
@ -2477,7 +2480,7 @@ PDFIO_ENCRYPTION_RC4_128
.TP 5
PDFIO_ENCRYPTION_RC4_40
.br
40-bit RC4 encryption (PDF 1.3)
40-bit RC4 encryption (PDF 1.3, reading only)
.SS pdfio_filter_e
Compression/decompression filters for streams
.TP 5
@ -2947,6 +2950,8 @@ bool pdfioArrayRemove (
size_t n
);
.fi
.PP
.SS pdfioContentClip
Clip output to the current path.
.PP
@ -3083,6 +3088,8 @@ bool pdfioContentPathEnd (
pdfio_stream_t *st
);
.fi
.PP
.SS pdfioContentPathLineTo
Add a straight line to the current path.
.PP
@ -3423,6 +3430,8 @@ double pdfioContentTextMeasure (
This function measures the given text string "s" and returns its width based
on "size". The text string must always use the UTF-8 (Unicode) encoding but
any control characters (such as newlines) are ignored.
.SS pdfioContentTextMoveLine
Move to the next line and offset.
.PP
@ -3451,6 +3460,8 @@ bool pdfioContentTextNewLine (
pdfio_stream_t *st
);
.fi
.PP
.SS pdfioContentTextNewLineShow
Move to the next line and show text.
.PP
@ -3468,6 +3479,8 @@ This function moves to the next line and then shows some text with optional
word and character spacing in a PDF content stream. The "unicode" argument
specifies that the current font maps to full Unicode. The "s" argument
specifies a UTF-8 encoded string.
.SS pdfioContentTextNewLineShowf
Show formatted text.
.PP
@ -3486,6 +3499,8 @@ This function moves to the next line and shows some formatted text with
optional word and character spacing in a PDF content stream. The "unicode"
argument specifies that the current font maps to full Unicode. The "format"
argument specifies a UTF-8 encoded \fBprintf\fR-style format string.
.SS pdfioContentTextShow
Show text.
.PP
@ -3536,6 +3551,8 @@ bool pdfioDictClear (
const char *key
);
.fi
.PP
.SS pdfioDictCopy
Copy a dictionary to a PDF file.
.PP
@ -3608,6 +3625,8 @@ const char * pdfioDictGetKey (
size_t n
);
.fi
.PP
.SS pdfioDictGetName
Get a key name value from a dictionary.
.PP
@ -3625,6 +3644,8 @@ size_t pdfioDictGetNumPairs (
pdfio_dict_t *dict
);
.fi
.PP
.SS pdfioDictGetNumber
Get a key number value from a dictionary.
.PP
@ -3697,6 +3718,8 @@ function "cb":
The iteration continues as long as the callback returns \fBtrue\fR or all keys
have been iterated.
.SS pdfioDictSetArray
Set a key array in a dictionary.
.PP
@ -3852,8 +3875,9 @@ CropBox for pages in the PDF file - if \fBNULL\fR then a default "Universal" siz
of 8.27x11in (the intersection of US Letter and ISO A4) is used.
.PP
The "error_cb" and "error_cbdata" arguments specify an error handler callback
and its data pointer - if \fBNULL\fR the default error handler is used that
writes error messages to \fBstderr\fR.
and its data pointer - if \fBNULL\fR then the default error handler is used that
writes error messages to \fBstderr\fR. The error handler callback should return
\fBtrue\fR to continue writing the PDF file or \fBfalse\fR to stop.
.SS pdfioFileCreateArrayObj
Create a new object in a PDF file containing an array.
.PP
@ -4011,6 +4035,8 @@ pdfio_obj_t * pdfioFileCreateNameObj (
.PP
This function creates a new object with a name value in a PDF file.
You must call \fIpdfioObjClose\fR to write the object to the file.
.SS pdfioFileCreateNumberObj
Create a new object in a PDF file containing a number.
.PP
@ -4023,6 +4049,8 @@ pdfio_obj_t * pdfioFileCreateNumberObj (
.PP
This function creates a new object with a number value in a PDF file.
You must call \fIpdfioObjClose\fR to write the object to the file.
.SS pdfioFileCreateObj
Create a new object in a PDF file.
.PP
@ -4071,8 +4099,9 @@ CropBox for pages in the PDF file - if \fBNULL\fR then a default "Universal" siz
of 8.27x11in (the intersection of US Letter and ISO A4) is used.
.PP
The "error_cb" and "error_cbdata" arguments specify an error handler callback
and its data pointer - if \fBNULL\fR the default error handler is used that
writes error messages to \fBstderr\fR.
and its data pointer - if \fBNULL\fR then the default error handler is used that
writes error messages to \fBstderr\fR. The error handler callback should return
\fBtrue\fR to continue writing the PDF file or \fBfalse\fR to stop.
.PP
.IP 5
\fINote\fR: Files created using this API are slightly larger than those
@ -4101,6 +4130,8 @@ pdfio_obj_t * pdfioFileCreateStringObj (
.PP
This function creates a new object with a string value in a PDF file.
You must call \fIpdfioObjClose\fR to write the object to the file.
.SS pdfioFileCreateTemporary
.PP
@ -4294,8 +4325,18 @@ cancel the open. If \fBNULL\fR is specified for the callback function and the
PDF file requires a password, the open will always fail.
.PP
The "error_cb" and "error_cbdata" arguments specify an error handler callback
and its data pointer - if \fBNULL\fR the default error handler is used that
writes error messages to \fBstderr\fR.
and its data pointer - if \fBNULL\fR then the default error handler is used that
writes error messages to \fBstderr\fR. The error handler callback should return
\fBtrue\fR to continue reading the PDF file or \fBfalse\fR to stop.
.PP
.IP 5
Note: Error messages starting with "WARNING:" are actually warning
.IP 5
messages - the callback should normally return \fBtrue\fR to allow PDFio to
.IP 5
try to resolve the issue. In addition, some errors are unrecoverable and
.IP 5
ignore the return value of the error callback.
.SS pdfioFileSetAuthor
Set the author for a PDF file.
.PP
@ -4474,6 +4515,8 @@ const char * pdfioObjGetName (
pdfio_obj_t *obj
);
.fi
.PP
.SS pdfioObjGetNumber
Get the object's number.
.PP
@ -4677,8 +4720,9 @@ bool pdfioStreamPrintf (
.fi
.PP
This function writes a formatted string to a stream. In addition to the
standard \fBprintf\fR format characters, you can use "%N" to format a PDF name
value ("/Name") and "%S" to format a PDF string ("(String)") value.
standard \fBprintf\fR format characters, you can use "%H" to format a HTML/XML
string value, "%N" to format a PDF name value ("/Name"), and "%S" to format
a PDF string ("(String)") value.
.SS pdfioStreamPutChar
Write a single character to a stream.
.PP

View File

@ -1,13 +1,13 @@
<!DOCTYPE html>
<html lang="en-US">
<head>
<title>PDFio Programming Manual v1.5.2</title>
<title>PDFio Programming Manual v1.5.3</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="generator" content="codedoc v3.8">
<meta name="author" content="Michael R Sweet">
<meta name="language" content="en-US">
<meta name="copyright" content="Copyright © 2021-2025 by Michael R Sweet">
<meta name="version" content="1.5.2">
<meta name="version" content="1.5.3">
<style type="text/css"><!--
body {
background: white;
@ -251,7 +251,7 @@ span.string {
<body>
<div class="header">
<p><img class="title" src="pdfio-512.png"></p>
<h1 class="title">PDFio Programming Manual v1.5.2</h1>
<h1 class="title">PDFio Programming Manual v1.5.3</h1>
<p>Michael R Sweet</p>
<p>Copyright © 2021-2025 by Michael R Sweet</p>
</div>
@ -728,7 +728,7 @@ password_cb(<span class="reserved">void</span> *data, <span class="reserved">con
<span class="reserved">return</span> (<span class="string">&quot;Password42&quot;</span>);
}
</code></pre>
<p>The error callback is called for both errors and warnings and accepts the <code>pdfio_file_t</code> pointer, a message string, and the callback pointer value, for example:</p>
<p>The error callback is called for both errors and warnings and accepts the <code>pdfio_file_t</code> pointer, a message string, and the callback pointer value. It returns <code>true</code> to continue processing the file or <code>false</code> to stop, for example:</p>
<pre><code class="language-c"><span class="reserved">bool</span>
error_cb(pdfio_file_t *pdf, <span class="reserved">const</span> <span class="reserved">char</span> *message, <span class="reserved">void</span> *data)
{
@ -736,11 +736,14 @@ error_cb(pdfio_file_t *pdf, <span class="reserved">const</span> <span class="res
fprintf(stderr, <span class="string">&quot;%s: %s\n&quot;</span>, pdfioFileGetName(pdf), message);
<span class="comment">// Return false to treat warnings as errors</span>
<span class="reserved">return</span> (<span class="reserved">false</span>);
<span class="comment">// Return true for warning messages (continue) and false for errors (stop)</span>
<span class="reserved">return</span> (!strncmp(message, <span class="string">&quot;WARNING:&quot;</span>, <span class="number">8</span>));
}
</code></pre>
<p>The default error callback (<code>NULL</code>) does the equivalent of the above.</p>
<blockquote>
<p>Note: Many errors are unrecoverable, so PDFio ignores the return value from the error callback and always stops processing the PDF file. Warning messages start with the prefix &quot;WARNING:&quot; while errors have no prefix.</p>
</blockquote>
<p>Each PDF file contains one or more pages. The <a href="#pdfioFileGetNumPages"><code>pdfioFileGetNumPages</code></a> function returns the number of pages in the file while the <a href="#pdfioFileGetPage"><code>pdfioFileGetPage</code></a> function gets the specified page in the PDF file:</p>
<pre><code class="language-c">pdfio_file_t *pdf; <span class="comment">// PDF file</span>
size_t i; <span class="comment">// Looping var</span>
@ -2746,7 +2749,7 @@ size_t pdfioArrayGetSize(<a href="#pdfio_array_t">pdfio_array_t</a> *a);</p>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Value type</p>
<h3 class="function"><a id="pdfioArrayRemove">pdfioArrayRemove</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.4&#160;</span><a id="pdfioArrayRemove">pdfioArrayRemove</a></h3>
<p class="description">Remove an array entry.</p>
<p class="code">
<span class="reserved">bool</span> pdfioArrayRemove(<a href="#pdfio_array_t">pdfio_array_t</a> *a, size_t n);</p>
@ -2951,7 +2954,7 @@ using the <a href="#pdfioPageDictAddImage"><code>pdfioPageDictAddImage</code></a
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioContentPathEnd">pdfioContentPathEnd</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.1&#160;</span><a id="pdfioContentPathEnd">pdfioContentPathEnd</a></h3>
<p class="description">Clear the current path.</p>
<p class="code">
<span class="reserved">bool</span> pdfioContentPathEnd(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st);</p>
@ -3441,7 +3444,7 @@ are 0, a solid line is drawn.</p>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioContentTextMeasure">pdfioContentTextMeasure</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.2&#160;</span><a id="pdfioContentTextMeasure">pdfioContentTextMeasure</a></h3>
<p class="description">Measure a text string and return its width.</p>
<p class="code">
<span class="reserved">double</span> pdfioContentTextMeasure(<a href="#pdfio_obj_t">pdfio_obj_t</a> *font, <span class="reserved">const</span> <span class="reserved">char</span> *s, <span class="reserved">double</span> size);</p>
@ -3459,7 +3462,9 @@ are 0, a solid line is drawn.</p>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function measures the given text string &quot;s&quot; and returns its width based
on &quot;size&quot;. The text string must always use the UTF-8 (Unicode) encoding but
any control characters (such as newlines) are ignored.</p>
any control characters (such as newlines) are ignored.
</p>
<h3 class="function"><a id="pdfioContentTextMoveLine">pdfioContentTextMoveLine</a></h3>
<p class="description">Move to the next line and offset.</p>
<p class="code">
@ -3490,7 +3495,7 @@ any control characters (such as newlines) are ignored.</p>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioContentTextNewLine">pdfioContentTextNewLine</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.2&#160;</span><a id="pdfioContentTextNewLine">pdfioContentTextNewLine</a></h3>
<p class="description">Move to the next line.</p>
<p class="code">
<span class="reserved">bool</span> pdfioContentTextNewLine(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st);</p>
@ -3501,7 +3506,7 @@ any control characters (such as newlines) are ignored.</p>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioContentTextNewLineShow">pdfioContentTextNewLineShow</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.2&#160;</span><a id="pdfioContentTextNewLineShow">pdfioContentTextNewLineShow</a></h3>
<p class="description">Move to the next line and show text.</p>
<p class="code">
<span class="reserved">bool</span> pdfioContentTextNewLineShow(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st, <span class="reserved">double</span> ws, <span class="reserved">double</span> cs, <span class="reserved">bool</span> unicode, <span class="reserved">const</span> <span class="reserved">char</span> *s);</p>
@ -3524,8 +3529,10 @@ any control characters (such as newlines) are ignored.</p>
<p class="discussion">This function moves to the next line and then shows some text with optional
word and character spacing in a PDF content stream. The &quot;unicode&quot; argument
specifies that the current font maps to full Unicode. The &quot;s&quot; argument
specifies a UTF-8 encoded string.</p>
<h3 class="function"><a id="pdfioContentTextNewLineShowf">pdfioContentTextNewLineShowf</a></h3>
specifies a UTF-8 encoded string.
</p>
<h3 class="function"><span class="info">&#160;PDFio v1.2&#160;</span><a id="pdfioContentTextNewLineShowf">pdfioContentTextNewLineShowf</a></h3>
<p class="description">Show formatted text.</p>
<p class="code">
<span class="reserved">bool</span> pdfioContentTextNewLineShowf(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st, <span class="reserved">double</span> ws, <span class="reserved">double</span> cs, <span class="reserved">bool</span> unicode, <span class="reserved">const</span> <span class="reserved">char</span> *format, ...);</p>
@ -3550,7 +3557,9 @@ specifies a UTF-8 encoded string.</p>
<p class="discussion">This function moves to the next line and shows some formatted text with
optional word and character spacing in a PDF content stream. The &quot;unicode&quot;
argument specifies that the current font maps to full Unicode. The &quot;format&quot;
argument specifies a UTF-8 encoded <code>printf</code>-style format string.</p>
argument specifies a UTF-8 encoded <code>printf</code>-style format string.
</p>
<h3 class="function"><a id="pdfioContentTextShow">pdfioContentTextShow</a></h3>
<p class="description">Show text.</p>
<p class="code">
@ -3613,7 +3622,7 @@ argument specifies an array of UTF-8 encoded strings.</p>
<p class="discussion">This function shows some formatted text in a PDF content stream. The
&quot;unicode&quot; argument specifies that the current font maps to full Unicode.
The &quot;format&quot; argument specifies a UTF-8 encoded <code>printf</code>-style format string.</p>
<h3 class="function"><a id="pdfioDictClear">pdfioDictClear</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.4&#160;</span><a id="pdfioDictClear">pdfioDictClear</a></h3>
<p class="description">Remove a key/value pair from a dictionary.</p>
<p class="code">
<span class="reserved">bool</span> pdfioDictClear(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, <span class="reserved">const</span> <span class="reserved">char</span> *key);</p>
@ -3717,7 +3726,7 @@ time_t pdfioDictGetDate(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, <span cl
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Value</p>
<h3 class="function"><a id="pdfioDictGetKey">pdfioDictGetKey</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.4&#160;</span><a id="pdfioDictGetKey">pdfioDictGetKey</a></h3>
<p class="description">Get the key for the specified pair.</p>
<p class="code">
<span class="reserved">const</span> <span class="reserved">char</span> *pdfioDictGetKey(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, size_t n);</p>
@ -3743,7 +3752,7 @@ time_t pdfioDictGetDate(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, <span cl
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Value</p>
<h3 class="function"><a id="pdfioDictGetNumPairs">pdfioDictGetNumPairs</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.4&#160;</span><a id="pdfioDictGetNumPairs">pdfioDictGetNumPairs</a></h3>
<p class="description">Get the number of key/value pairs in a dictionary.</p>
<p class="code">
size_t pdfioDictGetNumPairs(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict);</p>
@ -3821,7 +3830,7 @@ size_t pdfioDictGetNumPairs(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict);</p>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Value type</p>
<h3 class="function"><a id="pdfioDictIterateKeys">pdfioDictIterateKeys</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.1&#160;</span><a id="pdfioDictIterateKeys">pdfioDictIterateKeys</a></h3>
<p class="description">Iterate the keys in a dictionary.</p>
<p class="code">
<span class="reserved">void</span> pdfioDictIterateKeys(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, <a href="#pdfio_dict_cb_t">pdfio_dict_cb_t</a> cb, <span class="reserved">void</span> *cb_data);</p>
@ -3848,7 +3857,9 @@ my_dict_cb(pdfio_dict_t *dict, const char *key, void *cb_data)
</pre>
The iteration continues as long as the callback returns <code>true</code> or all keys
have been iterated.</p>
have been iterated.
</p>
<h3 class="function"><a id="pdfioDictSetArray">pdfioDictSetArray</a></h3>
<p class="description">Set a key array in a dictionary.</p>
<p class="code">
@ -4076,8 +4087,9 @@ CropBox for pages in the PDF file - if <code>NULL</code> then a default &quot;Un
of 8.27x11in (the intersection of US Letter and ISO A4) is used.<br>
<br>
The &quot;error_cb&quot; and &quot;error_cbdata&quot; arguments specify an error handler callback
and its data pointer - if <code>NULL</code> the default error handler is used that
writes error messages to <code>stderr</code>.</p>
and its data pointer - if <code>NULL</code> then the default error handler is used that
writes error messages to <code>stderr</code>. The error handler callback should return
<code>true</code> to continue writing the PDF file or <code>false</code> to stop.</p>
<h3 class="function"><a id="pdfioFileCreateArrayObj">pdfioFileCreateArrayObj</a></h3>
<p class="description">Create a new object in a PDF file containing an array.</p>
<p class="code">
@ -4260,7 +4272,7 @@ image on the page.<br>
Note: Currently PNG support is limited to grayscale, RGB, or indexed files
without interlacing or alpha. Transparency (masking) based on color/index
is supported.</blockquote>
<h3 class="function"><a id="pdfioFileCreateNameObj">pdfioFileCreateNameObj</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.4&#160;</span><a id="pdfioFileCreateNameObj">pdfioFileCreateNameObj</a></h3>
<p class="description">Create a new object in a PDF file containing a name.</p>
<p class="code">
<a href="#pdfio_obj_t">pdfio_obj_t</a> *pdfioFileCreateNameObj(<a href="#pdfio_file_t">pdfio_file_t</a> *pdf, <span class="reserved">const</span> <span class="reserved">char</span> *name);</p>
@ -4275,8 +4287,10 @@ is supported.</blockquote>
<p class="description">New object</p>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function creates a new object with a name value in a PDF file.
You must call <a href="#pdfioObjClose"><code>pdfioObjClose</code></a> to write the object to the file.</p>
<h3 class="function"><a id="pdfioFileCreateNumberObj">pdfioFileCreateNumberObj</a></h3>
You must call <a href="#pdfioObjClose"><code>pdfioObjClose</code></a> to write the object to the file.
</p>
<h3 class="function"><span class="info">&#160;PDFio v1.2&#160;</span><a id="pdfioFileCreateNumberObj">pdfioFileCreateNumberObj</a></h3>
<p class="description">Create a new object in a PDF file containing a number.</p>
<p class="code">
<a href="#pdfio_obj_t">pdfio_obj_t</a> *pdfioFileCreateNumberObj(<a href="#pdfio_file_t">pdfio_file_t</a> *pdf, <span class="reserved">double</span> number);</p>
@ -4291,7 +4305,9 @@ You must call <a href="#pdfioObjClose"><code>pdfioObjClose</code></a> to write t
<p class="description">New object</p>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function creates a new object with a number value in a PDF file.
You must call <a href="#pdfioObjClose"><code>pdfioObjClose</code></a> to write the object to the file.</p>
You must call <a href="#pdfioObjClose"><code>pdfioObjClose</code></a> to write the object to the file.
</p>
<h3 class="function"><a id="pdfioFileCreateObj">pdfioFileCreateObj</a></h3>
<p class="description">Create a new object in a PDF file.</p>
<p class="code">
@ -4352,8 +4368,9 @@ CropBox for pages in the PDF file - if <code>NULL</code> then a default &quot;Un
of 8.27x11in (the intersection of US Letter and ISO A4) is used.<br>
<br>
The &quot;error_cb&quot; and &quot;error_cbdata&quot; arguments specify an error handler callback
and its data pointer - if <code>NULL</code> the default error handler is used that
writes error messages to <code>stderr</code>.<br>
and its data pointer - if <code>NULL</code> then the default error handler is used that
writes error messages to <code>stderr</code>. The error handler callback should return
<code>true</code> to continue writing the PDF file or <code>false</code> to stop.<br>
<br>
</p><blockquote>
<em>Note</em>: Files created using this API are slightly larger than those
@ -4372,7 +4389,7 @@ stored as indirect object references.</blockquote>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Contents stream</p>
<h3 class="function"><a id="pdfioFileCreateStringObj">pdfioFileCreateStringObj</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.2&#160;</span><a id="pdfioFileCreateStringObj">pdfioFileCreateStringObj</a></h3>
<p class="description">Create a new object in a PDF file containing a string.</p>
<p class="code">
<a href="#pdfio_obj_t">pdfio_obj_t</a> *pdfioFileCreateStringObj(<a href="#pdfio_file_t">pdfio_file_t</a> *pdf, <span class="reserved">const</span> <span class="reserved">char</span> *string);</p>
@ -4387,7 +4404,9 @@ stored as indirect object references.</blockquote>
<p class="description">New object</p>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function creates a new object with a string value in a PDF file.
You must call <a href="#pdfioObjClose"><code>pdfioObjClose</code></a> to write the object to the file.</p>
You must call <a href="#pdfioObjClose"><code>pdfioObjClose</code></a> to write the object to the file.
</p>
<h3 class="function"><a id="pdfioFileCreateTemporary">pdfioFileCreateTemporary</a></h3>
<p class="description"></p>
<p class="code">
@ -4670,8 +4689,15 @@ cancel the open. If <code>NULL</code> is specified for the callback function an
PDF file requires a password, the open will always fail.<br>
<br>
The &quot;error_cb&quot; and &quot;error_cbdata&quot; arguments specify an error handler callback
and its data pointer - if <code>NULL</code> the default error handler is used that
writes error messages to <code>stderr</code>.</p>
and its data pointer - if <code>NULL</code> then the default error handler is used that
writes error messages to <code>stderr</code>. The error handler callback should return
<code>true</code> to continue reading the PDF file or <code>false</code> to stop.<br>
<br>
</p><blockquote>
Note: Error messages starting with &quot;WARNING:&quot; are actually warning
messages - the callback should normally return <code>true</code> to allow PDFio to
try to resolve the issue. In addition, some errors are unrecoverable and
ignore the return value of the error callback.</blockquote>
<h3 class="function"><a id="pdfioFileSetAuthor">pdfioFileSetAuthor</a></h3>
<p class="description">Set the author for a PDF file.</p>
<p class="code">
@ -4892,7 +4918,7 @@ size_t pdfioObjGetLength(<a href="#pdfio_obj_t">pdfio_obj_t</a> *obj);</p>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Length in bytes or <code>0</code> for none</p>
<h3 class="function"><a id="pdfioObjGetName">pdfioObjGetName</a></h3>
<h3 class="function"><span class="info">&#160;PDFio v1.4&#160;</span><a id="pdfioObjGetName">pdfioObjGetName</a></h3>
<p class="description">Get the name value associated with an object.</p>
<p class="code">
<span class="reserved">const</span> <span class="reserved">char</span> *pdfioObjGetName(<a href="#pdfio_obj_t">pdfio_obj_t</a> *obj);</p>
@ -5168,8 +5194,9 @@ ssize_t pdfioStreamPeek(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st, <span
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function writes a formatted string to a stream. In addition to the
standard <code>printf</code> format characters, you can use &quot;%N&quot; to format a PDF name
value (&quot;/Name&quot;) and &quot;%S&quot; to format a PDF string (&quot;(String)&quot;) value.</p>
standard <code>printf</code> format characters, you can use &quot;%H&quot; to format a HTML/XML
string value, &quot;%N&quot; to format a PDF name value (&quot;/Name&quot;), and &quot;%S&quot; to format
a PDF string (&quot;(String)&quot;) value.</p>
<h3 class="function"><a id="pdfioStreamPutChar">pdfioStreamPutChar</a></h3>
<p class="description">Write a single character to a stream.</p>
<p class="code">
@ -5403,7 +5430,7 @@ typedef enum <a href="#pdfio_valtype_e">pdfio_valtype_e</a> pdfio_valtype_t;
<tr><th>PDFIO_ENCRYPTION_AES_128 </th><td class="description">128-bit AES encryption (PDF 1.6)</td></tr>
<tr><th>PDFIO_ENCRYPTION_NONE </th><td class="description">No encryption</td></tr>
<tr><th>PDFIO_ENCRYPTION_RC4_128 </th><td class="description">128-bit RC4 encryption (PDF 1.4)</td></tr>
<tr><th>PDFIO_ENCRYPTION_RC4_40 </th><td class="description">40-bit RC4 encryption (PDF 1.3)</td></tr>
<tr><th>PDFIO_ENCRYPTION_RC4_40 </th><td class="description">40-bit RC4 encryption (PDF 1.3, reading only)</td></tr>
</tbody></table>
<h3 class="enumeration"><a id="pdfio_filter_e">pdfio_filter_e</a></h3>
<p class="description">Compression/decompression filters for streams</p>

View File

@ -343,8 +343,8 @@ password_cb(void *data, const char *filename)
```
The error callback is called for both errors and warnings and accepts the
`pdfio_file_t` pointer, a message string, and the callback pointer value, for
example:
`pdfio_file_t` pointer, a message string, and the callback pointer value. It
returns `true` to continue processing the file or `false` to stop, for example:
```c
bool
@ -354,13 +354,17 @@ error_cb(pdfio_file_t *pdf, const char *message, void *data)
fprintf(stderr, "%s: %s\n", pdfioFileGetName(pdf), message);
// Return false to treat warnings as errors
return (false);
// Return true for warning messages (continue) and false for errors (stop)
return (!strncmp(message, "WARNING:", 8));
}
```
The default error callback (`NULL`) does the equivalent of the above.
> Note: Many errors are unrecoverable, so PDFio ignores the return value from
> the error callback and always stops processing the PDF file. Warning messages
> start with the prefix "WARNING:" while errors have no prefix.
Each PDF file contains one or more pages. The [`pdfioFileGetNumPages`](@@)
function returns the number of pages in the file while the
[`pdfioFileGetPage`](@@) function gets the specified page in the PDF file:

View File

@ -8,8 +8,8 @@
//
// Usage:
//
// ./pdfmerge [-o OUTPUT.pdf] INPUT.pdf [... INPUT.pdf]
// ./pdfmerge INPUT.pdf [... INPUT.pdf] >OUTPUT.pdf
// ./pdfiomerge [-o OUTPUT.pdf] INPUT.pdf [... INPUT.pdf]
// ./pdfiomerge INPUT.pdf [... INPUT.pdf] >OUTPUT.pdf
//
#include <pdfio.h>
@ -47,7 +47,7 @@ main(int argc, // I - Number of command-line arguments
}
else if (!strncmp(argv[i], "--", 2))
{
fprintf(stderr, "pdfmerge: Unknown option '%s'.\n", argv[i]);
fprintf(stderr, "pdfiomerge: Unknown option '%s'.\n", argv[i]);
return (usage(stderr));
}
else if (argv[i][0] == '-')
@ -59,14 +59,14 @@ main(int argc, // I - Number of command-line arguments
case 'o' : // -o OUTPUT.pdf
if (outpdf)
{
fputs("pdfmerge: Only one output file can be specified.\n", stderr);
fputs("pdfiomerge: Only one output file can be specified.\n", stderr);
return (usage(stderr));
}
i ++;
if (i >= argc)
{
fputs("pdfmerge: Missing output filename after '-o'.\n", stderr);
fputs("pdfiomerge: Missing output filename after '-o'.\n", stderr);
return (usage(stderr));
}
@ -75,7 +75,7 @@ main(int argc, // I - Number of command-line arguments
break;
default :
fprintf(stderr, "pdfmerge: Unknown option '-%c'.\n", *opt);
fprintf(stderr, "pdfiomerge: Unknown option '-%c'.\n", *opt);
return (usage(stderr));
}
}

View File

@ -32,11 +32,11 @@ if test $(grep AC_INIT configure.ac | awk '{print $2}') != "[$version],"; then
status=1
fi
if test $(head -4 CHANGES.md | tail -1 | awk '{print $1}') != "v$version"; then
if test $(head -5 CHANGES.md | tail -1 | awk '{print $1}') != "v$version"; then
echo "Still need to update CHANGES.md version number."
status=1
fi
if test $(head -4 CHANGES.md | tail -1 | awk '{print $3}') = "YYYY-MM-DD"; then
if test $(head -5 CHANGES.md | tail -1 | awk '{print $3}') = "YYYY-MM-DD"; then
echo "Still need to update CHANGES.md release date."
status=1
fi

View File

@ -640,6 +640,8 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file
//
// 'pdfioArrayRemove()' - Remove an array entry.
//
// @since PDFio v1.4@
//
bool // O - `true` on success, `false` otherwise
pdfioArrayRemove(pdfio_array_t *a, // I - Array

View File

@ -47,7 +47,7 @@ _pdfioFileConsume(pdfio_file_t *pdf, // I - PDF file
// `false` to halt.
//
bool // O - `false` to stop
bool // O - `false` to stop, `true` to continue
_pdfioFileDefaultError(
pdfio_file_t *pdf, // I - PDF file
const char *message, // I - Error message
@ -57,7 +57,7 @@ _pdfioFileDefaultError(
fprintf(stderr, "%s: %s\n", pdf->filename, message);
return (false);
return (!strncmp(message, "WARNING:", 8));
}
@ -134,19 +134,20 @@ _pdfioFileGetChar(pdfio_file_t *pdf) // I - PDF file
bool // O - `true` on success, `false` on error
_pdfioFileGets(pdfio_file_t *pdf, // I - PDF file
char *buffer, // I - Line buffer
size_t bufsize) // I - Size of line buffer
size_t bufsize, // I - Size of line buffer
bool discard) // I - OK to discard excess line chars?
{
bool eol = false; // End of line?
char *bufptr = buffer, // Pointer into buffer
*bufend = buffer + bufsize - 1; // Pointer to end of buffer
PDFIO_DEBUG("_pdfioFileGets(pdf=%p, buffer=%p, bufsize=%lu) bufpos=%ld, buffer=%p, bufptr=%p, bufend=%p, offset=%lu\n", pdf, buffer, (unsigned long)bufsize, (long)pdf->bufpos, pdf->buffer, pdf->bufptr, pdf->bufend, (unsigned long)(pdf->bufpos + (pdf->bufptr - pdf->buffer)));
PDFIO_DEBUG("_pdfioFileGets(pdf=%p, buffer=%p, bufsize=%lu, discard=%s) bufpos=%ld, buffer=%p, bufptr=%p, bufend=%p, offset=%lu\n", pdf, buffer, (unsigned long)bufsize, discard ? "true" : "false", (long)pdf->bufpos, pdf->buffer, pdf->bufptr, pdf->bufend, (unsigned long)(pdf->bufpos + (pdf->bufptr - pdf->buffer)));
while (!eol)
{
// If there are characters ready in the buffer, use them...
while (!eol && pdf->bufptr < pdf->bufend && bufptr < bufend)
while (!eol && pdf->bufptr < pdf->bufend)
{
char ch = *(pdf->bufptr++); // Next character in buffer
@ -168,8 +169,10 @@ _pdfioFileGets(pdfio_file_t *pdf, // I - PDF file
pdf->bufptr ++;
}
}
else
else if (bufptr < bufend)
*bufptr++ = ch;
else if (!discard)
break;
}
// Fill the read buffer as needed...
@ -424,7 +427,7 @@ off_t // O - Offset from beginning of file
_pdfioFileTell(pdfio_file_t *pdf) // I - PDF file
{
if (pdf->bufptr)
return (pdf->bufpos + (pdf->bufptr - pdf->buffer));
return (pdf->bufpos + (off_t)(pdf->bufptr - pdf->buffer));
else
return (pdf->bufpos);
}
@ -452,7 +455,7 @@ _pdfioFileWrite(pdfio_file_t *pdf, // I - PDF file
if (!write_buffer(pdf, buffer, bytes))
return (false);
pdf->bufpos += bytes;
pdf->bufpos += (off_t)bytes;
return (true);
}
@ -478,7 +481,7 @@ fill_buffer(pdfio_file_t *pdf) // I - PDF file
// Advance current position in file as needed...
if (pdf->bufend)
pdf->bufpos += pdf->bufend - pdf->buffer;
pdf->bufpos += (off_t)(pdf->bufend - pdf->buffer);
// Try reading from the file...
if ((bytes = read_buffer(pdf, pdf->buffer, sizeof(pdf->buffer))) <= 0)

View File

@ -632,6 +632,8 @@ pdfioContentPathCurve23(
//
// 'pdfioContentPathEnd()' - Clear the current path.
//
// @since PDFio v1.1@
//
bool // O - `true` on success, `false` on failure
pdfioContentPathEnd(pdfio_stream_t *st) // I - Stream
@ -1111,6 +1113,8 @@ pdfioContentTextEnd(pdfio_stream_t *st) // I - Stream
// on "size". The text string must always use the UTF-8 (Unicode) encoding but
// any control characters (such as newlines) are ignored.
//
// @since PDFio v1.2@
//
double // O - Width
pdfioContentTextMeasure(
@ -1267,6 +1271,8 @@ pdfioContentTextMoveTo(
//
// 'pdfioContentTextNewLine()' - Move to the next line.
//
// @since PDFio v1.2@
//
bool // O - `true` on success, `false` on failure
pdfioContentTextNewLine(
@ -1298,6 +1304,8 @@ pdfioContentTextNextLine(
// specifies that the current font maps to full Unicode. The "s" argument
// specifies a UTF-8 encoded string.
//
// @since PDFio v1.2@
//
bool // O - `true` on success, `false` on failure
pdfioContentTextNewLineShow(
@ -1346,6 +1354,8 @@ pdfioContentTextNewLineShow(
// argument specifies that the current font maps to full Unicode. The "format"
// argument specifies a UTF-8 encoded `printf`-style format string.
//
// @since PDFio v1.2@
//
bool // O - `true` on success, `false` on failure
pdfioContentTextNewLineShowf(
@ -1935,6 +1945,7 @@ pdfioFileCreateFontObjFromFile(
if (fd >= 0)
close(fd);
if (obj)
_pdfioObjSetExtension(obj, font, (_pdfio_extfree_t)ttfDelete);
return (obj);

View File

@ -98,7 +98,7 @@ static uint8_t pdf_passpad[32] = // Padding for passwords
static void decrypt_user_key(pdfio_encryption_t encryption, const uint8_t *file_key, uint8_t user_key[32]);
static void encrypt_user_key(pdfio_encryption_t encryption, const uint8_t *file_key, uint8_t user_key[32]);
static void make_file_key(pdfio_encryption_t encryption, pdfio_permission_t permissions, const unsigned char *file_id, size_t file_idlen, const uint8_t *user_pad, const uint8_t *owner_key, uint8_t file_key[16]);
static void make_file_key(pdfio_encryption_t encryption, pdfio_permission_t permissions, const unsigned char *file_id, size_t file_idlen, const uint8_t *user_pad, const uint8_t *owner_key, bool encrypt_metadata, uint8_t file_key[16]);
static void make_owner_key(pdfio_encryption_t encryption, const uint8_t *owner_pad, const uint8_t *user_pad, uint8_t owner_key[32]);
static void make_user_key(const unsigned char *file_id, size_t file_idlen, uint8_t user_key[32]);
static void pad_password(const char *password, uint8_t pad[32]);
@ -158,7 +158,7 @@ _pdfioCryptoLock(
// Generate the encryption key
file_id = pdfioArrayGetBinary(pdf->id_array, 0, &file_idlen);
make_file_key(encryption, permissions, file_id, file_idlen, user_pad, pdf->owner_key, pdf->file_key);
make_file_key(encryption, permissions, file_id, file_idlen, user_pad, pdf->owner_key, pdf->encrypt_metadata, pdf->file_key);
pdf->file_keylen = 16;
// Generate the user key...
@ -409,13 +409,6 @@ _pdfioCryptoMakeReader(
uint8_t data[21]; // Key data
_pdfio_md5_t md5; // MD5 state
uint8_t digest[16]; // MD5 digest value
#if PDFIO_OBJ_CRYPT
pdfio_array_t *id_array; // Object ID array
unsigned char *id_value; // Object ID value
size_t id_len; // Length of object ID
uint8_t temp_key[16]; // File key for object
#endif // PDFIO_OBJ_CRYPT
uint8_t *file_key; // Computed file key to use
PDFIO_DEBUG("_pdfioCryptoMakeReader(pdf=%p, obj=%p(%d), ctx=%p, iv=%p, ivlen=%p(%d))\n", pdf, obj, (int)obj->number, ctx, iv, ivlen, (int)*ivlen);
@ -427,60 +420,6 @@ _pdfioCryptoMakeReader(
return (NULL);
}
#if PDFIO_OBJ_CRYPT
if ((id_array = pdfioDictGetArray(pdfioObjGetDict(obj), "ID")) != NULL)
{
// Object has its own ID that will get used for encryption...
_pdfio_md5_t md5; // MD5 context
uint8_t file_digest[16]; // MD5 digest of file ID and pad
uint8_t user_pad[32], // Padded user password
own_user_key[32], // Calculated user key
pdf_user_key[32]; // Decrypted user key
PDFIO_DEBUG("_pdfioCryptoMakeReader: Per-object file ID.\n");
if ((id_value = pdfioArrayGetBinary(id_array, 0, &id_len)) == NULL)
{
*ivlen = 0;
return (NULL);
}
_pdfioCryptoMD5Init(&md5);
_pdfioCryptoMD5Append(&md5, pdf_passpad, 32);
_pdfioCryptoMD5Append(&md5, id_value, id_len);
_pdfioCryptoMD5Finish(&md5, file_digest);
make_owner_key(pdf->encryption, pdf->password, pdf->owner_key, user_pad);
make_file_key(pdf->encryption, pdf->permissions, id_value, id_len, user_pad, pdf->owner_key, temp_key);
make_user_key(id_value, id_len, own_user_key);
if (memcmp(own_user_key, pdf->user_key, sizeof(own_user_key)))
{
PDFIO_DEBUG("_pdfioCryptoMakeReader: Not user password, trying owner password.\n");
make_file_key(pdf->encryption, pdf->permissions, id_value, id_len, pdf->password, pdf->owner_key, temp_key);
make_user_key(id_value, id_len, own_user_key);
memcpy(pdf_user_key, pdf->user_key, sizeof(pdf_user_key));
decrypt_user_key(pdf->encryption, temp_key, pdf_user_key);
if (memcmp(pdf->password, pdf_user_key, 32) && memcmp(own_user_key, pdf_user_key, 16))
{
_pdfioFileError(pdf, "Unable to unlock file.");
*ivlen = 0;
return (NULL);
}
}
file_key = temp_key;
}
else
#endif // PDFIO_OBJ_CRYPT
{
// Use the default file key...
file_key = pdf->file_key;
}
switch (pdf->encryption)
{
default :
@ -490,20 +429,20 @@ _pdfioCryptoMakeReader(
case PDFIO_ENCRYPTION_RC4_40 :
// Copy the key data for the MD5 hash.
memcpy(data, file_key, 16);
data[16] = (uint8_t)obj->number;
data[17] = (uint8_t)(obj->number >> 8);
data[18] = (uint8_t)(obj->number >> 16);
data[19] = (uint8_t)obj->generation;
data[20] = (uint8_t)(obj->generation >> 8);
memcpy(data, pdf->file_key, 5);
data[5] = (uint8_t)obj->number;
data[6] = (uint8_t)(obj->number >> 8);
data[7] = (uint8_t)(obj->number >> 16);
data[8] = (uint8_t)obj->generation;
data[9] = (uint8_t)(obj->generation >> 8);
// Hash it...
_pdfioCryptoMD5Init(&md5);
_pdfioCryptoMD5Append(&md5, data, sizeof(data));
_pdfioCryptoMD5Append(&md5, data, 10);
_pdfioCryptoMD5Finish(&md5, digest);
// Initialize the RC4 context using 40 bits of the digest...
_pdfioCryptoRC4Init(&ctx->rc4, digest, 5);
// Initialize the RC4 context using 80 bits of the digest...
_pdfioCryptoRC4Init(&ctx->rc4, digest, 10);
*ivlen = 0;
return ((_pdfio_crypto_cb_t)_pdfioCryptoRC4Crypt);
@ -517,7 +456,7 @@ _pdfioCryptoMakeReader(
case PDFIO_ENCRYPTION_RC4_128 :
// Copy the key data for the MD5 hash.
memcpy(data, file_key, 16);
memcpy(data, pdf->file_key, 16);
data[16] = (uint8_t)obj->number;
data[17] = (uint8_t)(obj->number >> 8);
data[18] = (uint8_t)(obj->number >> 16);
@ -643,6 +582,8 @@ _pdfioCryptoUnlock(
file_idlen; // Length of file ID
_pdfio_md5_t md5; // MD5 context
uint8_t file_digest[16]; // MD5 digest of file ID and pad
double p; // Permissions value as a double
_pdfio_value_t *value; // Encrypt dictionary value, if any
// See if we support the type of encryption specified by the Encrypt object
@ -658,7 +599,12 @@ _pdfioCryptoUnlock(
revision = (int)pdfioDictGetNumber(encrypt_dict, "R");
length = (int)pdfioDictGetNumber(encrypt_dict, "Length");
PDFIO_DEBUG("_pdfioCryptoUnlock: handler=%p(%s), version=%d, revision=%d, length=%d\n", (void *)handler, handler ? handler : "(null)", version, revision, length);
if ((value = _pdfioDictGetValue(encrypt_dict, "EncryptMetadata")) != NULL && value->type == PDFIO_VALTYPE_BOOLEAN)
pdf->encrypt_metadata = value->value.boolean;
else
pdf->encrypt_metadata = true;
PDFIO_DEBUG("_pdfioCryptoUnlock: handler=%p(%s), version=%d, revision=%d, length=%d, encrypt_metadata=%s\n", (void *)handler, handler ? handler : "(null)", version, revision, length, pdf->encrypt_metadata ? "true" : "false");
if (!handler || strcmp(handler, "Standard"))
{
@ -750,8 +696,13 @@ _pdfioCryptoUnlock(
// Grab the remaining values we need to unlock the PDF...
pdf->file_keylen = (size_t)(length / 8);
pdf->permissions = (pdfio_permission_t)pdfioDictGetNumber(encrypt_dict, "P");
p = pdfioDictGetNumber(encrypt_dict, "P");
PDFIO_DEBUG("_pdfioCryptoUnlock: P=%.0f\n", p);
if (p < 0x7fffffff) // Handle integers > 2^31-1
pdf->permissions = (pdfio_permission_t)p;
else
pdf->permissions = (pdfio_permission_t)(p - 4294967296.0);
PDFIO_DEBUG("_pdfioCryptoUnlock: permissions=%d\n", pdf->permissions);
owner_key = pdfioDictGetBinary(encrypt_dict, "O", &owner_keylen);
@ -823,7 +774,7 @@ _pdfioCryptoUnlock(
make_owner_key(pdf->encryption, pad, pdf->owner_key, user_pad);
PDFIO_DEBUG("_pdfioCryptoUnlock: Upad=%02X%02X%02X%02X...%02X%02X%02X%02X\n", user_pad[0], user_pad[1], user_pad[2], user_pad[3], user_pad[28], user_pad[29], user_pad[30], user_pad[31]);
make_file_key(pdf->encryption, pdf->permissions, file_id, file_idlen, user_pad, pdf->owner_key, file_key);
make_file_key(pdf->encryption, pdf->permissions, file_id, file_idlen, user_pad, pdf->owner_key, pdf->encrypt_metadata, file_key);
PDFIO_DEBUG("_pdfioCryptoUnlock: Fown=%02X%02X%02X%02X...%02X%02X%02X%02X\n", file_key[0], file_key[1], file_key[2], file_key[3], file_key[12], file_key[13], file_key[14], file_key[15]);
make_user_key(file_id, file_idlen, own_user_key);
@ -841,7 +792,7 @@ _pdfioCryptoUnlock(
}
// Not the owner password, try the user password...
make_file_key(pdf->encryption, pdf->permissions, file_id, file_idlen, pad, pdf->owner_key, file_key);
make_file_key(pdf->encryption, pdf->permissions, file_id, file_idlen, pad, pdf->owner_key, pdf->encrypt_metadata, file_key);
PDFIO_DEBUG("_pdfioCryptoUnlock: Fuse=%02X%02X%02X%02X...%02X%02X%02X%02X\n", file_key[0], file_key[1], file_key[2], file_key[3], file_key[12], file_key[13], file_key[14], file_key[15]);
make_user_key(file_id, file_idlen, own_user_key);
@ -973,6 +924,8 @@ make_file_key(
size_t file_idlen, // I - Length of file ID
const uint8_t *user_pad, // I - Padded user password
const uint8_t *owner_key, // I - Owner key
bool encrypt_metadata,
// I - Encrypt metadata?
uint8_t file_key[16]) // O - Encryption key
{
size_t i; // Looping var
@ -986,13 +939,25 @@ make_file_key(
perm_bytes[2] = (uint8_t)(permissions >> 16);
perm_bytes[3] = (uint8_t)(permissions >> 24);
PDFIO_DEBUG("make_file_key: user_pad[32]=<%02X%02X%02X%02X...%02X%02X%02X%02X>\n", user_pad[0], user_pad[1], user_pad[2], user_pad[3], user_pad[28], user_pad[29], user_pad[30], user_pad[31]);
PDFIO_DEBUG("make_file_key: owner_key[32]=<%02X%02X%02X%02X...%02X%02X%02X%02X>\n", owner_key[0], owner_key[1], owner_key[2], owner_key[3], owner_key[28], owner_key[29], owner_key[30], owner_key[31]);
PDFIO_DEBUG("make_file_key: permissions(%d)=<%02X%02X%02X%02X>\n", permissions, perm_bytes[0], perm_bytes[1], perm_bytes[2], perm_bytes[3]);
_pdfioCryptoMD5Init(&md5);
_pdfioCryptoMD5Append(&md5, user_pad, 32);
_pdfioCryptoMD5Append(&md5, owner_key, 32);
_pdfioCryptoMD5Append(&md5, perm_bytes, 4);
_pdfioCryptoMD5Append(&md5, file_id, file_idlen);
if (!encrypt_metadata)
{
uint8_t meta_bytes[4] = { 0xff, 0xff, 0xff, 0xff };
// Metadata bytes
_pdfioCryptoMD5Append(&md5, meta_bytes, 4);
}
_pdfioCryptoMD5Finish(&md5, digest);
PDFIO_DEBUG("make_file_key: first md5=<%02X%02X%02X%02X...%02X%02X%02X%02X>\n", digest[0], digest[1], digest[2], digest[3], digest[12], digest[13], digest[14], digest[15]);
if (encryption != PDFIO_ENCRYPTION_RC4_40)
{
// MD5 the result 50 times..
@ -1004,6 +969,8 @@ make_file_key(
}
}
PDFIO_DEBUG("make_file_key: file_key[16]=<%02X%02X%02X%02X...%02X%02X%02X%02X>\n", digest[0], digest[1], digest[2], digest[3], digest[12], digest[13], digest[14], digest[15]);
memcpy(file_key, digest, 16);
}
@ -1054,9 +1021,11 @@ make_owner_key(
// Encrypt 20 times...
uint8_t encrypt_key[16]; // RC4 encryption key
for (i = 0; i < 20; i ++)
for (i = 20; i > 0;)
{
// XOR each byte in the digest with the loop counter to make a key...
i --;
for (j = 0; j < sizeof(encrypt_key); j ++)
encrypt_key[j] = (uint8_t)(digest[j] ^ i);

View File

@ -20,6 +20,8 @@ static int compare_pairs(_pdfio_pair_t *a, _pdfio_pair_t *b);
//
// 'pdfioDictClear()' - Remove a key/value pair from a dictionary.
//
// @since PDFio v1.4@
//
bool // O - `true` if cleared, `false` otherwise
pdfioDictClear(pdfio_dict_t *dict, // I - Dictionary
@ -345,6 +347,8 @@ pdfioDictGetDict(pdfio_dict_t *dict, // I - Dictionary
//
// 'pdfioDictGetKey()' - Get the key for the specified pair.
//
// @since PDFio v1.4@
//
const char * // O - Key for specified pair
pdfioDictGetKey(pdfio_dict_t *dict, // I - Dictionary
@ -375,6 +379,8 @@ pdfioDictGetName(pdfio_dict_t *dict, // I - Dictionary
//
// 'pdfioDictGetNumPairs()' - Get the number of key/value pairs in a dictionary.
//
// @since PDFio v1.4@
//
size_t // O - Number of pairs
pdfioDictGetNumPairs(pdfio_dict_t *dict)// I - Dictionary
@ -465,127 +471,12 @@ pdfioDictGetString(pdfio_dict_t *dict, // I - Dictionary
else if (value && value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096)
{
// Convert binary string to regular string...
char temp[4096], // Temporary string
*tempptr; // Pointer into temporary string
unsigned char *dataptr; // Pointer into the data string
char temp[4096]; // Temporary UTF-8 string
if (!(value->value.binary.datalen & 1) && !memcmp(value->value.binary.data, "\376\377", 2))
if (!(value->value.binary.datalen & 1) && (!memcmp(value->value.binary.data, "\376\377", 2) || !memcmp(value->value.binary.data, "\377\376", 2)))
{
// Copy UTF-16 BE
int ch; // Unicode character
size_t remaining; // Remaining bytes
for (dataptr = value->value.binary.data + 2, remaining = value->value.binary.datalen - 2, tempptr = temp; remaining > 1 && tempptr < (temp + sizeof(temp) - 5); dataptr += 2, remaining -= 2)
{
ch = (dataptr[0] << 8) | dataptr[1];
if (ch >= 0xd800 && ch <= 0xdbff && remaining > 3)
{
// Multi-word UTF-16 char...
int lch; // Lower bits
lch = (dataptr[2] << 8) | dataptr[3];
if (lch < 0xdc00 || lch >= 0xdfff)
break;
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
dataptr += 2;
remaining -= 2;
}
else if (ch >= 0xfffe)
{
continue;
}
if (ch < 128)
{
// ASCII
*tempptr++ = (char)ch;
}
else if (ch < 4096)
{
// 2-byte UTF-8
*tempptr++ = (char)(0xc0 | (ch >> 6));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else if (ch < 65536)
{
// 3-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 12));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else
{
// 4-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 18));
*tempptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
}
*tempptr = '\0';
}
else if (!(value->value.binary.datalen & 1) && !memcmp(value->value.binary.data, "\377\376", 2))
{
// Copy UTF-16 LE
int ch; // Unicode character
size_t remaining; // Remaining bytes
for (dataptr = value->value.binary.data + 2, remaining = value->value.binary.datalen - 2, tempptr = temp; remaining > 1 && tempptr < (temp + sizeof(temp) - 5); dataptr += 2, remaining -= 2)
{
ch = (dataptr[1] << 8) | dataptr[0];
if (ch >= 0xd800 && ch <= 0xdbff && remaining > 3)
{
// Multi-word UTF-16 char...
int lch; // Lower bits
lch = (dataptr[3] << 8) | dataptr[2];
if (lch < 0xdc00 || lch >= 0xdfff)
break;
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
dataptr += 2;
remaining -= 2;
}
else if (ch >= 0xfffe)
{
continue;
}
if (ch < 128)
{
// ASCII
*tempptr++ = (char)ch;
}
else if (ch < 4096)
{
// 2-byte UTF-8
*tempptr++ = (char)(0xc0 | (ch >> 6));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else if (ch < 65536)
{
// 3-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 12));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else
{
// 4-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 18));
*tempptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
}
*tempptr = '\0';
// Copy UTF-16...
_pdfio_utf16cpy(temp, value->value.binary.data, value->value.binary.datalen, sizeof(temp));
}
else
{
@ -677,6 +568,8 @@ _pdfioDictGetValue(pdfio_dict_t *dict, // I - Dictionary
// The iteration continues as long as the callback returns `true` or all keys
// have been iterated.
//
// @since PDFio v1.1@
//
void
pdfioDictIterateKeys(
@ -750,9 +643,11 @@ _pdfioDictRead(pdfio_file_t *pdf, // I - PDF file
{
// Issue 118: Discard duplicate key/value pairs, in the future this will
// be a warning message...
_pdfioFileError(pdf, "WARNING: Discarding value for duplicate dictionary key '%s'.", key + 1);
_pdfioValueDelete(&value);
if (_pdfioFileError(pdf, "WARNING: Discarding value for duplicate dictionary key '%s'.", key + 1))
continue;
else
break;
}
else if (!_pdfioDictSetValue(dict, pdfioStringCreate(pdf, key + 1), &value))
break;

View File

@ -61,9 +61,10 @@ _pdfioFileAddMappedObj(
pdf->num_objmaps ++;
map->obj = dst_obj;
map->src_pdf = src_obj->pdf;
map->src_number = src_obj->number;
memcpy(map->src_id, src_obj->pdf->file_id, sizeof(map->src_id));
// Sort as needed...
if (pdf->num_objmaps > 1 && compare_objmaps(map, pdf->objmaps + pdf->num_objmaps - 2) < 0)
qsort(pdf->objmaps, pdf->num_objmaps, sizeof(_pdfio_objmap_t), (int (*)(const void *, const void *))compare_objmaps);
@ -181,8 +182,9 @@ pdfioFileClose(pdfio_file_t *pdf) // I - PDF file
// of 8.27x11in (the intersection of US Letter and ISO A4) is used.
//
// The "error_cb" and "error_cbdata" arguments specify an error handler callback
// and its data pointer - if `NULL` the default error handler is used that
// writes error messages to `stderr`.
// and its data pointer - if `NULL` then the default error handler is used that
// writes error messages to `stderr`. The error handler callback should return
// `true` to continue writing the PDF file or `false` to stop.
//
pdfio_file_t * // O - PDF file or `NULL` on error
@ -260,6 +262,8 @@ pdfioFileCreateArrayObj(
// This function creates a new object with a name value in a PDF file.
// You must call @link pdfioObjClose@ to write the object to the file.
//
// @since PDFio v1.4@
//
pdfio_obj_t * // O - New object
pdfioFileCreateNameObj(
@ -289,6 +293,8 @@ pdfioFileCreateNameObj(
// This function creates a new object with a number value in a PDF file.
// You must call @link pdfioObjClose@ to write the object to the file.
//
// @since PDFio v1.2@
//
pdfio_obj_t * // O - New object
pdfioFileCreateNumberObj(
@ -415,8 +421,9 @@ _pdfioFileCreateObj(
// of 8.27x11in (the intersection of US Letter and ISO A4) is used.
//
// The "error_cb" and "error_cbdata" arguments specify an error handler callback
// and its data pointer - if `NULL` the default error handler is used that
// writes error messages to `stderr`.
// and its data pointer - if `NULL` then the default error handler is used that
// writes error messages to `stderr`. The error handler callback should return
// `true` to continue writing the PDF file or `false` to stop.
//
// > *Note*: Files created using this API are slightly larger than those
// > created using the @link pdfioFileCreate@ function since stream lengths are
@ -518,6 +525,8 @@ pdfioFileCreatePage(pdfio_file_t *pdf, // I - PDF file
// This function creates a new object with a string value in a PDF file.
// You must call @link pdfioObjClose@ to write the object to the file.
//
// @since PDFio v1.2@
//
pdfio_obj_t * // O - New object
pdfioFileCreateStringObj(
@ -669,7 +678,7 @@ _pdfioFileFindMappedObj(
return (NULL);
// Otherwise search for a match...
key.src_pdf = src_pdf;
memcpy(key.src_id, src_pdf->file_id, sizeof(key.src_id));
key.src_number = src_number;
if ((match = (_pdfio_objmap_t *)bsearch(&key, pdf->objmaps, pdf->num_objmaps, sizeof(_pdfio_objmap_t), (int (*)(const void *, const void *))compare_objmaps)) != NULL)
@ -987,8 +996,14 @@ pdfioFileGetVersion(
// PDF file requires a password, the open will always fail.
//
// The "error_cb" and "error_cbdata" arguments specify an error handler callback
// and its data pointer - if `NULL` the default error handler is used that
// writes error messages to `stderr`.
// and its data pointer - if `NULL` then the default error handler is used that
// writes error messages to `stderr`. The error handler callback should return
// `true` to continue reading the PDF file or `false` to stop.
//
// > Note: Error messages starting with "WARNING:" are actually warning
// > messages - the callback should normally return `true` to allow PDFio to
// > try to resolve the issue. In addition, some errors are unrecoverable and
// > ignore the return value of the error callback.
//
pdfio_file_t * // O - PDF file
@ -1006,6 +1021,10 @@ pdfioFileOpen(
*end; // End of line
ssize_t bytes; // Bytes read
off_t xref_offset; // Offset to xref table
time_t curtime; // Creation date/time
unsigned char *id_value; // ID value
size_t id_valuelen; // Length of ID value
_pdfio_sha256_t ctx; // Hashing context
PDFIO_DEBUG("pdfioFileOpen(filename=\"%s\", password_cb=%p, password_cbdata=%p, error_cb=%p, error_cbdata=%p)\n", filename, (void *)password_cb, (void *)password_cbdata, (void *)error_cb, (void *)error_cbdata);
@ -1049,7 +1068,7 @@ pdfioFileOpen(
}
// Read the header from the first line...
if (!_pdfioFileGets(pdf, line, sizeof(line)))
if (!_pdfioFileGets(pdf, line, sizeof(line), true))
goto error;
if ((strncmp(line, "%PDF-1.", 7) && strncmp(line, "%PDF-2.", 7)) || !isdigit(line[7] & 255))
@ -1063,7 +1082,7 @@ pdfioFileOpen(
pdf->version = strdup(line + 5);
// Grab the last 1k of the file to find the start of the xref table...
if (_pdfioFileSeek(pdf, -1024, SEEK_END) < 0)
if (_pdfioFileSeek(pdf, 1 - sizeof(line), SEEK_END) < 0)
{
_pdfioFileError(pdf, "Unable to read startxref data.");
goto error;
@ -1075,28 +1094,48 @@ pdfioFileOpen(
goto error;
}
PDFIO_DEBUG("pdfioOpen: Read %d bytes at end of file.\n", (int)bytes);
line[bytes] = '\0';
end = line + bytes - 9;
for (ptr = line; ptr < end; ptr ++)
{
if (!memcmp(ptr, "startxref", 9))
if (!strncmp(ptr, "startxref", 9) && !strstr(ptr + 9, "startxref") && strtol(ptr + 9, NULL, 10) > 0)
break;
}
if (ptr >= end)
{
_pdfioFileError(pdf, "Unable to find start of xref table.");
if (!_pdfioFileError(pdf, "WARNING: Unable to find start of cross-reference table, will attempt to rebuild."))
goto error;
}
xref_offset = (off_t)strtol(ptr + 9, NULL, 10);
if (!load_xref(pdf, xref_offset, password_cb, password_cbdata))
{
if (!repair_xref(pdf, password_cb, password_cbdata))
goto error;
}
else
{
PDFIO_DEBUG("pdfioFileOpen: line=%p,ptr=%p(\"%s\")\n", line, ptr, ptr);
xref_offset = (off_t)strtol(ptr + 9, NULL, 10);
PDFIO_DEBUG("pdfioFileOpen: xref_offset=%lu\n", (unsigned long)xref_offset);
if (!load_xref(pdf, xref_offset, password_cb, password_cbdata))
goto error;
}
// Create the unique file identifier string for the object map...
curtime = pdfioFileGetCreationDate(pdf);
_pdfioCryptoSHA256Init(&ctx);
_pdfioCryptoSHA256Append(&ctx, (uint8_t *)pdf->filename, strlen(pdf->filename));
_pdfioCryptoSHA256Append(&ctx, (uint8_t *)&curtime, sizeof(curtime));
if ((id_value = pdfioArrayGetBinary(pdf->id_array, 0, &id_valuelen)) != NULL)
_pdfioCryptoSHA256Append(&ctx, id_value, id_valuelen);
if ((id_value = pdfioArrayGetBinary(pdf->id_array, 1, &id_valuelen)) != NULL)
_pdfioCryptoSHA256Append(&ctx, id_value, id_valuelen);
_pdfioCryptoSHA256Finish(&ctx, pdf->file_id);
return (pdf);
@ -1210,6 +1249,8 @@ pdfioFileSetPermissions(
if (encryption == PDFIO_ENCRYPTION_NONE)
return (true);
pdf->encrypt_metadata = true;
return (_pdfioCryptoLock(pdf, permissions, encryption, owner_password, user_password));
}
@ -1350,10 +1391,11 @@ static int // O - Result of comparison
compare_objmaps(_pdfio_objmap_t *a, // I - First object map
_pdfio_objmap_t *b) // I - Second object map
{
if (a->src_pdf < b->src_pdf)
return (-1);
else if (a->src_pdf > b->src_pdf)
return (1);
int ret = memcmp(a->src_id, b->src_id, sizeof(a->src_id));
// Result of comparison
if (ret)
return (ret);
else if (a->src_number < b->src_number)
return (-1);
else if (a->src_number > b->src_number)
@ -1382,6 +1424,8 @@ create_common(
pdfio_file_t *pdf; // New PDF file
pdfio_dict_t *dict; // Dictionary
unsigned char id_value[16]; // File ID value
time_t curtime; // Creation date/time
_pdfio_sha256_t ctx; // Hashing context
PDFIO_DEBUG("create_common(filename=\"%s\", fd=%d, output_cb=%p, output_cbdata=%p, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", filename, fd, (void *)output_cb, (void *)output_cbdata, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata);
@ -1472,7 +1516,9 @@ create_common(
if ((dict = pdfioDictCreate(pdf)) == NULL)
goto error;
pdfioDictSetDate(dict, "CreationDate", time(NULL));
curtime = time(NULL);
pdfioDictSetDate(dict, "CreationDate", curtime);
pdfioDictSetString(dict, "Producer", "pdfio/" PDFIO_VERSION);
if ((pdf->info_obj = pdfioFileCreateObj(pdf, dict)) == NULL)
@ -1497,6 +1543,14 @@ create_common(
pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value));
}
// Create the unique file identifier string for the object map...
_pdfioCryptoSHA256Init(&ctx);
_pdfioCryptoSHA256Append(&ctx, (uint8_t *)pdf->filename, strlen(pdf->filename));
_pdfioCryptoSHA256Append(&ctx, (uint8_t *)&curtime, sizeof(curtime));
_pdfioCryptoSHA256Append(&ctx, id_value, sizeof(id_value));
_pdfioCryptoSHA256Append(&ctx, id_value, sizeof(id_value));
_pdfioCryptoSHA256Finish(&ctx, pdf->file_id);
return (pdf);
// Common error handling code...
@ -1675,7 +1729,10 @@ load_pages(pdfio_file_t *pdf, // I - PDF file
}
if ((type = pdfioDictGetName(dict, "Type")) == NULL || (strcmp(type, "Pages") && strcmp(type, "Page")))
{
if (!_pdfioFileError(pdf, "WARNING: No Type value for pages object."))
return (false);
}
// If there is a Kids array, then this is a parent node and we have to look
// at the child objects...
@ -1742,31 +1799,32 @@ load_xref(
int generation; // Generation number
_pdfio_token_t tb; // Token buffer/stack
off_t line_offset; // Offset to start of line
pdfio_obj_t *pages_obj; // Pages object
while (!done)
{
if (_pdfioFileSeek(pdf, xref_offset, SEEK_SET) != xref_offset)
{
_pdfioFileError(pdf, "Unable to seek to start of xref table.");
return (false);
PDFIO_DEBUG("load_xref: Unable to seek to %lu.\n", (unsigned long)xref_offset);
goto repair;
}
do
{
line_offset = _pdfioFileTell(pdf);
if (!_pdfioFileGets(pdf, line, sizeof(line)))
if (!_pdfioFileGets(pdf, line, sizeof(line), true))
{
_pdfioFileError(pdf, "Unable to read start of xref table.");
return (false);
PDFIO_DEBUG("load_xref: Unable to read line at offset %lu.\n", (unsigned long)line_offset);
goto repair;
}
}
while (!line[0]);
PDFIO_DEBUG("load_xref: line_offset=%lu, line='%s'\n", (unsigned long)line_offset, line);
if (isdigit(line[0] & 255) && strlen(line) > 4 && (!strcmp(line + strlen(line) - 4, " obj") || ((ptr = strstr(line, " obj")) != NULL && ptr[4] == '<')))
if (isdigit(line[0] & 255) && strlen(line) > 4 && (!strcmp(line + strlen(line) - 4, " obj") || ((ptr = strstr(line, " obj")) != NULL && (ptr[4] == '<' || isspace(ptr[4])))))
{
// Cross-reference stream
pdfio_obj_t *obj; // Object
@ -1788,14 +1846,14 @@ load_xref(
if ((number = strtoimax(line, &ptr, 10)) < 1)
{
_pdfioFileError(pdf, "Bad xref table header '%s'.", line);
return (false);
PDFIO_DEBUG("load_xref: Unable to scan object number.\n");
goto repair;
}
if ((generation = (int)strtol(ptr, &ptr, 10)) < 0 || (generation > 65535 && number != 0))
{
_pdfioFileError(pdf, "Bad xref table header '%s'.", line);
return (false);
PDFIO_DEBUG("load_xref: Unable to scan generation number (%u).\n", (unsigned)generation);
goto repair;
}
while (isspace(*ptr & 255))
@ -1803,14 +1861,14 @@ load_xref(
if (strncmp(ptr, "obj", 3))
{
_pdfioFileError(pdf, "Bad xref table header '%s'.", line);
return (false);
PDFIO_DEBUG("load_xref: No 'obj' after object number and generation (saw '%s').\n", ptr);
goto repair;
}
if (_pdfioFileSeek(pdf, line_offset + (off_t)(ptr + 3 - line), SEEK_SET) < 0)
{
_pdfioFileError(pdf, "Unable to seek to xref object %lu %u.", (unsigned long)number, (unsigned)generation);
return (false);
PDFIO_DEBUG("load_xref: Unable to seek to start of cross-reference object dictionary.\n");
goto repair;
}
PDFIO_DEBUG("load_xref: Loading object %lu %u.\n", (unsigned long)number, (unsigned)generation);
@ -1825,21 +1883,21 @@ load_xref(
if (!_pdfioValueRead(pdf, obj, &tb, &trailer, 0))
{
_pdfioFileError(pdf, "Unable to read cross-reference stream dictionary.");
return (false);
PDFIO_DEBUG("load_xref: Unable to read cross-reference object dictionary.\n");
goto repair;
}
else if (trailer.type != PDFIO_VALTYPE_DICT)
{
_pdfioFileError(pdf, "Cross-reference stream does not have a dictionary.");
return (false);
PDFIO_DEBUG("load_xref: Expected dictionary for cross-reference object (type=%d).", trailer.type);
goto repair;
}
obj->value = trailer;
if (!_pdfioTokenGet(&tb, line, sizeof(line)) || strcmp(line, "stream"))
{
_pdfioFileError(pdf, "Unable to get stream after xref dictionary.");
return (false);
PDFIO_DEBUG("load_xref: No stream token after dictionary (got '%s').\n", line);
goto repair;
}
PDFIO_DEBUG("load_xref: tb.bufptr=%p, tb.bufend=%p, tb.bufptr[0]=0x%02x, tb.bufptr[0]=0x%02x\n", tb.bufptr, tb.bufend, tb.bufptr[0], tb.bufptr[1]);
@ -1857,8 +1915,8 @@ load_xref(
if ((w_array = pdfioDictGetArray(trailer.value.dict, "W")) == NULL)
{
_pdfioFileError(pdf, "Cross-reference stream does not have required W key.");
return (false);
PDFIO_DEBUG("load_xref: Missing W array in cross-reference objection dictionary.\n");
goto repair;
}
w[0] = (size_t)pdfioArrayGetNumber(w_array, 0);
@ -1868,16 +1926,18 @@ load_xref(
w_2 = w[0];
w_3 = w[0] + w[1];
if (w[1] == 0 || w[2] > 4 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer))
PDFIO_DEBUG("W=[%u %u %u], w_total=%u\n", (unsigned)w[0], (unsigned)w[1], (unsigned)w[2], (unsigned)w_total);
if (pdfioArrayGetSize(w_array) > 3 || w[1] == 0 || w[2] > 4 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer))
{
_pdfioFileError(pdf, "Cross-reference stream has invalid W key [%u %u %u].", (unsigned)w[0], (unsigned)w[1], (unsigned)w[2]);
return (false);
PDFIO_DEBUG("load_xref: Bad W array in cross-reference objection dictionary.\n");
goto repair;
}
if ((st = pdfioObjOpenStream(obj, true)) == NULL)
{
_pdfioFileError(pdf, "Unable to open cross-reference stream.");
return (false);
PDFIO_DEBUG("load_xref: Unable to open cross-reference stream.\n");
goto repair;
}
for (index_n = 0; index_n < index_count; index_n += 2)
@ -1897,7 +1957,20 @@ load_xref(
{
count --;
#ifdef DEBUG
if (w_total > 5)
PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X%02X...\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4]);
else if (w_total == 5)
PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4]);
else if (w_total == 4)
PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3]);
else if (w_total == 3)
PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2]);
else if (w_total == 2)
PDFIO_DEBUG("load_xref: number=%u %02X%02X\n", (unsigned)number, buffer[0], buffer[1]);
else
PDFIO_DEBUG("load_xref: number=%u %02X\n", (unsigned)number, buffer[0]);
#endif // DEBUG
// Check whether this is an object definition...
if (w[0] > 0)
@ -1979,6 +2052,7 @@ load_xref(
else
{
_pdfioFileError(pdf, "Too many object streams.");
pdfioStreamClose(st);
return (false);
}
}
@ -1987,8 +2061,11 @@ load_xref(
{
// Add this object...
if (!add_obj(pdf, (size_t)number, (unsigned short)generation, (off_t)offset))
{
pdfioStreamClose(st);
return (false);
}
}
number ++;
}
@ -2035,7 +2112,7 @@ load_xref(
// Offset of current line
PDFIO_DEBUG("load_xref: Reading xref table starting at offset %lu\n", (unsigned long)trailer_offset);
while (_pdfioFileGets(pdf, line, sizeof(line)))
while (_pdfioFileGets(pdf, line, sizeof(line), false))
{
PDFIO_DEBUG("load_xref: '%s' at offset %lu\n", line, (unsigned long)trailer_offset);
@ -2060,8 +2137,8 @@ load_xref(
if (sscanf(line, "%jd%jd", &number, &num_objects) != 2)
{
_pdfioFileError(pdf, "Malformed xref table section '%s'.", line);
return (false);
PDFIO_DEBUG("load_xref: Unable to scan START COUNT from line.\n");
goto repair;
}
// Read this group of objects...
@ -2069,41 +2146,45 @@ load_xref(
{
// Read a line from the file and validate it...
if (_pdfioFileRead(pdf, line, 20) != 20)
return (false);
{
PDFIO_DEBUG("load_xref: Unable to read 20 byte xref record.\n");
goto repair;
}
line[20] = '\0';
if (strcmp(line + 18, "\r\n") && strcmp(line + 18, " \n") && strcmp(line + 18, " \r"))
if (strcmp(line + 18, "\r\n") && strcmp(line + 18, "\r\r") && strcmp(line + 18, " \n") && strcmp(line + 18, " \r"))
{
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line);
return (false);
PDFIO_DEBUG("load_xref: Bad end-of-line <%02X%02X>\n", line[18], line[19]);
goto repair;
}
line[18] = '\0';
// Parse the line
if ((offset = strtoimax(line, &ptr, 10)) < 0)
{
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line);
return (false);
PDFIO_DEBUG("load_xref: Unable to scan offset.\n");
goto repair;
}
if ((generation = (int)strtol(ptr, &ptr, 10)) < 0 || (generation > 65535 && offset != 0))
{
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line);
return (false);
PDFIO_DEBUG("load_xref: Unable to scan generation (%u).\n", (unsigned)generation);
goto repair;
}
if (*ptr != ' ')
{
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line);
return (false);
PDFIO_DEBUG("load_xref: Missing space before type.\n");
goto repair;
}
ptr ++;
if (*ptr != 'f' && *ptr != 'n')
{
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line);
return (false);
PDFIO_DEBUG("load_xref: Bad type '%c'.\n", *ptr);
goto repair;
}
if (*ptr == 'f')
@ -2122,21 +2203,21 @@ load_xref(
if (strncmp(line, "trailer", 7))
{
_pdfioFileError(pdf, "Missing trailer.");
return (false);
PDFIO_DEBUG("load_xref: No trailer after xref table.\n");
goto repair;
}
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, NULL, &tb, &trailer, 0))
{
_pdfioFileError(pdf, "Unable to read trailer dictionary.");
return (false);
PDFIO_DEBUG("load_xref: Unable to read trailer dictionary.\n");
goto repair;
}
else if (trailer.type != PDFIO_VALTYPE_DICT)
{
_pdfioFileError(pdf, "Trailer is not a dictionary.");
return (false);
PDFIO_DEBUG("load_xref: Trailer not a dictionary (type=%d).\n", trailer.type);
goto repair;
}
PDFIO_DEBUG("load_xref: Got trailer dict.\n");
@ -2158,8 +2239,7 @@ load_xref(
}
else
{
_pdfioFileError(pdf, "Bad xref table header '%s'.", line);
return (false);
goto repair;
}
PDFIO_DEBUG("load_xref: Contents of trailer dictionary:\n");
@ -2188,13 +2268,31 @@ load_xref(
if ((pdf->root_obj = pdfioDictGetObj(pdf->trailer_dict, "Root")) == NULL)
{
_pdfioFileError(pdf, "Missing Root object.");
return (false);
PDFIO_DEBUG("load_xref: Missing Root object.\n");
goto repair;
}
PDFIO_DEBUG("load_xref: Root=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number);
return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages"), 0));
if ((pages_obj = pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages")) == NULL)
{
PDFIO_DEBUG("load_xref: Missing Pages object.\n");
goto repair;
}
PDFIO_DEBUG("load_xref: Pages=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number);
return (load_pages(pdf, pages_obj, 0));
// If we get here the cross-reference table is busted - try repairing if the
// error callback says to proceed...
repair:
if (_pdfioFileError(pdf, "WARNING: Cross-reference is damaged, will attempt to rebuild."))
return (repair_xref(pdf, password_cb, password_data));
else
return (false);
}
@ -2208,7 +2306,7 @@ repair_xref(
pdfio_password_cb_t password_cb, // I - Password callback or `NULL` for none
void *password_data) // I - Password callback data, if any
{
char line[16384], // Line from file
char line[1024], // Line from file
*ptr; // Pointer into line
off_t line_offset; // Offset in file
intmax_t number; // Object number
@ -2216,16 +2314,22 @@ repair_xref(
size_t i; // Looping var
size_t num_sobjs = 0; // Number of object streams
pdfio_obj_t *sobjs[16384]; // Object streams to load
pdfio_dict_t *backup_trailer = NULL; // Backup trailer dictionary
pdfio_obj_t *pages_obj; // Pages object
// Let caller know something is wrong...
_pdfioFileError(pdf, "WARNING: Cross-reference table is damaged, attempting to rebuild.");
// Clear trailer data...
pdf->trailer_dict = NULL;
pdf->root_obj = NULL;
pdf->info_obj = NULL;
pdf->pages_obj = NULL;
pdf->encrypt_obj = NULL;
// Read from the beginning of the file, looking for
// Read from the beginning of the file, looking for objects...
if ((line_offset = _pdfioFileSeek(pdf, 0, SEEK_SET)) < 0)
return (false);
while (_pdfioFileGets(pdf, line, sizeof(line)))
while (_pdfioFileGets(pdf, line, sizeof(line), true))
{
// See if this is the start of an object...
if (line[0] >= '1' && line[0] <= '9')
@ -2242,44 +2346,76 @@ repair_xref(
pdfio_obj_t *obj; // Object
_pdfio_token_t tb; // Token buffer/stack
PDFIO_DEBUG("OBJECT %ld %d at offset %ld\n", (long)number, generation, (long)line_offset);
PDFIO_DEBUG("repair_xref: OBJECT %ld %d at offset %ld\n", (long)number, generation, (long)line_offset);
if ((obj = add_obj(pdf, (size_t)number, (unsigned short)generation, line_offset)) == NULL)
if ((obj = pdfioFileFindObj(pdf, (size_t)number)) != NULL)
{
obj->offset = line_offset;
}
else if ((obj = add_obj(pdf, (size_t)number, (unsigned short)generation, line_offset)) == NULL)
{
_pdfioFileError(pdf, "Unable to allocate memory for object.");
return (false);
}
if (ptr[3])
{
// Probably the start of the object dictionary, rewind the file so
// we can read it...
_pdfioFileSeek(pdf, line_offset + (ptr - line + 3), SEEK_SET);
}
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, obj, &tb, &obj->value, 0))
{
_pdfioFileError(pdf, "Unable to read cross-reference stream dictionary.");
if (!_pdfioFileError(pdf, "WARNING: Unable to read object dictionary/value."))
return (false);
else
continue;
}
if (_pdfioTokenGet(&tb, line, sizeof(line)) && strcmp(line, "stream"))
if (_pdfioTokenGet(&tb, line, sizeof(line)))
{
const char *type = pdfioObjGetType(obj);
// Object type
_pdfioTokenFlush(&tb);
if (type && !strcmp(line, "stream"))
{
// Possible object or XRef stream...
obj->stream_offset = _pdfioFileTell(pdf);
if (type && !strcmp(type, "ObjStm") && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0])))
if (!strcmp(type, "ObjStm") && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0])))
{
PDFIO_DEBUG("repair_xref: Object stream...\n");
sobjs[num_sobjs] = obj;
num_sobjs ++;
}
if (type && !strcmp(type, "XRef") && !pdf->trailer_dict)
if (!strcmp(type, "XRef") && !pdf->trailer_dict)
{
// Save the trailer dictionary...
PDFIO_DEBUG("repair_xref: XRef stream...\n");
pdf->trailer_dict = pdfioObjGetDict(obj);
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
}
}
else if (type && !strcmp(line, "endobj"))
{
// Possible catalog or pages object...
if (!strcmp(type, "Catalog"))
{
PDFIO_DEBUG("repair_xref: Catalog (root) object...\n");
if (!backup_trailer)
backup_trailer = pdfioDictCreate(pdf);
pdfioDictSetObj(backup_trailer, "Root", obj);
}
}
}
}
}
}
@ -2289,6 +2425,8 @@ repair_xref(
_pdfio_token_t tb; // Token buffer/stack
_pdfio_value_t trailer; // Trailer
PDFIO_DEBUG("repair_xref: line=\"%s\"\n", line);
if (line[7])
{
// Probably the start of the trailer dictionary, rewind the file so
@ -2296,7 +2434,7 @@ repair_xref(
_pdfioFileSeek(pdf, line_offset + 7, SEEK_SET);
}
PDFIO_DEBUG("TRAILER at offset %ld\n", (long)line_offset);
PDFIO_DEBUG("repair_xref: TRAILER at offset %ld\n", (long)line_offset);
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, NULL, &tb, &trailer, 0))
@ -2312,10 +2450,12 @@ repair_xref(
_pdfioTokenFlush(&tb);
if (!pdf->trailer_dict)
if (_pdfioDictGetValue(trailer.value.dict, "Root"))
{
// Save the trailer dictionary and grab the root (catalog) and info
// objects...
PDFIO_DEBUG("repair_xref: Using this trailer dictionary.\n");
pdf->trailer_dict = trailer.value.dict;
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
@ -2326,11 +2466,18 @@ repair_xref(
line_offset = _pdfioFileTell(pdf);
}
PDFIO_DEBUG("repair_xref: Stopped at line_offset=%lu\n", (unsigned long)line_offset);
if (!pdf->trailer_dict && backup_trailer)
pdf->trailer_dict = backup_trailer;
// If the trailer contains an Encrypt key, try unlocking the file...
if (pdf->encrypt_obj && !_pdfioCryptoUnlock(pdf, password_cb, password_data))
return (false);
// Load any stream objects...
PDFIO_DEBUG("repair_xref: Found %lu stream objects.\n", (unsigned long)num_sobjs);
for (i = 0; i < num_sobjs; i ++)
{
if (!load_obj_stream(sobjs[i]))
@ -2349,8 +2496,16 @@ repair_xref(
PDFIO_DEBUG("repair_xref: Root=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number);
if ((pages_obj = pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages")) == NULL)
{
_pdfioFileError(pdf, "Missing Pages object.");
return (false);
}
PDFIO_DEBUG("repair_xref: Pages=%p(%lu)\n", pages_obj, (unsigned long)pages_obj->number);
// Load pages...
return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages"), 0));
return (load_pages(pdf, pages_obj, 0));
}
@ -2500,12 +2655,16 @@ write_trailer(pdfio_file_t *pdf) // I - PDF file
buffer[2] = (obj->offset >> 8) & 255;
buffer[3] = obj->offset & 255;
break;
#ifdef _WIN32
default :
#endif // _WIN32
case 4 :
buffer[1] = (obj->offset >> 24) & 255;
buffer[2] = (obj->offset >> 16) & 255;
buffer[3] = (obj->offset >> 8) & 255;
buffer[4] = obj->offset & 255;
break;
#ifndef _WIN32 // Windows off_t is 32-bits?!?
case 5 :
buffer[1] = (obj->offset >> 32) & 255;
buffer[2] = (obj->offset >> 24) & 255;
@ -2540,6 +2699,7 @@ write_trailer(pdfio_file_t *pdf) // I - PDF file
buffer[7] = (obj->offset >> 8) & 255;
buffer[8] = obj->offset & 255;
break;
#endif // !_WIN32
}
if (!pdfioStreamWrite(xref_st, buffer, offsize + 2))

View File

@ -79,6 +79,10 @@ pdfioObjCopy(pdfio_file_t *pdf, // I - PDF file
if (srcobj->value.type == PDFIO_VALTYPE_NONE)
_pdfioObjLoad(srcobj);
// See if we have already mapped this object...
if ((dstobj = _pdfioFileFindMappedObj(pdf, srcobj->pdf, srcobj->number)) != NULL)
return (dstobj); // Yes, return that one...
// Create the new object...
if ((dstobj = _pdfioFileCreateObj(pdf, srcobj->pdf, NULL)) == NULL)
return (NULL);
@ -141,6 +145,7 @@ pdfioObjCreateStream(
pdfio_obj_t *obj, // I - Object
pdfio_filter_t filter) // I - Type of compression to apply
{
pdfio_stream_t *st; // Stream
pdfio_obj_t *length_obj = NULL; // Length object, if any
@ -195,10 +200,12 @@ pdfioObjCreateStream(
return (NULL);
obj->stream_offset = _pdfioFileTell(obj->pdf);
obj->pdf->current_obj = obj;
// Return the new stream...
return (_pdfioStreamCreate(obj, length_obj, 0, filter));
if ((st = _pdfioStreamCreate(obj, length_obj, 0, filter)) != NULL)
obj->pdf->current_obj = obj;
return (st);
}
@ -330,6 +337,8 @@ pdfioObjGetLength(pdfio_obj_t *obj) // I - Object
//
// 'pdfioObjGetName()' - Get the name value associated with an object.
//
// @since PDFio v1.4@
//
const char * // O - Dictionary or `NULL` on error
pdfioObjGetName(pdfio_obj_t *obj) // I - Object
@ -505,7 +514,7 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object
}
// Decrypt as needed...
if (obj->pdf->encryption)
if (obj->pdf->encryption && obj->pdf->encrypt_metadata)
{
PDFIO_DEBUG("_pdfioObjLoad: Decrypting value...\n");
@ -532,6 +541,9 @@ pdfio_stream_t * // O - Stream or `NULL` on error
pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
bool decode) // I - Decode/decompress data?
{
pdfio_stream_t *st; // Stream
// Range check input...
if (!obj)
return (NULL);
@ -554,9 +566,10 @@ pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
return (NULL);
// Open the stream...
if ((st = _pdfioStreamOpen(obj, decode)) != NULL)
obj->pdf->current_obj = obj;
return (_pdfioStreamOpen(obj, decode));
return (st);
}

View File

@ -10,7 +10,7 @@
#ifndef PDFIO_PRIVATE_H
# define PDFIO_PRIVATE_H
# ifdef _WIN32
# define _CRT_SECURE_NO_WARNINGS // Disable bogus VS warnings/errors...
# define _CRT_SECURE_NO_WARNINGS 1 // Disable bogus VS warnings/errors...
# endif // _WIN32
# include "pdfio.h"
# include <stdarg.h>
@ -28,16 +28,16 @@
# define access _access // Map standard POSIX/C99 names
# define close _close
# define fileno _fileno
# define lseek _lseek
# define lseek(f,o,w) (off_t)_lseek((f),(long)(o),(w))
# define mkdir(d,p) _mkdir(d)
# define open _open
# define read _read
# define read(f,b,s) _read((f),(b),(unsigned)(s))
# define rmdir _rmdir
# define snprintf _snprintf
# define strdup _strdup
# define unlink _unlink
# define vsnprintf _vsnprintf
# define write _write
# define write(f,b,s) _write((f),(b),(unsigned)(s))
# ifndef F_OK
# define F_OK 00 // POSIX parameters/flags
# define W_OK 02
@ -221,7 +221,7 @@ struct _pdfio_dict_s // Dictionary
typedef struct _pdfio_objmap_s // PDF object map
{
pdfio_obj_t *obj; // Object for this file
pdfio_file_t *src_pdf; // Source PDF file
unsigned char src_id[32]; // Source PDF file file identifier
size_t src_number; // Source object number
} _pdfio_objmap_t;
@ -236,6 +236,7 @@ typedef struct _pdfio_strbuf_s // PDF string buffer
struct _pdfio_file_s // PDF file structure
{
char *filename; // Filename
unsigned char file_id[32]; // File identifier bytes
struct lconv *loc; // Locale data
char *version; // Version number
pdfio_rect_t media_box, // Default MediaBox value
@ -270,6 +271,7 @@ struct _pdfio_file_s // PDF file structure
pdfio_obj_t *cp1252_obj, // CP1252 font encoding object
*unicode_obj; // Unicode font encoding object
pdfio_array_t *id_array; // ID array
bool encrypt_metadata; // Encrypt metadata?
// Allocated data elements
size_t num_arrays, // Number of arrays
@ -339,6 +341,7 @@ struct _pdfio_stream_s // Stream
extern size_t _pdfio_strlcpy(char *dst, const char *src, size_t dstsize) _PDFIO_INTERNAL;
extern double _pdfio_strtod(pdfio_file_t *pdf, const char *s) _PDFIO_INTERNAL;
extern void _pdfio_utf16cpy(char *dst, const unsigned char *src, size_t srclen, size_t dstsize) _PDFIO_INTERNAL;
extern ssize_t _pdfio_vsnprintf(pdfio_file_t *pdf, char *buffer, size_t bufsize, const char *format, va_list ap) _PDFIO_INTERNAL;
extern bool _pdfioArrayDecrypt(pdfio_file_t *pdf, pdfio_obj_t *obj, pdfio_array_t *a, size_t depth) _PDFIO_INTERNAL;
@ -382,7 +385,7 @@ extern bool _pdfioFileError(pdfio_file_t *pdf, const char *format, ...) _PDFIO_
extern pdfio_obj_t *_pdfioFileFindMappedObj(pdfio_file_t *pdf, pdfio_file_t *src_pdf, size_t src_number) _PDFIO_INTERNAL;
extern bool _pdfioFileFlush(pdfio_file_t *pdf) _PDFIO_INTERNAL;
extern int _pdfioFileGetChar(pdfio_file_t *pdf) _PDFIO_INTERNAL;
extern bool _pdfioFileGets(pdfio_file_t *pdf, char *buffer, size_t bufsize) _PDFIO_INTERNAL;
extern bool _pdfioFileGets(pdfio_file_t *pdf, char *buffer, size_t bufsize, bool discard) _PDFIO_INTERNAL;
extern ssize_t _pdfioFilePeek(pdfio_file_t *pdf, void *buffer, size_t bytes) _PDFIO_INTERNAL;
extern bool _pdfioFilePrintf(pdfio_file_t *pdf, const char *format, ...) _PDFIO_INTERNAL;
extern bool _pdfioFilePuts(pdfio_file_t *pdf, const char *s) _PDFIO_INTERNAL;

View File

@ -259,7 +259,7 @@ _pdfioStreamCreate(
{
colors = 1;
}
else if (colors < 0 || colors > 4)
else if (colors < 0 || colors > 32)
{
_pdfioFileError(st->pdf, "Unsupported Colors value %d.", colors);
free(st);
@ -270,7 +270,7 @@ _pdfioStreamCreate(
{
columns = 1;
}
else if (columns < 0)
else if (columns < 0 || columns > 65536)
{
_pdfioFileError(st->pdf, "Unsupported Columns value %d.", columns);
free(st);
@ -532,7 +532,7 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
{
colors = 1;
}
else if (colors < 0 || colors > 4)
else if (colors < 0 || colors > 32)
{
_pdfioFileError(st->pdf, "Unsupported Colors value %d.", colors);
goto error;
@ -542,7 +542,7 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
{
columns = 1;
}
else if (columns < 0)
else if (columns < 0 || columns > 65536)
{
_pdfioFileError(st->pdf, "Unsupported Columns value %d.", columns);
goto error;
@ -562,6 +562,13 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
if (predictor >= 10)
st->pbsize ++; // Add PNG predictor byte
if (st->pbsize < 2)
{
_pdfioFileError(st->pdf, "Bad Predictor buffer size %lu.", (unsigned long)st->pbsize);
goto error;
}
PDFIO_DEBUG("_pdfioStreamOpen: st->predictor=%d, st->pbpixel=%u, st->pbsize=%lu\n", st->predictor, (unsigned)st->pbpixel, (unsigned long)st->pbsize);
if ((st->prbuffer = calloc(1, st->pbsize - 1)) == NULL || (st->psbuffer = calloc(1, st->pbsize)) == NULL)
{
_pdfioFileError(st->pdf, "Unable to allocate %lu bytes for Predictor buffers.", (unsigned long)st->pbsize);
@ -690,8 +697,9 @@ pdfioStreamPeek(pdfio_stream_t *st, // I - Stream
// 'pdfioStreamPrintf()' - Write a formatted string to a stream.
//
// This function writes a formatted string to a stream. In addition to the
// standard `printf` format characters, you can use "%N" to format a PDF name
// value ("/Name") and "%S" to format a PDF string ("(String)") value.
// standard `printf` format characters, you can use "%H" to format a HTML/XML
// string value, "%N" to format a PDF name value ("/Name"), and "%S" to format
// a PDF string ("(String)") value.
//
bool // O - `true` on success, `false` on failure
@ -1227,7 +1235,18 @@ stream_read(pdfio_stream_t *st, // I - Stream
}
// Apply predictor for this line
PDFIO_DEBUG("stream_read: Line %02X %02X %02X %02X %02X.\n", sptr[-1], sptr[0], sptr[0], sptr[2], sptr[3]);
#ifdef DEBUG
if (remaining > 4)
PDFIO_DEBUG("stream_read: Line %02X %02X %02X %02X %02X ...\n", sptr[-1], sptr[0], sptr[1], sptr[2], sptr[3]);
else if (remaining > 3)
PDFIO_DEBUG("stream_read: Line %02X %02X %02X %02X %02X.\n", sptr[-1], sptr[0], sptr[1], sptr[2], sptr[3]);
else if (remaining > 2)
PDFIO_DEBUG("stream_read: Line %02X %02X %02X %02X.\n", sptr[-1], sptr[0], sptr[1], sptr[2]);
else if (remaining > 1)
PDFIO_DEBUG("stream_read: Line %02X %02X %02X.\n", sptr[-1], sptr[0], sptr[1]);
else
PDFIO_DEBUG("stream_read: Line %02X %02X.\n", sptr[-1], sptr[0]);
#endif // DEBUG
switch (sptr[-1])
{

View File

@ -158,6 +158,89 @@ _pdfio_strtod(pdfio_file_t *pdf, // I - PDF file
}
//
// '_pdfio_utf16cpy()' - Convert UTF-16 to UTF-8.
//
void
_pdfio_utf16cpy(
char *dst, // I - Destination buffer for UTF-8
const unsigned char *src, // I - Source UTF-16
size_t srclen, // I - Length of UTF-16
size_t dstsize) // I - Destination buffer size
{
char *dstptr = dst, // Pointer into buffer
*dstend = dst + dstsize - 5; // End of buffer
int ch; // Unicode character
bool is_be = !memcmp(src, "\376\377", 2);
// Big-endian strings?
// Loop through the UTF-16 string, converting to Unicode then UTF-8...
for (src += 2, srclen -= 2; srclen > 1 && dstptr < dstend; src += 2, srclen -= 2)
{
// Initial character...
if (is_be)
ch = (src[0] << 8) | src[1];
else
ch = (src[1] << 8) | src[0];
if (ch >= 0xd800 && ch <= 0xdbff && srclen > 3)
{
// Multi-word UTF-16 char...
int lch; // Lower bits
if (is_be)
lch = (src[2] << 8) | src[3];
else
lch = (src[3] << 8) | src[2];
if (lch < 0xdc00 || lch >= 0xdfff)
break;
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
src += 2;
srclen -= 2;
}
else if (ch >= 0xfffe)
{
continue;
}
// Convert Unicode to UTF-8...
if (ch < 128)
{
// ASCII
*dstptr++ = (char)ch;
}
else if (ch < 4096)
{
// 2-byte UTF-8
*dstptr++ = (char)(0xc0 | (ch >> 6));
*dstptr++ = (char)(0x80 | (ch & 0x3f));
}
else if (ch < 65536)
{
// 3-byte UTF-8
*dstptr++ = (char)(0xe0 | (ch >> 12));
*dstptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*dstptr++ = (char)(0x80 | (ch & 0x3f));
}
else
{
// 4-byte UTF-8
*dstptr++ = (char)(0xe0 | (ch >> 18));
*dstptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*dstptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*dstptr++ = (char)(0x80 | (ch & 0x3f));
}
}
// Nul-terminate the UTF-8 string...
*dstptr = '\0';
}
//
// '_pdfio_vsnprintf()' - Format a string.
//
@ -403,6 +486,59 @@ _pdfio_vsnprintf(pdfio_file_t *pdf, // I - PDF file
}
break;
case 'H' : // XML/HTML string
if ((s = va_arg(ap, char *)) == NULL)
s = "(null)";
// Loop through the literal string...
while (*s)
{
// Escape special characters
if (*s == '&')
{
// &amp;
if (bufptr < bufend)
{
_pdfio_strlcpy(bufptr, "&amp;", (size_t)(bufend - bufptr + 1));
bufptr += strlen(bufptr);
}
bytes += 5;
}
else if (*s == '<')
{
// &lt;
if (bufptr < bufend)
{
_pdfio_strlcpy(bufptr, "&lt;", (size_t)(bufend - bufptr + 1));
bufptr += strlen(bufptr);
}
bytes += 4;
}
else if (*s == '>')
{
// &gt;
if (bufptr < bufend)
{
_pdfio_strlcpy(bufptr, "&gt;", (size_t)(bufend - bufptr + 1));
bufptr += strlen(bufptr);
}
bytes += 4;
}
else
{
// Literal character...
if (bufptr < bufend)
*bufptr++ = *s;
bytes ++;
}
s ++;
}
break;
case 'S' : // PDF string
if ((s = va_arg(ap, char *)) == NULL)
s = "(null)";

View File

@ -172,7 +172,7 @@ _pdfioValueDecrypt(pdfio_file_t *pdf, // I - PDF file
// Copy the decrypted string back to the value and adjust the length...
memcpy(v->value.binary.data, temp, templen);
if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128)
if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128 && temp[templen - 1] <= templen)
v->value.binary.datalen = templen - temp[templen - 1];
else
v->value.binary.datalen = templen;
@ -183,20 +183,48 @@ _pdfioValueDecrypt(pdfio_file_t *pdf, // I - PDF file
case PDFIO_VALTYPE_STRING :
// Decrypt regular string...
templen = strlen(v->value.string);
if (templen > (sizeof(temp) - 33))
if (templen > (PDFIO_MAX_STRING - 1))
{
_pdfioFileError(pdf, "Unable to read encrypted string - too long.");
return (false);
}
else if ((temp = (uint8_t *)_pdfioStringAllocBuffer(pdf)) == NULL)
{
_pdfioFileError(pdf, "Unable to read encrypted binary string - out of memory.");
return (false);
}
ivlen = templen;
if ((cb = _pdfioCryptoMakeReader(pdf, obj, &ctx, (uint8_t *)v->value.string, &ivlen)) == NULL)
return (false);
templen = (cb)(&ctx, temp, (uint8_t *)v->value.string + ivlen, templen - ivlen);
if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128 && temp[templen - 1] <= templen)
templen -= temp[templen - 1];
temp[templen] = '\0';
if ((timeval = get_date_time((char *)temp)) != 0)
if ((templen & 1) == 0 && (!memcmp(temp, "\376\377", 2) || !memcmp(temp, "\377\376", 2)))
{
// Convert UTF-16 to UTF-8...
char utf8[4096]; // Temporary string
_pdfio_utf16cpy(utf8, temp, templen, sizeof(utf8));
if ((timeval = get_date_time((char *)utf8)) != 0)
{
// Change the type to date...
v->type = PDFIO_VALTYPE_DATE;
v->value.date = timeval;
}
else
{
// Copy the decrypted string back to the value...
v->value.string = pdfioStringCreate(pdf, utf8);
}
}
else if ((timeval = get_date_time((char *)temp)) != 0)
{
// Change the type to date...
v->type = PDFIO_VALTYPE_DATE;
@ -207,6 +235,8 @@ _pdfioValueDecrypt(pdfio_file_t *pdf, // I - PDF file
// Copy the decrypted string back to the value...
v->value.string = pdfioStringCreate(pdf, (char *)temp);
}
_pdfioStringFreeBuffer(pdf, (char *)temp);
break;
}
@ -367,19 +397,23 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
ret = v;
}
else if ((timeval = get_date_time(token + 1)) != 0)
else if (token[0] == '(')
{
if ((timeval = get_date_time(token + 1)) != 0)
{
// Date
v->type = PDFIO_VALTYPE_DATE;
v->value.date = timeval;
ret = v;
}
else if (token[0] == '(')
else
{
// String
v->type = PDFIO_VALTYPE_STRING;
v->value.string = pdfioStringCreate(pdf, token + 1);
ret = v;
}
}
else if (token[0] == '/')
{
// Name

View File

@ -23,7 +23,7 @@ extern "C" {
// Version numbers...
//
# define PDFIO_VERSION "1.5.2"
# define PDFIO_VERSION "1.5.3"
# define PDFIO_VERSION_MAJOR 1
# define PDFIO_VERSION_MINOR 5
@ -46,7 +46,7 @@ extern "C" {
//
# if _WIN32
typedef __int64 ssize_t; // POSIX type not present on Windows... @private@
typedef __int64 ssize_t; // POSIX type not present on Windows @private@
# endif // _WIN32
typedef struct _pdfio_array_s pdfio_array_t;
@ -62,7 +62,7 @@ typedef bool (*pdfio_error_cb_t)(pdfio_file_t *pdf, const char *message, void *d
typedef enum pdfio_encryption_e // PDF encryption modes
{
PDFIO_ENCRYPTION_NONE = 0, // No encryption
PDFIO_ENCRYPTION_RC4_40, // 40-bit RC4 encryption (PDF 1.3)
PDFIO_ENCRYPTION_RC4_40, // 40-bit RC4 encryption (PDF 1.3, reading only)
PDFIO_ENCRYPTION_RC4_128, // 128-bit RC4 encryption (PDF 1.4)
PDFIO_ENCRYPTION_AES_128, // 128-bit AES encryption (PDF 1.6)
PDFIO_ENCRYPTION_AES_256 // 256-bit AES encryption (PDF 2.0) @exclude all@

View File

@ -3,7 +3,7 @@
<metadata>
<id>pdfio_native</id>
<title>PDFio Library for VS2019+</title>
<version>1.5.2</version>
<version>1.5.3</version>
<authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
@ -16,7 +16,7 @@
<copyright>Copyright © 2019-2025 by Michael R Sweet</copyright>
<tags>pdf file native</tags>
<dependencies>
<dependency id="pdfio_native.redist" version="1.5.2" />
<dependency id="pdfio_native.redist" version="1.5.3" />
<dependency id="libpng_native.redist" version="1.6.30" />
<dependency id="zlib_native.redist" version="1.2.11" />
</dependencies>

View File

@ -3,7 +3,7 @@
<metadata>
<id>pdfio_native.redist</id>
<title>PDFio Library for VS2019+</title>
<version>1.5.2</version>
<version>1.5.3</version>
<authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>