12 Commits

Author SHA1 Message Date
9c04d1dc20 Update changelog. 2024-10-15 13:10:06 -04:00
335472023e Bump version in header. 2024-10-15 13:06:40 -04:00
8f2c47cb07 Make sure memory is freed on error conditions. 2024-10-09 15:32:48 -04:00
74dfefdcc1 Update documentation (Issue #77)
- Explain pdfioObjGetSubtype and pdfioObjGetType values
- Provide example code and documentation for accessing common page object values
2024-10-09 15:07:57 -04:00
206f75403a Add debug printfs. 2024-08-26 09:19:34 -04:00
7d22477917 Fix opening of certain encrypted PDF files (Issue #62) 2024-08-21 11:28:39 -04:00
7c3651671b Add NULL checks in the private debug APIs that testpdfio calls. 2024-08-21 09:22:58 -04:00
6cb661f0f4 Cleanup changelog. 2024-08-21 08:25:11 -04:00
7e01451b18 Merge 0-character font fix from TTF. 2024-08-21 08:22:31 -04:00
138f3955d1 Add --password option to PDFio test program. 2024-08-19 17:12:16 -04:00
82844ad2ce Merge TTF v1.0.0 source files. 2024-08-19 16:59:00 -04:00
d7cce4dfbc Merge TTF v1.0.0 source files. 2024-08-19 16:58:38 -04:00
15 changed files with 498 additions and 79 deletions

View File

@ -2,22 +2,24 @@ Changes in PDFio
================
v1.3.2 (Month DD, YYYY)
-----------------------
v1.3.2 - 2024-08-15
-------------------
- Added some more sanity checks to the TrueType font reader.
- Updated documentation (Issue #77)
- Fixed an issue when opening certain encrypted PDF files (Issue #62)
v1.3.1 (August 5, 2024)
-----------------------
v1.3.1 - 2024-08-05
-------------------
- CVE 2024-42358: Updated TrueType font reader to avoid large memory
allocations.
- Fixed some documentation errors and added examples (Issue #68, Issue #69)
v1.3.0 (June 28, 2024)
----------------------
v1.3.0 - 2024-06-28
-------------------
- Added `pdfioFileGetCatalog` API for accessing the root/catalog object of a
PDF file (Issue #67)
@ -27,8 +29,8 @@ v1.3.0 (June 28, 2024)
- Optimized string pool code.
v1.2.0 (January 24, 2024)
-------------------------
v1.2.0 - 2024-01-24
-------------------
- Now use autoconf to configure the PDFio sources (Issue #54)
- Added `pdfioFileCreateNumberObj` and `pdfioFileCreateStringObj` functions
@ -51,8 +53,8 @@ v1.2.0 (January 24, 2024)
65536 in the xref table (Issue #59)
v1.1.4 (December 3, 2023)
-------------------------
v1.1.4 - 2023-12-03
-------------------
- Fixed detection of encrypted strings that are too short (Issue #52)
- Fixed a TrueType CMAP decoding bug.
@ -60,15 +62,15 @@ v1.1.4 (December 3, 2023)
- Added a ToUnicode map for Unicode text to support text copying.
v1.1.3 (November 15, 2023)
--------------------------
v1.1.3 - 2023-11-15
-------------------
- Fixed Unicode font support (Issue #16)
- Fixed missing initializer for 40-bit RC4 encryption (Issue #51)
v1.1.2 (October 10, 2023)
-------------------------
v1.1.2 - 2023-10-10
-------------------
- Updated `pdfioContentSetDashPattern` to support setting a solid (0 length)
dash pattern (Issue #41)
@ -83,15 +85,15 @@ v1.1.2 (October 10, 2023)
(Issue #48)
v1.1.1 (March 20, 2023)
-----------------------
v1.1.1 - 2023-03-20
-------------------
- CVE-2023-28428: Fixed a potential denial-of-service with corrupt PDF files.
- Fixed a few build issues.
v1.1.0 (February 6, 2023)
-------------------------
v1.1.0 - 2023-02-06
-------------------
- CVE-2023-24808: Fixed a potential denial-of-service with corrupt PDF files.
- Added `pdfioFileCreateTemporary` function (Issue #29)
@ -105,28 +107,28 @@ v1.1.0 (February 6, 2023)
- Fixed `pdfioContentMatrixRotate` function.
v1.0.1 (March 2, 2022)
----------------------
v1.0.1 - 2022-03-02
-------------------
- Added missing `pdfioPageGetNumStreams` and `pdfioPageOpenStream` functions.
- Added demo pdfiototext utility.
- Fixed bug in `pdfioStreamGetToken`.
v1.0.0 (December 14, 2021)
--------------------------
v1.0.0 - 2021-12-14
-------------------
- First stable release.
v1.0rc1 (November 30, 2021)
---------------------------
v1.0rc1 - 2021-11-30
--------------------
- Fixed a few stack/buffer overflow bugs discovered via fuzzing.
v1.0b2 (November 7, 2021)
-------------------------
v1.0b2 - 2021-11-07
-------------------
- Added `pdfioFileCreateOutput` API to support streaming output of PDF
(Issue #21)
@ -137,7 +139,7 @@ v1.0b2 (November 7, 2021)
- Fixed some issues identified by a Coverity scan.
v1.0b1 (August 30, 2021)
------------------------
v1.0b1 - 2021-08-30
-------------------
- Initial release

View File

@ -1,4 +1,4 @@
.TH pdfio 3 "pdf read/write library" "2024-08-05" "pdf read/write library"
.TH pdfio 3 "pdf read/write library" "2024-10-09" "pdf read/write library"
.SH NAME
pdfio \- pdf read/write library
.SH Introduction
@ -218,7 +218,90 @@ Each PDF file contains one or more pages. The pdfioFileGetNumPages function retu
}
.fi
.PP
Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page. Use the pdfioPageGetNumStreams and pdfioPageOpenStream functions to access the content streams for each page.
Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page. Use the pdfioPageGetNumStreams and pdfioPageOpenStream functions to access the content streams for each page, and pdfioObjGetDict to get the associated page object dictionary. For example, if you want to display the media and crop boxes for a given page:
.nf
pdfio_file_t *pdf; // PDF file
size_t i; // Looping var
size_t count; // Number of pages
pdfio_obj_t *page; // Current page
pdfio_dict_t *dict; // Current page dictionary
pdfio_array_t *media_box; // MediaBox array
double media_values[4]; // MediaBox values
pdfio_array_t *crop_box; // CropBox array
double crop_values[4]; // CropBox values
// Iterate the pages in the PDF file
for (i = 0, count = pdfioFileGetNumPages(pdf); i < count; i ++)
{
page = pdfioFileGetPage(pdf, i);
dict = pdfioObjGetDict(page);
media_box = pdfioDictGetArray(dict, "MediaBox");
media_values[0] = pdfioArrayGetNumber(media_box, 0);
media_values[1] = pdfioArrayGetNumber(media_box, 1);
media_values[2] = pdfioArrayGetNumber(media_box, 2);
media_values[3] = pdfioArrayGetNumber(media_box, 3);
crop_box = pdfioDictGetArray(dict, "CropBox");
crop_values[0] = pdfioArrayGetNumber(crop_box, 0);
crop_values[1] = pdfioArrayGetNumber(crop_box, 1);
crop_values[2] = pdfioArrayGetNumber(crop_box, 2);
crop_values[3] = pdfioArrayGetNumber(crop_box, 3);
printf("Page %u: MediaBox=[%g %g %g %g], CropBox=[%g %g %g %g]\\n",
(unsigned)(i + 1),
media_values[0], media_values[1], media_values[2], media_values[3],
crop_values[0], crop_values[1], crop_values[2], crop_values[3]);
}
.fi
.PP
Page object dictionaries have several (mostly optional) key/value pairs, including:
.IP \(bu 5
.PP
"Annots": An array of annotation dictionaries for the page; use pdfioDictGetArray to get the array
.IP \(bu 5
.PP
"CropBox": The crop box as an array of four numbers for the left, bottom, right, and top coordinates of the target media; use pdfioDictGetArray to get a pointer to the array of numbers
.IP \(bu 5
.PP
"Dur": The number of seconds the page should be displayed; use pdfioDictGetNumber to get the page duration value
.IP \(bu 5
.PP
"Group": The dictionary of transparency group values for the page; use pdfioDictGetDict to get a pointer to the resources dictionary
.IP \(bu 5
.PP
"LastModified": The date and time when this page was last modified; use pdfioDictGetDate to get the Unix time_t value
.IP \(bu 5
.PP
"Parent": The parent page tree node object for this page; use pdfioDictGetObj to get a pointer to the object
.IP \(bu 5
.PP
"MediaBox": The media box as an array of four numbers for the left, bottom, right, and top coordinates of the target media; use pdfioDictGetArray to get a pointer to the array of numbers
.IP \(bu 5
.PP
"Resources": The dictionary of resources for the page; use pdfioDictGetDict to get a pointer to the resources dictionary
.IP \(bu 5
.PP
"Rotate": A number indicating the number of degrees of counter\-clockwise rotation to apply to the page when viewing; use pdfioDictGetNumber to get the rotation angle
.IP \(bu 5
.PP
"Thumb": A thumbnail image object for the page; use pdfioDictGetObj to get a pointer to the thumbnail image object
.IP \(bu 5
.PP
"Trans": The page transition dictionary; use pdfioDictGetDict to get a pointer to the dictionary
.PP
The pdfioFileClose function closes a PDF file and frees all memory that was used for it:
.nf
@ -2869,6 +2952,29 @@ const char * pdfioObjGetSubtype (
pdfio_obj_t *obj
);
.fi
.PP
This function returns an object's PDF subtype name, if any. Common subtype
names include:
.PP
.IP \(bu 5
"CIDFontType0": A CID Type0 font
.IP \(bu 5
"CIDFontType2": A CID TrueType font
.IP \(bu 5
"Image": An image or image mask
.IP \(bu 5
"Form": A fillable form
.IP \(bu 5
"OpenType": An OpenType font
.IP \(bu 5
"Type0": A composite font
.IP \(bu 5
"Type1": A PostScript Type1 font
.IP \(bu 5
"Type3": A PDF Type3 font
.IP \(bu 5
"TrueType": A TrueType font</li>
</ul>
.SS pdfioObjGetType
Get an object's type.
.PP
@ -2877,6 +2983,27 @@ const char * pdfioObjGetType (
pdfio_obj_t *obj
);
.fi
.PP
This function returns an object's PDF type name, if any. Common type names
include:
.PP
.IP \(bu 5
"CMap": A character map for composite fonts
.IP \(bu 5
"Font": An embedded font (\fIpdfioObjGetSubtype\fR will tell you the
font format)
.IP \(bu 5
"FontDescriptor": A font descriptor
.IP \(bu 5
"Page": A (visible) page
.IP \(bu 5
"Pages": A page tree node
.IP \(bu 5
"Template": An invisible template page
.IP \(bu 5
"XObject": An image, image mask, or form (\fIpdfioObjGetSubtype\fR will
tell you which)</li>
</ul>
.SS pdfioObjOpenStream
Open an object's (data) stream for reading.
.PP

View File

@ -1,13 +1,13 @@
<!DOCTYPE html>
<html lang="en-US">
<head>
<title>PDFio Programming Manual v1.3.0</title>
<title>PDFio Programming Manual v1.3.2</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="generator" content="codedoc v3.7">
<meta name="author" content="Michael R Sweet">
<meta name="language" content="en-US">
<meta name="copyright" content="Copyright © 2021-2024 by Michael R Sweet">
<meta name="version" content="1.3.0">
<meta name="version" content="1.3.2">
<style type="text/css"><!--
body {
background: white;
@ -251,7 +251,7 @@ span.string {
<body>
<div class="header">
<p><img class="title" src="pdfio-512.png"></p>
<h1 class="title">PDFio Programming Manual v1.3.0</h1>
<h1 class="title">PDFio Programming Manual v1.3.2</h1>
<p>Michael R Sweet</p>
<p>Copyright © 2021-2024 by Michael R Sweet</p>
</div>
@ -628,7 +628,66 @@ pdfio_obj_t *page; <span class="comment">// Current page</span>
<span class="comment">// do something with page</span>
}
</code></pre>
<p>Each page is represented by a &quot;page tree&quot; object (what <a href="#pdfioFileGetPage"><code>pdfioFileGetPage</code></a> returns) that specifies information about the page and one or more &quot;content&quot; objects that contain the images, fonts, text, and graphics that appear on the page. Use the <a href="#pdfioPageGetNumStreams"><code>pdfioPageGetNumStreams</code></a> and <a href="#pdfioPageOpenStream"><code>pdfioPageOpenStream</code></a> functions to access the content streams for each page.</p>
<p>Each page is represented by a &quot;page tree&quot; object (what <a href="#pdfioFileGetPage"><code>pdfioFileGetPage</code></a> returns) that specifies information about the page and one or more &quot;content&quot; objects that contain the images, fonts, text, and graphics that appear on the page. Use the <a href="#pdfioPageGetNumStreams"><code>pdfioPageGetNumStreams</code></a> and <a href="#pdfioPageOpenStream"><code>pdfioPageOpenStream</code></a> functions to access the content streams for each page, and <a href="#pdfioObjGetDict"><code>pdfioObjGetDict</code></a> to get the associated page object dictionary. For example, if you want to display the media and crop boxes for a given page:</p>
<pre><code class="language-c">pdfio_file_t *pdf; <span class="comment">// PDF file</span>
size_t i; <span class="comment">// Looping var</span>
size_t count; <span class="comment">// Number of pages</span>
pdfio_obj_t *page; <span class="comment">// Current page</span>
pdfio_dict_t *dict; <span class="comment">// Current page dictionary</span>
pdfio_array_t *media_box; <span class="comment">// MediaBox array</span>
<span class="reserved">double</span> media_values[<span class="number">4</span>]; <span class="comment">// MediaBox values</span>
pdfio_array_t *crop_box; <span class="comment">// CropBox array</span>
<span class="reserved">double</span> crop_values[<span class="number">4</span>]; <span class="comment">// CropBox values</span>
<span class="comment">// Iterate the pages in the PDF file</span>
<span class="reserved">for</span> (i = <span class="number">0</span>, count = pdfioFileGetNumPages(pdf); i &lt; count; i ++)
{
page = pdfioFileGetPage(pdf, i);
dict = pdfioObjGetDict(page);
media_box = pdfioDictGetArray(dict, <span class="string">&quot;MediaBox&quot;</span>);
media_values[<span class="number">0</span>] = pdfioArrayGetNumber(media_box, <span class="number">0</span>);
media_values[<span class="number">1</span>] = pdfioArrayGetNumber(media_box, <span class="number">1</span>);
media_values[<span class="number">2</span>] = pdfioArrayGetNumber(media_box, <span class="number">2</span>);
media_values[<span class="number">3</span>] = pdfioArrayGetNumber(media_box, <span class="number">3</span>);
crop_box = pdfioDictGetArray(dict, <span class="string">&quot;CropBox&quot;</span>);
crop_values[<span class="number">0</span>] = pdfioArrayGetNumber(crop_box, <span class="number">0</span>);
crop_values[<span class="number">1</span>] = pdfioArrayGetNumber(crop_box, <span class="number">1</span>);
crop_values[<span class="number">2</span>] = pdfioArrayGetNumber(crop_box, <span class="number">2</span>);
crop_values[<span class="number">3</span>] = pdfioArrayGetNumber(crop_box, <span class="number">3</span>);
printf(<span class="string">&quot;Page %u: MediaBox=[%g %g %g %g], CropBox=[%g %g %g %g]\n&quot;</span>,
(<span class="reserved">unsigned</span>)(i + <span class="number">1</span>),
media_values[<span class="number">0</span>], media_values[<span class="number">1</span>], media_values[<span class="number">2</span>], media_values[<span class="number">3</span>],
crop_values[<span class="number">0</span>], crop_values[<span class="number">1</span>], crop_values[<span class="number">2</span>], crop_values[<span class="number">3</span>]);
}
</code></pre>
<p>Page object dictionaries have several (mostly optional) key/value pairs, including:</p>
<ul>
<li><p>&quot;Annots&quot;: An array of annotation dictionaries for the page; use <a href="#pdfioDictGetArray"><code>pdfioDictGetArray</code></a> to get the array</p>
</li>
<li><p>&quot;CropBox&quot;: The crop box as an array of four numbers for the left, bottom, right, and top coordinates of the target media; use <a href="#pdfioDictGetArray"><code>pdfioDictGetArray</code></a> to get a pointer to the array of numbers</p>
</li>
<li><p>&quot;Dur&quot;: The number of seconds the page should be displayed; use <a href="#pdfioDictGetNumber"><code>pdfioDictGetNumber</code></a> to get the page duration value</p>
</li>
<li><p>&quot;Group&quot;: The dictionary of transparency group values for the page; use <a href="#pdfioDictGetDict"><code>pdfioDictGetDict</code></a> to get a pointer to the resources dictionary</p>
</li>
<li><p>&quot;LastModified&quot;: The date and time when this page was last modified; use <a href="#pdfioDictGetDate"><code>pdfioDictGetDate</code></a> to get the Unix <code>time_t</code> value</p>
</li>
<li><p>&quot;Parent&quot;: The parent page tree node object for this page; use <a href="#pdfioDictGetObj"><code>pdfioDictGetObj</code></a> to get a pointer to the object</p>
</li>
<li><p>&quot;MediaBox&quot;: The media box as an array of four numbers for the left, bottom, right, and top coordinates of the target media; use <a href="#pdfioDictGetArray"><code>pdfioDictGetArray</code></a> to get a pointer to the array of numbers</p>
</li>
<li><p>&quot;Resources&quot;: The dictionary of resources for the page; use <a href="#pdfioDictGetDict"><code>pdfioDictGetDict</code></a> to get a pointer to the resources dictionary</p>
</li>
<li><p>&quot;Rotate&quot;: A number indicating the number of degrees of counter-clockwise rotation to apply to the page when viewing; use <a href="#pdfioDictGetNumber"><code>pdfioDictGetNumber</code></a> to get the rotation angle</p>
</li>
<li><p>&quot;Thumb&quot;: A thumbnail image object for the page; use <a href="#pdfioDictGetObj"><code>pdfioDictGetObj</code></a> to get a pointer to the thumbnail image object</p>
</li>
<li><p>&quot;Trans&quot;: The page transition dictionary; use <a href="#pdfioDictGetDict"><code>pdfioDictGetDict</code></a> to get a pointer to the dictionary</p>
</li>
</ul>
<p>The <a href="#pdfioFileClose"><code>pdfioFileClose</code></a> function closes a PDF file and frees all memory that was used for it:</p>
<pre><code class="language-c">pdfioFileClose(pdf);
</code></pre>
@ -3490,7 +3549,30 @@ size_t pdfioObjGetNumber(<a href="#pdfio_obj_t">pdfio_obj_t</a> *obj);</p>
<td class="description">Object</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Object subtype</p>
<p class="description">Object subtype name or <code>NULL</code> for none</p>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function returns an object's PDF subtype name, if any. Common subtype
names include:
</p><ul>
<li>&quot;CIDFontType0&quot;: A CID Type0 font
</li>
<li>&quot;CIDFontType2&quot;: A CID TrueType font
</li>
<li>&quot;Image&quot;: An image or image mask
</li>
<li>&quot;Form&quot;: A fillable form
</li>
<li>&quot;OpenType&quot;: An OpenType font
</li>
<li>&quot;Type0&quot;: A composite font
</li>
<li>&quot;Type1&quot;: A PostScript Type1 font
</li>
<li>&quot;Type3&quot;: A PDF Type3 font
</li>
<li>&quot;TrueType&quot;: A TrueType font</li>
</ul>
<h3 class="function"><a id="pdfioObjGetType">pdfioObjGetType</a></h3>
<p class="description">Get an object's type.</p>
<p class="code">
@ -3501,7 +3583,28 @@ size_t pdfioObjGetNumber(<a href="#pdfio_obj_t">pdfio_obj_t</a> *obj);</p>
<td class="description">Object</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Object type</p>
<p class="description">Object type name or <code>NULL</code> for none</p>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function returns an object's PDF type name, if any. Common type names
include:
</p><ul>
<li>&quot;CMap&quot;: A character map for composite fonts
</li>
<li>&quot;Font&quot;: An embedded font (<a href="#pdfioObjGetSubtype"><code>pdfioObjGetSubtype</code></a> will tell you the
font format)
</li>
<li>&quot;FontDescriptor&quot;: A font descriptor
</li>
<li>&quot;Page&quot;: A (visible) page
</li>
<li>&quot;Pages&quot;: A page tree node
</li>
<li>&quot;Template&quot;: An invisible template page
</li>
<li>&quot;XObject&quot;: An image, image mask, or form (<a href="#pdfioObjGetSubtype"><code>pdfioObjGetSubtype</code></a> will
tell you which)</li>
</ul>
<h3 class="function"><a id="pdfioObjOpenStream">pdfioObjOpenStream</a></h3>
<p class="description">Open an object's (data) stream for reading.</p>
<p class="code">

View File

@ -202,7 +202,74 @@ Each page is represented by a "page tree" object (what [`pdfioFileGetPage`](@@)
returns) that specifies information about the page and one or more "content"
objects that contain the images, fonts, text, and graphics that appear on the
page. Use the [`pdfioPageGetNumStreams`](@@) and [`pdfioPageOpenStream`](@@)
functions to access the content streams for each page.
functions to access the content streams for each page, and
[`pdfioObjGetDict`](@@) to get the associated page object dictionary. For
example, if you want to display the media and crop boxes for a given page:
```c
pdfio_file_t *pdf; // PDF file
size_t i; // Looping var
size_t count; // Number of pages
pdfio_obj_t *page; // Current page
pdfio_dict_t *dict; // Current page dictionary
pdfio_array_t *media_box; // MediaBox array
double media_values[4]; // MediaBox values
pdfio_array_t *crop_box; // CropBox array
double crop_values[4]; // CropBox values
// Iterate the pages in the PDF file
for (i = 0, count = pdfioFileGetNumPages(pdf); i < count; i ++)
{
page = pdfioFileGetPage(pdf, i);
dict = pdfioObjGetDict(page);
media_box = pdfioDictGetArray(dict, "MediaBox");
media_values[0] = pdfioArrayGetNumber(media_box, 0);
media_values[1] = pdfioArrayGetNumber(media_box, 1);
media_values[2] = pdfioArrayGetNumber(media_box, 2);
media_values[3] = pdfioArrayGetNumber(media_box, 3);
crop_box = pdfioDictGetArray(dict, "CropBox");
crop_values[0] = pdfioArrayGetNumber(crop_box, 0);
crop_values[1] = pdfioArrayGetNumber(crop_box, 1);
crop_values[2] = pdfioArrayGetNumber(crop_box, 2);
crop_values[3] = pdfioArrayGetNumber(crop_box, 3);
printf("Page %u: MediaBox=[%g %g %g %g], CropBox=[%g %g %g %g]\n",
(unsigned)(i + 1),
media_values[0], media_values[1], media_values[2], media_values[3],
crop_values[0], crop_values[1], crop_values[2], crop_values[3]);
}
```
Page object dictionaries have several (mostly optional) key/value pairs,
including:
- "Annots": An array of annotation dictionaries for the page; use
[`pdfioDictGetArray`](@@) to get the array
- "CropBox": The crop box as an array of four numbers for the left, bottom,
right, and top coordinates of the target media; use [`pdfioDictGetArray`](@@)
to get a pointer to the array of numbers
- "Dur": The number of seconds the page should be displayed; use
[`pdfioDictGetNumber`](@@) to get the page duration value
- "Group": The dictionary of transparency group values for the page; use
[`pdfioDictGetDict`](@@) to get a pointer to the resources dictionary
- "LastModified": The date and time when this page was last modified; use
[`pdfioDictGetDate`](@@) to get the Unix `time_t` value
- "Parent": The parent page tree node object for this page; use
[`pdfioDictGetObj`](@@) to get a pointer to the object
- "MediaBox": The media box as an array of four numbers for the left, bottom,
right, and top coordinates of the target media; use [`pdfioDictGetArray`](@@)
to get a pointer to the array of numbers
- "Resources": The dictionary of resources for the page; use
[`pdfioDictGetDict`](@@) to get a pointer to the resources dictionary
- "Rotate": A number indicating the number of degrees of counter-clockwise
rotation to apply to the page when viewing; use [`pdfioDictGetNumber`](@@)
to get the rotation angle
- "Thumb": A thumbnail image object for the page; use [`pdfioDictGetObj`](@@)
to get a pointer to the thumbnail image object
- "Trans": The page transition dictionary; use [`pdfioDictGetDict`](@@) to get
a pointer to the dictionary
The [`pdfioFileClose`](@@) function closes a PDF file and frees all memory that
was used for it:

View File

@ -363,6 +363,9 @@ _pdfioArrayDebug(pdfio_array_t *a, // I - Array
_pdfio_value_t *v; // Current value
if (!a)
return;
putc('[', fp);
for (i = a->num_values, v = a->values; i > 0; i --, v ++)
_pdfioValueDebug(v, fp);

View File

@ -194,6 +194,9 @@ _pdfioDictDebug(pdfio_dict_t *dict, // I - Dictionary
_pdfio_pair_t *pair; // Current pair
if (!dict)
return;
for (i = dict->num_pairs, pair = dict->pairs; i > 0; i --, pair ++)
{
fprintf(fp, "/%s", pair->key);

View File

@ -188,6 +188,8 @@ pdfioFileCreate(
int fd; // File descriptor
PDFIO_DEBUG("pdfioFileCreate(filename=\"%s\", version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", filename, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata);
// Range check input...
if (!filename)
return (NULL);
@ -390,6 +392,8 @@ pdfioFileCreateOutput(
pdfio_error_cb_t error_cb, // I - Error callback or `NULL` for default
void *error_cbdata) // I - Error callback data, if any
{
PDFIO_DEBUG("pdfioFileCreate(output_cb=%p, output_cbdata=%p, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", (void *)output_cb, (void *)output_cbdata, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata);
return (create_common("output.pdf", /*fd*/-1, output_cb, output_cbdata, version, media_box, crop_box, error_cb, error_cbdata));
}
@ -524,6 +528,8 @@ pdfioFileCreateTemporary(
unsigned tmpnum; // Temporary filename number
PDFIO_DEBUG("pdfioFileCreate(buffer=%p, bufsize=%lu, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", (void *)buffer, (unsigned long)bufsize, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata);
// Range check input...
if (!buffer || bufsize < 32)
{
@ -648,11 +654,12 @@ pdfioFileFindObj(
if ((current = number - 1) >= pdf->num_objs)
current = pdf->num_objs / 2;
PDFIO_DEBUG("pdfioFileFindObj: objs[current=%lu]=%p\n", (unsigned long)current, (void *)pdf->objs[current]);
PDFIO_DEBUG("pdfioFileFindObj: objs[current=%lu]=%p(%lu)\n", (unsigned long)current, (void *)pdf->objs[current], (unsigned long)(pdf->objs[current] ? pdf->objs[current]->number : 0));
if (number == pdf->objs[current]->number)
{
// Fast match...
PDFIO_DEBUG("pdfioFileFindObj: Returning %lu (%p)\n", (unsigned long)current, pdf->objs[current]);
return (pdf->objs[current]);
}
else if (number < pdf->objs[current]->number)
@ -679,11 +686,20 @@ pdfioFileFindObj(
}
if (number == pdf->objs[left]->number)
{
PDFIO_DEBUG("pdfioFileFindObj: Returning %lu (%p)\n", (unsigned long)left, pdf->objs[left]);
return (pdf->objs[left]);
}
else if (number == pdf->objs[right]->number)
{
PDFIO_DEBUG("pdfioFileFindObj: Returning %lu (%p)\n", (unsigned long)right, pdf->objs[right]);
return (pdf->objs[right]);
}
else
{
PDFIO_DEBUG("pdfioFileFindObj: Returning NULL\n");
return (NULL);
}
}
@ -928,6 +944,8 @@ pdfioFileOpen(
off_t xref_offset; // Offset to xref table
PDFIO_DEBUG("pdfioFileOpen(filename=\"%s\", password_cb=%p, password_cbdata=%p, error_cb=%p, error_cbdata=%p)\n", filename, (void *)password_cb, (void *)password_cbdata, (void *)error_cb, (void *)error_cbdata);
// Range check input...
if (!filename)
return (NULL);
@ -1285,6 +1303,8 @@ create_common(
unsigned char id_value[16]; // File ID value
PDFIO_DEBUG("create_common(filename=\"%s\", fd=%d, output_cb=%p, output_cbdata=%p, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", filename, fd, (void *)output_cb, (void *)output_cbdata, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata);
// Range check input...
if (!filename || (fd < 0 && !output_cb))
return (NULL);

View File

@ -1,7 +1,7 @@
//
// PDF object functions for PDFio.
//
// Copyright © 2021-2023 by Michael R Sweet.
// Copyright © 2021-2024 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -347,8 +347,21 @@ pdfioObjGetNumber(pdfio_obj_t *obj) // I - Object
//
// 'pdfioObjGetSubtype()' - Get an object's subtype.
//
// This function returns an object's PDF subtype name, if any. Common subtype
// names include:
//
// - "CIDFontType0": A CID Type0 font
// - "CIDFontType2": A CID TrueType font
// - "Image": An image or image mask
// - "Form": A fillable form
// - "OpenType": An OpenType font
// - "Type0": A composite font
// - "Type1": A PostScript Type1 font
// - "Type3": A PDF Type3 font
// - "TrueType": A TrueType font
//
const char * // O - Object subtype
const char * // O - Object subtype name or `NULL` for none
pdfioObjGetSubtype(pdfio_obj_t *obj) // I - Object
{
pdfio_dict_t *dict; // Object dictionary
@ -364,8 +377,21 @@ pdfioObjGetSubtype(pdfio_obj_t *obj) // I - Object
//
// 'pdfioObjGetType()' - Get an object's type.
//
// This function returns an object's PDF type name, if any. Common type names
// include:
//
// - "CMap": A character map for composite fonts
// - "Font": An embedded font (@link pdfioObjGetSubtype@ will tell you the
// font format)
// - "FontDescriptor": A font descriptor
// - "Page": A (visible) page
// - "Pages": A page tree node
// - "Template": An invisible template page
// - "XObject": An image, image mask, or form (@link pdfioObjGetSubtype@ will
// tell you which)
//
const char * // O - Object type
const char * // O - Object type name or `NULL` for none
pdfioObjGetType(pdfio_obj_t *obj) // I - Object
{
pdfio_dict_t *dict; // Object dictionary

View File

@ -408,6 +408,7 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
pdfio_stream_t *st; // Stream
pdfio_dict_t *dict = pdfioObjGetDict(obj);
// Object dictionary
const char *type; // Object type
PDFIO_DEBUG("_pdfioStreamOpen(obj=%p(%u), decode=%s)\n", obj, (unsigned)obj->number, decode ? "true" : "false");
@ -434,7 +435,9 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
return (NULL);
}
if (obj->pdf->encryption)
type = pdfioObjGetType(obj);
if (obj->pdf->encryption && (!type || strcmp(type, "XRef")))
{
uint8_t iv[64]; // Initialization vector
size_t ivlen; // Length of initialization vector, if any
@ -1061,19 +1064,11 @@ stream_read(pdfio_stream_t *st, // I - Stream
st->flate.next_out = (Bytef *)buffer;
st->flate.avail_out = (uInt)bytes;
avail_in = st->flate.avail_in;
avail_out = st->flate.avail_out;
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
return (-1);
}
else if (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out)
{
_pdfioFileError(st->pdf, "Corrupt stream data.");
return (-1);
}
return (st->flate.next_out - (Bytef *)buffer);
}

View File

@ -215,6 +215,9 @@ void
_pdfioValueDebug(_pdfio_value_t *v, // I - Value
FILE *fp) // I - Output file
{
if (!v)
return;
switch (v->type)
{
case PDFIO_VALTYPE_ARRAY :

View File

@ -23,7 +23,7 @@ extern "C" {
// Version number...
//
# define PDFIO_VERSION "1.3.1"
# define PDFIO_VERSION "1.3.2"
//

View File

@ -27,7 +27,7 @@
//
static int do_crypto_tests(void);
static int do_test_file(const char *filename, int objnum, bool verbose);
static int do_test_file(const char *filename, int objnum, const char *password, bool verbose);
static int do_unit_tests(void);
static int draw_image(pdfio_stream_t *st, const char *name, double x, double y, double w, double h, const char *label);
static bool error_cb(pdfio_file_t *pdf, const char *message, bool *error);
@ -37,6 +37,7 @@ static const char *password_cb(void *data, const char *filename);
static int read_unit_file(const char *filename, size_t num_pages, size_t first_image, bool is_output);
static ssize_t token_consume_cb(const char **s, size_t bytes);
static ssize_t token_peek_cb(const char **s, char *buffer, size_t bytes);
static int usage(FILE *fp);
static int verify_image(pdfio_file_t *pdf, size_t number);
static int write_alpha_test(pdfio_file_t *pdf, int number, pdfio_obj_t *font);
static int write_color_patch(pdfio_stream_t *st, bool device);
@ -59,22 +60,33 @@ int // O - Exit status
main(int argc, // I - Number of command-line arguments
char *argv[]) // I - Command-line arguments
{
int ret = 0; // Return value
int ret = 0; // Return value
fprintf(stderr, "testpdfio: Test locale is \"%s\".\n", setlocale(LC_ALL, getenv("LANG")));
if (argc > 1)
{
int i; // Looping var
const char *password = NULL; // Password
bool verbose = false; // Be verbose?
for (i = 1; i < argc; i ++)
{
if (!strcmp(argv[i], "--help"))
{
puts("Usage: ./testpdfio [--help] [--verbose] [filename [objnum] ...]");
return (0);
return (usage(stdout));
}
else if (!strcmp(argv[i], "--password"))
{
i ++;
if (i < argc)
{
password = argv[i];
}
else
{
fputs("testpdfio: Missing password after '--password'.\n", stderr);
return (usage(stderr));
}
}
else if (!strcmp(argv[i], "--verbose"))
{
@ -82,24 +94,27 @@ main(int argc, // I - Number of command-line arguments
}
else if (argv[i][0] == '-')
{
printf("Unknown option '%s'.\n\n", argv[i]);
puts("Usage: ./testpdfio [--help] [--verbose] [filename [objnum] ...]");
return (1);
fprintf(stderr, "testpdfio: Unknown option '%s'.\n", argv[i]);
return (usage(stderr));
}
else if ((i + 1) < argc && isdigit(argv[i + 1][0] & 255))
{
// filename.pdf object-number
if (do_test_file(argv[i], atoi(argv[i + 1]), verbose))
if (do_test_file(argv[i], atoi(argv[i + 1]), password, verbose))
ret = 1;
i ++;
}
else if (do_test_file(argv[i], 0, verbose))
else if (do_test_file(argv[i], 0, password, verbose))
{
ret = 1;
}
}
}
else
{
fprintf(stderr, "testpdfio: Test locale is \"%s\".\n", setlocale(LC_ALL, getenv("LANG")));
#if _WIN32
// Windows puts executables in Platform/Configuration subdirs...
if (!_access("../../testfiles", 0))
@ -363,6 +378,7 @@ do_crypto_tests(void)
static int // O - Exit status
do_test_file(const char *filename, // I - PDF filename
int objnum, // I - Object number to dump, if any
const char *password, // I - Password for file
bool verbose) // I - Be verbose?
{
bool error = false; // Have we shown an error yet?
@ -381,7 +397,7 @@ do_test_file(const char *filename, // I - PDF filename
fflush(stdout);
}
if ((pdf = pdfioFileOpen(filename, /*password_cb*/NULL, /*password_data*/NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL)
if ((pdf = pdfioFileOpen(filename, password_cb, (void *)password, (pdfio_error_cb_t)error_cb, &error)) != NULL)
{
if (objnum)
{
@ -1559,6 +1575,23 @@ token_peek_cb(const char **s, // IO - Test string
}
//
// 'usage()' - Show program usage.
//
static int // O - Exit status
usage(FILE *fp) // I - Output file
{
fputs("Usage: ./testpdfio [OPTIONS] [FILENAME [OBJNUM]] ...\n", fp);
fputs("Options:\n", fp);
fputs(" --help Show program help.\n", fp);
fputs(" --password PASSWORD Set PDF password.\n", fp);
fputs(" --verbose Be verbose.\n", fp);
return (fp != stdout);
}
//
// 'verify_image()' - Verify an image object.
//

View File

@ -3,7 +3,7 @@
//
// https://github.com/michaelrsweet/ttf
//
// Copyright © 2018-2023 by Michael R Sweet.
// Copyright © 2018-2024 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -120,6 +120,7 @@ test_font(const char *filename) // I - Font filename
printf("ttfCreate(\"%s\"): ", filename);
fflush(stdout);
if ((font = ttfCreate(filename, 0, error_cb, NULL)) != NULL)
puts("PASS");
else

50
ttf.c
View File

@ -62,7 +62,7 @@
# define O_CREAT _O_CREAT
# define O_TRUNC _O_TRUNC
typedef __int64 ssize_t; // POSIX type not present on Windows...
typedef __int64 ssize_t; // POSIX type not present on Windows... @private@
#else
# include <unistd.h>
@ -299,7 +299,28 @@ static unsigned seek_table(ttf_t *font, unsigned tag, unsigned offset, bool requ
//
// 'ttfCreate()' - Create a new font object for the named font family.
// 'ttfCreate()' - Create a new font object for the named font file.
//
// This function creates a new font object for the named TrueType or OpenType
// font file or collection. The "filename" argument specifies the name of the
// file to read.
//
// The "idx" argument specifies the font to load from a collection - the first
// font is number `0`. Once created, you can call the @link ttfGetNumFonts@
// function to determine whether the loaded font file is a collection with more
// than one font.
//
// The "err_cb" and "err_data" arguments specify a callback function and data
// pointer for receiving error messages. If `NULL`, errors are sent to the
// `stderr` file. The callback function receives the data pointer and a text
// message string, for example:
//
// ```
// void my_err_cb(void *err_data, const char *message)
// {
// fprintf(stderr, "ERROR: %s\n", message);
// }
// ```
//
ttf_t * // O - New font object
@ -552,6 +573,10 @@ ttfGetAscent(ttf_t *font) // I - Font
//
// 'ttfGetBounds()' - Get the bounds of all characters in a font.
//
// This function gets the bounds of all characters in a font. The "bounds"
// argument is a pointer to a `ttf_rect_t` structure that will be filled with
// the limits for characters in the font scaled to a 1000x1000 unit square.
//
ttf_rect_t * // O - Bounds or `NULL` on error
ttfGetBounds(ttf_t *font, // I - Font
@ -633,8 +658,11 @@ ttfGetDescent(ttf_t *font) // I - Font
//
// 'ttfGetExtents()' - Get the extents of a UTF-8 string.
//
// This function computes the extents of a UTF-8 string when rendered using the
// specified font and size.
// This function computes the extents of the UTF-8 string "s" when rendered
// using the specified font "font" and size "size". The "extents" argument is
// a pointer to a `ttf_rect_t` structure that is filled with the extents of a
// simple rendering of the string with no kerning or rewriting applied. The
// values are scaled using the specified font size.
//
ttf_rect_t * // O - Pointer to extents or `NULL` on error
@ -1281,6 +1309,8 @@ read_cmap(ttf_t *font) // I - Font
if (segment->startCode > segment->endCode)
{
errorf(font, "Bad cmap table segment %u to %u.", segments->startCode, segment->endCode);
free(segments);
free(glyphIdArray);
return (false);
}
@ -1295,9 +1325,11 @@ read_cmap(ttf_t *font) // I - Font
TTF_DEBUG("read_cmap: glyphIdArray[%d]=%d\n", i, glyphIdArray[i]);
#endif /* DEBUG */
if (font->num_cmap > TTF_FONT_MAX_CHAR)
if (font->num_cmap == 0 || font->num_cmap > TTF_FONT_MAX_CHAR)
{
errorf(font, "Invalid cmap table with %u characters.", (unsigned)font->num_cmap);
free(segments);
free(glyphIdArray);
return (false);
}
@ -1394,6 +1426,7 @@ read_cmap(ttf_t *font) // I - Font
if (group->startCharCode > group->endCharCode)
{
errorf(font, "Bad cmap table segment %u to %u.", group->startCharCode, group->endCharCode);
free(groups);
return (false);
}
@ -1405,9 +1438,10 @@ read_cmap(ttf_t *font) // I - Font
// uncompressed cmap table...
TTF_DEBUG("read_cmap: num_cmap=%u\n", (unsigned)font->num_cmap);
if (font->num_cmap > TTF_FONT_MAX_CHAR)
if (font->num_cmap == 0 || font->num_cmap > TTF_FONT_MAX_CHAR)
{
errorf(font, "Invalid cmap table with %u characters.", (unsigned)font->num_cmap);
free(groups);
return (false);
}
@ -1483,6 +1517,7 @@ read_cmap(ttf_t *font) // I - Font
if (group->startCharCode > group->endCharCode)
{
errorf(font, "Bad cmap table segment %u to %u.", group->startCharCode, group->endCharCode);
free(groups);
return (false);
}
@ -1494,9 +1529,10 @@ read_cmap(ttf_t *font) // I - Font
// uncompressed cmap table...
TTF_DEBUG("read_cmap: num_cmap=%u\n", (unsigned)font->num_cmap);
if (font->num_cmap > TTF_FONT_MAX_CHAR)
if (font->num_cmap == 0 || font->num_cmap > TTF_FONT_MAX_CHAR)
{
errorf(font, "Invalid cmap table with %u characters.", (unsigned)font->num_cmap);
free(groups);
return (false);
}

16
ttf.h
View File

@ -3,7 +3,7 @@
//
// https://github.com/michaelrsweet/ttf
//
// Copyright © 2018-2023 by Michael R Sweet.
// Copyright © 2018-2024 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -22,12 +22,12 @@ extern "C" {
// Types...
//
typedef struct _ttf_s ttf_t; //// Font object
typedef struct _ttf_s ttf_t; // Font object
typedef void (*ttf_err_cb_t)(void *data, const char *message);
//// Font error callback
// Font error callback
typedef enum ttf_stretch_e //// Font stretch
typedef enum ttf_stretch_e // Font stretch
{
TTF_STRETCH_NORMAL, // normal
TTF_STRETCH_ULTRA_CONDENSED, // ultra-condensed
@ -40,20 +40,20 @@ typedef enum ttf_stretch_e //// Font stretch
TTF_STRETCH_ULTRA_EXPANDED // ultra-expanded
} ttf_stretch_t;
typedef enum ttf_style_e //// Font style
typedef enum ttf_style_e // Font style
{
TTF_STYLE_NORMAL, // Normal font
TTF_STYLE_ITALIC, // Italic font
TTF_STYLE_OBLIQUE // Oblique (angled) font
} ttf_style_t;
typedef enum ttf_variant_e //// Font variant
typedef enum ttf_variant_e // Font variant
{
TTF_VARIANT_NORMAL, // Normal font
TTF_VARIANT_SMALL_CAPS // Font whose lowercase letters are small capitals
} ttf_variant_t;
typedef enum ttf_weight_e //// Font weight
typedef enum ttf_weight_e // Font weight
{
TTF_WEIGHT_100 = 100, // Weight 100 (Thin)
TTF_WEIGHT_200 = 200, // Weight 200 (Extra/Ultra-Light)
@ -66,7 +66,7 @@ typedef enum ttf_weight_e //// Font weight
TTF_WEIGHT_900 = 900 // Weight 900 (Black/Heavy)
} ttf_weight_t;
typedef struct ttf_rect_s //// Bounding rectangle
typedef struct ttf_rect_s // Bounding rectangle
{
float left; // Left offset
float top; // Top offset