Update pdfioinfo example to support Acrobat Form dictionaries as well as indirect references (Issue #114)

This commit is contained in:
Michael R Sweet 2025-04-04 21:24:42 -04:00
parent 0bd9edc845
commit 130cef8702
No known key found for this signature in database
GPG Key ID: BE67C75EC81F3244
5 changed files with 311 additions and 47 deletions

View File

@ -6,6 +6,7 @@ v1.5.2 - YYYY-MM-DD
-------------------
- Updated maximum allowed PDF string size to 64k (Issue #117)
- Fixed form detection in `pdfioinfo` example code (Issue #114)
- Fixed parsing of certain date/time values (Issue #115)
- Fixed support for empty name values (Issue #116)

View File

@ -1,4 +1,4 @@
.TH pdfio 3 "pdf read/write library" "2025-03-06" "pdf read/write library"
.TH pdfio 3 "pdf read/write library" "2025-04-04" "pdf read/write library"
.SH NAME
pdfio \- pdf read/write library
.SH Introduction
@ -1047,11 +1047,26 @@ The pdfioinfo.c example program opens a PDF file and prints the title, author, c
{
const char *filename; // PDF filename
pdfio_file_t *pdf; // PDF file
const char *author; // Author name
time_t creation_date; // Creation date
struct tm *creation_tm; // Creation date/time information
char creation_text[256]; // Creation date/time as a string
const char *title; // Title
pdfio_dict_t *catalog; // Catalog dictionary
const char *author, // Author name
*creator, // Creator name
*producer, // Producer name
*title; // Title
time_t creation_date, // Creation date
modification_date; // Modification date
struct tm *creation_tm, // Creation date/time information
*modification_tm; // Modification date/time information
char creation_text[256], // Creation date/time as a string
modification_text[256], // Modification date/time human fmt string
range_text[255]; // Page range text
size_t num_pages; // PDF number of pages
bool has_acroform; // Does the file have an AcroForm?
pdfio_obj_t *page; // Object
pdfio_dict_t *page_dict; // Object dictionary
size_t cur, // Current page index
prev; // Previous page index
pdfio_rect_t cur_box, // Current MediaBox
prev_box; // Previous MediaBox
// Get the filename from the command\-line...
@ -1064,14 +1079,20 @@ The pdfioinfo.c example program opens a PDF file and prints the title, author, c
filename = argv[1];
// Open the PDF file with the default callbacks...
pdf = pdfioFileOpen(filename, /*password_cb*/NULL, /*password_cbdata*/NULL,
/*error_cb*/NULL, /*error_cbdata*/NULL);
pdf = pdfioFileOpen(filename, /*password_cb*/NULL,
/*password_cbdata*/NULL, /*error_cb*/NULL,
/*error_cbdata*/NULL);
if (pdf == NULL)
return (1);
// Get the title and author...
author = pdfioFileGetAuthor(pdf);
title = pdfioFileGetTitle(pdf);
// Get the title, author, etc...
catalog = pdfioFileGetCatalog(pdf);
author = pdfioFileGetAuthor(pdf);
creator = pdfioFileGetCreator(pdf);
has_acroform = pdfioDictGetType(catalog, "AcroForm") != PDFIO_VALTYPE_NONE;
num_pages = pdfioFileGetNumPages(pdf);
producer = pdfioFileGetProducer(pdf);
title = pdfioFileGetTitle(pdf);
// Get the creation date and convert to a string...
if ((creation_date = pdfioFileGetCreationDate(pdf)) > 0)
@ -1084,12 +1105,76 @@ The pdfioinfo.c example program opens a PDF file and prints the title, author, c
snprintf(creation_text, sizeof(creation_text), "\-\- not set \-\-");
}
// Get the modification date and convert to a string...
if ((modification_date = pdfioFileGetModificationDate(pdf)) > 0)
{
modification_tm = localtime(&modification_date);
strftime(modification_text, sizeof(modification_text), "%c", modification_tm);
}
else
{
snprintf(modification_text, sizeof(modification_text), "\-\- not set \-\-");
}
// Print file information to stdout...
printf("%s:\\n", filename);
printf(" Title: %s\\n", title ? title : "\-\- not set \-\-");
printf(" Author: %s\\n", author ? author : "\-\- not set \-\-");
printf(" Created On: %s\\n", creation_text);
printf(" Number Pages: %u\\n", (unsigned)pdfioFileGetNumPages(pdf));
printf(" Title: %s\\n", title ? title : "\-\- not set \-\-");
printf(" Author: %s\\n", author ? author : "\-\- not set \-\-");
printf(" Creator: %s\\n", creator ? creator : "\-\- not set \-\-");
printf(" Producer: %s\\n", producer ? producer : "\-\- not set \-\-");
printf(" Created On: %s\\n", creation_text);
printf(" Modified On: %s\\n", modification_text);
printf(" Version: %s\\n", pdfioFileGetVersion(pdf));
printf(" AcroForm: %s\\n", has_acroform ? "Yes" : "No");
printf(" Number of Pages: %u\\n", (unsigned)num_pages);
// Report the MediaBox for all of the pages
prev_box.x1 = prev_box.x2 = prev_box.y1 = prev_box.y2 = 0.0;
for (cur = 0, prev = 0; cur < num_pages; cur ++)
{
// Find the MediaBox for this page in the page tree...
for (page = pdfioFileGetPage(pdf, cur);
page != NULL;
page = pdfioDictGetObj(page_dict, "Parent"))
{
cur_box.x1 = cur_box.x2 = cur_box.y1 = cur_box.y2 = 0.0;
page_dict = pdfioObjGetDict(page);
if (pdfioDictGetRect(page_dict, "MediaBox", &cur_box))
break;
}
// If this MediaBox is different from the previous one, show the range of
// pages that have that size...
if (cur == 0 ||
fabs(cur_box.x1 \- prev_box.x1) > 0.01 ||
fabs(cur_box.y1 \- prev_box.y1) > 0.01 ||
fabs(cur_box.x2 \- prev_box.x2) > 0.01 ||
fabs(cur_box.y2 \- prev_box.y2) > 0.01)
{
if (cur > prev)
{
snprintf(range_text, sizeof(range_text), "Pages %u\-%u",
(unsigned)(prev + 1), (unsigned)cur);
printf("%16s: [%g %g %g %g]\\n", range_text,
prev_box.x1, prev_box.y1, prev_box.x2, prev_box.y2);
}
// Start a new series of pages with the new size...
prev = cur;
prev_box = cur_box;
}
}
// Show the last range as needed...
if (cur > prev)
{
snprintf(range_text, sizeof(range_text), "Pages %u\-%u",
(unsigned)(prev + 1), (unsigned)cur);
printf("%16s: [%g %g %g %g]\\n", range_text,
prev_box.x1, prev_box.y1, prev_box.x2, prev_box.y2);
}
// Close the PDF file...
pdfioFileClose(pdf);
@ -4590,6 +4675,10 @@ bool pdfioStreamPrintf (
...
);
.fi
.PP
This function writes a formatted string to a stream. In addition to the
standard \fBprintf\fR format characters, you can use "%N" to format a PDF name
value ("/Name") and "%S" to format a PDF string ("(String)") value.
.SS pdfioStreamPutChar
Write a single character to a stream.
.PP

View File

@ -1,13 +1,13 @@
<!DOCTYPE html>
<html lang="en-US">
<head>
<title>PDFio Programming Manual v1.5.0</title>
<title>PDFio Programming Manual v1.5.2</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="generator" content="codedoc v3.8">
<meta name="author" content="Michael R Sweet">
<meta name="language" content="en-US">
<meta name="copyright" content="Copyright © 2021-2025 by Michael R Sweet">
<meta name="version" content="1.5.0">
<meta name="version" content="1.5.2">
<style type="text/css"><!--
body {
background: white;
@ -251,7 +251,7 @@ span.string {
<body>
<div class="header">
<p><img class="title" src="pdfio-512.png"></p>
<h1 class="title">PDFio Programming Manual v1.5.0</h1>
<h1 class="title">PDFio Programming Manual v1.5.2</h1>
<p>Michael R Sweet</p>
<p>Copyright © 2021-2025 by Michael R Sweet</p>
</div>
@ -1165,11 +1165,26 @@ main(<span class="reserved">int</span> argc, <span clas
{
<span class="reserved">const</span> <span class="reserved">char</span> *filename; <span class="comment">// PDF filename</span>
pdfio_file_t *pdf; <span class="comment">// PDF file</span>
<span class="reserved">const</span> <span class="reserved">char</span> *author; <span class="comment">// Author name</span>
time_t creation_date; <span class="comment">// Creation date</span>
<span class="reserved">struct</span> tm *creation_tm; <span class="comment">// Creation date/time information</span>
<span class="reserved">char</span> creation_text[<span class="number">256</span>]; <span class="comment">// Creation date/time as a string</span>
<span class="reserved">const</span> <span class="reserved">char</span> *title; <span class="comment">// Title</span>
pdfio_dict_t *catalog; <span class="comment">// Catalog dictionary</span>
<span class="reserved">const</span> <span class="reserved">char</span> *author, <span class="comment">// Author name</span>
*creator, <span class="comment">// Creator name</span>
*producer, <span class="comment">// Producer name</span>
*title; <span class="comment">// Title</span>
time_t creation_date, <span class="comment">// Creation date</span>
modification_date; <span class="comment">// Modification date</span>
<span class="reserved">struct</span> tm *creation_tm, <span class="comment">// Creation date/time information</span>
*modification_tm; <span class="comment">// Modification date/time information</span>
<span class="reserved">char</span> creation_text[<span class="number">256</span>], <span class="comment">// Creation date/time as a string</span>
modification_text[<span class="number">256</span>], <span class="comment">// Modification date/time human fmt string</span>
range_text[<span class="number">255</span>]; <span class="comment">// Page range text</span>
size_t num_pages; <span class="comment">// PDF number of pages</span>
<span class="reserved">bool</span> has_acroform; <span class="comment">// Does the file have an AcroForm?</span>
pdfio_obj_t *page; <span class="comment">// Object</span>
pdfio_dict_t *page_dict; <span class="comment">// Object dictionary</span>
size_t cur, <span class="comment">// Current page index</span>
prev; <span class="comment">// Previous page index</span>
pdfio_rect_t cur_box, <span class="comment">// Current MediaBox</span>
prev_box; <span class="comment">// Previous MediaBox</span>
<span class="comment">// Get the filename from the command-line...</span>
@ -1182,14 +1197,20 @@ main(<span class="reserved">int</span> argc, <span clas
filename = argv[<span class="number">1</span>];
<span class="comment">// Open the PDF file with the default callbacks...</span>
pdf = pdfioFileOpen(filename, <span class="comment">/*password_cb*/</span>NULL, <span class="comment">/*password_cbdata*/</span>NULL,
<span class="comment">/*error_cb*/</span>NULL, <span class="comment">/*error_cbdata*/</span>NULL);
pdf = pdfioFileOpen(filename, <span class="comment">/*password_cb*/</span>NULL,
<span class="comment">/*password_cbdata*/</span>NULL, <span class="comment">/*error_cb*/</span>NULL,
<span class="comment">/*error_cbdata*/</span>NULL);
<span class="reserved">if</span> (pdf == NULL)
<span class="reserved">return</span> (<span class="number">1</span>);
<span class="comment">// Get the title and author...</span>
author = pdfioFileGetAuthor(pdf);
title = pdfioFileGetTitle(pdf);
<span class="comment">// Get the title, author, etc...</span>
catalog = pdfioFileGetCatalog(pdf);
author = pdfioFileGetAuthor(pdf);
creator = pdfioFileGetCreator(pdf);
has_acroform = pdfioDictGetType(catalog, <span class="string">&quot;AcroForm&quot;</span>) != PDFIO_VALTYPE_NONE;
num_pages = pdfioFileGetNumPages(pdf);
producer = pdfioFileGetProducer(pdf);
title = pdfioFileGetTitle(pdf);
<span class="comment">// Get the creation date and convert to a string...</span>
<span class="reserved">if</span> ((creation_date = pdfioFileGetCreationDate(pdf)) &gt; <span class="number">0</span>)
@ -1202,12 +1223,76 @@ main(<span class="reserved">int</span> argc, <span clas
snprintf(creation_text, <span class="reserved">sizeof</span>(creation_text), <span class="string">&quot;-- not set --&quot;</span>);
}
<span class="comment">// Get the modification date and convert to a string...</span>
<span class="reserved">if</span> ((modification_date = pdfioFileGetModificationDate(pdf)) &gt; <span class="number">0</span>)
{
modification_tm = localtime(&amp;modification_date);
strftime(modification_text, <span class="reserved">sizeof</span>(modification_text), <span class="string">&quot;%c&quot;</span>, modification_tm);
}
<span class="reserved">else</span>
{
snprintf(modification_text, <span class="reserved">sizeof</span>(modification_text), <span class="string">&quot;-- not set --&quot;</span>);
}
<span class="comment">// Print file information to stdout...</span>
printf(<span class="string">&quot;%s:\n&quot;</span>, filename);
printf(<span class="string">&quot; Title: %s\n&quot;</span>, title ? title : <span class="string">&quot;-- not set --&quot;</span>);
printf(<span class="string">&quot; Author: %s\n&quot;</span>, author ? author : <span class="string">&quot;-- not set --&quot;</span>);
printf(<span class="string">&quot; Created On: %s\n&quot;</span>, creation_text);
printf(<span class="string">&quot; Number Pages: %u\n&quot;</span>, (<span class="reserved">unsigned</span>)pdfioFileGetNumPages(pdf));
printf(<span class="string">&quot; Title: %s\n&quot;</span>, title ? title : <span class="string">&quot;-- not set --&quot;</span>);
printf(<span class="string">&quot; Author: %s\n&quot;</span>, author ? author : <span class="string">&quot;-- not set --&quot;</span>);
printf(<span class="string">&quot; Creator: %s\n&quot;</span>, creator ? creator : <span class="string">&quot;-- not set --&quot;</span>);
printf(<span class="string">&quot; Producer: %s\n&quot;</span>, producer ? producer : <span class="string">&quot;-- not set --&quot;</span>);
printf(<span class="string">&quot; Created On: %s\n&quot;</span>, creation_text);
printf(<span class="string">&quot; Modified On: %s\n&quot;</span>, modification_text);
printf(<span class="string">&quot; Version: %s\n&quot;</span>, pdfioFileGetVersion(pdf));
printf(<span class="string">&quot; AcroForm: %s\n&quot;</span>, has_acroform ? <span class="string">&quot;Yes&quot;</span> : <span class="string">&quot;No&quot;</span>);
printf(<span class="string">&quot; Number of Pages: %u\n&quot;</span>, (<span class="reserved">unsigned</span>)num_pages);
<span class="comment">// Report the MediaBox for all of the pages</span>
prev_box.x1 = prev_box.x2 = prev_box.y1 = prev_box.y2 = <span class="number">0.0</span>;
<span class="reserved">for</span> (cur = <span class="number">0</span>, prev = <span class="number">0</span>; cur &lt; num_pages; cur ++)
{
<span class="comment">// Find the MediaBox for this page in the page tree...</span>
<span class="reserved">for</span> (page = pdfioFileGetPage(pdf, cur);
page != NULL;
page = pdfioDictGetObj(page_dict, <span class="string">&quot;Parent&quot;</span>))
{
cur_box.x1 = cur_box.x2 = cur_box.y1 = cur_box.y2 = <span class="number">0.0</span>;
page_dict = pdfioObjGetDict(page);
<span class="reserved">if</span> (pdfioDictGetRect(page_dict, <span class="string">&quot;MediaBox&quot;</span>, &amp;cur_box))
<span class="reserved">break</span>;
}
<span class="comment">// If this MediaBox is different from the previous one, show the range of</span>
<span class="comment">// pages that have that size...</span>
<span class="reserved">if</span> (cur == <span class="number">0</span> ||
fabs(cur_box.x1 - prev_box.x1) &gt; <span class="number">0.01</span> ||
fabs(cur_box.y1 - prev_box.y1) &gt; <span class="number">0.01</span> ||
fabs(cur_box.x2 - prev_box.x2) &gt; <span class="number">0.01</span> ||
fabs(cur_box.y2 - prev_box.y2) &gt; <span class="number">0.01</span>)
{
<span class="reserved">if</span> (cur &gt; prev)
{
snprintf(range_text, <span class="reserved">sizeof</span>(range_text), <span class="string">&quot;Pages %u-%u&quot;</span>,
(<span class="reserved">unsigned</span>)(prev + <span class="number">1</span>), (<span class="reserved">unsigned</span>)cur);
printf(<span class="string">&quot;%16s: [%g %g %g %g]\n&quot;</span>, range_text,
prev_box.x1, prev_box.y1, prev_box.x2, prev_box.y2);
}
<span class="comment">// Start a new series of pages with the new size...</span>
prev = cur;
prev_box = cur_box;
}
}
<span class="comment">// Show the last range as needed...</span>
<span class="reserved">if</span> (cur &gt; prev)
{
snprintf(range_text, <span class="reserved">sizeof</span>(range_text), <span class="string">&quot;Pages %u-%u&quot;</span>,
(<span class="reserved">unsigned</span>)(prev + <span class="number">1</span>), (<span class="reserved">unsigned</span>)cur);
printf(<span class="string">&quot;%16s: [%g %g %g %g]\n&quot;</span>, range_text,
prev_box.x1, prev_box.y1, prev_box.x2, prev_box.y2);
}
<span class="comment">// Close the PDF file...</span>
pdfioFileClose(pdf);
@ -5081,6 +5166,10 @@ ssize_t pdfioStreamPeek(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st, <span
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function writes a formatted string to a stream. In addition to the
standard <code>printf</code> format characters, you can use &quot;%N&quot; to format a PDF name
value (&quot;/Name&quot;) and &quot;%S&quot; to format a PDF string (&quot;(String)&quot;) value.</p>
<h3 class="function"><a id="pdfioStreamPutChar">pdfioStreamPutChar</a></h3>
<p class="description">Write a single character to a stream.</p>
<p class="code">

View File

@ -889,11 +889,26 @@ main(int argc, // I - Number of command-line arguments
{
const char *filename; // PDF filename
pdfio_file_t *pdf; // PDF file
const char *author; // Author name
time_t creation_date; // Creation date
struct tm *creation_tm; // Creation date/time information
char creation_text[256]; // Creation date/time as a string
const char *title; // Title
pdfio_dict_t *catalog; // Catalog dictionary
const char *author, // Author name
*creator, // Creator name
*producer, // Producer name
*title; // Title
time_t creation_date, // Creation date
modification_date; // Modification date
struct tm *creation_tm, // Creation date/time information
*modification_tm; // Modification date/time information
char creation_text[256], // Creation date/time as a string
modification_text[256], // Modification date/time human fmt string
range_text[255]; // Page range text
size_t num_pages; // PDF number of pages
bool has_acroform; // Does the file have an AcroForm?
pdfio_obj_t *page; // Object
pdfio_dict_t *page_dict; // Object dictionary
size_t cur, // Current page index
prev; // Previous page index
pdfio_rect_t cur_box, // Current MediaBox
prev_box; // Previous MediaBox
// Get the filename from the command-line...
@ -906,14 +921,20 @@ main(int argc, // I - Number of command-line arguments
filename = argv[1];
// Open the PDF file with the default callbacks...
pdf = pdfioFileOpen(filename, /*password_cb*/NULL, /*password_cbdata*/NULL,
/*error_cb*/NULL, /*error_cbdata*/NULL);
pdf = pdfioFileOpen(filename, /*password_cb*/NULL,
/*password_cbdata*/NULL, /*error_cb*/NULL,
/*error_cbdata*/NULL);
if (pdf == NULL)
return (1);
// Get the title and author...
author = pdfioFileGetAuthor(pdf);
title = pdfioFileGetTitle(pdf);
// Get the title, author, etc...
catalog = pdfioFileGetCatalog(pdf);
author = pdfioFileGetAuthor(pdf);
creator = pdfioFileGetCreator(pdf);
has_acroform = pdfioDictGetType(catalog, "AcroForm") != PDFIO_VALTYPE_NONE;
num_pages = pdfioFileGetNumPages(pdf);
producer = pdfioFileGetProducer(pdf);
title = pdfioFileGetTitle(pdf);
// Get the creation date and convert to a string...
if ((creation_date = pdfioFileGetCreationDate(pdf)) > 0)
@ -926,12 +947,76 @@ main(int argc, // I - Number of command-line arguments
snprintf(creation_text, sizeof(creation_text), "-- not set --");
}
// Get the modification date and convert to a string...
if ((modification_date = pdfioFileGetModificationDate(pdf)) > 0)
{
modification_tm = localtime(&modification_date);
strftime(modification_text, sizeof(modification_text), "%c", modification_tm);
}
else
{
snprintf(modification_text, sizeof(modification_text), "-- not set --");
}
// Print file information to stdout...
printf("%s:\n", filename);
printf(" Title: %s\n", title ? title : "-- not set --");
printf(" Author: %s\n", author ? author : "-- not set --");
printf(" Created On: %s\n", creation_text);
printf(" Number Pages: %u\n", (unsigned)pdfioFileGetNumPages(pdf));
printf(" Title: %s\n", title ? title : "-- not set --");
printf(" Author: %s\n", author ? author : "-- not set --");
printf(" Creator: %s\n", creator ? creator : "-- not set --");
printf(" Producer: %s\n", producer ? producer : "-- not set --");
printf(" Created On: %s\n", creation_text);
printf(" Modified On: %s\n", modification_text);
printf(" Version: %s\n", pdfioFileGetVersion(pdf));
printf(" AcroForm: %s\n", has_acroform ? "Yes" : "No");
printf(" Number of Pages: %u\n", (unsigned)num_pages);
// Report the MediaBox for all of the pages
prev_box.x1 = prev_box.x2 = prev_box.y1 = prev_box.y2 = 0.0;
for (cur = 0, prev = 0; cur < num_pages; cur ++)
{
// Find the MediaBox for this page in the page tree...
for (page = pdfioFileGetPage(pdf, cur);
page != NULL;
page = pdfioDictGetObj(page_dict, "Parent"))
{
cur_box.x1 = cur_box.x2 = cur_box.y1 = cur_box.y2 = 0.0;
page_dict = pdfioObjGetDict(page);
if (pdfioDictGetRect(page_dict, "MediaBox", &cur_box))
break;
}
// If this MediaBox is different from the previous one, show the range of
// pages that have that size...
if (cur == 0 ||
fabs(cur_box.x1 - prev_box.x1) > 0.01 ||
fabs(cur_box.y1 - prev_box.y1) > 0.01 ||
fabs(cur_box.x2 - prev_box.x2) > 0.01 ||
fabs(cur_box.y2 - prev_box.y2) > 0.01)
{
if (cur > prev)
{
snprintf(range_text, sizeof(range_text), "Pages %u-%u",
(unsigned)(prev + 1), (unsigned)cur);
printf("%16s: [%g %g %g %g]\n", range_text,
prev_box.x1, prev_box.y1, prev_box.x2, prev_box.y2);
}
// Start a new series of pages with the new size...
prev = cur;
prev_box = cur_box;
}
}
// Show the last range as needed...
if (cur > prev)
{
snprintf(range_text, sizeof(range_text), "Pages %u-%u",
(unsigned)(prev + 1), (unsigned)cur);
printf("%16s: [%g %g %g %g]\n", range_text,
prev_box.x1, prev_box.y1, prev_box.x2, prev_box.y2);
}
// Close the PDF file...
pdfioFileClose(pdf);

View File

@ -68,7 +68,7 @@ main(int argc, // I - Number of command-line arguments
catalog = pdfioFileGetCatalog(pdf);
author = pdfioFileGetAuthor(pdf);
creator = pdfioFileGetCreator(pdf);
has_acroform = pdfioDictGetObj(catalog, "AcroForm") != NULL ? true : false;
has_acroform = pdfioDictGetType(catalog, "AcroForm") != PDFIO_VALTYPE_NONE;
num_pages = pdfioFileGetNumPages(pdf);
producer = pdfioFileGetProducer(pdf);
title = pdfioFileGetTitle(pdf);