From 130cef87024c451e14b5db086b3a38c4731f91af Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Fri, 4 Apr 2025 21:24:42 -0400 Subject: [PATCH] Update pdfioinfo example to support Acrobat Form dictionaries as well as indirect references (Issue #114) --- CHANGES.md | 1 + doc/pdfio.3 | 119 +++++++++++++++++++++++++++++++++++------ doc/pdfio.html | 123 +++++++++++++++++++++++++++++++++++++------ doc/pdfio.md | 113 ++++++++++++++++++++++++++++++++++----- examples/pdfioinfo.c | 2 +- 5 files changed, 311 insertions(+), 47 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index a181270..9f06329 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,7 @@ v1.5.2 - YYYY-MM-DD ------------------- - Updated maximum allowed PDF string size to 64k (Issue #117) +- Fixed form detection in `pdfioinfo` example code (Issue #114) - Fixed parsing of certain date/time values (Issue #115) - Fixed support for empty name values (Issue #116) diff --git a/doc/pdfio.3 b/doc/pdfio.3 index 0ec0417..9b2ab62 100644 --- a/doc/pdfio.3 +++ b/doc/pdfio.3 @@ -1,4 +1,4 @@ -.TH pdfio 3 "pdf read/write library" "2025-03-06" "pdf read/write library" +.TH pdfio 3 "pdf read/write library" "2025-04-04" "pdf read/write library" .SH NAME pdfio \- pdf read/write library .SH Introduction @@ -1047,11 +1047,26 @@ The pdfioinfo.c example program opens a PDF file and prints the title, author, c { const char *filename; // PDF filename pdfio_file_t *pdf; // PDF file - const char *author; // Author name - time_t creation_date; // Creation date - struct tm *creation_tm; // Creation date/time information - char creation_text[256]; // Creation date/time as a string - const char *title; // Title + pdfio_dict_t *catalog; // Catalog dictionary + const char *author, // Author name + *creator, // Creator name + *producer, // Producer name + *title; // Title + time_t creation_date, // Creation date + modification_date; // Modification date + struct tm *creation_tm, // Creation date/time information + *modification_tm; // Modification date/time information + char creation_text[256], // Creation date/time as a string + modification_text[256], // Modification date/time human fmt string + range_text[255]; // Page range text + size_t num_pages; // PDF number of pages + bool has_acroform; // Does the file have an AcroForm? + pdfio_obj_t *page; // Object + pdfio_dict_t *page_dict; // Object dictionary + size_t cur, // Current page index + prev; // Previous page index + pdfio_rect_t cur_box, // Current MediaBox + prev_box; // Previous MediaBox // Get the filename from the command\-line... @@ -1064,14 +1079,20 @@ The pdfioinfo.c example program opens a PDF file and prints the title, author, c filename = argv[1]; // Open the PDF file with the default callbacks... - pdf = pdfioFileOpen(filename, /*password_cb*/NULL, /*password_cbdata*/NULL, - /*error_cb*/NULL, /*error_cbdata*/NULL); + pdf = pdfioFileOpen(filename, /*password_cb*/NULL, + /*password_cbdata*/NULL, /*error_cb*/NULL, + /*error_cbdata*/NULL); if (pdf == NULL) return (1); - // Get the title and author... - author = pdfioFileGetAuthor(pdf); - title = pdfioFileGetTitle(pdf); + // Get the title, author, etc... + catalog = pdfioFileGetCatalog(pdf); + author = pdfioFileGetAuthor(pdf); + creator = pdfioFileGetCreator(pdf); + has_acroform = pdfioDictGetType(catalog, "AcroForm") != PDFIO_VALTYPE_NONE; + num_pages = pdfioFileGetNumPages(pdf); + producer = pdfioFileGetProducer(pdf); + title = pdfioFileGetTitle(pdf); // Get the creation date and convert to a string... if ((creation_date = pdfioFileGetCreationDate(pdf)) > 0) @@ -1084,12 +1105,76 @@ The pdfioinfo.c example program opens a PDF file and prints the title, author, c snprintf(creation_text, sizeof(creation_text), "\-\- not set \-\-"); } + // Get the modification date and convert to a string... + if ((modification_date = pdfioFileGetModificationDate(pdf)) > 0) + { + modification_tm = localtime(&modification_date); + strftime(modification_text, sizeof(modification_text), "%c", modification_tm); + } + else + { + snprintf(modification_text, sizeof(modification_text), "\-\- not set \-\-"); + } + // Print file information to stdout... printf("%s:\\n", filename); - printf(" Title: %s\\n", title ? title : "\-\- not set \-\-"); - printf(" Author: %s\\n", author ? author : "\-\- not set \-\-"); - printf(" Created On: %s\\n", creation_text); - printf(" Number Pages: %u\\n", (unsigned)pdfioFileGetNumPages(pdf)); + printf(" Title: %s\\n", title ? title : "\-\- not set \-\-"); + printf(" Author: %s\\n", author ? author : "\-\- not set \-\-"); + printf(" Creator: %s\\n", creator ? creator : "\-\- not set \-\-"); + printf(" Producer: %s\\n", producer ? producer : "\-\- not set \-\-"); + printf(" Created On: %s\\n", creation_text); + printf(" Modified On: %s\\n", modification_text); + printf(" Version: %s\\n", pdfioFileGetVersion(pdf)); + printf(" AcroForm: %s\\n", has_acroform ? "Yes" : "No"); + printf(" Number of Pages: %u\\n", (unsigned)num_pages); + + // Report the MediaBox for all of the pages + prev_box.x1 = prev_box.x2 = prev_box.y1 = prev_box.y2 = 0.0; + + for (cur = 0, prev = 0; cur < num_pages; cur ++) + { + // Find the MediaBox for this page in the page tree... + for (page = pdfioFileGetPage(pdf, cur); + page != NULL; + page = pdfioDictGetObj(page_dict, "Parent")) + { + cur_box.x1 = cur_box.x2 = cur_box.y1 = cur_box.y2 = 0.0; + page_dict = pdfioObjGetDict(page); + + if (pdfioDictGetRect(page_dict, "MediaBox", &cur_box)) + break; + } + + // If this MediaBox is different from the previous one, show the range of + // pages that have that size... + if (cur == 0 || + fabs(cur_box.x1 \- prev_box.x1) > 0.01 || + fabs(cur_box.y1 \- prev_box.y1) > 0.01 || + fabs(cur_box.x2 \- prev_box.x2) > 0.01 || + fabs(cur_box.y2 \- prev_box.y2) > 0.01) + { + if (cur > prev) + { + snprintf(range_text, sizeof(range_text), "Pages %u\-%u", + (unsigned)(prev + 1), (unsigned)cur); + printf("%16s: [%g %g %g %g]\\n", range_text, + prev_box.x1, prev_box.y1, prev_box.x2, prev_box.y2); + } + + // Start a new series of pages with the new size... + prev = cur; + prev_box = cur_box; + } + } + + // Show the last range as needed... + if (cur > prev) + { + snprintf(range_text, sizeof(range_text), "Pages %u\-%u", + (unsigned)(prev + 1), (unsigned)cur); + printf("%16s: [%g %g %g %g]\\n", range_text, + prev_box.x1, prev_box.y1, prev_box.x2, prev_box.y2); + } // Close the PDF file... pdfioFileClose(pdf); @@ -4590,6 +4675,10 @@ bool pdfioStreamPrintf ( ... ); .fi +.PP +This function writes a formatted string to a stream. In addition to the +standard \fBprintf\fR format characters, you can use "%N" to format a PDF name +value ("/Name") and "%S" to format a PDF string ("(String)") value. .SS pdfioStreamPutChar Write a single character to a stream. .PP diff --git a/doc/pdfio.html b/doc/pdfio.html index 1ec01f5..7cd05ea 100644 --- a/doc/pdfio.html +++ b/doc/pdfio.html @@ -1,13 +1,13 @@ -PDFio Programming Manual v1.5.0 +PDFio Programming Manual v1.5.2 - +