Compare commits

...

3 Commits

Author SHA1 Message Date
Thierry LARONDE
51748c98a2
Merge 8b2b013b36ff05b3f98afed416fe3e87023b121a into 5b5de3aff6901b4553015eb40028504e64a25524 2025-02-05 13:34:09 +01:00
Michael R Sweet
5b5de3aff6
Update pdf2txt example to support font encodings. 2025-01-28 14:26:33 -05:00
Thierry LARONDE
8b2b013b36 Extend by adding pdfioGetModDate and extend the pdfioinfo example
When exploring a PDF, it may be convenient to have the typical
informations delivered by some "Document Properties"---and some more
about the MediaBox(es).

So just add the function to get the ModDate and extend the
pdfioinfo example as an example of what the library do have
and pdfioinfo as a debugging tool also.

Signed-off-by: Thierry LARONDE <tlaronde@kergis.com>
2025-01-18 11:25:36 +01:00
6 changed files with 1385 additions and 13 deletions

View File

@ -1,6 +1,12 @@
Changes in PDFio
================
v1.?.? - YYYY-MM-DD
-------------------
- Updated the pdf2txt example to support font encodings.
v1.4.1 - 2025-01-24
-------------------

File diff suppressed because it is too large Load Diff

View File

@ -25,11 +25,18 @@ main(int argc, // I - Number of command-line arguments
{
const char *filename; // PDF filename
pdfio_file_t *pdf; // PDF file
const char *author; // Author name
time_t creation_date; // Creation date
struct tm *creation_tm; // Creation date/time information
char creation_text[256]; // Creation date/time as a string
const char *author, // Author name
*creator, // Creator name
*producer; // Producer name
time_t creation_date, // Creation date
mod_date; // Modification date
struct tm *creation_tm, // Creation date/time information
*mod_tm; // Mod. date/time information
char creation_text[256], // Creation date/time as a string
mod_text[256]; // Mod. date/time human fmt string
const char *title; // Title
size_t num_pages; // PDF number of pages
bool has_acroform; // AcroForm or not
// Get the filename from the command-line...
@ -48,9 +55,12 @@ main(int argc, // I - Number of command-line arguments
if (pdf == NULL)
return (1);
// Get the title and author...
// Get the title, author...
author = pdfioFileGetAuthor(pdf);
title = pdfioFileGetTitle(pdf);
creator = pdfioFileGetCreator(pdf);
producer = pdfioFileGetProducer(pdf);
num_pages = pdfioFileGetNumPages(pdf);
// Get the creation date and convert to a string...
if ((creation_date = pdfioFileGetCreationDate(pdf)) > 0)
@ -63,12 +73,82 @@ main(int argc, // I - Number of command-line arguments
snprintf(creation_text, sizeof(creation_text), "-- not set --");
}
// Get the modification date and convert to a string...
if ((mod_date = pdfioFileGetModDate(pdf)) > 0)
{
mod_tm = localtime(&mod_date);
strftime(mod_text, sizeof(mod_text), "%c", mod_tm);
}
else
{
snprintf(mod_text, sizeof(mod_text), "-- not set --");
}
// Detect simply if AcroFrom is a dict in catalog
{
pdfio_dict_t *dict; // some Object dictionary
dict = pdfioFileGetCatalog(pdf);
has_acroform = (dict != NULL && pdfioDictGetObj(dict, "AcroForm") != NULL)?
true : false;
}
// Print file information to stdout...
printf("%s:\n", filename);
printf(" Title: %s\n", title ? title : "-- not set --");
printf(" Author: %s\n", author ? author : "-- not set --");
printf(" Creator: %s\n", creator ? creator : "-- not set --");
printf(" Producer: %s\n", producer ? producer : "-- not set --");
printf(" Created On: %s\n", creation_text);
printf(" Number Pages: %u\n", (unsigned)pdfioFileGetNumPages(pdf));
printf(" Modified On: %s\n", mod_text);
printf(" Version: %s\n", pdfioFileGetVersion(pdf));
printf(" AcroForm: %s\n", has_acroform ? "Yes" : "No");
printf(" Number Pages: %u\n", (unsigned)num_pages);
printf(" MediaBoxes:");
// There can be a different MediaBox per page
// Loop and report MediaBox and number of consecutive pages of this size
{
pdfio_obj_t *obj; // Object
pdfio_dict_t *dict; // Object dictionary
pdfio_rect_t prev, // MediaBox previous
now; // MediaBox now
size_t n, // Page index
nprev; // Number previous prev size
// MediaBox should be set at least on the root
for (n = nprev = 0; n < num_pages; n++)
{
obj = pdfioFileGetPage(pdf, n);
while (obj != NULL)
{
dict = pdfioObjGetDict(obj);
if (pdfioDictGetRect(dict, "MediaBox", &now))
{
if (
nprev == 0
|| (
now.x1 != prev.x1 || now.y1 != prev.y1
|| now.x2 != prev.x2 || now.y2 != prev.y2
)
)
{
if (nprev) printf("(%zd) ", nprev);
prev = now;
printf("[%.7g %.7g %.7g %.7g]", now.x1, now.y1, now.x2, now.y2);
nprev = 1;
}
else
++nprev;
obj = NULL;
}
else
obj = pdfioDictGetObj(dict, "Parent");
}
}
printf("(%zd)", nprev);
}
printf("\n");
// Close the PDF file...
pdfioFileClose(pdf);

View File

@ -801,6 +801,18 @@ pdfioFileGetKeywords(pdfio_file_t *pdf) // I - PDF file
}
//
// 'pdfioFileGetModDate()' - Get the most recent modification date for a PDF file.
//
time_t // O - Modification date or `0` for none
pdfioFileGetModDate(
pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetDate(pdfioObjGetDict(pdf->info_obj), "ModDate") : 0);
}
//
// 'pdfioFileGetName()' - Get a PDF's filename.
//

View File

@ -201,6 +201,7 @@ extern time_t pdfioFileGetCreationDate(pdfio_file_t *pdf) _PDFIO_PUBLIC;
extern const char *pdfioFileGetCreator(pdfio_file_t *pdf) _PDFIO_PUBLIC;
extern pdfio_array_t *pdfioFileGetID(pdfio_file_t *pdf) _PDFIO_PUBLIC;
extern const char *pdfioFileGetKeywords(pdfio_file_t *pdf) _PDFIO_PUBLIC;
extern time_t pdfioFileGetModDate(pdfio_file_t *pdf) _PDFIO_PUBLIC;
extern const char *pdfioFileGetName(pdfio_file_t *pdf) _PDFIO_PUBLIC;
extern size_t pdfioFileGetNumObjs(pdfio_file_t *pdf) _PDFIO_PUBLIC;
extern size_t pdfioFileGetNumPages(pdfio_file_t *pdf) _PDFIO_PUBLIC;

View File

@ -204,6 +204,7 @@ pdfioFileGetCreationDate
pdfioFileGetCreator
pdfioFileGetID
pdfioFileGetKeywords
pdfioFileGetModDate
pdfioFileGetName
pdfioFileGetNumObjs
pdfioFileGetNumPages