diff --git a/CHANGES.md b/CHANGES.md index 42baa8b..3748dd8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,7 @@ v1.3.2 - YYYY-MM-DD ------------------- - Added some more sanity checks to the TrueType font reader. +- Updated documentation (Issue #77) - Fixed an issue when opening certain encrypted PDF files (Issue #62) diff --git a/doc/pdfio.3 b/doc/pdfio.3 index 70439eb..dd802c3 100644 --- a/doc/pdfio.3 +++ b/doc/pdfio.3 @@ -1,4 +1,4 @@ -.TH pdfio 3 "pdf read/write library" "2024-08-05" "pdf read/write library" +.TH pdfio 3 "pdf read/write library" "2024-10-09" "pdf read/write library" .SH NAME pdfio \- pdf read/write library .SH Introduction @@ -218,7 +218,90 @@ Each PDF file contains one or more pages. The pdfioFileGetNumPages function retu } .fi .PP -Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page. Use the pdfioPageGetNumStreams and pdfioPageOpenStream functions to access the content streams for each page. +Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page. Use the pdfioPageGetNumStreams and pdfioPageOpenStream functions to access the content streams for each page, and pdfioObjGetDict to get the associated page object dictionary. For example, if you want to display the media and crop boxes for a given page: +.nf + + pdfio_file_t *pdf; // PDF file + size_t i; // Looping var + size_t count; // Number of pages + pdfio_obj_t *page; // Current page + pdfio_dict_t *dict; // Current page dictionary + pdfio_array_t *media_box; // MediaBox array + double media_values[4]; // MediaBox values + pdfio_array_t *crop_box; // CropBox array + double crop_values[4]; // CropBox values + + // Iterate the pages in the PDF file + for (i = 0, count = pdfioFileGetNumPages(pdf); i < count; i ++) + { + page = pdfioFileGetPage(pdf, i); + dict = pdfioObjGetDict(page); + + media_box = pdfioDictGetArray(dict, "MediaBox"); + media_values[0] = pdfioArrayGetNumber(media_box, 0); + media_values[1] = pdfioArrayGetNumber(media_box, 1); + media_values[2] = pdfioArrayGetNumber(media_box, 2); + media_values[3] = pdfioArrayGetNumber(media_box, 3); + + crop_box = pdfioDictGetArray(dict, "CropBox"); + crop_values[0] = pdfioArrayGetNumber(crop_box, 0); + crop_values[1] = pdfioArrayGetNumber(crop_box, 1); + crop_values[2] = pdfioArrayGetNumber(crop_box, 2); + crop_values[3] = pdfioArrayGetNumber(crop_box, 3); + + printf("Page %u: MediaBox=[%g %g %g %g], CropBox=[%g %g %g %g]\\n", + (unsigned)(i + 1), + media_values[0], media_values[1], media_values[2], media_values[3], + crop_values[0], crop_values[1], crop_values[2], crop_values[3]); + } +.fi +.PP +Page object dictionaries have several (mostly optional) key/value pairs, including: +.IP \(bu 5 +.PP +"Annots": An array of annotation dictionaries for the page; use pdfioDictGetArray to get the array + +.IP \(bu 5 +.PP +"CropBox": The crop box as an array of four numbers for the left, bottom, right, and top coordinates of the target media; use pdfioDictGetArray to get a pointer to the array of numbers + +.IP \(bu 5 +.PP +"Dur": The number of seconds the page should be displayed; use pdfioDictGetNumber to get the page duration value + +.IP \(bu 5 +.PP +"Group": The dictionary of transparency group values for the page; use pdfioDictGetDict to get a pointer to the resources dictionary + +.IP \(bu 5 +.PP +"LastModified": The date and time when this page was last modified; use pdfioDictGetDate to get the Unix time_t value + +.IP \(bu 5 +.PP +"Parent": The parent page tree node object for this page; use pdfioDictGetObj to get a pointer to the object + +.IP \(bu 5 +.PP +"MediaBox": The media box as an array of four numbers for the left, bottom, right, and top coordinates of the target media; use pdfioDictGetArray to get a pointer to the array of numbers + +.IP \(bu 5 +.PP +"Resources": The dictionary of resources for the page; use pdfioDictGetDict to get a pointer to the resources dictionary + +.IP \(bu 5 +.PP +"Rotate": A number indicating the number of degrees of counter\-clockwise rotation to apply to the page when viewing; use pdfioDictGetNumber to get the rotation angle + +.IP \(bu 5 +.PP +"Thumb": A thumbnail image object for the page; use pdfioDictGetObj to get a pointer to the thumbnail image object + +.IP \(bu 5 +.PP +"Trans": The page transition dictionary; use pdfioDictGetDict to get a pointer to the dictionary + + .PP The pdfioFileClose function closes a PDF file and frees all memory that was used for it: .nf @@ -2869,6 +2952,29 @@ const char * pdfioObjGetSubtype ( pdfio_obj_t *obj ); .fi +.PP +This function returns an object's PDF subtype name, if any. Common subtype +names include: +.PP +.IP \(bu 5 +"CIDFontType0": A CID Type0 font +.IP \(bu 5 +"CIDFontType2": A CID TrueType font +.IP \(bu 5 +"Image": An image or image mask +.IP \(bu 5 +"Form": A fillable form +.IP \(bu 5 +"OpenType": An OpenType font +.IP \(bu 5 +"Type0": A composite font +.IP \(bu 5 +"Type1": A PostScript Type1 font +.IP \(bu 5 +"Type3": A PDF Type3 font +.IP \(bu 5 +"TrueType": A TrueType font + .SS pdfioObjGetType Get an object's type. .PP @@ -2877,6 +2983,27 @@ const char * pdfioObjGetType ( pdfio_obj_t *obj ); .fi +.PP +This function returns an object's PDF type name, if any. Common type names +include: +.PP +.IP \(bu 5 +"CMap": A character map for composite fonts +.IP \(bu 5 +"Font": An embedded font (\fIpdfioObjGetSubtype\fR will tell you the + font format) +.IP \(bu 5 +"FontDescriptor": A font descriptor +.IP \(bu 5 +"Page": A (visible) page +.IP \(bu 5 +"Pages": A page tree node +.IP \(bu 5 +"Template": An invisible template page +.IP \(bu 5 +"XObject": An image, image mask, or form (\fIpdfioObjGetSubtype\fR will + tell you which) + .SS pdfioObjOpenStream Open an object's (data) stream for reading. .PP diff --git a/doc/pdfio.html b/doc/pdfio.html index 7b9edbb..6f23b08 100644 --- a/doc/pdfio.html +++ b/doc/pdfio.html @@ -1,13 +1,13 @@
-