From 7733fbeb1032d8606966fdc7bb76a4322c8434ee Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Tue, 4 May 2021 16:06:14 -0400 Subject: [PATCH] Rework object and xref processing to allow for sparse object lists and generation numbers. --- pdfio-file.c | 78 ++++++++++++++++++++++++++++----- pdfio-private.h | 1 + pdfio.h | 5 ++- pdfio.xcodeproj/project.pbxproj | 14 ++++-- 4 files changed, 81 insertions(+), 17 deletions(-) diff --git a/pdfio-file.c b/pdfio-file.c index ca2b36c..e209879 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -21,7 +21,8 @@ // Local functions... // -static pdfio_obj_t *add_object(pdfio_file_t *pdf); +static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset); +static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b); static bool load_xref(pdfio_file_t *pdf, off_t xref_offset); static bool write_trailer(pdfio_file_t *pdf); @@ -221,6 +222,34 @@ pdfioFileCreatePage(pdfio_file_t *pdf, // I - PDF file } +// +// 'pdfioFileFindObject()' - Find an object using its object number. +// +// This differs from @link pdfioFileGetObject@ which takes an index into the +// list of objects while this function takes the object number. +// + +pdfio_obj_t * // O - Object or `NULL` if not found +pdfioFileFindObject( + pdfio_file_t *pdf, // I - PDF file + size_t number) // I - Object number (1 to N) +{ + pdfio_obj_t key, // Search key + *keyptr; // Pointer to key + + + if (pdf->num_objs > 0) + { + key.number = number; + keyptr = &key; + + return ((pdfio_obj_t *)bsearch(&keyptr, pdf->objs, pdf->num_objs, sizeof(pdfio_obj_t *), (int (*)(const void *, const void *))compare_objs)); + } + + return (NULL); +} + + // // 'pdfioFileGetID()' - Get the PDF file's ID strings. // @@ -407,13 +436,16 @@ pdfioFileOpen( // -// 'add_object()' - Add an object to a PDF file. +// '_pdfioObjAdd()' - Add an object to a file. // -static pdfio_obj_t * // O - New object -add_object(pdfio_file_t *pdf) // I - PDF file +pdfio_obj_t * // O - Object +add_obj(pdfio_file_t *pdf, // I - PDF file + size_t number, // I - Object number + unsigned short generation, // I - Object generation + off_t offset) // I - Offset in file { - pdfio_obj_t *obj; // New object + pdfio_obj_t *obj; // Object // Allocate memory for the object... @@ -441,7 +473,33 @@ add_object(pdfio_file_t *pdf) // I - PDF file pdf->objs[pdf->num_objs ++] = obj; + obj->number = number; + obj->generation = generation; + obj->offset = offset; + + // Re-sort object array as needed... + if (pdf->num_objs > 1 && pdf->objs[pdf->num_objs - 2]->number > number) + qsort(pdf->objs, pdf->num_objs, sizeof(pdfio_obj_t *), (int (*)(const void *, const void *))compare_objs); + return (obj); + +} + + +// +// 'compare_objs()' - Compare the object numbers of two objects. +// + +static int +compare_objs(pdfio_obj_t **a, // I - First object + pdfio_obj_t **b) // I - Second object +{ + if ((*a)->number < (*b)->number) + return (-1); + else if ((*a)->number == (*b)->number) + return (0); + else + return (1); } @@ -499,7 +557,6 @@ load_xref(pdfio_file_t *pdf, // I - PDF file { intmax_t offset; // Offset in file int generation; // Generation number - pdfio_obj_t *obj; // Object // Read a line from the file and validate it... if (_pdfioFileRead(pdf, line, 20) != 20) @@ -544,12 +601,11 @@ load_xref(pdfio_file_t *pdf, // I - PDF file continue; // Don't care about free objects... // Create a placeholder for the object in memory... - if ((obj = add_object(pdf)) == NULL) - return (false); + if (pdfioFileFindObject(pdf, (size_t)number)) + continue; // Don't replace newer object... - obj->number = (size_t)number; - obj->generation = (unsigned short)generation; - obj->offset = offset; + if (!add_obj(pdf, (size_t)number, (unsigned short)generation, offset)) + return (false); } } diff --git a/pdfio-private.h b/pdfio-private.h index ae2458a..ed43dea 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -214,6 +214,7 @@ extern off_t _pdfioFileSeek(pdfio_file_t *pdf, off_t offset, int whence) PDFIO_ extern off_t _pdfioFileTell(pdfio_file_t *pdf) PDFIO_INTERNAL; extern bool _pdfioFileWrite(pdfio_file_t *pdf, const void *buffer, size_t bytes) PDFIO_INTERNAL; +extern pdfio_obj_t *_pdfioObjAdd(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset) PDFIO_INTERNAL; extern void _pdfioObjDelete(pdfio_obj_t *obj) PDFIO_INTERNAL; extern void _pdfioStreamDelete(pdfio_stream_t *st) PDFIO_INTERNAL; diff --git a/pdfio.h b/pdfio.h index 7950de3..ad00b8d 100644 --- a/pdfio.h +++ b/pdfio.h @@ -141,12 +141,13 @@ extern bool pdfioFileClose(pdfio_file_t *pdf) PDFIO_PUBLIC; extern pdfio_file_t *pdfioFileCreate(const char *filename, const char *version, pdfio_error_cb_t error_cb, void *error_data) PDFIO_PUBLIC; extern pdfio_obj_t *pdfioFileCreateObject(pdfio_file_t *pdf, pdfio_dict_t *dict) PDFIO_PUBLIC; extern pdfio_obj_t *pdfioFileCreatePage(pdfio_file_t *pdf, pdfio_dict_t *dict) PDFIO_PUBLIC; +extern pdfio_obj_t *pdfioFileFindObject(pdfio_file_t *pdf, size_t number) PDFIO_PUBLIC; extern pdfio_array_t *pdfioFileGetID(pdfio_file_t *pdf) PDFIO_PUBLIC; extern const char *pdfioFileGetName(pdfio_file_t *pdf) PDFIO_PUBLIC; extern size_t pdfioFileGetNumObjects(pdfio_file_t *pdf) PDFIO_PUBLIC; extern size_t pdfioFileGetNumPages(pdfio_file_t *pdf) PDFIO_PUBLIC; -extern pdfio_obj_t *pdfioFileGetObject(pdfio_file_t *pdf, size_t number) PDFIO_PUBLIC; -extern pdfio_obj_t *pdfioFileGetPage(pdfio_file_t *pdf, size_t number) PDFIO_PUBLIC; +extern pdfio_obj_t *pdfioFileGetObject(pdfio_file_t *pdf, size_t n) PDFIO_PUBLIC; +extern pdfio_obj_t *pdfioFileGetPage(pdfio_file_t *pdf, size_t n) PDFIO_PUBLIC; extern const char *pdfioFileGetVersion(pdfio_file_t *pdf) PDFIO_PUBLIC; extern pdfio_file_t *pdfioFileOpen(const char *filename, pdfio_error_cb_t error_cb, void *error_data) PDFIO_PUBLIC; diff --git a/pdfio.xcodeproj/project.pbxproj b/pdfio.xcodeproj/project.pbxproj index c1cbb3f..58c7175 100644 --- a/pdfio.xcodeproj/project.pbxproj +++ b/pdfio.xcodeproj/project.pbxproj @@ -442,24 +442,30 @@ 273440D5263D72AE00FBFD63 /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { + CODE_SIGN_IDENTITY = "-"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = RU58A2256H; + DEVELOPMENT_TEAM = ""; ENABLE_HARDENED_RUNTIME = YES; GCC_DYNAMIC_NO_PIC = NO; GCC_OPTIMIZATION_LEVEL = 0; - MACOSX_DEPLOYMENT_TARGET = 11.3; + MACOSX_DEPLOYMENT_TARGET = 10.14; + PRODUCT_BUNDLE_IDENTIFIER = org.msweet.testpdfio; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; }; name = Debug; }; 273440D6263D72AE00FBFD63 /* Release */ = { isa = XCBuildConfiguration; buildSettings = { + CODE_SIGN_IDENTITY = "-"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = RU58A2256H; + DEVELOPMENT_TEAM = ""; ENABLE_HARDENED_RUNTIME = YES; - MACOSX_DEPLOYMENT_TARGET = 11.3; + MACOSX_DEPLOYMENT_TARGET = 10.14; + PRODUCT_BUNDLE_IDENTIFIER = org.msweet.testpdfio; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; }; name = Release; };