From aac04a2a960ab4c0dd275f8a58531a0951f66ae0 Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Fri, 16 Jan 2026 16:54:37 -0500 Subject: [PATCH] Fix repaired xref stream offsets and support indirect Contents arrays for pages. --- CHANGES.md | 2 ++ pdfio-file.c | 6 +++--- pdfio-object.c | 5 +++++ pdfio-page.c | 27 +++++++++++++++++++++++++-- testpdfio.c | 2 +- 5 files changed, 36 insertions(+), 6 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ce2b071..4f4736e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -19,6 +19,8 @@ v1.6.2 - YYYY-MM-DD - Added missing range checks to `pdfioArrayCopy` and `pdfioDictCopy`. - Refactored PDF encryption code to fix unlocking with certain files. - Improved xref table loop detection (Issue #148) +- Fixed xref reconstruction for objects lacking a `Type` value. +- Fixed `pdfioPageOpenStream` for indirect `Contents` arrays. - Fixed an error propagation bug when reading too-long values (Issue #146) - Fixed a Clang warning. diff --git a/pdfio-file.c b/pdfio-file.c index 5bcc4d7..9461b39 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -2621,19 +2621,19 @@ repair_xref( _pdfioTokenFlush(&tb); - if (type && !strcmp(line, "stream")) + if (!strcmp(line, "stream")) { // Possible object or XRef stream... obj->stream_offset = _pdfioFileTell(pdf); - if (!strcmp(type, "ObjStm") && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0]))) + if (type && !strcmp(type, "ObjStm") && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0]))) { PDFIO_DEBUG("repair_xref: Object stream...\n"); sobjs[num_sobjs] = obj; num_sobjs ++; } - if (!strcmp(type, "XRef") && !pdf->trailer_dict) + if (type && !strcmp(type, "XRef") && !pdf->trailer_dict) { // Save the trailer dictionary... pdfio_obj_t *encrypt_obj; diff --git a/pdfio-object.c b/pdfio-object.c index 814a923..ac287d5 100644 --- a/pdfio-object.c +++ b/pdfio-object.c @@ -547,6 +547,8 @@ pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object pdfio_stream_t *st; // Stream + PDFIO_DEBUG("pdfioObjOpenStream(obj=%p(%lu), decode=%s)\n", (void *)obj, obj ? (unsigned long)obj->number : 0, decode ? "true" : "false"); + // Range check input... if (!obj) return (NULL); @@ -566,7 +568,10 @@ pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object // No stream if there is no dict or offset to a stream... if (obj->value.type != PDFIO_VALTYPE_DICT || !obj->stream_offset) + { + PDFIO_DEBUG("pdfioObjOpenStream: value.type=%d, stream_offset=%ld\n", obj->value.type, (long)obj->stream_offset); return (NULL); + } // Open the stream... if ((st = _pdfioStreamOpen(obj, decode)) != NULL) diff --git a/pdfio-page.c b/pdfio-page.c index a7b45de..3b9b331 100644 --- a/pdfio-page.c +++ b/pdfio-page.c @@ -106,7 +106,7 @@ pdfioPageOpenStream( } else { - PDFIO_DEBUG("pdfioPageOpenStream: Opening single content stream.\n"); + PDFIO_DEBUG("pdfioPageOpenStream: Opening single content stream %d.\n", (int)contents->value.indirect.number); return (pdfioObjOpenStream(pdfioFileFindObj(page->pdf, contents->value.indirect.number), decode)); } } @@ -119,6 +119,10 @@ pdfioPageOpenStream( static _pdfio_value_t * // O - Value or NULL on error get_contents(pdfio_obj_t *page) // I - Page object { + _pdfio_value_t *contents; // Contents value + pdfio_obj_t *obj; // Contents object + + // Range check input... if (!page) return (NULL); @@ -133,5 +137,24 @@ get_contents(pdfio_obj_t *page) // I - Page object if (page->value.type != PDFIO_VALTYPE_DICT) return (NULL); - return (_pdfioDictGetValue(page->value.value.dict, "Contents")); + contents = _pdfioDictGetValue(page->value.value.dict, "Contents"); + + if (contents->type == PDFIO_VALTYPE_INDIRECT) + { + // See if the indirect object is a stream or an array of indirect object + // references... + if ((obj = pdfioFileFindObj(page->pdf, contents->value.indirect.number)) != NULL) + { + if (obj->value.type == PDFIO_VALTYPE_NONE) + { + if (!_pdfioObjLoad(obj)) + return (NULL); + } + + if (obj->value.type == PDFIO_VALTYPE_ARRAY) + contents = &(obj->value); + } + } + + return (contents); } diff --git a/testpdfio.c b/testpdfio.c index 9028d0d..a7cc362 100644 --- a/testpdfio.c +++ b/testpdfio.c @@ -687,7 +687,7 @@ do_test_file(const char *filename, // I - PDF filename } else { - testEndMessage(false, "unable to open content stream"); + testEndMessage(false, "page #%d/obj %d is %gx%g, unable to open content stream", (int)n + 1, (int)pdfioObjGetNumber(obj), media_box.x2, media_box.y2); status = 1; } }