Fix repaired xref stream offsets and support indirect Contents arrays for pages.

This commit is contained in:
Michael R Sweet
2026-01-16 16:54:37 -05:00
parent 65098b5509
commit aac04a2a96
5 changed files with 36 additions and 6 deletions

View File

@@ -19,6 +19,8 @@ v1.6.2 - YYYY-MM-DD
- Added missing range checks to `pdfioArrayCopy` and `pdfioDictCopy`.
- Refactored PDF encryption code to fix unlocking with certain files.
- Improved xref table loop detection (Issue #148)
- Fixed xref reconstruction for objects lacking a `Type` value.
- Fixed `pdfioPageOpenStream` for indirect `Contents` arrays.
- Fixed an error propagation bug when reading too-long values (Issue #146)
- Fixed a Clang warning.

View File

@@ -2621,19 +2621,19 @@ repair_xref(
_pdfioTokenFlush(&tb);
if (type && !strcmp(line, "stream"))
if (!strcmp(line, "stream"))
{
// Possible object or XRef stream...
obj->stream_offset = _pdfioFileTell(pdf);
if (!strcmp(type, "ObjStm") && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0])))
if (type && !strcmp(type, "ObjStm") && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0])))
{
PDFIO_DEBUG("repair_xref: Object stream...\n");
sobjs[num_sobjs] = obj;
num_sobjs ++;
}
if (!strcmp(type, "XRef") && !pdf->trailer_dict)
if (type && !strcmp(type, "XRef") && !pdf->trailer_dict)
{
// Save the trailer dictionary...
pdfio_obj_t *encrypt_obj;

View File

@@ -547,6 +547,8 @@ pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
pdfio_stream_t *st; // Stream
PDFIO_DEBUG("pdfioObjOpenStream(obj=%p(%lu), decode=%s)\n", (void *)obj, obj ? (unsigned long)obj->number : 0, decode ? "true" : "false");
// Range check input...
if (!obj)
return (NULL);
@@ -566,7 +568,10 @@ pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
// No stream if there is no dict or offset to a stream...
if (obj->value.type != PDFIO_VALTYPE_DICT || !obj->stream_offset)
{
PDFIO_DEBUG("pdfioObjOpenStream: value.type=%d, stream_offset=%ld\n", obj->value.type, (long)obj->stream_offset);
return (NULL);
}
// Open the stream...
if ((st = _pdfioStreamOpen(obj, decode)) != NULL)

View File

@@ -106,7 +106,7 @@ pdfioPageOpenStream(
}
else
{
PDFIO_DEBUG("pdfioPageOpenStream: Opening single content stream.\n");
PDFIO_DEBUG("pdfioPageOpenStream: Opening single content stream %d.\n", (int)contents->value.indirect.number);
return (pdfioObjOpenStream(pdfioFileFindObj(page->pdf, contents->value.indirect.number), decode));
}
}
@@ -119,6 +119,10 @@ pdfioPageOpenStream(
static _pdfio_value_t * // O - Value or NULL on error
get_contents(pdfio_obj_t *page) // I - Page object
{
_pdfio_value_t *contents; // Contents value
pdfio_obj_t *obj; // Contents object
// Range check input...
if (!page)
return (NULL);
@@ -133,5 +137,24 @@ get_contents(pdfio_obj_t *page) // I - Page object
if (page->value.type != PDFIO_VALTYPE_DICT)
return (NULL);
return (_pdfioDictGetValue(page->value.value.dict, "Contents"));
contents = _pdfioDictGetValue(page->value.value.dict, "Contents");
if (contents->type == PDFIO_VALTYPE_INDIRECT)
{
// See if the indirect object is a stream or an array of indirect object
// references...
if ((obj = pdfioFileFindObj(page->pdf, contents->value.indirect.number)) != NULL)
{
if (obj->value.type == PDFIO_VALTYPE_NONE)
{
if (!_pdfioObjLoad(obj))
return (NULL);
}
if (obj->value.type == PDFIO_VALTYPE_ARRAY)
contents = &(obj->value);
}
}
return (contents);
}

View File

@@ -687,7 +687,7 @@ do_test_file(const char *filename, // I - PDF filename
}
else
{
testEndMessage(false, "unable to open content stream");
testEndMessage(false, "page #%d/obj %d is %gx%g, unable to open content stream", (int)n + 1, (int)pdfioObjGetNumber(obj), media_box.x2, media_box.y2);
status = 1;
}
}