Multiple fixes to allow PDFio to read more edge-case PDFs.

- Update _pdfioFileGets to allow for really long lines where it
  doesn't matter if we lose the end of the line.
- Update "startxref" detection at the end of the file.
- Refactor repair logic so that you just get a single WARNING about
  the repair (debug messages available for testing)
- Allow whitespace after the "obj" in the object header.
- Make sure to close xref stream on error.
- Update predictor code to support Colors <= 32 (some implementations
  set Colors to the number of bytes per record in the xref stream,
  which prevents the predictor from doing anything...)
- Allow CR CR in xref table.
- Clear old trailer/root/pages/etc. objects when repairing, update
  existing objects that were already found in load_xref.
- Don't set current object in pdfioObjectCreate/OpenStream if the
  stream can't be created/opened.
This commit is contained in:
Michael R Sweet
2025-04-24 11:09:54 -04:00
parent 278ddb7fa7
commit cad8f450ab
6 changed files with 148 additions and 84 deletions

View File

@@ -141,6 +141,7 @@ pdfioObjCreateStream(
pdfio_obj_t *obj, // I - Object
pdfio_filter_t filter) // I - Type of compression to apply
{
pdfio_stream_t *st; // Stream
pdfio_obj_t *length_obj = NULL; // Length object, if any
@@ -194,11 +195,13 @@ pdfioObjCreateStream(
if (!_pdfioFilePuts(obj->pdf, "stream\n"))
return (NULL);
obj->stream_offset = _pdfioFileTell(obj->pdf);
obj->pdf->current_obj = obj;
obj->stream_offset = _pdfioFileTell(obj->pdf);
// Return the new stream...
return (_pdfioStreamCreate(obj, length_obj, 0, filter));
if ((st = _pdfioStreamCreate(obj, length_obj, 0, filter)) != NULL)
obj->pdf->current_obj = obj;
return (st);
}
@@ -534,6 +537,9 @@ pdfio_stream_t * // O - Stream or `NULL` on error
pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
bool decode) // I - Decode/decompress data?
{
pdfio_stream_t *st; // Stream
// Range check input...
if (!obj)
return (NULL);
@@ -556,9 +562,10 @@ pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
return (NULL);
// Open the stream...
obj->pdf->current_obj = obj;
if ((st = _pdfioStreamOpen(obj, decode)) != NULL)
obj->pdf->current_obj = obj;
return (_pdfioStreamOpen(obj, decode));
return (st);
}