From a83f7f50ff828de3da903132e7cb21de00b48e00 Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Mon, 2 Oct 2023 05:06:33 -0400 Subject: [PATCH] Allow extra whitespace/newlines after stream tokens (Issue #40) --- CHANGES.md | 7 +++++++ Makefile | 2 +- pdfio-object.c | 8 +++++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index f607e06..32713bf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,13 @@ Changes in PDFio ================ +v1.1.2 (TBD) +------------ + +- Fixed an issue with broken PDF files containing extra CR and/or LF separators + after the object stream token (Issue #40) + + v1.1.1 (March 20, 2023) ----------------------- diff --git a/Makefile b/Makefile index 26d41b2..e6952a9 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ DSONAME = LDFLAGS = LIBS = -lm -lz RANLIB = ranlib -VERSION = 1.1.1 +VERSION = 1.1.2 prefix = /usr/local diff --git a/pdfio-object.c b/pdfio-object.c index 88ab25f..c0184c0 100644 --- a/pdfio-object.c +++ b/pdfio-object.c @@ -420,6 +420,9 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object } ptr += 3; + while (*ptr && isspace(*ptr & 255)) + ptr ++; + _pdfioFileConsume(obj->pdf, (size_t)(ptr - line)); // Then grab the object value... @@ -438,11 +441,14 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object return (false); } + if (tb.bufptr && tb.bufptr < tb.bufend && (*(tb.bufptr) == 0x0d || *(tb.bufptr) == 0x0a)) + tb.bufptr ++; // Skip trailing CR or LF after token + _pdfioTokenFlush(&tb); if (!strcmp(line, "stream")) { - // Yes, save its location... + // Yes, this is an embedded stream so save its location... obj->stream_offset = _pdfioFileTell(obj->pdf); PDFIO_DEBUG("_pdfioObjLoad: stream_offset=%lu.\n", (unsigned long)obj->stream_offset); }