From 7f6ffcda2234960c75ce76d1c1cfc4a7738cbb95 Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Fri, 6 Oct 2023 10:46:30 -0400 Subject: [PATCH] Fix a couple issues with parsing PDF files produced by Microsoft Reporting Services (Issue #46) - Odd cross-reference stream containing 3-byte generation number field for this 16-bit value - Odd empty hex strings --- CHANGES.md | 2 ++ pdfio-file.c | 19 +++++++++++++++++-- pdfio-token.c | 7 +++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 32713bf..6a1f4ef 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,8 @@ v1.1.2 (TBD) - Fixed an issue with broken PDF files containing extra CR and/or LF separators after the object stream token (Issue #40) +- Fixed an issue with PDF files produced by Microsoft Reporting Services + (Issue #46) v1.1.1 (March 20, 2023) diff --git a/pdfio-file.c b/pdfio-file.c index a95378c..e045230 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -1759,9 +1759,9 @@ load_xref( w_2 = w[0]; w_3 = w[0] + w[1]; - if (w[1] == 0 || w[2] > 2 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer)) + if (w[1] == 0 || w[2] > 4 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer)) { - _pdfioFileError(pdf, "Cross-reference stream has invalid W key."); + _pdfioFileError(pdf, "Cross-reference stream has invalid W key [%u %u %u].", (unsigned)w[0], (unsigned)w[1], (unsigned)w[2]); return (false); } @@ -1801,9 +1801,11 @@ load_xref( } } + // Offset for (i = 1, offset = buffer[w_2]; i < w[1]; i ++) offset = (offset << 8) | buffer[w_2 + i]; + // Generation number switch (w[2]) { default : @@ -1815,6 +1817,19 @@ load_xref( case 2 : generation = (buffer[w_3] << 8) | buffer[w_3 + 1]; break; + case 3 : + // Issue #46: Stupid Microsoft PDF generator using 3 bytes to + // encode 16-bit generation numbers == 0 (probably a lazy coder + // stuffing things into an array of 64-bit unsigned integers) + generation = (buffer[w_3] << 16) | (buffer[w_3 + 1] << 8) | buffer[w_3 + 2]; + if (generation > 65535) + generation = 65535; + break; + case 4 : // Even stupider :) + generation = (buffer[w_3] << 24) | (buffer[w_3 + 1] << 16) | (buffer[w_3 + 2] << 8) | buffer[w_3 + 3]; + if (generation > 65535) + generation = 65535; + break; } // Create a placeholder for the object in memory... diff --git a/pdfio-token.c b/pdfio-token.c index 559a7ff..437bd9c 100644 --- a/pdfio-token.c +++ b/pdfio-token.c @@ -495,6 +495,13 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack *bufptr++ = (char)ch; break; } + else if (ch == '>') + { + // Issue #46: Empty hex string from Microsoft PDF generator; treat as + // empty literal string... + *buffer = '('; + break; + } else if (!isspace(ch & 255) && !isxdigit(ch & 255)) { _pdfioFileError(tb->pdf, "Syntax error: '<%c'", ch);