Fix a couple issues with parsing PDF files produced by Microsoft Reporting

Services (Issue #46)

- Odd cross-reference stream containing 3-byte generation number field for this
  16-bit value
- Odd empty hex strings
This commit is contained in:
Michael R Sweet 2023-10-06 10:46:30 -04:00
parent 87ca4db73f
commit 7f6ffcda22
No known key found for this signature in database
GPG Key ID: BE67C75EC81F3244
3 changed files with 26 additions and 2 deletions

View File

@ -7,6 +7,8 @@ v1.1.2 (TBD)
- Fixed an issue with broken PDF files containing extra CR and/or LF separators - Fixed an issue with broken PDF files containing extra CR and/or LF separators
after the object stream token (Issue #40) after the object stream token (Issue #40)
- Fixed an issue with PDF files produced by Microsoft Reporting Services
(Issue #46)
v1.1.1 (March 20, 2023) v1.1.1 (March 20, 2023)

View File

@ -1759,9 +1759,9 @@ load_xref(
w_2 = w[0]; w_2 = w[0];
w_3 = w[0] + w[1]; w_3 = w[0] + w[1];
if (w[1] == 0 || w[2] > 2 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer)) if (w[1] == 0 || w[2] > 4 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer))
{ {
_pdfioFileError(pdf, "Cross-reference stream has invalid W key."); _pdfioFileError(pdf, "Cross-reference stream has invalid W key [%u %u %u].", (unsigned)w[0], (unsigned)w[1], (unsigned)w[2]);
return (false); return (false);
} }
@ -1801,9 +1801,11 @@ load_xref(
} }
} }
// Offset
for (i = 1, offset = buffer[w_2]; i < w[1]; i ++) for (i = 1, offset = buffer[w_2]; i < w[1]; i ++)
offset = (offset << 8) | buffer[w_2 + i]; offset = (offset << 8) | buffer[w_2 + i];
// Generation number
switch (w[2]) switch (w[2])
{ {
default : default :
@ -1815,6 +1817,19 @@ load_xref(
case 2 : case 2 :
generation = (buffer[w_3] << 8) | buffer[w_3 + 1]; generation = (buffer[w_3] << 8) | buffer[w_3 + 1];
break; break;
case 3 :
// Issue #46: Stupid Microsoft PDF generator using 3 bytes to
// encode 16-bit generation numbers == 0 (probably a lazy coder
// stuffing things into an array of 64-bit unsigned integers)
generation = (buffer[w_3] << 16) | (buffer[w_3 + 1] << 8) | buffer[w_3 + 2];
if (generation > 65535)
generation = 65535;
break;
case 4 : // Even stupider :)
generation = (buffer[w_3] << 24) | (buffer[w_3 + 1] << 16) | (buffer[w_3 + 2] << 8) | buffer[w_3 + 3];
if (generation > 65535)
generation = 65535;
break;
} }
// Create a placeholder for the object in memory... // Create a placeholder for the object in memory...

View File

@ -495,6 +495,13 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr++ = (char)ch; *bufptr++ = (char)ch;
break; break;
} }
else if (ch == '>')
{
// Issue #46: Empty hex string from Microsoft PDF generator; treat as
// empty literal string...
*buffer = '(';
break;
}
else if (!isspace(ch & 255) && !isxdigit(ch & 255)) else if (!isspace(ch & 255) && !isxdigit(ch & 255))
{ {
_pdfioFileError(tb->pdf, "Syntax error: '<%c'", ch); _pdfioFileError(tb->pdf, "Syntax error: '<%c'", ch);