Update the token reading code to protect against obvious format abuses.

Update the xref loading code to protect against looping xref tables.
This commit is contained in:
Michael R Sweet 2023-12-07 17:50:52 -05:00
parent ed723a46dc
commit c992b2ba89
No known key found for this signature in database
GPG Key ID: BE67C75EC81F3244
3 changed files with 62 additions and 5 deletions

View File

@ -12,6 +12,9 @@ v1.2.0 (Month DD, YYYY)
functions (Issue #24) functions (Issue #24)
- Renamed `pdfioContentTextNextLine` to `pdfioContentTextNewLine`. - Renamed `pdfioContentTextNextLine` to `pdfioContentTextNewLine`.
- Now use autoconf to configure the PDFio sources (Issue #54) - Now use autoconf to configure the PDFio sources (Issue #54)
- Updated the token reading code to protect against some obvious abuses of the
PDF format.
- Updated the xref reading code to protect against loops.
v1.1.4 (December 3, 2023) v1.1.4 (December 3, 2023)

View File

@ -2091,9 +2091,20 @@ load_xref(
PDFIO_DEBUG_VALUE(&trailer); PDFIO_DEBUG_VALUE(&trailer);
PDFIO_DEBUG("\n"); PDFIO_DEBUG("\n");
if ((xref_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev")) <= 0) off_t new_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev");
if (new_offset <= 0)
{
done = true; done = true;
} }
else if (new_offset == xref_offset)
{
_pdfioFileError(pdf, "Recursive xref table.");
return (false);
}
xref_offset = new_offset;
}
// Once we have all of the xref tables loaded, get the important objects and // Once we have all of the xref tables loaded, get the important objects and
// build the pages array... // build the pages array...

View File

@ -208,9 +208,10 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufend, // End of buffer *bufend, // End of buffer
state = '\0'; // Current state state = '\0'; // Current state
bool saw_nul = false; // Did we see a nul character? bool saw_nul = false; // Did we see a nul character?
size_t count = 0; // Number of whitespace/comment bytes
//
// "state" is: // "state" is:
// //
// - '\0' for idle // - '\0' for idle
@ -229,18 +230,39 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
// Skip leading whitespace... // Skip leading whitespace...
while ((ch = get_char(tb)) != EOF) while ((ch = get_char(tb)) != EOF)
{ {
count ++;
if (ch == '%') if (ch == '%')
{ {
// Skip comment // Skip comment
PDFIO_DEBUG("_pdfioTokenRead: Skipping comment...\n");
while ((ch = get_char(tb)) != EOF) while ((ch = get_char(tb)) != EOF)
{ {
count ++;
if (ch == '\n' || ch == '\r') if (ch == '\n' || ch == '\r')
{
break; break;
} }
else if (count > 2048)
{
_pdfioFileError(tb->pdf, "Comment too long.");
*bufptr = '\0';
return (false);
}
}
} }
else if (!isspace(ch)) else if (!isspace(ch))
{
break; break;
} }
else if (count > 2048)
{
_pdfioFileError(tb->pdf, "Too much whitespace.");
*bufptr = '\0';
return (false);
}
}
if (ch == EOF) if (ch == EOF)
{ {
@ -266,6 +288,8 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr++ = (char)ch; *bufptr++ = (char)ch;
} }
PDFIO_DEBUG("_pdfioTokenRead: state='%c'\n", state);
switch (state) switch (state)
{ {
case '(' : // Literal string case '(' : // Literal string
@ -431,6 +455,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
if (!isdigit(ch) && ch != '.') if (!isdigit(ch) && ch != '.')
{ {
// End of number... // End of number...
PDFIO_DEBUG("_pdfioTokenRead: End of number with ch=0x%02x\n", ch);
tb->bufptr --; tb->bufptr --;
break; break;
} }
@ -496,6 +521,13 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
return (false); return (false);
} }
} }
if (bufptr == (buffer + 1))
{
_pdfioFileError(tb->pdf, "Empty name.");
*bufptr = '\0';
return (false);
}
break; break;
case '<' : // Potential hex string case '<' : // Potential hex string
@ -519,6 +551,8 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
return (false); return (false);
} }
count = 0;
do do
{ {
if (isxdigit(ch)) if (isxdigit(ch))
@ -527,6 +561,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
{ {
// Hex digit // Hex digit
*bufptr++ = (char)ch; *bufptr++ = (char)ch;
count = 0;
} }
else else
{ {
@ -542,6 +577,16 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr = '\0'; *bufptr = '\0';
return (false); return (false);
} }
else
{
count ++;
if (count > 2048)
{
_pdfioFileError(tb->pdf, "Too much whitespace.");
*bufptr = '\0';
return (false);
}
}
} }
while ((ch = get_char(tb)) != EOF && ch != '>'); while ((ch = get_char(tb)) != EOF && ch != '>');
@ -569,7 +614,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr = '\0'; *bufptr = '\0';
// PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer); PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
return (bufptr > buffer); return (bufptr > buffer);
} }
@ -606,7 +651,6 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
tb->bufptr = tb->buffer; tb->bufptr = tb->buffer;
tb->bufend = tb->buffer + bytes; tb->bufend = tb->buffer + bytes;
#if 0
#ifdef DEBUG #ifdef DEBUG
unsigned char *ptr; // Pointer into buffer unsigned char *ptr; // Pointer into buffer
@ -620,7 +664,6 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
} }
PDFIO_DEBUG("'\n"); PDFIO_DEBUG("'\n");
#endif // DEBUG #endif // DEBUG
#endif // 0
} }
// Return the next character... // Return the next character...