Update the token reading code to protect against obvious format abuses.

Update the xref loading code to protect against looping xref tables.
This commit is contained in:
Michael R Sweet 2023-12-07 17:50:52 -05:00
parent ed723a46dc
commit c992b2ba89
No known key found for this signature in database
GPG Key ID: BE67C75EC81F3244
3 changed files with 62 additions and 5 deletions

View File

@ -12,6 +12,9 @@ v1.2.0 (Month DD, YYYY)
functions (Issue #24)
- Renamed `pdfioContentTextNextLine` to `pdfioContentTextNewLine`.
- Now use autoconf to configure the PDFio sources (Issue #54)
- Updated the token reading code to protect against some obvious abuses of the
PDF format.
- Updated the xref reading code to protect against loops.
v1.1.4 (December 3, 2023)

View File

@ -2091,8 +2091,19 @@ load_xref(
PDFIO_DEBUG_VALUE(&trailer);
PDFIO_DEBUG("\n");
if ((xref_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev")) <= 0)
off_t new_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev");
if (new_offset <= 0)
{
done = true;
}
else if (new_offset == xref_offset)
{
_pdfioFileError(pdf, "Recursive xref table.");
return (false);
}
xref_offset = new_offset;
}
// Once we have all of the xref tables loaded, get the important objects and

View File

@ -208,9 +208,10 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufend, // End of buffer
state = '\0'; // Current state
bool saw_nul = false; // Did we see a nul character?
size_t count = 0; // Number of whitespace/comment bytes
//
// "state" is:
//
// - '\0' for idle
@ -229,17 +230,38 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
// Skip leading whitespace...
while ((ch = get_char(tb)) != EOF)
{
count ++;
if (ch == '%')
{
// Skip comment
PDFIO_DEBUG("_pdfioTokenRead: Skipping comment...\n");
while ((ch = get_char(tb)) != EOF)
{
count ++;
if (ch == '\n' || ch == '\r')
{
break;
}
else if (count > 2048)
{
_pdfioFileError(tb->pdf, "Comment too long.");
*bufptr = '\0';
return (false);
}
}
}
else if (!isspace(ch))
{
break;
}
else if (count > 2048)
{
_pdfioFileError(tb->pdf, "Too much whitespace.");
*bufptr = '\0';
return (false);
}
}
if (ch == EOF)
@ -266,6 +288,8 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr++ = (char)ch;
}
PDFIO_DEBUG("_pdfioTokenRead: state='%c'\n", state);
switch (state)
{
case '(' : // Literal string
@ -431,6 +455,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
if (!isdigit(ch) && ch != '.')
{
// End of number...
PDFIO_DEBUG("_pdfioTokenRead: End of number with ch=0x%02x\n", ch);
tb->bufptr --;
break;
}
@ -496,6 +521,13 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
return (false);
}
}
if (bufptr == (buffer + 1))
{
_pdfioFileError(tb->pdf, "Empty name.");
*bufptr = '\0';
return (false);
}
break;
case '<' : // Potential hex string
@ -519,6 +551,8 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
return (false);
}
count = 0;
do
{
if (isxdigit(ch))
@ -527,6 +561,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
{
// Hex digit
*bufptr++ = (char)ch;
count = 0;
}
else
{
@ -542,6 +577,16 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr = '\0';
return (false);
}
else
{
count ++;
if (count > 2048)
{
_pdfioFileError(tb->pdf, "Too much whitespace.");
*bufptr = '\0';
return (false);
}
}
}
while ((ch = get_char(tb)) != EOF && ch != '>');
@ -569,7 +614,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr = '\0';
// PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
return (bufptr > buffer);
}
@ -606,7 +651,6 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
tb->bufptr = tb->buffer;
tb->bufend = tb->buffer + bytes;
#if 0
#ifdef DEBUG
unsigned char *ptr; // Pointer into buffer
@ -620,7 +664,6 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
}
PDFIO_DEBUG("'\n");
#endif // DEBUG
#endif // 0
}
// Return the next character...