From c992b2ba8900238c3a50f26dcdab34b4ee8296a7 Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Thu, 7 Dec 2023 17:50:52 -0500 Subject: [PATCH] Update the token reading code to protect against obvious format abuses. Update the xref loading code to protect against looping xref tables. --- CHANGES.md | 3 +++ pdfio-file.c | 13 ++++++++++++- pdfio-token.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 245b28a..6a8ad9c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,6 +12,9 @@ v1.2.0 (Month DD, YYYY) functions (Issue #24) - Renamed `pdfioContentTextNextLine` to `pdfioContentTextNewLine`. - Now use autoconf to configure the PDFio sources (Issue #54) +- Updated the token reading code to protect against some obvious abuses of the + PDF format. +- Updated the xref reading code to protect against loops. v1.1.4 (December 3, 2023) diff --git a/pdfio-file.c b/pdfio-file.c index a9e0ac6..5e7d97d 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -2091,8 +2091,19 @@ load_xref( PDFIO_DEBUG_VALUE(&trailer); PDFIO_DEBUG("\n"); - if ((xref_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev")) <= 0) + off_t new_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev"); + + if (new_offset <= 0) + { done = true; + } + else if (new_offset == xref_offset) + { + _pdfioFileError(pdf, "Recursive xref table."); + return (false); + } + + xref_offset = new_offset; } // Once we have all of the xref tables loaded, get the important objects and diff --git a/pdfio-token.c b/pdfio-token.c index 685138e..652c850 100644 --- a/pdfio-token.c +++ b/pdfio-token.c @@ -208,9 +208,10 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack *bufend, // End of buffer state = '\0'; // Current state bool saw_nul = false; // Did we see a nul character? + size_t count = 0; // Number of whitespace/comment bytes + - // // "state" is: // // - '\0' for idle @@ -229,17 +230,38 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack // Skip leading whitespace... while ((ch = get_char(tb)) != EOF) { + count ++; + if (ch == '%') { // Skip comment + PDFIO_DEBUG("_pdfioTokenRead: Skipping comment...\n"); while ((ch = get_char(tb)) != EOF) { + count ++; + if (ch == '\n' || ch == '\r') + { break; + } + else if (count > 2048) + { + _pdfioFileError(tb->pdf, "Comment too long."); + *bufptr = '\0'; + return (false); + } } } else if (!isspace(ch)) + { break; + } + else if (count > 2048) + { + _pdfioFileError(tb->pdf, "Too much whitespace."); + *bufptr = '\0'; + return (false); + } } if (ch == EOF) @@ -266,6 +288,8 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack *bufptr++ = (char)ch; } + PDFIO_DEBUG("_pdfioTokenRead: state='%c'\n", state); + switch (state) { case '(' : // Literal string @@ -431,6 +455,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack if (!isdigit(ch) && ch != '.') { // End of number... + PDFIO_DEBUG("_pdfioTokenRead: End of number with ch=0x%02x\n", ch); tb->bufptr --; break; } @@ -496,6 +521,13 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack return (false); } } + + if (bufptr == (buffer + 1)) + { + _pdfioFileError(tb->pdf, "Empty name."); + *bufptr = '\0'; + return (false); + } break; case '<' : // Potential hex string @@ -519,6 +551,8 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack return (false); } + count = 0; + do { if (isxdigit(ch)) @@ -527,6 +561,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack { // Hex digit *bufptr++ = (char)ch; + count = 0; } else { @@ -542,6 +577,16 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack *bufptr = '\0'; return (false); } + else + { + count ++; + if (count > 2048) + { + _pdfioFileError(tb->pdf, "Too much whitespace."); + *bufptr = '\0'; + return (false); + } + } } while ((ch = get_char(tb)) != EOF && ch != '>'); @@ -569,7 +614,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack *bufptr = '\0'; -// PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer); + PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer); return (bufptr > buffer); } @@ -606,7 +651,6 @@ get_char(_pdfio_token_t *tb) // I - Token buffer tb->bufptr = tb->buffer; tb->bufend = tb->buffer + bytes; -#if 0 #ifdef DEBUG unsigned char *ptr; // Pointer into buffer @@ -620,7 +664,6 @@ get_char(_pdfio_token_t *tb) // I - Token buffer } PDFIO_DEBUG("'\n"); #endif // DEBUG -#endif // 0 } // Return the next character...