Update the token reading code to protect against obvious format abuses.

Update the xref loading code to protect against looping xref tables.
2025-08-29 23:32:15 +02:00 · 2023-12-07 17:50:52 -05:00
parent ed723a46dc
commit c992b2ba89
3 changed files with 62 additions and 5 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -12,6 +12,9 @@ v1.2.0 (Month DD, YYYY)
  functions (Issue #24)
 - Renamed `pdfioContentTextNextLine` to `pdfioContentTextNewLine`.
 - Now use autoconf to configure the PDFio sources (Issue #54)
 - Updated the token reading code to protect against some obvious abuses of the
  PDF format.
 - Updated the xref reading code to protect against loops.
 v1.1.4 (December 3, 2023)
--- a/pdfio-file.c
+++ b/pdfio-file.c
@@ -2091,9 +2091,20 @@ load_xref(
    PDFIO_DEBUG_VALUE(&trailer);
    PDFIO_DEBUG("\n");
-    if ((xref_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev")) <= 0)
+    off_t new_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev");
    if (new_offset <= 0)
    {
      done = true;
    }
    else if (new_offset == xref_offset)
    {
      _pdfioFileError(pdf, "Recursive xref table.");
      return (false);
    }
    xref_offset = new_offset;
  }
  // Once we have all of the xref tables loaded, get the important objects and
  // build the pages array...
--- a/pdfio-token.c
+++ b/pdfio-token.c
@@ -208,9 +208,10 @@ _pdfioTokenRead(_pdfio_token_t *tb,	// I - Token buffer/stack
 	*bufend,			// End of buffer
 	state = '\0';			// Current state
  bool	saw_nul = false;		// Did we see a nul character?
  size_t count = 0;			// Number of whitespace/comment bytes
  //
  // "state" is:
  //
  // - '\0' for idle
@@ -229,18 +230,39 @@ _pdfioTokenRead(_pdfio_token_t *tb,	// I - Token buffer/stack
  // Skip leading whitespace...
  while ((ch = get_char(tb)) != EOF)
  {
    count ++;
    if (ch == '%')
    {
      // Skip comment
      PDFIO_DEBUG("_pdfioTokenRead: Skipping comment...\n");
      while ((ch = get_char(tb)) != EOF)
      {
        count ++;
 	if (ch == '\n' || ch == '\r')
 	{
 	  break;
 	}
 	else if (count > 2048)
 	{
 	  _pdfioFileError(tb->pdf, "Comment too long.");
 	  *bufptr = '\0';
 	  return (false);
 	}
      }
    }
    else if (!isspace(ch))
    {
      break;
    }
    else if (count > 2048)
    {
      _pdfioFileError(tb->pdf, "Too much whitespace.");
      *bufptr = '\0';
      return (false);
    }
  }
  if (ch == EOF)
  {
@@ -266,6 +288,8 @@ _pdfioTokenRead(_pdfio_token_t *tb,	// I - Token buffer/stack
    *bufptr++ = (char)ch;
  }
  PDFIO_DEBUG("_pdfioTokenRead: state='%c'\n", state);
  switch (state)
  {
    case '(' : // Literal string
@@ -431,6 +455,7 @@ _pdfioTokenRead(_pdfio_token_t *tb,	// I - Token buffer/stack
 	  if (!isdigit(ch) && ch != '.')
 	  {
 	    // End of number...
 	    PDFIO_DEBUG("_pdfioTokenRead: End of number with ch=0x%02x\n", ch);
 	    tb->bufptr --;
 	    break;
 	  }
@@ -496,6 +521,13 @@ _pdfioTokenRead(_pdfio_token_t *tb,	// I - Token buffer/stack
 	    return (false);
 	  }
 	}
 	if (bufptr == (buffer + 1))
 	{
 	  _pdfioFileError(tb->pdf, "Empty name.");
 	  *bufptr = '\0';
 	  return (false);
 	}
 	break;
    case '<' : // Potential hex string
@@ -519,6 +551,8 @@ _pdfioTokenRead(_pdfio_token_t *tb,	// I - Token buffer/stack
 	  return (false);
 	}
        count = 0;
        do
 	{
 	  if (isxdigit(ch))
@@ -527,6 +561,7 @@ _pdfioTokenRead(_pdfio_token_t *tb,	// I - Token buffer/stack
 	    {
 	      // Hex digit
 	      *bufptr++ = (char)ch;
 	      count = 0;
 	    }
 	    else
 	    {
@@ -542,6 +577,16 @@ _pdfioTokenRead(_pdfio_token_t *tb,	// I - Token buffer/stack
 	    *bufptr = '\0';
 	    return (false);
 	  }
 	  else
 	  {
 	    count ++;
 	    if (count > 2048)
 	    {
 	      _pdfioFileError(tb->pdf, "Too much whitespace.");
 	      *bufptr = '\0';
 	      return (false);
 	    }
 	  }
 	}
 	while ((ch = get_char(tb)) != EOF && ch != '>');
@@ -569,7 +614,7 @@ _pdfioTokenRead(_pdfio_token_t *tb,	// I - Token buffer/stack
  *bufptr = '\0';
-//  PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
+  PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
  return (bufptr > buffer);
 }
@@ -606,7 +651,6 @@ get_char(_pdfio_token_t *tb)		// I - Token buffer
    tb->bufptr = tb->buffer;
    tb->bufend = tb->buffer + bytes;
 #if 0
 #ifdef DEBUG
    unsigned char *ptr;			// Pointer into buffer
@@ -620,7 +664,6 @@ get_char(_pdfio_token_t *tb)		// I - Token buffer
    }
    PDFIO_DEBUG("'\n");
 #endif // DEBUG
 #endif // 0
  }
  // Return the next character...