Increase token buffer to 256 bytes, to avoid overflow scenarios.

Add _pdfioTokenFlush to manually flush the token buffer.

Add more debug printfs.
This commit is contained in:
Michael R Sweet 2021-05-10 17:37:57 -04:00
parent cfb5ca0ddc
commit 18853ca3d5
No known key found for this signature in database
GPG Key ID: 999559A027815955
7 changed files with 93 additions and 37 deletions

View File

@ -524,6 +524,8 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file
_pdfio_value_t value; // Value
PDFIO_DEBUG("_pdfioArrayRead(pdf=%p, tb=%p)\n", pdf, tb);
// Create an array...
array = pdfioArrayCreate(pdf);
@ -541,6 +543,10 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file
if (!_pdfioValueRead(pdf, tb, &value))
break;
PDFIO_DEBUG("_pdfioArrayRead(%p): Appending ", (void *)array);
PDFIO_DEBUG_VALUE(&value);
PDFIO_DEBUG("\n");
append_value(array, &value);
}

View File

@ -205,6 +205,8 @@ _pdfioFilePeek(pdfio_file_t *pdf, // I - PDF file
// Yes, try reading more...
ssize_t rbytes; // Bytes read
PDFIO_DEBUG("_pdfioFilePeek: Sliding buffer, total=%ld\n", (long)total);
memmove(pdf->buffer, pdf->bufptr, total);
pdf->bufpos += pdf->bufptr - pdf->buffer;
pdf->bufptr = pdf->buffer;
@ -334,6 +336,8 @@ _pdfioFileSeek(pdfio_file_t *pdf, // I - PDF file
off_t offset, // I - Offset
int whence) // I - Offset base
{
PDFIO_DEBUG("_pdfioFileSeek(pdf=%p, offset=%ld, whence=%d)\n", pdf, (long)offset, whence);
// Adjust offset for relative seeks...
if (whence == SEEK_CUR)
{

View File

@ -762,8 +762,8 @@ _pdfioDictSetValue(
qsort(dict->pairs, dict->num_pairs, sizeof(_pdfio_pair_t), (int (*)(const void *, const void *))compare_pairs);
#ifdef DEBUG
PDFIO_DEBUG("_pdfioDictSetValue: %lu pairs\n", (unsigned long)dict->num_pairs);
PDFIO_DEBUG("_pdfioDictSetValue: ");
PDFIO_DEBUG("_pdfioDictSetValue(%p): %lu pairs\n", (void *)dict, (unsigned long)dict->num_pairs);
PDFIO_DEBUG("_pdfioDictSetValue(%p): ", (void *)dict);
PDFIO_DEBUG_DICT(dict);
PDFIO_DEBUG("\n");
#endif // DEBUG

View File

@ -760,6 +760,8 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
return (false);
}
_pdfioTokenFlush(&tb);
obj->stream_offset = _pdfioFileTell(pdf);
if ((index_array = pdfioDictGetArray(trailer.value.dict, "Index")) != NULL)
@ -974,6 +976,8 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
_pdfioFileError(pdf, "Trailer is not a dictionary.");
return (false);
}
_pdfioTokenFlush(&tb);
}
else
{

View File

@ -215,12 +215,14 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object
}
// Now see if there is an associated stream...
if (!_pdfioFileGets(obj->pdf, line, sizeof(line)))
if (!_pdfioTokenGet(&tb, line, sizeof(line)))
{
_pdfioFileError(obj->pdf, "Early end-of-file for object %lu.", (unsigned long)obj->number);
return (false);
}
_pdfioTokenFlush(&tb);
if (!strcmp(line, "stream"))
{
// Yes, save its location...

View File

@ -89,7 +89,7 @@ typedef struct _pdfio_token_s // Token buffer/stack
_pdfio_tconsume_cb_t consume_cb; // Consume callback
_pdfio_tpeek_cb_t peek_cb; // Peek callback
void *cb_data; // Callback data
unsigned char buffer[32], // Buffer
unsigned char buffer[256], // Buffer
*bufptr, // Pointer into buffer
*bufend; // Last valid byte in buffer
size_t num_tokens; // Number of tokens in stack
@ -252,11 +252,12 @@ extern pdfio_stream_t *_pdfioStreamOpen(pdfio_obj_t *obj, bool decode) PDFIO_INT
extern bool _pdfioStringIsAllocated(pdfio_file_t *pdf, const char *s) PDFIO_INTERNAL;
extern void _pdfioTokenClear(_pdfio_token_t *ts) PDFIO_INTERNAL;
extern bool _pdfioTokenGet(_pdfio_token_t *ts, char *buffer, size_t bufsize) PDFIO_INTERNAL;
extern void _pdfioTokenInit(_pdfio_token_t *ts, pdfio_file_t *pdf, _pdfio_tconsume_cb_t consume_cb, _pdfio_tpeek_cb_t peek_cb, void *cb_data);
extern void _pdfioTokenPush(_pdfio_token_t *ts, const char *token) PDFIO_INTERNAL;
extern bool _pdfioTokenRead(_pdfio_token_t *ts, char *buffer, size_t bufsize);
extern void _pdfioTokenClear(_pdfio_token_t *tb) PDFIO_INTERNAL;
extern void _pdfioTokenFlush(_pdfio_token_t *tb) PDFIO_INTERNAL;
extern bool _pdfioTokenGet(_pdfio_token_t *tb, char *buffer, size_t bufsize) PDFIO_INTERNAL;
extern void _pdfioTokenInit(_pdfio_token_t *tb, pdfio_file_t *pdf, _pdfio_tconsume_cb_t consume_cb, _pdfio_tpeek_cb_t peek_cb, void *cb_data);
extern void _pdfioTokenPush(_pdfio_token_t *tb, const char *token) PDFIO_INTERNAL;
extern bool _pdfioTokenRead(_pdfio_token_t *tb, char *buffer, size_t bufsize);
extern _pdfio_value_t *_pdfioValueCopy(pdfio_file_t *pdfdst, _pdfio_value_t *vdst, pdfio_file_t *pdfsrc, _pdfio_value_t *vsrc) PDFIO_INTERNAL;
extern void _pdfioValueDebug(_pdfio_value_t *v, FILE *fp) PDFIO_INTERNAL;

View File

@ -69,6 +69,52 @@ _pdfioTokenClear(_pdfio_token_t *tb) // I - Token buffer/stack
}
//
// '_pdfioTokenFlush()' - Flush (consume) any bytes that have been used.
//
void
_pdfioTokenFlush(_pdfio_token_t *tb) // I - Token buffer/stack
{
if (tb->bufptr > tb->buffer)
{
size_t remaining = (size_t)(tb->bufend - tb->bufptr);
// Remaining bytes in buffer
// Consume what we've used...
PDFIO_DEBUG("_pdfioTokenFlush: Consuming %d bytes.\n", (int)(tb->bufptr - tb->buffer));
(tb->consume_cb)(tb->cb_data, (size_t)(tb->bufptr - tb->buffer));
if (remaining > 0)
{
// Shuffle remaining bytes for next call...
memmove(tb->buffer, tb->bufptr, remaining);
tb->bufptr = tb->buffer;
tb->bufend = tb->buffer + remaining;
#ifdef DEBUG
unsigned char *ptr; // Pointer into buffer
PDFIO_DEBUG("_pdfioTokenFlush: Remainder '");
for (ptr = tb->buffer; ptr < tb->bufend; ptr ++)
{
if (*ptr < ' ' || *ptr == 0x7f)
PDFIO_DEBUG("\\%03o", *ptr);
else
PDFIO_DEBUG("%c", *ptr);
}
PDFIO_DEBUG("'\n");
#endif // DEBUG
}
else
{
// Nothing left, reset pointers...
tb->bufptr = tb->bufend = tb->buffer;
}
}
}
//
// '_pdfioTokenGet()' - Get a token.
//
@ -453,30 +499,6 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
while (tb->bufptr < tb->bufend && isspace(*(tb->bufptr)))
tb->bufptr ++;
#if 0
if (tb->bufptr > tb->buffer)
{
size_t remaining = (size_t)(tb->bufend - tb->bufptr);
// Remaining bytes in buffer
// Consume what we've used...
(tb->consume_cb)(tb->cb_data, (size_t)(tb->bufptr - tb->buffer));
if (remaining > 0)
{
// Shuffle remaining bytes for next call...
memmove(tb->buffer, tb->bufptr, remaining);
tb->bufptr = tb->buffer;
tb->bufend = tb->buffer + remaining;
}
else
{
// Nothing left, reset pointers...
tb->bufptr = tb->bufend = tb->buffer;
}
}
#endif // 0
*bufptr = '\0';
PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
@ -500,7 +522,10 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
{
// Consume previous bytes...
if (tb->bufend > tb->buffer)
{
PDFIO_DEBUG("get_char: Consuming %d bytes.\n", (int)(tb->bufend - tb->buffer));
(tb->consume_cb)(tb->cb_data, (size_t)(tb->bufend - tb->buffer));
}
// Peek new bytes...
if ((bytes = (tb->peek_cb)(tb->cb_data, tb->buffer, sizeof(tb->buffer))) <= 0)
@ -512,6 +537,20 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
// Update pointers...
tb->bufptr = tb->buffer;
tb->bufend = tb->buffer + bytes;
#ifdef DEBUG
unsigned char *ptr; // Pointer into buffer
PDFIO_DEBUG("get_char: Read '");
for (ptr = tb->buffer; ptr < tb->bufend; ptr ++)
{
if (*ptr < ' ' || *ptr == 0x7f)
PDFIO_DEBUG("\\%03o", *ptr);
else
PDFIO_DEBUG("%c", *ptr);
}
PDFIO_DEBUG("'\n");
#endif // DEBUG
}
// Return the next character...