Increase token buffer to 256 bytes, to avoid overflow scenarios.

Add _pdfioTokenFlush to manually flush the token buffer.

Add more debug printfs.
This commit is contained in:
Michael R Sweet 2021-05-10 17:37:57 -04:00
parent cfb5ca0ddc
commit 18853ca3d5
No known key found for this signature in database
GPG Key ID: 999559A027815955
7 changed files with 93 additions and 37 deletions

View File

@ -524,6 +524,8 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file
_pdfio_value_t value; // Value _pdfio_value_t value; // Value
PDFIO_DEBUG("_pdfioArrayRead(pdf=%p, tb=%p)\n", pdf, tb);
// Create an array... // Create an array...
array = pdfioArrayCreate(pdf); array = pdfioArrayCreate(pdf);
@ -541,6 +543,10 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file
if (!_pdfioValueRead(pdf, tb, &value)) if (!_pdfioValueRead(pdf, tb, &value))
break; break;
PDFIO_DEBUG("_pdfioArrayRead(%p): Appending ", (void *)array);
PDFIO_DEBUG_VALUE(&value);
PDFIO_DEBUG("\n");
append_value(array, &value); append_value(array, &value);
} }

View File

@ -205,6 +205,8 @@ _pdfioFilePeek(pdfio_file_t *pdf, // I - PDF file
// Yes, try reading more... // Yes, try reading more...
ssize_t rbytes; // Bytes read ssize_t rbytes; // Bytes read
PDFIO_DEBUG("_pdfioFilePeek: Sliding buffer, total=%ld\n", (long)total);
memmove(pdf->buffer, pdf->bufptr, total); memmove(pdf->buffer, pdf->bufptr, total);
pdf->bufpos += pdf->bufptr - pdf->buffer; pdf->bufpos += pdf->bufptr - pdf->buffer;
pdf->bufptr = pdf->buffer; pdf->bufptr = pdf->buffer;
@ -334,6 +336,8 @@ _pdfioFileSeek(pdfio_file_t *pdf, // I - PDF file
off_t offset, // I - Offset off_t offset, // I - Offset
int whence) // I - Offset base int whence) // I - Offset base
{ {
PDFIO_DEBUG("_pdfioFileSeek(pdf=%p, offset=%ld, whence=%d)\n", pdf, (long)offset, whence);
// Adjust offset for relative seeks... // Adjust offset for relative seeks...
if (whence == SEEK_CUR) if (whence == SEEK_CUR)
{ {

View File

@ -762,8 +762,8 @@ _pdfioDictSetValue(
qsort(dict->pairs, dict->num_pairs, sizeof(_pdfio_pair_t), (int (*)(const void *, const void *))compare_pairs); qsort(dict->pairs, dict->num_pairs, sizeof(_pdfio_pair_t), (int (*)(const void *, const void *))compare_pairs);
#ifdef DEBUG #ifdef DEBUG
PDFIO_DEBUG("_pdfioDictSetValue: %lu pairs\n", (unsigned long)dict->num_pairs); PDFIO_DEBUG("_pdfioDictSetValue(%p): %lu pairs\n", (void *)dict, (unsigned long)dict->num_pairs);
PDFIO_DEBUG("_pdfioDictSetValue: "); PDFIO_DEBUG("_pdfioDictSetValue(%p): ", (void *)dict);
PDFIO_DEBUG_DICT(dict); PDFIO_DEBUG_DICT(dict);
PDFIO_DEBUG("\n"); PDFIO_DEBUG("\n");
#endif // DEBUG #endif // DEBUG

View File

@ -760,6 +760,8 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
return (false); return (false);
} }
_pdfioTokenFlush(&tb);
obj->stream_offset = _pdfioFileTell(pdf); obj->stream_offset = _pdfioFileTell(pdf);
if ((index_array = pdfioDictGetArray(trailer.value.dict, "Index")) != NULL) if ((index_array = pdfioDictGetArray(trailer.value.dict, "Index")) != NULL)
@ -974,6 +976,8 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
_pdfioFileError(pdf, "Trailer is not a dictionary."); _pdfioFileError(pdf, "Trailer is not a dictionary.");
return (false); return (false);
} }
_pdfioTokenFlush(&tb);
} }
else else
{ {

View File

@ -215,12 +215,14 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object
} }
// Now see if there is an associated stream... // Now see if there is an associated stream...
if (!_pdfioFileGets(obj->pdf, line, sizeof(line))) if (!_pdfioTokenGet(&tb, line, sizeof(line)))
{ {
_pdfioFileError(obj->pdf, "Early end-of-file for object %lu.", (unsigned long)obj->number); _pdfioFileError(obj->pdf, "Early end-of-file for object %lu.", (unsigned long)obj->number);
return (false); return (false);
} }
_pdfioTokenFlush(&tb);
if (!strcmp(line, "stream")) if (!strcmp(line, "stream"))
{ {
// Yes, save its location... // Yes, save its location...

View File

@ -47,10 +47,10 @@
// //
# ifdef DEBUG # ifdef DEBUG
# define PDFIO_DEBUG(...) fprintf(stderr, __VA_ARGS__) # define PDFIO_DEBUG(...) fprintf(stderr, __VA_ARGS__)
# define PDFIO_DEBUG_ARRAY(array) _pdfioArrayDebug(array, stderr) # define PDFIO_DEBUG_ARRAY(array) _pdfioArrayDebug(array, stderr)
# define PDFIO_DEBUG_DICT(dict) _pdfioDictDebug(dict, stderr) # define PDFIO_DEBUG_DICT(dict) _pdfioDictDebug(dict, stderr)
# define PDFIO_DEBUG_VALUE(value) _pdfioValueDebug(value, stderr) # define PDFIO_DEBUG_VALUE(value) _pdfioValueDebug(value, stderr)
# else # else
# define PDFIO_DEBUG(...) # define PDFIO_DEBUG(...)
# define PDFIO_DEBUG_ARRAY(array) # define PDFIO_DEBUG_ARRAY(array)
@ -89,7 +89,7 @@ typedef struct _pdfio_token_s // Token buffer/stack
_pdfio_tconsume_cb_t consume_cb; // Consume callback _pdfio_tconsume_cb_t consume_cb; // Consume callback
_pdfio_tpeek_cb_t peek_cb; // Peek callback _pdfio_tpeek_cb_t peek_cb; // Peek callback
void *cb_data; // Callback data void *cb_data; // Callback data
unsigned char buffer[32], // Buffer unsigned char buffer[256], // Buffer
*bufptr, // Pointer into buffer *bufptr, // Pointer into buffer
*bufend; // Last valid byte in buffer *bufend; // Last valid byte in buffer
size_t num_tokens; // Number of tokens in stack size_t num_tokens; // Number of tokens in stack
@ -252,11 +252,12 @@ extern pdfio_stream_t *_pdfioStreamOpen(pdfio_obj_t *obj, bool decode) PDFIO_INT
extern bool _pdfioStringIsAllocated(pdfio_file_t *pdf, const char *s) PDFIO_INTERNAL; extern bool _pdfioStringIsAllocated(pdfio_file_t *pdf, const char *s) PDFIO_INTERNAL;
extern void _pdfioTokenClear(_pdfio_token_t *ts) PDFIO_INTERNAL; extern void _pdfioTokenClear(_pdfio_token_t *tb) PDFIO_INTERNAL;
extern bool _pdfioTokenGet(_pdfio_token_t *ts, char *buffer, size_t bufsize) PDFIO_INTERNAL; extern void _pdfioTokenFlush(_pdfio_token_t *tb) PDFIO_INTERNAL;
extern void _pdfioTokenInit(_pdfio_token_t *ts, pdfio_file_t *pdf, _pdfio_tconsume_cb_t consume_cb, _pdfio_tpeek_cb_t peek_cb, void *cb_data); extern bool _pdfioTokenGet(_pdfio_token_t *tb, char *buffer, size_t bufsize) PDFIO_INTERNAL;
extern void _pdfioTokenPush(_pdfio_token_t *ts, const char *token) PDFIO_INTERNAL; extern void _pdfioTokenInit(_pdfio_token_t *tb, pdfio_file_t *pdf, _pdfio_tconsume_cb_t consume_cb, _pdfio_tpeek_cb_t peek_cb, void *cb_data);
extern bool _pdfioTokenRead(_pdfio_token_t *ts, char *buffer, size_t bufsize); extern void _pdfioTokenPush(_pdfio_token_t *tb, const char *token) PDFIO_INTERNAL;
extern bool _pdfioTokenRead(_pdfio_token_t *tb, char *buffer, size_t bufsize);
extern _pdfio_value_t *_pdfioValueCopy(pdfio_file_t *pdfdst, _pdfio_value_t *vdst, pdfio_file_t *pdfsrc, _pdfio_value_t *vsrc) PDFIO_INTERNAL; extern _pdfio_value_t *_pdfioValueCopy(pdfio_file_t *pdfdst, _pdfio_value_t *vdst, pdfio_file_t *pdfsrc, _pdfio_value_t *vsrc) PDFIO_INTERNAL;
extern void _pdfioValueDebug(_pdfio_value_t *v, FILE *fp) PDFIO_INTERNAL; extern void _pdfioValueDebug(_pdfio_value_t *v, FILE *fp) PDFIO_INTERNAL;

View File

@ -69,6 +69,52 @@ _pdfioTokenClear(_pdfio_token_t *tb) // I - Token buffer/stack
} }
//
// '_pdfioTokenFlush()' - Flush (consume) any bytes that have been used.
//
void
_pdfioTokenFlush(_pdfio_token_t *tb) // I - Token buffer/stack
{
if (tb->bufptr > tb->buffer)
{
size_t remaining = (size_t)(tb->bufend - tb->bufptr);
// Remaining bytes in buffer
// Consume what we've used...
PDFIO_DEBUG("_pdfioTokenFlush: Consuming %d bytes.\n", (int)(tb->bufptr - tb->buffer));
(tb->consume_cb)(tb->cb_data, (size_t)(tb->bufptr - tb->buffer));
if (remaining > 0)
{
// Shuffle remaining bytes for next call...
memmove(tb->buffer, tb->bufptr, remaining);
tb->bufptr = tb->buffer;
tb->bufend = tb->buffer + remaining;
#ifdef DEBUG
unsigned char *ptr; // Pointer into buffer
PDFIO_DEBUG("_pdfioTokenFlush: Remainder '");
for (ptr = tb->buffer; ptr < tb->bufend; ptr ++)
{
if (*ptr < ' ' || *ptr == 0x7f)
PDFIO_DEBUG("\\%03o", *ptr);
else
PDFIO_DEBUG("%c", *ptr);
}
PDFIO_DEBUG("'\n");
#endif // DEBUG
}
else
{
// Nothing left, reset pointers...
tb->bufptr = tb->bufend = tb->buffer;
}
}
}
// //
// '_pdfioTokenGet()' - Get a token. // '_pdfioTokenGet()' - Get a token.
// //
@ -453,30 +499,6 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
while (tb->bufptr < tb->bufend && isspace(*(tb->bufptr))) while (tb->bufptr < tb->bufend && isspace(*(tb->bufptr)))
tb->bufptr ++; tb->bufptr ++;
#if 0
if (tb->bufptr > tb->buffer)
{
size_t remaining = (size_t)(tb->bufend - tb->bufptr);
// Remaining bytes in buffer
// Consume what we've used...
(tb->consume_cb)(tb->cb_data, (size_t)(tb->bufptr - tb->buffer));
if (remaining > 0)
{
// Shuffle remaining bytes for next call...
memmove(tb->buffer, tb->bufptr, remaining);
tb->bufptr = tb->buffer;
tb->bufend = tb->buffer + remaining;
}
else
{
// Nothing left, reset pointers...
tb->bufptr = tb->bufend = tb->buffer;
}
}
#endif // 0
*bufptr = '\0'; *bufptr = '\0';
PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer); PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
@ -500,7 +522,10 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
{ {
// Consume previous bytes... // Consume previous bytes...
if (tb->bufend > tb->buffer) if (tb->bufend > tb->buffer)
{
PDFIO_DEBUG("get_char: Consuming %d bytes.\n", (int)(tb->bufend - tb->buffer));
(tb->consume_cb)(tb->cb_data, (size_t)(tb->bufend - tb->buffer)); (tb->consume_cb)(tb->cb_data, (size_t)(tb->bufend - tb->buffer));
}
// Peek new bytes... // Peek new bytes...
if ((bytes = (tb->peek_cb)(tb->cb_data, tb->buffer, sizeof(tb->buffer))) <= 0) if ((bytes = (tb->peek_cb)(tb->cb_data, tb->buffer, sizeof(tb->buffer))) <= 0)
@ -512,6 +537,20 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
// Update pointers... // Update pointers...
tb->bufptr = tb->buffer; tb->bufptr = tb->buffer;
tb->bufend = tb->buffer + bytes; tb->bufend = tb->buffer + bytes;
#ifdef DEBUG
unsigned char *ptr; // Pointer into buffer
PDFIO_DEBUG("get_char: Read '");
for (ptr = tb->buffer; ptr < tb->bufend; ptr ++)
{
if (*ptr < ' ' || *ptr == 0x7f)
PDFIO_DEBUG("\\%03o", *ptr);
else
PDFIO_DEBUG("%c", *ptr);
}
PDFIO_DEBUG("'\n");
#endif // DEBUG
} }
// Return the next character... // Return the next character...