From 18853ca3d5ee5d7e59e61d2de7c1dbf714d17b64 Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Mon, 10 May 2021 17:37:57 -0400 Subject: [PATCH] Increase token buffer to 256 bytes, to avoid overflow scenarios. Add _pdfioTokenFlush to manually flush the token buffer. Add more debug printfs. --- pdfio-array.c | 6 ++++ pdfio-common.c | 4 +++ pdfio-dict.c | 4 +-- pdfio-file.c | 4 +++ pdfio-object.c | 4 ++- pdfio-private.h | 21 ++++++------ pdfio-token.c | 87 +++++++++++++++++++++++++++++++++++-------------- 7 files changed, 93 insertions(+), 37 deletions(-) diff --git a/pdfio-array.c b/pdfio-array.c index 82fcc02..f272edb 100644 --- a/pdfio-array.c +++ b/pdfio-array.c @@ -524,6 +524,8 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file _pdfio_value_t value; // Value + PDFIO_DEBUG("_pdfioArrayRead(pdf=%p, tb=%p)\n", pdf, tb); + // Create an array... array = pdfioArrayCreate(pdf); @@ -541,6 +543,10 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file if (!_pdfioValueRead(pdf, tb, &value)) break; + PDFIO_DEBUG("_pdfioArrayRead(%p): Appending ", (void *)array); + PDFIO_DEBUG_VALUE(&value); + PDFIO_DEBUG("\n"); + append_value(array, &value); } diff --git a/pdfio-common.c b/pdfio-common.c index 1de953e..a460d85 100644 --- a/pdfio-common.c +++ b/pdfio-common.c @@ -205,6 +205,8 @@ _pdfioFilePeek(pdfio_file_t *pdf, // I - PDF file // Yes, try reading more... ssize_t rbytes; // Bytes read + PDFIO_DEBUG("_pdfioFilePeek: Sliding buffer, total=%ld\n", (long)total); + memmove(pdf->buffer, pdf->bufptr, total); pdf->bufpos += pdf->bufptr - pdf->buffer; pdf->bufptr = pdf->buffer; @@ -334,6 +336,8 @@ _pdfioFileSeek(pdfio_file_t *pdf, // I - PDF file off_t offset, // I - Offset int whence) // I - Offset base { + PDFIO_DEBUG("_pdfioFileSeek(pdf=%p, offset=%ld, whence=%d)\n", pdf, (long)offset, whence); + // Adjust offset for relative seeks... if (whence == SEEK_CUR) { diff --git a/pdfio-dict.c b/pdfio-dict.c index 8d473a7..1374f63 100644 --- a/pdfio-dict.c +++ b/pdfio-dict.c @@ -762,8 +762,8 @@ _pdfioDictSetValue( qsort(dict->pairs, dict->num_pairs, sizeof(_pdfio_pair_t), (int (*)(const void *, const void *))compare_pairs); #ifdef DEBUG - PDFIO_DEBUG("_pdfioDictSetValue: %lu pairs\n", (unsigned long)dict->num_pairs); - PDFIO_DEBUG("_pdfioDictSetValue: "); + PDFIO_DEBUG("_pdfioDictSetValue(%p): %lu pairs\n", (void *)dict, (unsigned long)dict->num_pairs); + PDFIO_DEBUG("_pdfioDictSetValue(%p): ", (void *)dict); PDFIO_DEBUG_DICT(dict); PDFIO_DEBUG("\n"); #endif // DEBUG diff --git a/pdfio-file.c b/pdfio-file.c index cfec7a8..25fedba 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -760,6 +760,8 @@ load_xref(pdfio_file_t *pdf, // I - PDF file return (false); } + _pdfioTokenFlush(&tb); + obj->stream_offset = _pdfioFileTell(pdf); if ((index_array = pdfioDictGetArray(trailer.value.dict, "Index")) != NULL) @@ -974,6 +976,8 @@ load_xref(pdfio_file_t *pdf, // I - PDF file _pdfioFileError(pdf, "Trailer is not a dictionary."); return (false); } + + _pdfioTokenFlush(&tb); } else { diff --git a/pdfio-object.c b/pdfio-object.c index c05f670..2027313 100644 --- a/pdfio-object.c +++ b/pdfio-object.c @@ -215,12 +215,14 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object } // Now see if there is an associated stream... - if (!_pdfioFileGets(obj->pdf, line, sizeof(line))) + if (!_pdfioTokenGet(&tb, line, sizeof(line))) { _pdfioFileError(obj->pdf, "Early end-of-file for object %lu.", (unsigned long)obj->number); return (false); } + _pdfioTokenFlush(&tb); + if (!strcmp(line, "stream")) { // Yes, save its location... diff --git a/pdfio-private.h b/pdfio-private.h index 01e2352..a6f2980 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -47,10 +47,10 @@ // # ifdef DEBUG -# define PDFIO_DEBUG(...) fprintf(stderr, __VA_ARGS__) -# define PDFIO_DEBUG_ARRAY(array) _pdfioArrayDebug(array, stderr) -# define PDFIO_DEBUG_DICT(dict) _pdfioDictDebug(dict, stderr) -# define PDFIO_DEBUG_VALUE(value) _pdfioValueDebug(value, stderr) +# define PDFIO_DEBUG(...) fprintf(stderr, __VA_ARGS__) +# define PDFIO_DEBUG_ARRAY(array) _pdfioArrayDebug(array, stderr) +# define PDFIO_DEBUG_DICT(dict) _pdfioDictDebug(dict, stderr) +# define PDFIO_DEBUG_VALUE(value) _pdfioValueDebug(value, stderr) # else # define PDFIO_DEBUG(...) # define PDFIO_DEBUG_ARRAY(array) @@ -89,7 +89,7 @@ typedef struct _pdfio_token_s // Token buffer/stack _pdfio_tconsume_cb_t consume_cb; // Consume callback _pdfio_tpeek_cb_t peek_cb; // Peek callback void *cb_data; // Callback data - unsigned char buffer[32], // Buffer + unsigned char buffer[256], // Buffer *bufptr, // Pointer into buffer *bufend; // Last valid byte in buffer size_t num_tokens; // Number of tokens in stack @@ -252,11 +252,12 @@ extern pdfio_stream_t *_pdfioStreamOpen(pdfio_obj_t *obj, bool decode) PDFIO_INT extern bool _pdfioStringIsAllocated(pdfio_file_t *pdf, const char *s) PDFIO_INTERNAL; -extern void _pdfioTokenClear(_pdfio_token_t *ts) PDFIO_INTERNAL; -extern bool _pdfioTokenGet(_pdfio_token_t *ts, char *buffer, size_t bufsize) PDFIO_INTERNAL; -extern void _pdfioTokenInit(_pdfio_token_t *ts, pdfio_file_t *pdf, _pdfio_tconsume_cb_t consume_cb, _pdfio_tpeek_cb_t peek_cb, void *cb_data); -extern void _pdfioTokenPush(_pdfio_token_t *ts, const char *token) PDFIO_INTERNAL; -extern bool _pdfioTokenRead(_pdfio_token_t *ts, char *buffer, size_t bufsize); +extern void _pdfioTokenClear(_pdfio_token_t *tb) PDFIO_INTERNAL; +extern void _pdfioTokenFlush(_pdfio_token_t *tb) PDFIO_INTERNAL; +extern bool _pdfioTokenGet(_pdfio_token_t *tb, char *buffer, size_t bufsize) PDFIO_INTERNAL; +extern void _pdfioTokenInit(_pdfio_token_t *tb, pdfio_file_t *pdf, _pdfio_tconsume_cb_t consume_cb, _pdfio_tpeek_cb_t peek_cb, void *cb_data); +extern void _pdfioTokenPush(_pdfio_token_t *tb, const char *token) PDFIO_INTERNAL; +extern bool _pdfioTokenRead(_pdfio_token_t *tb, char *buffer, size_t bufsize); extern _pdfio_value_t *_pdfioValueCopy(pdfio_file_t *pdfdst, _pdfio_value_t *vdst, pdfio_file_t *pdfsrc, _pdfio_value_t *vsrc) PDFIO_INTERNAL; extern void _pdfioValueDebug(_pdfio_value_t *v, FILE *fp) PDFIO_INTERNAL; diff --git a/pdfio-token.c b/pdfio-token.c index 4a2e6e5..6d8e773 100644 --- a/pdfio-token.c +++ b/pdfio-token.c @@ -69,6 +69,52 @@ _pdfioTokenClear(_pdfio_token_t *tb) // I - Token buffer/stack } +// +// '_pdfioTokenFlush()' - Flush (consume) any bytes that have been used. +// + +void +_pdfioTokenFlush(_pdfio_token_t *tb) // I - Token buffer/stack +{ + if (tb->bufptr > tb->buffer) + { + size_t remaining = (size_t)(tb->bufend - tb->bufptr); + // Remaining bytes in buffer + + // Consume what we've used... + PDFIO_DEBUG("_pdfioTokenFlush: Consuming %d bytes.\n", (int)(tb->bufptr - tb->buffer)); + (tb->consume_cb)(tb->cb_data, (size_t)(tb->bufptr - tb->buffer)); + + if (remaining > 0) + { + // Shuffle remaining bytes for next call... + memmove(tb->buffer, tb->bufptr, remaining); + tb->bufptr = tb->buffer; + tb->bufend = tb->buffer + remaining; + +#ifdef DEBUG + unsigned char *ptr; // Pointer into buffer + + PDFIO_DEBUG("_pdfioTokenFlush: Remainder '"); + for (ptr = tb->buffer; ptr < tb->bufend; ptr ++) + { + if (*ptr < ' ' || *ptr == 0x7f) + PDFIO_DEBUG("\\%03o", *ptr); + else + PDFIO_DEBUG("%c", *ptr); + } + PDFIO_DEBUG("'\n"); +#endif // DEBUG + } + else + { + // Nothing left, reset pointers... + tb->bufptr = tb->bufend = tb->buffer; + } + } +} + + // // '_pdfioTokenGet()' - Get a token. // @@ -453,30 +499,6 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack while (tb->bufptr < tb->bufend && isspace(*(tb->bufptr))) tb->bufptr ++; -#if 0 - if (tb->bufptr > tb->buffer) - { - size_t remaining = (size_t)(tb->bufend - tb->bufptr); - // Remaining bytes in buffer - - // Consume what we've used... - (tb->consume_cb)(tb->cb_data, (size_t)(tb->bufptr - tb->buffer)); - - if (remaining > 0) - { - // Shuffle remaining bytes for next call... - memmove(tb->buffer, tb->bufptr, remaining); - tb->bufptr = tb->buffer; - tb->bufend = tb->buffer + remaining; - } - else - { - // Nothing left, reset pointers... - tb->bufptr = tb->bufend = tb->buffer; - } - } -#endif // 0 - *bufptr = '\0'; PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer); @@ -500,7 +522,10 @@ get_char(_pdfio_token_t *tb) // I - Token buffer { // Consume previous bytes... if (tb->bufend > tb->buffer) + { + PDFIO_DEBUG("get_char: Consuming %d bytes.\n", (int)(tb->bufend - tb->buffer)); (tb->consume_cb)(tb->cb_data, (size_t)(tb->bufend - tb->buffer)); + } // Peek new bytes... if ((bytes = (tb->peek_cb)(tb->cb_data, tb->buffer, sizeof(tb->buffer))) <= 0) @@ -512,6 +537,20 @@ get_char(_pdfio_token_t *tb) // I - Token buffer // Update pointers... tb->bufptr = tb->buffer; tb->bufend = tb->buffer + bytes; + +#ifdef DEBUG + unsigned char *ptr; // Pointer into buffer + + PDFIO_DEBUG("get_char: Read '"); + for (ptr = tb->buffer; ptr < tb->bufend; ptr ++) + { + if (*ptr < ' ' || *ptr == 0x7f) + PDFIO_DEBUG("\\%03o", *ptr); + else + PDFIO_DEBUG("%c", *ptr); + } + PDFIO_DEBUG("'\n"); +#endif // DEBUG } // Return the next character...