From 9a919d478f4d6a544bd76db1ba4e719e8048dfb6 Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Sat, 8 May 2021 07:38:44 -0400 Subject: [PATCH] Rework token reading to be separate from the PDF file so we can add support for compressed object streams. --- pdfio-array.c | 9 +- pdfio-common.c | 63 -------------- pdfio-dict.c | 7 +- pdfio-file.c | 11 ++- pdfio-object.c | 5 +- pdfio-private.h | 36 +++++--- pdfio-stream.c | 7 +- pdfio-token.c | 220 +++++++++++++++++++++++++++++++++--------------- pdfio-value.c | 19 +++-- 9 files changed, 211 insertions(+), 166 deletions(-) diff --git a/pdfio-array.c b/pdfio-array.c index 7e9af25..7d13bfd 100644 --- a/pdfio-array.c +++ b/pdfio-array.c @@ -517,7 +517,8 @@ _pdfioArrayGetValue(pdfio_array_t *a, // I - Array // pdfio_array_t * // O - New array -_pdfioArrayRead(pdfio_file_t *pdf) // I - PDF file +_pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file + _pdfio_token_t *tb) // I - Token buffer/stack { pdfio_array_t *array; // New array char token[8192]; // Token from file @@ -528,7 +529,7 @@ _pdfioArrayRead(pdfio_file_t *pdf) // I - PDF file array = pdfioArrayCreate(pdf); // Read until we get "]" to end the array... - while (_pdfioFileGetToken(pdf, token, sizeof(token))) + while (_pdfioTokenGet(tb, token, sizeof(token))) { if (!strcmp(token, "]")) { @@ -537,8 +538,8 @@ _pdfioArrayRead(pdfio_file_t *pdf) // I - PDF file } // Push the token and decode the value... - _pdfioFilePushToken(pdf, token); - if (!_pdfioValueRead(pdf, &value)) + _pdfioTokenPush(tb, token); + if (!_pdfioValueRead(pdf, tb, &value)) break; append_value(array, &value); diff --git a/pdfio-common.c b/pdfio-common.c index 2ddc303..1de953e 100644 --- a/pdfio-common.c +++ b/pdfio-common.c @@ -23,24 +23,6 @@ static ssize_t read_buffer(pdfio_file_t *pdf, char *buffer, size_t bytes); static bool write_buffer(pdfio_file_t *pdf, const void *buffer, size_t bytes); -// -// '_pdfioFileClearTokens()' - Clear the token stack. -// - -void -_pdfioFileClearTokens(pdfio_file_t *pdf)// I - PDF file -{ - PDFIO_DEBUG("_pdfioFileClearTokens(pdf=%p)\n", pdf); - - while (pdf->num_tokens > 0) - { - pdf->num_tokens --; - free(pdf->tokens[pdf->num_tokens]); - pdf->tokens[pdf->num_tokens] = NULL; - } -} - - // // '_pdfioFileConsume()' - Consume bytes from the file. // @@ -144,35 +126,6 @@ _pdfioFileGetChar(pdfio_file_t *pdf) // I - PDF file } -// -// '_pdfioFileGetToken()' - Get a token from a PDF file. -// - -bool // O - `true` on success, `false` on failure -_pdfioFileGetToken(pdfio_file_t *pdf, // I - PDF file - char *buffer,// I - String buffer - size_t bufsize)// I - Size of string buffer -{ - // See if we have a token waiting on the stack... - if (pdf->num_tokens > 0) - { - // Yes, return it... - pdf->num_tokens --; - strncpy(buffer, pdf->tokens[pdf->num_tokens], bufsize - 1); - buffer[bufsize - 1] = '\0'; - - PDFIO_DEBUG("_pdfioFileGetToken(pdf=%p, buffer=%p, bufsize=%u): Popping '%s' from stack.\n", pdf, buffer, (unsigned)bufsize, buffer); - - free(pdf->tokens[pdf->num_tokens]); - pdf->tokens[pdf->num_tokens] = NULL; - return (true); - } - - // No, read a new one... - return (_pdfioTokenRead(pdf, buffer, bufsize, (_pdfio_tpeek_cb_t)_pdfioFilePeek, (_pdfio_tconsume_cb_t)_pdfioFileConsume, pdf)); -} - - // // '_pdfioFileGets()' - Read a line from a PDF file. // @@ -306,22 +259,6 @@ _pdfioFilePrintf(pdfio_file_t *pdf, // I - PDF file } -// -// '()' - Push a token on the token stack. -// - -void -_pdfioFilePushToken(pdfio_file_t *pdf, // I - PDF file - const char *token)// I - Token -{ - if (pdf->num_tokens < (sizeof(pdf->tokens) / sizeof(pdf->tokens[0]))) - { - if ((pdf->tokens[pdf->num_tokens ++] = strdup(token)) == NULL) - pdf->num_tokens --; - } -} - - // // '_pdfioFilePuts()' - Write a literal string to a PDF file. // diff --git a/pdfio-dict.c b/pdfio-dict.c index ffa47f7..ec35b8b 100644 --- a/pdfio-dict.c +++ b/pdfio-dict.c @@ -373,7 +373,8 @@ _pdfioDictGetValue(pdfio_dict_t *dict, // I - Dictionary // pdfio_dict_t * // O - New dictionary -_pdfioDictRead(pdfio_file_t *pdf) // I - PDF file +_pdfioDictRead(pdfio_file_t *pdf, // I - PDF file + _pdfio_token_t *tb) // I - Token buffer/stack { pdfio_dict_t *dict; // New dictionary char key[256]; // Dictionary key @@ -385,7 +386,7 @@ _pdfioDictRead(pdfio_file_t *pdf) // I - PDF file // Create a dictionary and start reading... dict = pdfioDictCreate(pdf); - while (_pdfioFileGetToken(pdf, key, sizeof(key))) + while (_pdfioTokenGet(tb, key, sizeof(key))) { // Get the next key or end-of-dictionary... if (!strcmp(key, ">>")) @@ -400,7 +401,7 @@ _pdfioDictRead(pdfio_file_t *pdf) // I - PDF file } // Then get the next value... - if (!_pdfioValueRead(pdf, &value)) + if (!_pdfioValueRead(pdf, tb, &value)) { _pdfioFileError(pdf, "Missing value for dictionary key."); break; diff --git a/pdfio-file.c b/pdfio-file.c index 6b4cd32..1c5a94e 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -521,6 +521,7 @@ load_xref(pdfio_file_t *pdf, // I - PDF file num_objects, // Number of objects offset; // Offset in file int generation; // Generation number + _pdfio_token_t tb; // Token buffer/stack while (!done) @@ -581,7 +582,9 @@ load_xref(pdfio_file_t *pdf, // I - PDF file return (false); } - if (!_pdfioValueRead(pdf, &trailer)) + _pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf); + + if (!_pdfioValueRead(pdf, &tb, &trailer)) { _pdfioFileError(pdf, "Unable to read cross-reference stream dictionary."); return (false); @@ -594,7 +597,7 @@ load_xref(pdfio_file_t *pdf, // I - PDF file obj->value = trailer; - if (!_pdfioFileGetToken(pdf, line, sizeof(line)) || strcmp(line, "stream")) + if (!_pdfioTokenGet(&tb, line, sizeof(line)) || strcmp(line, "stream")) { _pdfioFileError(pdf, "Unable to get stream after xref dictionary."); return (false); @@ -748,7 +751,9 @@ load_xref(pdfio_file_t *pdf, // I - PDF file return (false); } - if (!_pdfioValueRead(pdf, &trailer)) + _pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf); + + if (!_pdfioValueRead(pdf, &tb, &trailer)) { _pdfioFileError(pdf, "Unable to read trailer dictionary."); return (false); diff --git a/pdfio-object.c b/pdfio-object.c index a9146bf..e6f86b2 100644 --- a/pdfio-object.c +++ b/pdfio-object.c @@ -128,6 +128,7 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object { char line[1024], // Line from file *ptr; // Pointer into line + _pdfio_token_t tb; // Token buffer/stack PDFIO_DEBUG("_pdfioObjLoad(obj=%p(%lu)), offset=%lu\n", obj, (unsigned long)obj->number, (unsigned long)obj->offset); @@ -167,9 +168,9 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object } // Then grab the object value... - _pdfioFileClearTokens(obj->pdf); + _pdfioTokenInit(&tb, obj->pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, obj->pdf); - if (!_pdfioValueRead(obj->pdf, &obj->value)) + if (!_pdfioValueRead(obj->pdf, &tb, &obj->value)) { _pdfioFileError(obj->pdf, "Unable to read value for object %lu.", (unsigned long)obj->number); return (false); diff --git a/pdfio-private.h b/pdfio-private.h index 0f1727f..ed3c488 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -80,6 +80,22 @@ typedef enum _pdfio_predictor_e // PNG predictor constants _PDFIO_PREDICTOR_PNG_PAETH = 14 // PNG Paeth predictor } _pdfio_predictor_t; +typedef ssize_t (*_pdfio_tconsume_cb_t)(void *data, size_t bytes); +typedef ssize_t (*_pdfio_tpeek_cb_t)(void *data, void *buffer, size_t bytes); + +typedef struct _pdfio_token_s // Token buffer/stack +{ + pdfio_file_t *pdf; // PDF file + _pdfio_tconsume_cb_t consume_cb; // Consume callback + _pdfio_tpeek_cb_t peek_cb; // Peek callback + void *cb_data; // Callback data + unsigned char buffer[32], // Buffer + *bufptr, // Pointer into buffer + *bufend; // Last valid byte in buffer + size_t num_tokens; // Number of tokens in stack + char *tokens[4]; // Token stack +} _pdfio_token_t; + typedef struct _pdfio_value_s // Value structure { pdfio_valtype_t type; // Type of value @@ -163,8 +179,6 @@ struct _pdfio_file_s // PDF file structure size_t num_strings, // Number of strings alloc_strings; // Allocated strings char **strings; // Nul-terminated strings - size_t num_tokens; // Number of tokens in stack - char *tokens[4]; // Token stack }; struct _pdfio_obj_s // Object @@ -198,9 +212,6 @@ struct _pdfio_stream_s // Stream *pbuffers[2]; // Predictor buffers, as needed }; -typedef ssize_t (*_pdfio_tconsume_cb_t)(void *data, size_t bytes); -typedef ssize_t (*_pdfio_tpeek_cb_t)(void *data, void *buffer, size_t bytes); - // // Functions... @@ -211,7 +222,7 @@ extern void _pdfioArrayDebug(pdfio_array_t *a) PDFIO_INTERNAL; # endif // DEBUG extern void _pdfioArrayDelete(pdfio_array_t *a) PDFIO_INTERNAL; extern _pdfio_value_t *_pdfioArrayGetValue(pdfio_array_t *a, size_t n) PDFIO_INTERNAL; -extern pdfio_array_t *_pdfioArrayRead(pdfio_file_t *pdf) PDFIO_INTERNAL; +extern pdfio_array_t *_pdfioArrayRead(pdfio_file_t *pdf, _pdfio_token_t *ts) PDFIO_INTERNAL; extern bool _pdfioArrayWrite(pdfio_array_t *a) PDFIO_INTERNAL; # ifdef DEBUG @@ -219,21 +230,18 @@ extern void _pdfioDictDebug(pdfio_dict_t *dict) PDFIO_INTERNAL; # endif // DEBUG extern void _pdfioDictDelete(pdfio_dict_t *dict) PDFIO_INTERNAL; extern _pdfio_value_t *_pdfioDictGetValue(pdfio_dict_t *dict, const char *key) PDFIO_INTERNAL; -extern pdfio_dict_t *_pdfioDictRead(pdfio_file_t *pdf) PDFIO_INTERNAL; +extern pdfio_dict_t *_pdfioDictRead(pdfio_file_t *pdf, _pdfio_token_t *ts) PDFIO_INTERNAL; extern bool _pdfioDictSetValue(pdfio_dict_t *dict, const char *key, _pdfio_value_t *value) PDFIO_INTERNAL; extern bool _pdfioDictWrite(pdfio_dict_t *dict, off_t *length) PDFIO_INTERNAL; -extern void _pdfioFileClearTokens(pdfio_file_t *pdf) PDFIO_INTERNAL; extern bool _pdfioFileConsume(pdfio_file_t *pdf, size_t bytes) PDFIO_INTERNAL; extern bool _pdfioFileDefaultError(pdfio_file_t *pdf, const char *message, void *data) PDFIO_INTERNAL; extern bool _pdfioFileError(pdfio_file_t *pdf, const char *format, ...) PDFIO_FORMAT(2,3) PDFIO_INTERNAL; extern bool _pdfioFileFlush(pdfio_file_t *pdf) PDFIO_INTERNAL; extern int _pdfioFileGetChar(pdfio_file_t *pdf) PDFIO_INTERNAL; -extern bool _pdfioFileGetToken(pdfio_file_t *pdf, char *buffer, size_t bufsize) PDFIO_INTERNAL; extern bool _pdfioFileGets(pdfio_file_t *pdf, char *buffer, size_t bufsize) PDFIO_INTERNAL; extern ssize_t _pdfioFilePeek(pdfio_file_t *pdf, void *buffer, size_t bytes) PDFIO_INTERNAL; extern bool _pdfioFilePrintf(pdfio_file_t *pdf, const char *format, ...) PDFIO_FORMAT(2,3) PDFIO_INTERNAL; -extern void _pdfioFilePushToken(pdfio_file_t *pdf, const char *token) PDFIO_INTERNAL; extern bool _pdfioFilePuts(pdfio_file_t *pdf, const char *s) PDFIO_INTERNAL; extern ssize_t _pdfioFileRead(pdfio_file_t *pdf, void *buffer, size_t bytes) PDFIO_INTERNAL; extern off_t _pdfioFileSeek(pdfio_file_t *pdf, off_t offset, int whence) PDFIO_INTERNAL; @@ -248,14 +256,18 @@ extern pdfio_stream_t *_pdfioStreamOpen(pdfio_obj_t *obj, bool decode) PDFIO_INT extern bool _pdfioStringIsAllocated(pdfio_file_t *pdf, const char *s) PDFIO_INTERNAL; -extern bool _pdfioTokenRead(pdfio_file_t *pdf, char *buffer, size_t bufsize, _pdfio_tpeek_cb_t peek_cb, _pdfio_tconsume_cb_t consume_cb, void *data); +extern void _pdfioTokenClear(_pdfio_token_t *ts) PDFIO_INTERNAL; +extern bool _pdfioTokenGet(_pdfio_token_t *ts, char *buffer, size_t bufsize) PDFIO_INTERNAL; +extern void _pdfioTokenInit(_pdfio_token_t *ts, pdfio_file_t *pdf, _pdfio_tconsume_cb_t consume_cb, _pdfio_tpeek_cb_t peek_cb, void *cb_data); +extern void _pdfioTokenPush(_pdfio_token_t *ts, const char *token) PDFIO_INTERNAL; +extern bool _pdfioTokenRead(_pdfio_token_t *ts, char *buffer, size_t bufsize); extern _pdfio_value_t *_pdfioValueCopy(pdfio_file_t *pdfdst, _pdfio_value_t *vdst, pdfio_file_t *pdfsrc, _pdfio_value_t *vsrc) PDFIO_INTERNAL; # ifdef DEBUG extern void _pdfioValueDebug(_pdfio_value_t *v) PDFIO_INTERNAL; # endif // DEBUG extern void _pdfioValueDelete(_pdfio_value_t *v) PDFIO_INTERNAL; -extern _pdfio_value_t *_pdfioValueRead(pdfio_file_t *pdf, _pdfio_value_t *v) PDFIO_INTERNAL; +extern _pdfio_value_t *_pdfioValueRead(pdfio_file_t *pdf, _pdfio_token_t *ts, _pdfio_value_t *v) PDFIO_INTERNAL; extern bool _pdfioValueWrite(pdfio_file_t *pdf, _pdfio_value_t *v) PDFIO_INTERNAL; #endif // !PDFIO_PRIVATE_H diff --git a/pdfio-stream.c b/pdfio-stream.c index 7e27eeb..d674b79 100644 --- a/pdfio-stream.c +++ b/pdfio-stream.c @@ -121,12 +121,17 @@ pdfioStreamGetToken( char *buffer, // I - String buffer size_t bufsize) // I - Size of string buffer { + _pdfio_token_t tb; // Token buffer/stack + + // Range check input... if (!st || st->pdf->mode != _PDFIO_MODE_READ || !buffer || !bufsize) return (false); // Read using the token engine... - return (_pdfioTokenRead(st->pdf, buffer, bufsize, (_pdfio_tpeek_cb_t)pdfioStreamPeek, (_pdfio_tconsume_cb_t)pdfioStreamConsume, st)); + _pdfioTokenInit(&tb, st->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st); + + return (_pdfioTokenRead(&tb, buffer, bufsize)); } diff --git a/pdfio-token.c b/pdfio-token.c index 3bb6368..b69ea2f 100644 --- a/pdfio-token.c +++ b/pdfio-token.c @@ -44,26 +44,99 @@ #define PDFIO_DELIM_CHARS "<>(){}[]/%" -// -// Types... -// - -typedef struct _pdfio_tbuffer_s // Token reading buffer -{ - unsigned char buffer[32], // Buffer - *bufptr, // Pointer into buffer - *bufend; // Last valid byte in buffer - _pdfio_tpeek_cb_t peek_cb; // Peek callback - _pdfio_tconsume_cb_t consume_cb; // Consume callback - void *data; // Callback data -} _pdfio_tbuffer_t; - - // // Local functions... // -static int get_char(_pdfio_tbuffer_t *tb); +static int get_char(_pdfio_token_t *tb); + + +// +// '_pdfioTokenClear()' - Clear the token stack. +// + +void +_pdfioTokenClear(_pdfio_token_t *tb) // I - Token buffer/stack +{ + PDFIO_DEBUG("_pdfioTokenClear(tb=%p)\n", tb); + + while (tb->num_tokens > 0) + { + tb->num_tokens --; + free(tb->tokens[tb->num_tokens]); + tb->tokens[tb->num_tokens] = NULL; + } +} + + +// +// '_pdfioTokenGet()' - Get a token. +// + +bool // O - `true` on success, `false` on failure +_pdfioTokenGet(_pdfio_token_t *tb, // I - Token buffer/stack + char *buffer, // I - String buffer + size_t bufsize) // I - Size of string buffer +{ + // See if we have a token waiting on the stack... + if (tb->num_tokens > 0) + { + // Yes, return it... + tb->num_tokens --; + strncpy(buffer, tb->tokens[tb->num_tokens], bufsize - 1); + buffer[bufsize - 1] = '\0'; + + PDFIO_DEBUG("_pdfioTokenGet(tb=%p, buffer=%p, bufsize=%u): Popping '%s' from stack.\n", tb, buffer, (unsigned)bufsize, buffer); + + free(tb->tokens[tb->num_tokens]); + tb->tokens[tb->num_tokens] = NULL; + + return (true); + } + + // No, read a new one... + return (_pdfioTokenRead(tb, buffer, bufsize)); +} + + +// +// '_pdfioTokenInit()' - Initialize a token buffer/stack. +// + +void +_pdfioTokenInit( + _pdfio_token_t *ts, // I - Token buffer/stack + pdfio_file_t *pdf, // I - PDF file + _pdfio_tconsume_cb_t consume_cb, // I - Consume callback + _pdfio_tpeek_cb_t peek_cb, // I - Peek callback + void *cb_data) // I - Callback data +{ + // Zero everything out and then initialize key pointers... + memset(ts, 0, sizeof(_pdfio_token_t)); + + ts->pdf = pdf; + ts->consume_cb = consume_cb; + ts->peek_cb = peek_cb; + ts->cb_data = cb_data; + ts->bufptr = ts->buffer; + ts->bufend = ts->buffer; +} + + +// +// '_pdfioTokenPush()' - Push a token on the token stack. +// + +void +_pdfioTokenPush(_pdfio_token_t *tb, // I - Token buffer/stack + const char *token) // I - Token to push +{ + if (tb->num_tokens < (sizeof(tb->tokens) / sizeof(tb->tokens[0]))) + { + if ((tb->tokens[tb->num_tokens ++] = strdup(token)) == NULL) + tb->num_tokens --; + } +} // @@ -71,19 +144,14 @@ static int get_char(_pdfio_tbuffer_t *tb); // bool // O - `true` on success, `false` on failure -_pdfioTokenRead( - pdfio_file_t *pdf, // I - PDF file - char *buffer, // I - String buffer - size_t bufsize, // I - Size of string buffer - _pdfio_tpeek_cb_t peek_cb, // I - "peek" callback - _pdfio_tconsume_cb_t consume_cb, // I - "consume" callback - void *data) // I - Callback data +_pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack + char *buffer, // I - String buffer + size_t bufsize) // I - Size of string buffer { - _pdfio_tbuffer_t tb; // Token buffer - int ch; // Character - char *bufptr, // Pointer into buffer - *bufend, // End of buffer - state = '\0'; // Current state + int ch; // Character + char *bufptr, // Pointer into buffer + *bufend, // End of buffer + state = '\0'; // Current state // @@ -99,21 +167,16 @@ _pdfioTokenRead( // - 'N' for number // Read the next token, skipping any leading whitespace... - memset(&tb, 0, sizeof(tb)); - tb.peek_cb = peek_cb; - tb.consume_cb = consume_cb; - tb.data = data; - bufptr = buffer; bufend = buffer + bufsize - 1; // Skip leading whitespace... - while ((ch = get_char(&tb)) != EOF) + while ((ch = get_char(tb)) != EOF) { if (ch == '%') { // Skip comment - while ((ch = get_char(&tb)) != EOF) + while ((ch = get_char(tb)) != EOF) { if (ch == '\n' || ch == '\r') break; @@ -147,14 +210,14 @@ _pdfioTokenRead( switch (state) { case '(' : // Literal string - while ((ch = get_char(&tb)) != EOF && ch != ')') + while ((ch = get_char(tb)) != EOF && ch != ')') { if (ch == '\\') { // Quoted character... int i; // Looping var - switch (ch = get_char(&tb)) + switch (ch = get_char(tb)) { case '0' : // Octal character escape case '1' : @@ -166,13 +229,13 @@ _pdfioTokenRead( case '7' : for (ch -= '0', i = 0; i < 2; i ++) { - int tch = get_char(&tb); // Next char + int tch = get_char(tb); // Next char if (tch >= '0' && tch <= '7') ch = (char)((ch << 3) | (tch - '0')); else { - tb.bufptr --; + tb->bufptr --; break; } } @@ -204,7 +267,7 @@ _pdfioTokenRead( break; default : - _pdfioFileError(pdf, "Unknown escape '\\%c' in literal string.", ch); + _pdfioFileError(tb->pdf, "Unknown escape '\\%c' in literal string.", ch); return (false); } } @@ -217,25 +280,25 @@ _pdfioTokenRead( else { // Out of space - _pdfioFileError(pdf, "Token too large."); + _pdfioFileError(tb->pdf, "Token too large."); return (false); } } if (ch != ')') { - _pdfioFileError(pdf, "Unterminated string literal."); + _pdfioFileError(tb->pdf, "Unterminated string literal."); return (false); } break; case 'K' : // keyword - while ((ch = get_char(&tb)) != EOF && !isspace(ch)) + while ((ch = get_char(tb)) != EOF && !isspace(ch)) { if (strchr(PDFIO_DELIM_CHARS, ch) != NULL) { // End of keyword... - tb.bufptr --; + tb->bufptr --; break; } else if (bufptr < bufend) @@ -246,19 +309,19 @@ _pdfioTokenRead( else { // Out of space... - _pdfioFileError(pdf, "Token too large."); + _pdfioFileError(tb->pdf, "Token too large."); return (false); } } break; case 'N' : // number - while ((ch = get_char(&tb)) != EOF && !isspace(ch)) + while ((ch = get_char(tb)) != EOF && !isspace(ch)) { if (!isdigit(ch) && ch != '.') { // End of number... - tb.bufptr --; + tb->bufptr --; break; } else if (bufptr < bufend) @@ -269,19 +332,19 @@ _pdfioTokenRead( else { // Out of space... - _pdfioFileError(pdf, "Token too large."); + _pdfioFileError(tb->pdf, "Token too large."); return (false); } } break; case '/' : // "/name" - while ((ch = get_char(&tb)) != EOF && !isspace(ch)) + while ((ch = get_char(tb)) != EOF && !isspace(ch)) { if (strchr(PDFIO_DELIM_CHARS, ch) != NULL) { // End of keyword... - tb.bufptr --; + tb->bufptr --; break; } else if (ch == '#') @@ -291,11 +354,11 @@ _pdfioTokenRead( for (i = 0, ch = 0; i < 2; i ++) { - int tch = get_char(&tb); + int tch = get_char(tb); if (!isxdigit(tch & 255)) { - _pdfioFileError(pdf, "Bad # escape in name."); + _pdfioFileError(tb->pdf, "Bad # escape in name."); return (false); } else if (isdigit(tch)) @@ -312,14 +375,14 @@ _pdfioTokenRead( else { // Out of space - _pdfioFileError(pdf, "Token too large."); + _pdfioFileError(tb->pdf, "Token too large."); return (false); } } break; case '<' : // Potential hex string - if ((ch = get_char(&tb)) == '<') + if ((ch = get_char(tb)) == '<') { // Dictionary delimiter *bufptr++ = (char)ch; @@ -327,11 +390,11 @@ _pdfioTokenRead( } else if (!isspace(ch & 255) && !isxdigit(ch & 255)) { - _pdfioFileError(pdf, "Syntax error: '<%c'", ch); + _pdfioFileError(tb->pdf, "Syntax error: '<%c'", ch); return (false); } - while ((ch = get_char(&tb)) != EOF && ch != '>') + while ((ch = get_char(tb)) != EOF && ch != '>') { if (isxdigit(ch)) { @@ -343,46 +406,65 @@ _pdfioTokenRead( else { // Too large - _pdfioFileError(pdf, "Token too large."); + _pdfioFileError(tb->pdf, "Token too large."); return (false); } } else if (!isspace(ch)) { - _pdfioFileError(pdf, "Invalid hex string character '%c'.", ch); + _pdfioFileError(tb->pdf, "Invalid hex string character '%c'.", ch); return (false); } } if (ch == EOF) { - _pdfioFileError(pdf, "Unterminated hex string."); + _pdfioFileError(tb->pdf, "Unterminated hex string."); return (false); } break; case '>' : // Dictionary - if ((ch = get_char(&tb)) == '>') + if ((ch = get_char(tb)) == '>') { *bufptr++ = '>'; } else { - _pdfioFileError(pdf, "Syntax error: '>%c'.", ch); + _pdfioFileError(tb->pdf, "Syntax error: '>%c'.", ch); return (false); } break; } - while (tb.bufptr < tb.bufend && isspace(*(tb.bufptr))) - tb.bufptr ++; + while (tb->bufptr < tb->bufend && isspace(*(tb->bufptr))) + tb->bufptr ++; - if (tb.bufptr > tb.buffer) - (consume_cb)(data, (size_t)(tb.bufptr - tb.buffer)); + if (tb->bufptr > tb->buffer) + { + size_t remaining = (size_t)(tb->bufend - tb->bufptr); + // Remaining bytes in buffer + + // Consume what we've used... + (tb->consume_cb)(tb->cb_data, (size_t)(tb->bufptr - tb->buffer)); + + if (remaining > 0) + { + // Shuffle remaining bytes for next call... + memmove(tb->buffer, tb->bufptr, remaining); + tb->bufptr = tb->buffer; + tb->bufend = tb->buffer + remaining; + } + else + { + // Nothing left, reset pointers... + tb->bufptr = tb->bufend = tb->buffer; + } + } *bufptr = '\0'; - PDFIO_DEBUG("_pdfioTokenRead(pdf=%p, ...): Read '%s'.\n", pdf, buffer); + PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer); return (bufptr > buffer); } @@ -393,7 +475,7 @@ _pdfioTokenRead( // static int // O - Character or `EOF` on end-of-file -get_char(_pdfio_tbuffer_t *tb) // I - Token buffer +get_char(_pdfio_token_t *tb) // I - Token buffer { ssize_t bytes; // Bytes peeked @@ -403,10 +485,10 @@ get_char(_pdfio_tbuffer_t *tb) // I - Token buffer { // Consume previous bytes... if (tb->bufend > tb->buffer) - (tb->consume_cb)(tb->data, (size_t)(tb->bufend - tb->buffer)); + (tb->consume_cb)(tb->cb_data, (size_t)(tb->bufend - tb->buffer)); // Peek new bytes... - if ((bytes = (tb->peek_cb)(tb->data, tb->buffer, sizeof(tb->buffer))) < 0) + if ((bytes = (tb->peek_cb)(tb->cb_data, tb->buffer, sizeof(tb->buffer))) <= 0) { tb->bufptr = tb->bufend = tb->buffer; return (EOF); diff --git a/pdfio-value.c b/pdfio-value.c index f888c0e..927f165 100644 --- a/pdfio-value.c +++ b/pdfio-value.c @@ -157,6 +157,7 @@ _pdfioValueDelete(_pdfio_value_t *v) // I - Value _pdfio_value_t * // O - Value or `NULL` on error/EOF _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file + _pdfio_token_t *tb, // I - Token buffer/stack _pdfio_value_t *v) // I - Value { char token[8192]; // Token buffer @@ -180,21 +181,21 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file PDFIO_DEBUG("_pdfioValueRead(pdf=%p, v=%p)\n", pdf, v); - if (!_pdfioFileGetToken(pdf, token, sizeof(token))) + if (!_pdfioTokenGet(tb, token, sizeof(token))) return (NULL); if (!strcmp(token, "[")) { // Start of array v->type = PDFIO_VALTYPE_ARRAY; - if ((v->value.array = _pdfioArrayRead(pdf)) == NULL) + if ((v->value.array = _pdfioArrayRead(pdf, tb)) == NULL) return (NULL); } else if (!strcmp(token, "<<")) { // Start of dictionary v->type = PDFIO_VALTYPE_DICT; - if ((v->value.dict = _pdfioDictRead(pdf)) == NULL) + if ((v->value.dict = _pdfioDictRead(pdf, tb)) == NULL) return (NULL); } else if (token[0] == '(') @@ -259,7 +260,7 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file token3[8192], // Third token ("R") *tokptr; // Pointer into token - if (_pdfioFileGetToken(pdf, token2, sizeof(token2))) + if (_pdfioTokenGet(tb, token2, sizeof(token2))) { // Got the second token, is it an integer? for (tokptr = token2; *tokptr; tokptr ++) @@ -271,12 +272,12 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file if (*tokptr) { // Not an object reference, push this token for later use... - _pdfioFilePushToken(pdf, token2); + _pdfioTokenPush(tb, token2); } else { // A possible reference, get one more... - if (_pdfioFileGetToken(pdf, token3, sizeof(token3))) + if (_pdfioTokenGet(tb, token3, sizeof(token3))) { if (!strcmp(token3, "R")) { @@ -292,14 +293,14 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file else { // Not a reference, push the tokens back... - _pdfioFilePushToken(pdf, token3); - _pdfioFilePushToken(pdf, token2); + _pdfioTokenPush(tb, token3); + _pdfioTokenPush(tb, token2); } } else { // Not a reference... - _pdfioFilePushToken(pdf, token2); + _pdfioTokenPush(tb, token2); } } }