diff --git a/pdfio-array.c b/pdfio-array.c index 2044180..1fde7d5 100644 --- a/pdfio-array.c +++ b/pdfio-array.c @@ -196,8 +196,9 @@ pdfioArrayAppendObject( return (false); // Add an indirect reference... - v.type = PDFIO_VALTYPE_INDIRECT; - v.value.obj = value; + v.type = PDFIO_VALTYPE_INDIRECT; + v.value.indirect.number = value->number; + v.value.indirect.generation = value->generation; return (append_value(a, &v)); } @@ -429,7 +430,7 @@ pdfioArrayGetObject(pdfio_array_t *a, // I - Array if (!a || n >= a->num_values || a->values[n].type != PDFIO_VALTYPE_INDIRECT) return (NULL); else - return (a->values[n].value.obj); + return (pdfioFileGetObject(a->pdf, a->values[n].value.indirect.number)); } @@ -489,6 +490,44 @@ _pdfioArrayGetValue(pdfio_array_t *a, // I - Array } +// +// '_pdfioArrayRead()' - Read an array from a file. +// +// At this point the initial "[" has been seen... +// + +pdfio_array_t * // O - New array +_pdfioArrayRead(pdfio_file_t *pdf) // I - PDF file +{ + pdfio_array_t *array; // New array + char token[8192]; // Token from file + _pdfio_value_t value; // Value + + + // Create an array... + array = pdfioArrayCreate(pdf); + + // Read until we get "]" to end the array... + while (_pdfioFileGetToken(pdf, token, sizeof(token))) + { + if (!strcmp(token, "]")) + { + // End of array... + return (array); + } + + // Push the token and decode the value... + _pdfioFilePushToken(pdf, token); + if (!_pdfioValueRead(pdf, &value)) + break; + + append_value(array, &value); + } + + return (NULL); +} + + // // '_pdfioArrayWrite()' - Write an array to a PDF file. // diff --git a/pdfio-common.c b/pdfio-common.c index bcf217e..2a63712 100644 --- a/pdfio-common.c +++ b/pdfio-common.c @@ -23,6 +23,22 @@ static ssize_t read_buffer(pdfio_file_t *pdf, char *buffer, size_t bytes); static bool write_buffer(pdfio_file_t *pdf, const void *buffer, size_t bytes); +// +// '_pdfioFileClearTokens()' - Clear the token stack. +// + +void +_pdfioFileClearTokens(pdfio_file_t *pdf)// I - PDF file +{ + while (pdf->num_tokens > 0) + { + pdf->num_tokens --; + free(pdf->tokens[pdf->num_tokens]); + pdf->tokens[pdf->num_tokens] = NULL; + } +} + + // // '_pdfioFileConsume()' - Consume bytes from the file. // @@ -131,6 +147,20 @@ _pdfioFileGetToken(pdfio_file_t *pdf, // I - PDF file char *buffer,// I - String buffer size_t bufsize)// I - Size of string buffer { + // See if we have a token waiting on the stack... + if (pdf->num_tokens > 0) + { + // Yes, return it... + pdf->num_tokens --; + strncpy(buffer, pdf->tokens[pdf->num_tokens], bufsize - 1); + buffer[bufsize - 1] = '\0'; + + free(pdf->tokens[pdf->num_tokens]); + pdf->tokens[pdf->num_tokens] = NULL; + return (true); + } + + // No, read a new one... return (_pdfioTokenRead(pdf, buffer, bufsize, (_pdfio_tpeek_cb_t)_pdfioFilePeek, (_pdfio_tconsume_cb_t)_pdfioFileConsume, pdf)); } @@ -268,6 +298,22 @@ _pdfioFilePrintf(pdfio_file_t *pdf, // I - PDF file } +// +// '()' - Push a token on the token stack. +// + +void +_pdfioFilePushToken(pdfio_file_t *pdf, // I - PDF file + const char *token)// I - Token +{ + if (pdf->num_tokens < (sizeof(pdf->tokens) / sizeof(pdf->tokens[0]))) + { + if ((pdf->tokens[pdf->num_tokens ++] = strdup(token)) == NULL) + pdf->num_tokens --; + } +} + + // // '_pdfioFilePuts()' - Write a literal string to a PDF file. // diff --git a/pdfio-dict.c b/pdfio-dict.c index 28f0f19..a2e043c 100644 --- a/pdfio-dict.c +++ b/pdfio-dict.c @@ -244,7 +244,7 @@ pdfioDictGetObject(pdfio_dict_t *dict, // I - Dictionary _pdfio_value_t *value = _pdfioDictGetValue(dict, key); if (value && value->type == PDFIO_VALTYPE_INDIRECT) - return (value->value.obj); + return (pdfioFileGetObject(dict->pdf, value->value.indirect.number)); else return (NULL); } @@ -341,18 +341,41 @@ _pdfioDictGetValue(pdfio_dict_t *dict, // I - Dictionary pdfio_dict_t * // O - New dictionary _pdfioDictRead(pdfio_file_t *pdf) // I - PDF file { - pdfio_dict_t *dict; // New dictionary - char token[8192], // Token buffer - key[256]; // Dictionary key - _pdfio_value_t value; // Dictionary value + pdfio_dict_t *dict; // New dictionary + char key[256]; // Dictionary key + _pdfio_value_t value; // Dictionary value - (void)pdf; - (void)dict; - (void)token; - (void)key; - (void)value; + // Create a dictionary and start reading... + dict = pdfioDictCreate(pdf); + while (_pdfioFileGetToken(pdf, key, sizeof(key))) + { + // Get the next key or end-of-dictionary... + if (!strcmp(key, ">>")) + { + // End of dictionary... + return (dict); + } + else if (key[0] != '/') + { + _pdfioFileError(pdf, "Invalid dictionary contents."); + break; + } + + // Then get the next value... + if (!_pdfioValueRead(pdf, &value)) + { + _pdfioFileError(pdf, "Missing value for dictionary key."); + break; + } + + if (!_pdfioDictSetValue(dict, key, &value)) + break; + } + + // Dictionary is invalid - pdfioFileClose will free the memory, return NULL + // to indicate an error... return (NULL); } @@ -543,8 +566,8 @@ pdfioDictSetNumber(pdfio_dict_t *dict, // I - Dictionary bool // O - `true` on success, `false` on failure pdfioDictSetObject(pdfio_dict_t *dict, // I - Dictionary - const char *key, // I - Key - pdfio_obj_t *value) // I - Value + const char *key, // I - Key + pdfio_obj_t *value)// I - Value { _pdfio_value_t temp; // New value @@ -554,8 +577,9 @@ pdfioDictSetObject(pdfio_dict_t *dict, // I - Dictionary return (false); // Set the key/value pair... - temp.type = PDFIO_VALTYPE_INDIRECT; - temp.value.obj = value; + temp.type = PDFIO_VALTYPE_INDIRECT; + temp.value.indirect.number = value->number; + temp.value.indirect.generation = value->generation; return (_pdfioDictSetValue(dict, key, &temp)); } diff --git a/pdfio-private.h b/pdfio-private.h index e9a84ca..807fb2a 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -77,7 +77,11 @@ typedef struct _pdfio_value_s // Value structure bool boolean; // Boolean value time_t date; // Date/time value pdfio_dict_t *dict; // Dictionary value - pdfio_obj_t *obj; // Indirect object (N G obj) value + struct + { + size_t number; // Object number + unsigned short generation; // Generation number + } indirect; // Indirect object reference const char *name; // Name value float number; // Number value const char *string; // String value @@ -140,6 +144,8 @@ struct _pdfio_file_s // PDF file structure size_t num_strings, // Number of strings alloc_strings; // Allocated strings char **strings; // Nul-terminated strings + size_t num_tokens; // Number of tokens in stack + char *tokens[4]; // Token stack }; struct _pdfio_obj_s // Object @@ -175,6 +181,7 @@ typedef ssize_t (*_pdfio_tpeek_cb_t)(void *data, void *buffer, size_t bytes); extern void _pdfioArrayDelete(pdfio_array_t *a) PDFIO_INTERNAL; extern _pdfio_value_t *_pdfioArrayGetValue(pdfio_array_t *a, size_t n) PDFIO_INTERNAL; +extern pdfio_array_t *_pdfioArrayRead(pdfio_file_t *pdf) PDFIO_INTERNAL; extern bool _pdfioArrayWrite(pdfio_array_t *a) PDFIO_INTERNAL; extern void _pdfioDictDelete(pdfio_dict_t *dict) PDFIO_INTERNAL; @@ -183,6 +190,7 @@ extern pdfio_dict_t *_pdfioDictRead(pdfio_file_t *pdf) PDFIO_INTERNAL; extern bool _pdfioDictSetValue(pdfio_dict_t *dict, const char *key, _pdfio_value_t *value) PDFIO_INTERNAL; extern bool _pdfioDictWrite(pdfio_dict_t *dict, off_t *length) PDFIO_INTERNAL; +extern void _pdfioFileClearTokens(pdfio_file_t *pdf) PDFIO_INTERNAL; extern bool _pdfioFileConsume(pdfio_file_t *pdf, size_t bytes) PDFIO_INTERNAL; extern bool _pdfioFileDefaultError(pdfio_file_t *pdf, const char *message, void *data) PDFIO_INTERNAL; extern bool _pdfioFileError(pdfio_file_t *pdf, const char *format, ...) PDFIO_FORMAT(2,3) PDFIO_INTERNAL; @@ -192,6 +200,7 @@ extern bool _pdfioFileGetToken(pdfio_file_t *pdf, char *buffer, size_t bufsize) extern bool _pdfioFileGets(pdfio_file_t *pdf, char *buffer, size_t bufsize) PDFIO_INTERNAL; extern ssize_t _pdfioFilePeek(pdfio_file_t *pdf, void *buffer, size_t bytes) PDFIO_INTERNAL; extern bool _pdfioFilePrintf(pdfio_file_t *pdf, const char *format, ...) PDFIO_FORMAT(2,3) PDFIO_INTERNAL; +extern void _pdfioFilePushToken(pdfio_file_t *pdf, const char *token) PDFIO_INTERNAL; extern bool _pdfioFilePuts(pdfio_file_t *pdf, const char *s) PDFIO_INTERNAL; extern ssize_t _pdfioFileRead(pdfio_file_t *pdf, void *buffer, size_t bytes) PDFIO_INTERNAL; extern off_t _pdfioFileSeek(pdfio_file_t *pdf, off_t offset, int whence) PDFIO_INTERNAL; @@ -208,6 +217,7 @@ extern bool _pdfioTokenRead(pdfio_file_t *pdf, char *buffer, size_t bufsize, _p extern _pdfio_value_t *_pdfioValueCopy(pdfio_file_t *pdfdst, _pdfio_value_t *vdst, pdfio_file_t *pdfsrc, _pdfio_value_t *vsrc) PDFIO_INTERNAL; extern void _pdfioValueDelete(_pdfio_value_t *v) PDFIO_INTERNAL; +extern _pdfio_value_t *_pdfioValueRead(pdfio_file_t *pdf, _pdfio_value_t *v) PDFIO_INTERNAL; extern bool _pdfioValueWrite(pdfio_file_t *pdf, _pdfio_value_t *v) PDFIO_INTERNAL; #endif // !PDFIO_PRIVATE_H diff --git a/pdfio-value.c b/pdfio-value.c index f2ba655..2ac9a48 100644 --- a/pdfio-value.c +++ b/pdfio-value.c @@ -92,6 +92,159 @@ _pdfioValueDelete(_pdfio_value_t *v) // I - Value } +// +// '_pdfioValueRead()' - Read a value from a file. +// + +_pdfio_value_t * // O - Value or `NULL` on error/EOF +_pdfioValueRead(pdfio_file_t *pdf, // I - PDF file + _pdfio_value_t *v) // I - Value +{ + char token[8192]; // Token buffer + + + if (!_pdfioFileGetToken(pdf, token, sizeof(token))) + return (NULL); + + if (token[0] == '(') + { + // TODO: Add date value support + // String + v->type = PDFIO_VALTYPE_STRING; + v->value.string = pdfioStringCreate(pdf, token + 1); + } + else if (token[0] == '<') + { + // Hex string + const char *tokptr; // Pointer into token + unsigned char *dataptr; // Pointer into data + + v->type = PDFIO_VALTYPE_BINARY; + v->value.binary.datalen = strlen(token) / 2; + if ((v->value.binary.data = (unsigned char *)malloc(v->value.binary.datalen)) == NULL) + { + _pdfioFileError(pdf, "Out of memory for hex string."); + return (NULL); + } + + // Convert hex to binary... + tokptr = token + 1; + dataptr = v->value.binary.data; + + while (*tokptr) + { + int d; // Data value + + if (isdigit(*tokptr)) + d = (*tokptr++ - '0') << 4; + else + d = (tolower(*tokptr++) - 'a' + 10) << 4; + + if (*tokptr) + { + // PDF allows writers to drop a trailing 0... + if (isdigit(*tokptr)) + d |= *tokptr++ - '0'; + else + d |= tolower(*tokptr++) - 'a' + 10; + } + + *dataptr++ = (unsigned char)d; + } + } + else if (strchr("0123456789-+.", token[0]) != NULL) + { + // Number or indirect object reference + if (isdigit(token[0]) && !strchr(token, '.')) + { + // Integer or object ref... + char token2[8192], // Second token (generation number) + token3[8192], // Third token ("R") + *tokptr; // Pointer into token + + if (_pdfioFileGetToken(pdf, token2, sizeof(token2))) + { + // Got the second token, is it an integer? + for (tokptr = token2; *tokptr; tokptr ++) + { + if (!isdigit(*tokptr)) + break; + } + + if (*tokptr) + { + // Not an object reference, push this token for later use... + _pdfioFilePushToken(pdf, token2); + } + else + { + // A possible reference, get one more... + if (_pdfioFileGetToken(pdf, token3, sizeof(token3))) + { + if (!strcmp(token3, "R")) + { + // Reference! + v->type = PDFIO_VALTYPE_INDIRECT; + v->value.indirect.number = (size_t)strtoimax(token, NULL, 10); + v->value.indirect.generation = (unsigned short)strtol(token2, NULL, 10); + + return (v); + } + else + { + // Not a reference, push the tokens back... + _pdfioFilePushToken(pdf, token3); + _pdfioFilePushToken(pdf, token2); + } + } + else + { + // Not a reference... + _pdfioFilePushToken(pdf, token2); + } + } + } + } + + // If we get here, we have a number... + v->type = PDFIO_VALTYPE_NUMBER; + v->value.number = (float)strtod(token, NULL); + } + else if (!strcmp(token, "true") || !strcmp(token, "false")) + { + // Boolean value + v->type = PDFIO_VALTYPE_BOOLEAN; + v->value.boolean = !strcmp(token, "true"); + } + else if (!strcmp(token, "null")) + { + // null value + v->type = PDFIO_VALTYPE_NULL; + } + else if (!strcmp(token, "[")) + { + // Start of array + v->type = PDFIO_VALTYPE_ARRAY; + if ((v->value.array = _pdfioArrayRead(pdf)) == NULL) + return (NULL); + } + else if (!strcmp(token, "<<")) + { + // Start of dictionary + v->type = PDFIO_VALTYPE_DICT; + if ((v->value.dict = _pdfioDictRead(pdf)) == NULL) + return (NULL); + } + else + { + _pdfioFileError(pdf, "Unexpected '%s' token seen.", token); + return (NULL); + } + + return (v); +} + + // // '_pdfioValueWrite()' - Write a value to a PDF file. // @@ -146,7 +299,7 @@ _pdfioValueWrite(pdfio_file_t *pdf, // I - PDF file return (_pdfioDictWrite(v->value.dict, NULL)); case PDFIO_VALTYPE_INDIRECT : - return (_pdfioFilePrintf(pdf, " %lu %lu obj", (unsigned long)v->value.obj->number, (unsigned long)v->value.obj->generation)); + return (_pdfioFilePrintf(pdf, " %lu %u R", (unsigned long)v->value.indirect.number, v->value.indirect.generation)); case PDFIO_VALTYPE_NAME : return (_pdfioFilePrintf(pdf, "/%s", v->value.name));