diff --git a/Makefile b/Makefile index dc57847..f4afacd 100644 --- a/Makefile +++ b/Makefile @@ -67,7 +67,7 @@ all-shared: fi debug: - $(MAKE) -$(MAKEFLAGS) COMMONFLAGS="$(COMMONFLAGS) -DDEBUG=1" clean all + $(MAKE) -$(MAKEFLAGS) COMMONFLAGS="-g -fsanitize=address -DDEBUG=1" clean all # Clean everything diff --git a/pdfio-dict.c b/pdfio-dict.c index 5598071..ea24faf 100644 --- a/pdfio-dict.c +++ b/pdfio-dict.c @@ -727,6 +727,8 @@ _pdfioDictSetValue( _pdfio_pair_t *pair; // Current pair + PDFIO_DEBUG("_pdfioDictSetValue(dict=%p, key=\"%s\", value=%p)\n", dict, key, (void *)value); + // See if the key is already set... if (dict->num_pairs > 0) { @@ -765,6 +767,11 @@ _pdfioDictSetValue( if (dict->num_pairs > 1) qsort(dict->pairs, dict->num_pairs, sizeof(_pdfio_pair_t), (int (*)(const void *, const void *))compare_pairs); +#ifdef DEBUG + PDFIO_DEBUG("_pdfioDictSetValue: %lu pairs\n", (unsigned long)dict->num_pairs); + PDFIO_DEBUG_DICT(dict, "_pdfioDictSetValue"); +#endif // DEBUG + return (true); } diff --git a/pdfio-file.c b/pdfio-file.c index 5955a0c..ecd5edd 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -302,12 +302,12 @@ pdfioFileGetNumPages(pdfio_file_t *pdf) // I - PDF file pdfio_obj_t * // O - Object pdfioFileGetObject(pdfio_file_t *pdf, // I - PDF file - size_t number) // I - Object number (starting at 1) + size_t n) // I - Object index (starting at 0) { - if (!pdf || number < 1 || number > pdf->num_objs) + if (!pdf || n >= pdf->num_objs) return (NULL); else - return (pdf->objs[number - 1]); + return (pdf->objs[n]); } @@ -317,12 +317,12 @@ pdfioFileGetObject(pdfio_file_t *pdf, // I - PDF file pdfio_obj_t * // O - Object pdfioFileGetPage(pdfio_file_t *pdf, // I - PDF file - size_t number) // I - Page number (starting at 1) + size_t n) // I - Page index (starting at 0) { - if (!pdf || number < 1 || number > pdf->num_pages) + if (!pdf || n >= pdf->num_pages) return (NULL); else - return (pdf->pages[number - 1]); + return (pdf->pages[n]); } @@ -474,6 +474,7 @@ add_obj(pdfio_file_t *pdf, // I - PDF file pdf->objs[pdf->num_objs ++] = obj; + obj->pdf = pdf; obj->number = number; obj->generation = generation; obj->offset = offset; diff --git a/pdfio-object.c b/pdfio-object.c index 2c99ee9..fe972bb 100644 --- a/pdfio-object.c +++ b/pdfio-object.c @@ -67,10 +67,16 @@ _pdfioObjDelete(pdfio_obj_t *obj) // I - Object pdfio_dict_t * // O - Dictionary or `NULL` on error pdfioObjGetDict(pdfio_obj_t *obj) // I - Object { - // TODO: Implement me - (void)obj; + if (!obj) + return (NULL); - return (NULL); + if (obj->value.type == PDFIO_VALTYPE_NONE) + _pdfioObjLoad(obj); + + if (obj->value.type == PDFIO_VALTYPE_DICT) + return (obj->value.value.dict); + else + return (NULL); } @@ -103,29 +109,18 @@ pdfioObjGetNumber(pdfio_obj_t *obj) // I - Object const char * // O - Object type pdfioObjGetType(pdfio_obj_t *obj) // I - Object { - // TODO: Implement me - (void)obj; + pdfio_dict_t *dict; // Object dictionary - return (NULL); + + if ((dict = pdfioObjGetDict(obj)) == NULL) + return (NULL); + else + return (pdfioDictGetName(dict, "Type")); } // -// 'pdfioObjOpenStream()' - Open an object's (data) stream for reading. -// - -pdfio_stream_t * // O - Stream or `NULL` on error -pdfioObjOpenStream(pdfio_obj_t *obj) // I - Object -{ - // TODO: Implement me - (void)obj; - - return (NULL); -} - - -// -// '()' - Load an object dictionary/value. +// '_pdfioObjLoad()' - Load an object dictionary/value. // bool // O - `true` on success, `false` otherwise @@ -135,6 +130,8 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object *ptr; // Pointer into line + PDFIO_DEBUG("_pdfioObjLoad(obj=%p(%lu)), offset=%lu\n", obj, (unsigned long)obj->number, (unsigned long)obj->offset); + // Seek to the start of the object and read its header... if (_pdfioFileSeek(obj->pdf, obj->offset, SEEK_SET) != obj->offset) { @@ -170,6 +167,8 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object } // Then grab the object value... + _pdfioFileClearTokens(obj->pdf); + if (!_pdfioValueRead(obj->pdf, &obj->value)) { _pdfioFileError(obj->pdf, "Unable to read value for object %lu.", (unsigned long)obj->number); @@ -191,3 +190,17 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object return (true); } + + +// +// 'pdfioObjOpenStream()' - Open an object's (data) stream for reading. +// + +pdfio_stream_t * // O - Stream or `NULL` on error +pdfioObjOpenStream(pdfio_obj_t *obj) // I - Object +{ + // TODO: Implement me + (void)obj; + + return (NULL); +} diff --git a/pdfio-string.c b/pdfio-string.c index a4e41cf..10fa73f 100644 --- a/pdfio-string.c +++ b/pdfio-string.c @@ -39,6 +39,8 @@ pdfioStringCreate( char *news; // New string + PDFIO_DEBUG("pdfioStringCreate(pdf=%p, s=\"%s\")\n", pdf, s); + // Range check input... if (!pdf || !s) return (NULL); @@ -54,7 +56,7 @@ pdfioStringCreate( if (pdf->num_strings >= pdf->alloc_strings) { // Expand the string array... - char **temp = realloc(pdf->strings, (pdf->alloc_strings + 32) * sizeof(char *)); + char **temp = (char **)realloc(pdf->strings, (pdf->alloc_strings + 32) * sizeof(char *)); if (!temp) { @@ -72,6 +74,16 @@ pdfioStringCreate( if (pdf->num_strings > 1) qsort(pdf->strings, pdf->num_strings, sizeof(char *), (int (*)(const void *, const void *))compare_strings); +#ifdef DEBUG + { + size_t i; // Looping var + + PDFIO_DEBUG("pdfioStringCreate: %lu strings\n", (unsigned long)pdf->num_strings); + for (i = 0; i < pdf->num_strings; i ++) + PDFIO_DEBUG("pdfioStringCreate: strings[%lu]=%p(\"%s\")\n", (unsigned long)i, pdf->strings[i], pdf->strings[i]); + } +#endif // DEBUG + return (news); } diff --git a/pdfio-token.c b/pdfio-token.c index 1a55e08..3bb6368 100644 --- a/pdfio-token.c +++ b/pdfio-token.c @@ -90,14 +90,13 @@ _pdfioTokenRead( // "state" is: // // - '\0' for idle - // - ')' for literal string + // - '(' for literal string // - '/' for name // - '<' for possible hex string or dict // - '>' for possible dict // - '%' for comment // - 'K' for keyword // - 'N' for number - // - 'X' for hex string // Read the next token, skipping any leading whitespace... memset(&tb, 0, sizeof(tb)); @@ -147,7 +146,7 @@ _pdfioTokenRead( switch (state) { - case ')' : // Literal string + case '(' : // Literal string while ((ch = get_char(&tb)) != EOF && ch != ')') { if (ch == '\\') @@ -259,6 +258,7 @@ _pdfioTokenRead( if (!isdigit(ch) && ch != '.') { // End of number... + tb.bufptr --; break; } else if (bufptr < bufend) @@ -278,7 +278,13 @@ _pdfioTokenRead( case '/' : // "/name" while ((ch = get_char(&tb)) != EOF && !isspace(ch)) { - if (ch == '#') + if (strchr(PDFIO_DELIM_CHARS, ch) != NULL) + { + // End of keyword... + tb.bufptr --; + break; + } + else if (ch == '#') { // Quoted character (#xx) in name... int i; // Looping var @@ -325,9 +331,6 @@ _pdfioTokenRead( return (false); } - // Fall through to parse a hex string... - - case 'X' : // Hex string while ((ch = get_char(&tb)) != EOF && ch != '>') { if (isxdigit(ch)) diff --git a/pdfio-value.c b/pdfio-value.c index be88f96..f8a6f1d 100644 --- a/pdfio-value.c +++ b/pdfio-value.c @@ -145,6 +145,12 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file v->type = PDFIO_VALTYPE_STRING; v->value.string = pdfioStringCreate(pdf, token + 1); } + else if (token[0] == '/') + { + // Name + v->type = PDFIO_VALTYPE_NAME; + v->value.name = pdfioStringCreate(pdf, token + 1); + } else if (token[0] == '<') { // Hex string @@ -220,6 +226,8 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file v->value.indirect.number = (size_t)strtoimax(token, NULL, 10); v->value.indirect.generation = (unsigned short)strtol(token2, NULL, 10); + PDFIO_DEBUG("_pdfioValueRead: Returning indirect value %lu %u R.\n", (unsigned long)v->value.indirect.number, v->value.indirect.generation); + return (v); } else diff --git a/testpdfio.c b/testpdfio.c index 8398550..cc2683e 100644 --- a/testpdfio.c +++ b/testpdfio.c @@ -11,7 +11,7 @@ // Include necessary headers... // -#include "pdfio.h" +#include "pdfio-private.h" // @@ -26,12 +26,69 @@ main(int argc, // I - Number of command-line arguments { int i; // Looping var pdfio_file_t *pdf; // PDF file + size_t n, // Object/page index + num_objs, // Number of objects + num_pages; // Number of pages + pdfio_obj_t *obj; // Object + pdfio_dict_t *dict; // Object dictionary + const char *type; // Object type for (i = 1; i < argc; i ++) { if ((pdf = pdfioFileOpen(argv[i], NULL, NULL)) != NULL) { - printf("%s: PDF %s, %d objects.\n", argv[i], pdfioFileGetVersion(pdf), (int)pdfioFileGetNumObjects(pdf)); + num_objs = pdfioFileGetNumObjects(pdf); + num_pages = pdfioFileGetNumPages(pdf); + + printf("%s: PDF %s, %d objects, %d pages.\n", argv[i], pdfioFileGetVersion(pdf), (int)num_objs, (int)num_pages); + + for (n = 0; n < num_objs; n ++) + { + if ((obj = pdfioFileGetObject(pdf, n)) == NULL) + { + printf("%s: Unable to get object #%d.\n", argv[i], (int)n); + } + else + { + size_t np; // Number of pairs + _pdfio_pair_t *pair; // Current pair + + dict = pdfioObjGetDict(obj); + + printf("%s: %u %u obj dict=%p(%lu)\n", argv[i], (unsigned)pdfioObjGetNumber(obj), (unsigned)pdfioObjGetGeneration(obj), dict, dict ? (unsigned long)dict->num_pairs : 0UL); + if (dict) + { + for (np = dict->num_pairs, pair = dict->pairs; np > 0; np --, pair ++) + { + switch (pair->value.type) + { + case PDFIO_VALTYPE_INDIRECT : + printf(" /%s %u %u R\n", pair->key, (unsigned)pair->value.value.indirect.number, pair->value.value.indirect.generation); + break; + case PDFIO_VALTYPE_NUMBER : + printf(" /%s %g\n", pair->key, pair->value.value.number); + break; + case PDFIO_VALTYPE_BOOLEAN : + printf(" /%s %s\n", pair->key, pair->value.value.boolean ? "true" : "false"); + break; + case PDFIO_VALTYPE_NULL : + printf(" /%s null\n", pair->key); + break; + case PDFIO_VALTYPE_ARRAY : + printf(" /%s [...]\n", pair->key); + break; + case PDFIO_VALTYPE_DICT : + printf(" /%s <<...>>\n", pair->key); + break; + default : + printf(" /%s ...\n", pair->key); + break; + } + } + } + } + } + pdfioFileClose(pdf); } }