From d2a0484d93c1d24e008cd7f3d687f818758bd095 Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Thu, 6 May 2021 09:51:48 -0400 Subject: [PATCH] Rework debug functions, start xref stream support. --- pdfio-array.c | 20 ++++ pdfio-dict.c | 33 ++----- pdfio-file.c | 238 +++++++++++++++++++++++++++++------------------- pdfio-private.h | 16 +++- pdfio-value.c | 59 ++++++++++++ 5 files changed, 244 insertions(+), 122 deletions(-) diff --git a/pdfio-array.c b/pdfio-array.c index 1fde7d5..7e9af25 100644 --- a/pdfio-array.c +++ b/pdfio-array.c @@ -305,6 +305,26 @@ pdfioArrayCreate(pdfio_file_t *pdf) // I - PDF file } +#ifdef DEBUG +// +// '_pdfioArrayDebug()' - Print the contents of an array. +// + +void +_pdfioArrayDebug(pdfio_array_t *a) // I - Array +{ + size_t i; // Looping var + _pdfio_value_t *v; // Current value + + + PDFIO_DEBUG("["); + for (i = a->num_values, v = a->values; i > 0; i --, v ++) + _pdfioValueDebug(v); + PDFIO_DEBUG("]"); +} +#endif // DEBUG + + // // '_pdfioArrayDelete()' - Free the memory used by an array. // diff --git a/pdfio-dict.c b/pdfio-dict.c index 7be3353..9397ae4 100644 --- a/pdfio-dict.c +++ b/pdfio-dict.c @@ -113,8 +113,7 @@ pdfioDictCreate(pdfio_file_t *pdf) // I - PDF file // void -_pdfioDictDebug(pdfio_dict_t *dict, // I - Dictionary - const char *prefix) // I - Prefix for each line +_pdfioDictDebug(pdfio_dict_t *dict) // I - Dictionary { size_t i; // Looping var _pdfio_pair_t *pair; // Current pair @@ -122,30 +121,8 @@ _pdfioDictDebug(pdfio_dict_t *dict, // I - Dictionary for (i = dict->num_pairs, pair = dict->pairs; i > 0; i --, pair ++) { - switch (pair->value.type) - { - case PDFIO_VALTYPE_INDIRECT : - PDFIO_DEBUG("%s: /%s %u %u R\n", prefix, pair->key, (unsigned)pair->value.value.indirect.number, pair->value.value.indirect.generation); - break; - case PDFIO_VALTYPE_NUMBER : - PDFIO_DEBUG("%s: /%s %g\n", prefix, pair->key, pair->value.value.number); - break; - case PDFIO_VALTYPE_BOOLEAN : - PDFIO_DEBUG("%s: /%s %s\n", prefix, pair->key, pair->value.value.boolean ? "true" : "false"); - break; - case PDFIO_VALTYPE_NULL : - PDFIO_DEBUG("%s: /%s null\n", prefix, pair->key); - break; - case PDFIO_VALTYPE_ARRAY : - PDFIO_DEBUG("%s: /%s [...]\n", prefix, pair->key); - break; - case PDFIO_VALTYPE_DICT : - PDFIO_DEBUG("%s: /%s <<...>>\n", prefix, pair->key); - break; - default : - PDFIO_DEBUG("%s: /%s ...\n", prefix, pair->key); - break; - } + PDFIO_DEBUG("/%s", pair->key); + _pdfioValueDebug(&pair->value); } } #endif // DEBUG @@ -773,7 +750,9 @@ _pdfioDictSetValue( #ifdef DEBUG PDFIO_DEBUG("_pdfioDictSetValue: %lu pairs\n", (unsigned long)dict->num_pairs); - PDFIO_DEBUG_DICT(dict, "_pdfioDictSetValue"); + PDFIO_DEBUG("_pdfioDictSetValue: "); + PDFIO_DEBUG_DICT(dict); + PDFIO_DEBUG("\n"); #endif // DEBUG return (true); diff --git a/pdfio-file.c b/pdfio-file.c index ecd5edd..ab5dc21 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -517,6 +517,10 @@ load_xref(pdfio_file_t *pdf, // I - PDF file char line[1024], // Line from file *ptr; // Pointer into line _pdfio_value_t trailer; // Trailer dictionary + intmax_t number, // Object number + num_objects, // Number of objects + offset; // Offset in file + int generation; // Generation number while (!done) @@ -533,103 +537,153 @@ load_xref(pdfio_file_t *pdf, // I - PDF file return (false); } - if (strcmp(line, "xref")) + if (isdigit(line[0] & 255) && strlen(line) > 4 && !strcmp(line + strlen(line) - 4, " obj")) + { + // Cross-reference stream + pdfio_obj_t *obj; // Object + pdfio_array_t *w_array; // W array + size_t w[3]; // Size of each cross-reference field + size_t w_total; // Total length + pdfio_stream_t *st; // Stream with + unsigned char buffer[32]; // Read buffer + + if ((number = strtoimax(line, &ptr, 10)) < 1) + { + _pdfioFileError(pdf, "Bad xref table header '%s'.", line); + return (false); + } + + if ((generation = (int)strtol(ptr, &ptr, 10)) < 0 || generation > 65535) + { + _pdfioFileError(pdf, "Bad xref table header '%s'.", line); + return (false); + } + + while (isspace(*ptr & 255)) + ptr ++; + + if (strcmp(ptr, "obj")) + { + _pdfioFileError(pdf, "Bad xref table header '%s'.", line); + return (false); + } + + if ((obj = add_obj(pdf, (size_t)number, (unsigned short)generation, xref_offset)) == NULL) + { + _pdfioFileError(pdf, "Unable to allocate memory for object."); + return (false); + } + + if (!_pdfioValueRead(pdf, &trailer)) + { + _pdfioFileError(pdf, "Unable to read cross-reference stream dictionary."); + return (false); + } + else if (trailer.type != PDFIO_VALTYPE_DICT) + { + _pdfioFileError(pdf, "Cross-reference stream does not have a dictionary."); + return (false); + } + + obj->value = trailer; + + // TODO: read stream + } + else if (!strcmp(line, "xref")) + { + // Read the xref tables + while (_pdfioFileGets(pdf, line, sizeof(line))) + { + if (!strcmp(line, "trailer")) + break; + + if (sscanf(line, "%jd%jd", &number, &num_objects) != 2) + { + _pdfioFileError(pdf, "Malformed xref table section '%s'.", line); + return (false); + } + + // Read this group of objects... + for (; num_objects > 0; num_objects --, number ++) + { + // Read a line from the file and validate it... + if (_pdfioFileRead(pdf, line, 20) != 20) + return (false); + + line[20] = '\0'; + + if (strcmp(line + 18, "\r\n") && strcmp(line + 18, " \n") && strcmp(line + 18, " \r")) + { + _pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); + return (false); + } + line[18] = '\0'; + + // Parse the line + if ((offset = strtoimax(line, &ptr, 10)) < 0) + { + _pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); + return (false); + } + + if ((generation = (int)strtol(ptr, &ptr, 10)) < 0 || generation > 65535) + { + _pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); + return (false); + } + + if (*ptr != ' ') + { + _pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); + return (false); + } + + ptr ++; + if (*ptr != 'f' && *ptr != 'n') + { + _pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); + return (false); + } + + if (*ptr == 'f') + continue; // Don't care about free objects... + + // Create a placeholder for the object in memory... + if (pdfioFileFindObject(pdf, (size_t)number)) + continue; // Don't replace newer object... + + if (!add_obj(pdf, (size_t)number, (unsigned short)generation, offset)) + return (false); + } + } + + if (strcmp(line, "trailer")) + { + _pdfioFileError(pdf, "Missing trailer."); + return (false); + } + + if (!_pdfioValueRead(pdf, &trailer)) + { + _pdfioFileError(pdf, "Unable to read trailer dictionary."); + return (false); + } + else if (trailer.type != PDFIO_VALTYPE_DICT) + { + _pdfioFileError(pdf, "Trailer is not a dictionary."); + return (false); + } + } + else { _pdfioFileError(pdf, "Bad xref table header '%s'.", line); return (false); } - // Read the xref tables - while (_pdfioFileGets(pdf, line, sizeof(line))) - { - intmax_t number, // Object number - num_objects; // Number of objects - - if (!strcmp(line, "trailer")) - break; - - if (sscanf(line, "%jd%jd", &number, &num_objects) != 2) - { - _pdfioFileError(pdf, "Malformed xref table section '%s'.", line); - return (false); - } - - // Read this group of objects... - for (; num_objects > 0; num_objects --, number ++) - { - intmax_t offset; // Offset in file - int generation; // Generation number - - // Read a line from the file and validate it... - if (_pdfioFileRead(pdf, line, 20) != 20) - return (false); - - line[20] = '\0'; - - if (strcmp(line + 18, "\r\n") && strcmp(line + 18, " \n") && strcmp(line + 18, " \r")) - { - _pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); - return (false); - } - line[18] = '\0'; - - // Parse the line - if ((offset = strtoimax(line, &ptr, 10)) < 0) - { - _pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); - return (false); - } - - if ((generation = (int)strtol(ptr, &ptr, 10)) < 0 || generation > 65535) - { - _pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); - return (false); - } - - if (*ptr != ' ') - { - _pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); - return (false); - } - - ptr ++; - if (*ptr != 'f' && *ptr != 'n') - { - _pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); - return (false); - } - - if (*ptr == 'f') - continue; // Don't care about free objects... - - // Create a placeholder for the object in memory... - if (pdfioFileFindObject(pdf, (size_t)number)) - continue; // Don't replace newer object... - - if (!add_obj(pdf, (size_t)number, (unsigned short)generation, offset)) - return (false); - } - } - - if (strcmp(line, "trailer")) - { - _pdfioFileError(pdf, "Missing trailer."); - return (false); - } - - if (!_pdfioValueRead(pdf, &trailer)) - { - _pdfioFileError(pdf, "Unable to read trailer dictionary."); - return (false); - } - else if (trailer.type != PDFIO_VALTYPE_DICT) - { - _pdfioFileError(pdf, "Trailer is not a dictionary."); - return (false); - } - PDFIO_DEBUG("load_xref: Contents of trailer dictionary:\n"); - PDFIO_DEBUG_DICT(trailer.value.dict, "load_xref"); + PDFIO_DEBUG("load_xref: "); + PDFIO_DEBUG_VALUE(&trailer); + PDFIO_DEBUG("\n"); if (!pdf->trailer) { @@ -648,7 +702,7 @@ load_xref(pdfio_file_t *pdf, // I - PDF file pdf->id_array = pdfioDictGetArray(pdf->trailer, "ID"); } - if ((xref_offset = (off_t)pdfioDictGetNumber(pdf->trailer, "Prev")) <= 0) + if ((xref_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev")) <= 0) done = true; } diff --git a/pdfio-private.h b/pdfio-private.h index b279f63..a30de60 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -48,10 +48,14 @@ # ifdef DEBUG # define PDFIO_DEBUG(...) fprintf(stderr, __VA_ARGS__) -# define PDFIO_DEBUG_DICT(dict,prefix) _pdfioDictDebug(dict, prefix) +# define PDFIO_DEBUG_ARRAY(array) _pdfioArrayDebug(array) +# define PDFIO_DEBUG_DICT(dict) _pdfioDictDebug(dict) +# define PDFIO_DEBUG_VALUE(value) _pdfioValueDebug(value) # else # define PDFIO_DEBUG(...) -# define PDFIO_DEBUG_DICT(dict,prefix) +# define PDFIO_DEBUG_ARRAY(array) +# define PDFIO_DEBUG_DICT(dict) +# define PDFIO_DEBUG_VALUE(value) # endif // DEBUG @@ -183,13 +187,16 @@ typedef ssize_t (*_pdfio_tpeek_cb_t)(void *data, void *buffer, size_t bytes); // Functions... // +# ifdef DEBUG +extern void _pdfioArrayDebug(pdfio_array_t *a) PDFIO_INTERNAL; +# endif // DEBUG extern void _pdfioArrayDelete(pdfio_array_t *a) PDFIO_INTERNAL; extern _pdfio_value_t *_pdfioArrayGetValue(pdfio_array_t *a, size_t n) PDFIO_INTERNAL; extern pdfio_array_t *_pdfioArrayRead(pdfio_file_t *pdf) PDFIO_INTERNAL; extern bool _pdfioArrayWrite(pdfio_array_t *a) PDFIO_INTERNAL; # ifdef DEBUG -extern void _pdfioDictDebug(pdfio_dict_t *dict, const char *prefix) PDFIO_INTERNAL; +extern void _pdfioDictDebug(pdfio_dict_t *dict) PDFIO_INTERNAL; # endif // DEBUG extern void _pdfioDictDelete(pdfio_dict_t *dict) PDFIO_INTERNAL; extern _pdfio_value_t *_pdfioDictGetValue(pdfio_dict_t *dict, const char *key) PDFIO_INTERNAL; @@ -224,6 +231,9 @@ extern bool _pdfioStringIsAllocated(pdfio_file_t *pdf, const char *s) PDFIO_INT extern bool _pdfioTokenRead(pdfio_file_t *pdf, char *buffer, size_t bufsize, _pdfio_tpeek_cb_t peek_cb, _pdfio_tconsume_cb_t consume_cb, void *data); extern _pdfio_value_t *_pdfioValueCopy(pdfio_file_t *pdfdst, _pdfio_value_t *vdst, pdfio_file_t *pdfsrc, _pdfio_value_t *vsrc) PDFIO_INTERNAL; +# ifdef DEBUG +extern void _pdfioValueDebug(_pdfio_value_t *v) PDFIO_INTERNAL; +# endif // DEBUG extern void _pdfioValueDelete(_pdfio_value_t *v) PDFIO_INTERNAL; extern _pdfio_value_t *_pdfioValueRead(pdfio_file_t *pdf, _pdfio_value_t *v) PDFIO_INTERNAL; extern bool _pdfioValueWrite(pdfio_file_t *pdf, _pdfio_value_t *v) PDFIO_INTERNAL; diff --git a/pdfio-value.c b/pdfio-value.c index f8a6f1d..f888c0e 100644 --- a/pdfio-value.c +++ b/pdfio-value.c @@ -80,6 +80,65 @@ _pdfioValueCopy(pdfio_file_t *pdfdst, // I - Destination PDF file } +#ifdef DEBUG +// +// '_pdfioValueDebug()' - Print the contents of a value. +// + +void +_pdfioValueDebug(_pdfio_value_t *v) // I - Value +{ + switch (v->type) + { + case PDFIO_VALTYPE_ARRAY : + _pdfioArrayDebug(v->value.array); + break; + case PDFIO_VALTYPE_BINARY : + { + size_t i; // Looping var + unsigned char *ptr; // Pointer into data + + PDFIO_DEBUG("<"); + for (i = v->value.binary.datalen, ptr = v->value.binary.data; i > 0; i --, ptr ++) + PDFIO_DEBUG("%02X", *ptr); + PDFIO_DEBUG(">"); + } + break; + case PDFIO_VALTYPE_BOOLEAN : + PDFIO_DEBUG(v->value.boolean ? "true" : "false"); + break; + case PDFIO_VALTYPE_DATE : + // TODO: Implement date value support + PDFIO_DEBUG("(D:YYYYMMDDhhmmssZ)"); + break; + case PDFIO_VALTYPE_DICT : + PDFIO_DEBUG("<<"); + _pdfioDictDebug(v->value.dict); + PDFIO_DEBUG(">>"); + break; + case PDFIO_VALTYPE_INDIRECT : + PDFIO_DEBUG(" %lu %u R", (unsigned long)v->value.indirect.number, v->value.indirect.generation); + break; + case PDFIO_VALTYPE_NAME : + PDFIO_DEBUG("/%s", v->value.name); + break; + case PDFIO_VALTYPE_NULL : + PDFIO_DEBUG(" null"); + break; + case PDFIO_VALTYPE_NUMBER : + PDFIO_DEBUG(" %g", v->value.number); + break; + case PDFIO_VALTYPE_STRING : + PDFIO_DEBUG("(%s)", v->value.string); + break; + + default : + break; + } +} +#endif // DEBUG + + // // '_pdfioValueDelete()' - Free the memory used by a value. //