From a3f3bbfe11eeb682377f8bbb85502616aedc6508 Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Tue, 12 Jul 2022 18:36:08 -0400 Subject: [PATCH] Fix pdfioFileGetAuthor, etc. APIs (Issue #33) --- CHANGES.md | 1 + pdfio-file.c | 60 ++++++++++++++++++++++++++++++++----- pdfio-value.c | 29 +++++++++++++++++- testpdfio.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 8 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 68c11d0..f312b69 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,7 @@ v1.1.0 (Month DD, YYYY) - Added protection against opening multiple streams in the same file at the same time. - Fixed "install-shared" target (Issue #32) +- Fixed `pdfioFileGet...` metadata APIs (Issue #33) - Fixed `pdfioContentMatrixRotate` function. diff --git a/pdfio-file.c b/pdfio-file.c index 456a48c..90986d9 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -24,6 +24,7 @@ static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset); static int compare_objmaps(_pdfio_objmap_t *a, _pdfio_objmap_t *b); static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b); +static const char *get_info_string(pdfio_file_t *pdf, const char *key); static bool load_obj_stream(pdfio_obj_t *obj); static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth); static bool load_xref(pdfio_file_t *pdf, off_t xref_offset, pdfio_password_cb_t password_cb, void *password_data); @@ -892,7 +893,7 @@ pdfioFileFindObj( const char * // O - Author or `NULL` for none pdfioFileGetAuthor(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Author") : NULL); + return (get_info_string(pdf, "Author")); } @@ -904,7 +905,7 @@ time_t // O - Creation date or `0` for none pdfioFileGetCreationDate( pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info_obj ? pdfioDictGetDate(pdf->info_obj->value.value.dict, "CreationDate") : 0); + return (pdf && pdf->info_obj ? pdfioDictGetDate(pdfioObjGetDict(pdf->info_obj), "CreationDate") : 0); } @@ -915,7 +916,7 @@ pdfioFileGetCreationDate( const char * // O - Creator string or `NULL` for none pdfioFileGetCreator(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Creator") : NULL); + return (get_info_string(pdf, "Creator")); } @@ -937,7 +938,7 @@ pdfioFileGetID(pdfio_file_t *pdf) // I - PDF file const char * // O - Keywords string or `NULL` for none pdfioFileGetKeywords(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Keywords") : NULL); + return (get_info_string(pdf, "Keywords")); } @@ -1041,7 +1042,7 @@ pdfioFileGetPermissions( const char * // O - Producer string or `NULL` for none pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Producer") : NULL); + return (get_info_string(pdf, "Producer")); } @@ -1052,7 +1053,7 @@ pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file const char * // O - Subject or `NULL` for none pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Subject") : NULL); + return (get_info_string(pdf, "Subject")); } @@ -1063,7 +1064,7 @@ pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file const char * // O - Title or `NULL` for none pdfioFileGetTitle(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Title") : NULL); + return (get_info_string(pdf, "Title")); } @@ -1406,6 +1407,51 @@ compare_objs(pdfio_obj_t **a, // I - First object } +// +// 'get_info_string()' - Get a string value from the Info dictionary. +// +// This function also handles converting binary strings to C strings, which +// occur in encrypted PDF files. +// + +static const char * // O - String or `NULL` if not found +get_info_string(pdfio_file_t *pdf, // I - PDF file + const char *key) // I - Dictionary key +{ + pdfio_dict_t *dict; // Info dictionary + _pdfio_value_t *value; // Value + + // Range check input... + if (!pdf || !pdf->info_obj || (dict = pdfioObjGetDict(pdf->info_obj)) == NULL || (value = _pdfioDictGetValue(dict, key)) == NULL) + return (NULL); + + // If we already have a value, return it... + if (value->type == PDFIO_VALTYPE_NAME || value->type == PDFIO_VALTYPE_STRING) + { + return (value->value.string); + } + else if (value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096) + { + // Convert binary string to regular string... + char temp[4096]; // Temporary string + + memcpy(temp, value->value.binary.data, value->value.binary.datalen); + temp[value->value.binary.datalen] = '\0'; + + free(value->value.binary.data); + value->type = PDFIO_VALTYPE_STRING; + value->value.string = pdfioStringCreate(pdf, temp); + + return (value->value.string); + } + else + { + // Something else that is not a string... + return (NULL); + } +} + + // // 'load_obj_stream()' - Load an object stream. // diff --git a/pdfio-value.c b/pdfio-value.c index 25e293d..7f114e6 100644 --- a/pdfio-value.c +++ b/pdfio-value.c @@ -219,7 +219,6 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file PDFIO_DEBUG("_pdfioValueRead(pdf=%p, obj=%p, v=%p)\n", pdf, obj, v); - (void)obj; // TODO: Implement decryption if (!_pdfioTokenGet(tb, token, sizeof(token))) return (NULL); @@ -284,6 +283,7 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file } } } + if (token[i]) { // Just a string... @@ -367,6 +367,33 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file *dataptr++ = (unsigned char)d; } + + if (obj && pdf->encryption) + { + // Decrypt the string... + _pdfio_crypto_ctx_t ctx; // Decryption context + _pdfio_crypto_cb_t cb; // Decryption callback + size_t ivlen; // Number of initialization vector bytes + uint8_t temp[32768]; // Temporary buffer for decryption + size_t templen; // Number of actual data bytes + + if (v->value.binary.datalen > (sizeof(temp) - 32)) + { + _pdfioFileError(pdf, "Unable to read encrypted binary string - too long."); + return (false); + } + + cb = _pdfioCryptoMakeReader(pdf, obj, &ctx, v->value.binary.data, &ivlen); + templen = (cb)(&ctx, temp, v->value.binary.data + ivlen, v->value.binary.datalen - ivlen); + + // Copy the decrypted string back to the value and adjust the length... + memcpy(v->value.binary.data, temp, templen); + + if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128) + v->value.binary.datalen = templen - temp[templen - 1]; + else + v->value.binary.datalen = templen; + } } else if (strchr("0123456789-+.", token[0]) != NULL) { diff --git a/testpdfio.c b/testpdfio.c index 61f39c6..64d0c49 100644 --- a/testpdfio.c +++ b/testpdfio.c @@ -1310,6 +1310,7 @@ read_unit_file(const char *filename, // I - File to read { pdfio_file_t *pdf; // PDF file size_t i; // Looping var + const char *s; // String bool error = false; // Error callback data @@ -1320,6 +1321,87 @@ read_unit_file(const char *filename, // I - File to read else return (1); + // Verify metadata... + fputs("pdfioFileGetAuthor: ", stdout); + if ((s = pdfioFileGetAuthor(pdf)) != NULL && !strcmp(s, "Michael R Sweet")) + { + puts("PASS"); + } + else if (s) + { + printf("FAIL (got '%s', expected 'Michael R Sweet')\n", s); + return (1); + } + else + { + puts("FAIL (got NULL, expected 'Michael R Sweet')"); + return (1); + } + + fputs("pdfioFileGetCreator: ", stdout); + if ((s = pdfioFileGetCreator(pdf)) != NULL && !strcmp(s, "testpdfio")) + { + puts("PASS"); + } + else if (s) + { + printf("FAIL (got '%s', expected 'testpdfio')\n", s); + return (1); + } + else + { + puts("FAIL (got NULL, expected 'testpdfio')"); + return (1); + } + + fputs("pdfioFileGetKeywords: ", stdout); + if ((s = pdfioFileGetKeywords(pdf)) != NULL && !strcmp(s, "one fish,two fish,red fish,blue fish")) + { + puts("PASS"); + } + else if (s) + { + printf("FAIL (got '%s', expected 'one fish,two fish,red fish,blue fish')\n", s); + return (1); + } + else + { + puts("FAIL (got NULL, expected 'one fish,two fish,red fish,blue fish')"); + return (1); + } + + fputs("pdfioFileGetSubject: ", stdout); + if ((s = pdfioFileGetSubject(pdf)) != NULL && !strcmp(s, "Unit test document")) + { + puts("PASS"); + } + else if (s) + { + printf("FAIL (got '%s', expected 'Unit test document')\n", s); + return (1); + } + else + { + puts("FAIL (got NULL, expected 'Unit test document')"); + return (1); + } + + fputs("pdfioFileGetTitle: ", stdout); + if ((s = pdfioFileGetTitle(pdf)) != NULL && !strcmp(s, "Test Document")) + { + puts("PASS"); + } + else if (s) + { + printf("FAIL (got '%s', expected 'Test Document')\n", s); + return (1); + } + else + { + puts("FAIL (got NULL, expected 'Test Document')"); + return (1); + } + // Verify the number of pages is the same... fputs("pdfioFileGetNumPages: ", stdout); if (num_pages == pdfioFileGetNumPages(pdf))