From 57a01a73173eaadc37e0dbe206f5e35587cdd6bf Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Thu, 24 Apr 2025 14:00:24 -0400 Subject: [PATCH] Fix object map to use unique file hash instead of pointer values (Issue #125) --- CHANGES.md | 2 ++ examples/pdfiomerge.c | 12 ++++++------ pdfio-file.c | 44 ++++++++++++++++++++++++++++++++++++------- pdfio-object.c | 4 ++++ pdfio-private.h | 3 ++- 5 files changed, 51 insertions(+), 14 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index c90ac76..3ab84ce 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -27,6 +27,8 @@ v1.5.3 - YYYY-MM-DD - Fixed decryption of UTF-16 strings (Issue #42) - Fixed decryption of PDF files with large permission values. - Fixed support for EncryptMetadata key in the encryption dictionary. +- Fixed `pdfioObjCopy` and `pdfioPageCopy` to properly identify the source PDF + file being used (Issue #125) v1.5.2 - 2025-04-12 diff --git a/examples/pdfiomerge.c b/examples/pdfiomerge.c index 70d151b..9019057 100644 --- a/examples/pdfiomerge.c +++ b/examples/pdfiomerge.c @@ -8,8 +8,8 @@ // // Usage: // -// ./pdfmerge [-o OUTPUT.pdf] INPUT.pdf [... INPUT.pdf] -// ./pdfmerge INPUT.pdf [... INPUT.pdf] >OUTPUT.pdf +// ./pdfiomerge [-o OUTPUT.pdf] INPUT.pdf [... INPUT.pdf] +// ./pdfiomerge INPUT.pdf [... INPUT.pdf] >OUTPUT.pdf // #include @@ -47,7 +47,7 @@ main(int argc, // I - Number of command-line arguments } else if (!strncmp(argv[i], "--", 2)) { - fprintf(stderr, "pdfmerge: Unknown option '%s'.\n", argv[i]); + fprintf(stderr, "pdfiomerge: Unknown option '%s'.\n", argv[i]); return (usage(stderr)); } else if (argv[i][0] == '-') @@ -59,14 +59,14 @@ main(int argc, // I - Number of command-line arguments case 'o' : // -o OUTPUT.pdf if (outpdf) { - fputs("pdfmerge: Only one output file can be specified.\n", stderr); + fputs("pdfiomerge: Only one output file can be specified.\n", stderr); return (usage(stderr)); } i ++; if (i >= argc) { - fputs("pdfmerge: Missing output filename after '-o'.\n", stderr); + fputs("pdfiomerge: Missing output filename after '-o'.\n", stderr); return (usage(stderr)); } @@ -75,7 +75,7 @@ main(int argc, // I - Number of command-line arguments break; default : - fprintf(stderr, "pdfmerge: Unknown option '-%c'.\n", *opt); + fprintf(stderr, "pdfiomerge: Unknown option '-%c'.\n", *opt); return (usage(stderr)); } } diff --git a/pdfio-file.c b/pdfio-file.c index 7d7f27a..d4a5b36 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -64,9 +64,10 @@ _pdfioFileAddMappedObj( pdf->num_objmaps ++; map->obj = dst_obj; - map->src_pdf = src_obj->pdf; map->src_number = src_obj->number; + memcpy(map->src_id, src_obj->pdf->file_id, sizeof(map->src_id)); + // Sort as needed... if (pdf->num_objmaps > 1 && compare_objmaps(map, pdf->objmaps + pdf->num_objmaps - 2) < 0) qsort(pdf->objmaps, pdf->num_objmaps, sizeof(_pdfio_objmap_t), (int (*)(const void *, const void *))compare_objmaps); @@ -685,7 +686,7 @@ _pdfioFileFindMappedObj( return (NULL); // Otherwise search for a match... - key.src_pdf = src_pdf; + memcpy(key.src_id, src_pdf->file_id, sizeof(key.src_id)); key.src_number = src_number; if ((match = (_pdfio_objmap_t *)bsearch(&key, pdf->objmaps, pdf->num_objmaps, sizeof(_pdfio_objmap_t), (int (*)(const void *, const void *))compare_objmaps)) != NULL) @@ -1046,6 +1047,10 @@ pdfioFileOpen( *end; // End of line ssize_t bytes; // Bytes read off_t xref_offset; // Offset to xref table + time_t curtime; // Creation date/time + unsigned char *id_value; // ID value + size_t id_valuelen; // Length of ID value + _pdfio_sha256_t ctx; // Hashing context PDFIO_DEBUG("pdfioFileOpen(filename=\"%s\", password_cb=%p, password_cbdata=%p, error_cb=%p, error_cbdata=%p)\n", filename, (void *)password_cb, (void *)password_cbdata, (void *)error_cb, (void *)error_cbdata); @@ -1146,6 +1151,18 @@ pdfioFileOpen( goto error; } + // Create the unique file identifier string for the object map... + curtime = pdfioFileGetCreationDate(pdf); + + _pdfioCryptoSHA256Init(&ctx); + _pdfioCryptoSHA256Append(&ctx, (uint8_t *)pdf->filename, strlen(pdf->filename)); + _pdfioCryptoSHA256Append(&ctx, (uint8_t *)&curtime, sizeof(curtime)); + if ((id_value = pdfioArrayGetBinary(pdf->id_array, 0, &id_valuelen)) != NULL) + _pdfioCryptoSHA256Append(&ctx, id_value, id_valuelen); + if ((id_value = pdfioArrayGetBinary(pdf->id_array, 1, &id_valuelen)) != NULL) + _pdfioCryptoSHA256Append(&ctx, id_value, id_valuelen); + _pdfioCryptoSHA256Finish(&ctx, pdf->file_id); + return (pdf); @@ -1420,10 +1437,11 @@ static int // O - Result of comparison compare_objmaps(_pdfio_objmap_t *a, // I - First object map _pdfio_objmap_t *b) // I - Second object map { - if (a->src_pdf < b->src_pdf) - return (-1); - else if (a->src_pdf > b->src_pdf) - return (1); + int ret = memcmp(a->src_id, b->src_id, sizeof(a->src_id)); + // Result of comparison + + if (ret) + return (ret); else if (a->src_number < b->src_number) return (-1); else if (a->src_number > b->src_number) @@ -1452,6 +1470,8 @@ create_common( pdfio_file_t *pdf; // New PDF file pdfio_dict_t *dict; // Dictionary unsigned char id_value[16]; // File ID value + time_t curtime; // Creation date/time + _pdfio_sha256_t ctx; // Hashing context PDFIO_DEBUG("create_common(filename=\"%s\", fd=%d, output_cb=%p, output_cbdata=%p, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", filename, fd, (void *)output_cb, (void *)output_cbdata, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata); @@ -1542,7 +1562,9 @@ create_common( if ((dict = pdfioDictCreate(pdf)) == NULL) goto error; - pdfioDictSetDate(dict, "CreationDate", time(NULL)); + curtime = time(NULL); + + pdfioDictSetDate(dict, "CreationDate", curtime); pdfioDictSetString(dict, "Producer", "pdfio/" PDFIO_VERSION); if ((pdf->info_obj = pdfioFileCreateObj(pdf, dict)) == NULL) @@ -1567,6 +1589,14 @@ create_common( pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value)); } + // Create the unique file identifier string for the object map... + _pdfioCryptoSHA256Init(&ctx); + _pdfioCryptoSHA256Append(&ctx, (uint8_t *)pdf->filename, strlen(pdf->filename)); + _pdfioCryptoSHA256Append(&ctx, (uint8_t *)&curtime, sizeof(curtime)); + _pdfioCryptoSHA256Append(&ctx, id_value, sizeof(id_value)); + _pdfioCryptoSHA256Append(&ctx, id_value, sizeof(id_value)); + _pdfioCryptoSHA256Finish(&ctx, pdf->file_id); + return (pdf); // Common error handling code... diff --git a/pdfio-object.c b/pdfio-object.c index 58a977e..8f13fbd 100644 --- a/pdfio-object.c +++ b/pdfio-object.c @@ -79,6 +79,10 @@ pdfioObjCopy(pdfio_file_t *pdf, // I - PDF file if (srcobj->value.type == PDFIO_VALTYPE_NONE) _pdfioObjLoad(srcobj); + // See if we have already mapped this object... + if ((dstobj = _pdfioFileFindMappedObj(pdf, srcobj->pdf, srcobj->number)) != NULL) + return (dstobj); // Yes, return that one... + // Create the new object... if ((dstobj = _pdfioFileCreateObj(pdf, srcobj->pdf, NULL)) == NULL) return (NULL); diff --git a/pdfio-private.h b/pdfio-private.h index 890c780..4a1d992 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -221,7 +221,7 @@ struct _pdfio_dict_s // Dictionary typedef struct _pdfio_objmap_s // PDF object map { pdfio_obj_t *obj; // Object for this file - pdfio_file_t *src_pdf; // Source PDF file + unsigned char src_id[32]; // Source PDF file file identifier size_t src_number; // Source object number } _pdfio_objmap_t; @@ -236,6 +236,7 @@ typedef struct _pdfio_strbuf_s // PDF string buffer struct _pdfio_file_s // PDF file structure { char *filename; // Filename + unsigned char file_id[32]; // File identifier bytes struct lconv *loc; // Locale data char *version; // Version number pdfio_rect_t media_box, // Default MediaBox value