Fix object map to use unique file hash instead of pointer values (Issue #125)

This commit is contained in:
Michael R Sweet 2025-04-24 14:00:24 -04:00
parent 5b08046ece
commit 57a01a7317
No known key found for this signature in database
GPG Key ID: BE67C75EC81F3244
5 changed files with 51 additions and 14 deletions

View File

@ -27,6 +27,8 @@ v1.5.3 - YYYY-MM-DD
- Fixed decryption of UTF-16 strings (Issue #42)
- Fixed decryption of PDF files with large permission values.
- Fixed support for EncryptMetadata key in the encryption dictionary.
- Fixed `pdfioObjCopy` and `pdfioPageCopy` to properly identify the source PDF
file being used (Issue #125)
v1.5.2 - 2025-04-12

View File

@ -8,8 +8,8 @@
//
// Usage:
//
// ./pdfmerge [-o OUTPUT.pdf] INPUT.pdf [... INPUT.pdf]
// ./pdfmerge INPUT.pdf [... INPUT.pdf] >OUTPUT.pdf
// ./pdfiomerge [-o OUTPUT.pdf] INPUT.pdf [... INPUT.pdf]
// ./pdfiomerge INPUT.pdf [... INPUT.pdf] >OUTPUT.pdf
//
#include <pdfio.h>
@ -47,7 +47,7 @@ main(int argc, // I - Number of command-line arguments
}
else if (!strncmp(argv[i], "--", 2))
{
fprintf(stderr, "pdfmerge: Unknown option '%s'.\n", argv[i]);
fprintf(stderr, "pdfiomerge: Unknown option '%s'.\n", argv[i]);
return (usage(stderr));
}
else if (argv[i][0] == '-')
@ -59,14 +59,14 @@ main(int argc, // I - Number of command-line arguments
case 'o' : // -o OUTPUT.pdf
if (outpdf)
{
fputs("pdfmerge: Only one output file can be specified.\n", stderr);
fputs("pdfiomerge: Only one output file can be specified.\n", stderr);
return (usage(stderr));
}
i ++;
if (i >= argc)
{
fputs("pdfmerge: Missing output filename after '-o'.\n", stderr);
fputs("pdfiomerge: Missing output filename after '-o'.\n", stderr);
return (usage(stderr));
}
@ -75,7 +75,7 @@ main(int argc, // I - Number of command-line arguments
break;
default :
fprintf(stderr, "pdfmerge: Unknown option '-%c'.\n", *opt);
fprintf(stderr, "pdfiomerge: Unknown option '-%c'.\n", *opt);
return (usage(stderr));
}
}

View File

@ -64,9 +64,10 @@ _pdfioFileAddMappedObj(
pdf->num_objmaps ++;
map->obj = dst_obj;
map->src_pdf = src_obj->pdf;
map->src_number = src_obj->number;
memcpy(map->src_id, src_obj->pdf->file_id, sizeof(map->src_id));
// Sort as needed...
if (pdf->num_objmaps > 1 && compare_objmaps(map, pdf->objmaps + pdf->num_objmaps - 2) < 0)
qsort(pdf->objmaps, pdf->num_objmaps, sizeof(_pdfio_objmap_t), (int (*)(const void *, const void *))compare_objmaps);
@ -685,7 +686,7 @@ _pdfioFileFindMappedObj(
return (NULL);
// Otherwise search for a match...
key.src_pdf = src_pdf;
memcpy(key.src_id, src_pdf->file_id, sizeof(key.src_id));
key.src_number = src_number;
if ((match = (_pdfio_objmap_t *)bsearch(&key, pdf->objmaps, pdf->num_objmaps, sizeof(_pdfio_objmap_t), (int (*)(const void *, const void *))compare_objmaps)) != NULL)
@ -1046,6 +1047,10 @@ pdfioFileOpen(
*end; // End of line
ssize_t bytes; // Bytes read
off_t xref_offset; // Offset to xref table
time_t curtime; // Creation date/time
unsigned char *id_value; // ID value
size_t id_valuelen; // Length of ID value
_pdfio_sha256_t ctx; // Hashing context
PDFIO_DEBUG("pdfioFileOpen(filename=\"%s\", password_cb=%p, password_cbdata=%p, error_cb=%p, error_cbdata=%p)\n", filename, (void *)password_cb, (void *)password_cbdata, (void *)error_cb, (void *)error_cbdata);
@ -1146,6 +1151,18 @@ pdfioFileOpen(
goto error;
}
// Create the unique file identifier string for the object map...
curtime = pdfioFileGetCreationDate(pdf);
_pdfioCryptoSHA256Init(&ctx);
_pdfioCryptoSHA256Append(&ctx, (uint8_t *)pdf->filename, strlen(pdf->filename));
_pdfioCryptoSHA256Append(&ctx, (uint8_t *)&curtime, sizeof(curtime));
if ((id_value = pdfioArrayGetBinary(pdf->id_array, 0, &id_valuelen)) != NULL)
_pdfioCryptoSHA256Append(&ctx, id_value, id_valuelen);
if ((id_value = pdfioArrayGetBinary(pdf->id_array, 1, &id_valuelen)) != NULL)
_pdfioCryptoSHA256Append(&ctx, id_value, id_valuelen);
_pdfioCryptoSHA256Finish(&ctx, pdf->file_id);
return (pdf);
@ -1420,10 +1437,11 @@ static int // O - Result of comparison
compare_objmaps(_pdfio_objmap_t *a, // I - First object map
_pdfio_objmap_t *b) // I - Second object map
{
if (a->src_pdf < b->src_pdf)
return (-1);
else if (a->src_pdf > b->src_pdf)
return (1);
int ret = memcmp(a->src_id, b->src_id, sizeof(a->src_id));
// Result of comparison
if (ret)
return (ret);
else if (a->src_number < b->src_number)
return (-1);
else if (a->src_number > b->src_number)
@ -1452,6 +1470,8 @@ create_common(
pdfio_file_t *pdf; // New PDF file
pdfio_dict_t *dict; // Dictionary
unsigned char id_value[16]; // File ID value
time_t curtime; // Creation date/time
_pdfio_sha256_t ctx; // Hashing context
PDFIO_DEBUG("create_common(filename=\"%s\", fd=%d, output_cb=%p, output_cbdata=%p, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", filename, fd, (void *)output_cb, (void *)output_cbdata, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata);
@ -1542,7 +1562,9 @@ create_common(
if ((dict = pdfioDictCreate(pdf)) == NULL)
goto error;
pdfioDictSetDate(dict, "CreationDate", time(NULL));
curtime = time(NULL);
pdfioDictSetDate(dict, "CreationDate", curtime);
pdfioDictSetString(dict, "Producer", "pdfio/" PDFIO_VERSION);
if ((pdf->info_obj = pdfioFileCreateObj(pdf, dict)) == NULL)
@ -1567,6 +1589,14 @@ create_common(
pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value));
}
// Create the unique file identifier string for the object map...
_pdfioCryptoSHA256Init(&ctx);
_pdfioCryptoSHA256Append(&ctx, (uint8_t *)pdf->filename, strlen(pdf->filename));
_pdfioCryptoSHA256Append(&ctx, (uint8_t *)&curtime, sizeof(curtime));
_pdfioCryptoSHA256Append(&ctx, id_value, sizeof(id_value));
_pdfioCryptoSHA256Append(&ctx, id_value, sizeof(id_value));
_pdfioCryptoSHA256Finish(&ctx, pdf->file_id);
return (pdf);
// Common error handling code...

View File

@ -79,6 +79,10 @@ pdfioObjCopy(pdfio_file_t *pdf, // I - PDF file
if (srcobj->value.type == PDFIO_VALTYPE_NONE)
_pdfioObjLoad(srcobj);
// See if we have already mapped this object...
if ((dstobj = _pdfioFileFindMappedObj(pdf, srcobj->pdf, srcobj->number)) != NULL)
return (dstobj); // Yes, return that one...
// Create the new object...
if ((dstobj = _pdfioFileCreateObj(pdf, srcobj->pdf, NULL)) == NULL)
return (NULL);

View File

@ -221,7 +221,7 @@ struct _pdfio_dict_s // Dictionary
typedef struct _pdfio_objmap_s // PDF object map
{
pdfio_obj_t *obj; // Object for this file
pdfio_file_t *src_pdf; // Source PDF file
unsigned char src_id[32]; // Source PDF file file identifier
size_t src_number; // Source object number
} _pdfio_objmap_t;
@ -236,6 +236,7 @@ typedef struct _pdfio_strbuf_s // PDF string buffer
struct _pdfio_file_s // PDF file structure
{
char *filename; // Filename
unsigned char file_id[32]; // File identifier bytes
struct lconv *loc; // Locale data
char *version; // Version number
pdfio_rect_t media_box, // Default MediaBox value