Fix pdfioFileGetAuthor, etc. APIs (Issue #33)

This commit is contained in:
Michael R Sweet 2022-07-12 18:36:08 -04:00
parent 26d485cfc5
commit a3f3bbfe11
No known key found for this signature in database
GPG Key ID: 999559A027815955
4 changed files with 164 additions and 8 deletions

View File

@ -11,6 +11,7 @@ v1.1.0 (Month DD, YYYY)
- Added protection against opening multiple streams in the same file at the - Added protection against opening multiple streams in the same file at the
same time. same time.
- Fixed "install-shared" target (Issue #32) - Fixed "install-shared" target (Issue #32)
- Fixed `pdfioFileGet...` metadata APIs (Issue #33)
- Fixed `pdfioContentMatrixRotate` function. - Fixed `pdfioContentMatrixRotate` function.

View File

@ -24,6 +24,7 @@
static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset); static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset);
static int compare_objmaps(_pdfio_objmap_t *a, _pdfio_objmap_t *b); static int compare_objmaps(_pdfio_objmap_t *a, _pdfio_objmap_t *b);
static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b); static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b);
static const char *get_info_string(pdfio_file_t *pdf, const char *key);
static bool load_obj_stream(pdfio_obj_t *obj); static bool load_obj_stream(pdfio_obj_t *obj);
static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth); static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth);
static bool load_xref(pdfio_file_t *pdf, off_t xref_offset, pdfio_password_cb_t password_cb, void *password_data); static bool load_xref(pdfio_file_t *pdf, off_t xref_offset, pdfio_password_cb_t password_cb, void *password_data);
@ -892,7 +893,7 @@ pdfioFileFindObj(
const char * // O - Author or `NULL` for none const char * // O - Author or `NULL` for none
pdfioFileGetAuthor(pdfio_file_t *pdf) // I - PDF file pdfioFileGetAuthor(pdfio_file_t *pdf) // I - PDF file
{ {
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Author") : NULL); return (get_info_string(pdf, "Author"));
} }
@ -904,7 +905,7 @@ time_t // O - Creation date or `0` for none
pdfioFileGetCreationDate( pdfioFileGetCreationDate(
pdfio_file_t *pdf) // I - PDF file pdfio_file_t *pdf) // I - PDF file
{ {
return (pdf && pdf->info_obj ? pdfioDictGetDate(pdf->info_obj->value.value.dict, "CreationDate") : 0); return (pdf && pdf->info_obj ? pdfioDictGetDate(pdfioObjGetDict(pdf->info_obj), "CreationDate") : 0);
} }
@ -915,7 +916,7 @@ pdfioFileGetCreationDate(
const char * // O - Creator string or `NULL` for none const char * // O - Creator string or `NULL` for none
pdfioFileGetCreator(pdfio_file_t *pdf) // I - PDF file pdfioFileGetCreator(pdfio_file_t *pdf) // I - PDF file
{ {
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Creator") : NULL); return (get_info_string(pdf, "Creator"));
} }
@ -937,7 +938,7 @@ pdfioFileGetID(pdfio_file_t *pdf) // I - PDF file
const char * // O - Keywords string or `NULL` for none const char * // O - Keywords string or `NULL` for none
pdfioFileGetKeywords(pdfio_file_t *pdf) // I - PDF file pdfioFileGetKeywords(pdfio_file_t *pdf) // I - PDF file
{ {
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Keywords") : NULL); return (get_info_string(pdf, "Keywords"));
} }
@ -1041,7 +1042,7 @@ pdfioFileGetPermissions(
const char * // O - Producer string or `NULL` for none const char * // O - Producer string or `NULL` for none
pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file
{ {
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Producer") : NULL); return (get_info_string(pdf, "Producer"));
} }
@ -1052,7 +1053,7 @@ pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file
const char * // O - Subject or `NULL` for none const char * // O - Subject or `NULL` for none
pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file
{ {
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Subject") : NULL); return (get_info_string(pdf, "Subject"));
} }
@ -1063,7 +1064,7 @@ pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file
const char * // O - Title or `NULL` for none const char * // O - Title or `NULL` for none
pdfioFileGetTitle(pdfio_file_t *pdf) // I - PDF file pdfioFileGetTitle(pdfio_file_t *pdf) // I - PDF file
{ {
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Title") : NULL); return (get_info_string(pdf, "Title"));
} }
@ -1406,6 +1407,51 @@ compare_objs(pdfio_obj_t **a, // I - First object
} }
//
// 'get_info_string()' - Get a string value from the Info dictionary.
//
// This function also handles converting binary strings to C strings, which
// occur in encrypted PDF files.
//
static const char * // O - String or `NULL` if not found
get_info_string(pdfio_file_t *pdf, // I - PDF file
const char *key) // I - Dictionary key
{
pdfio_dict_t *dict; // Info dictionary
_pdfio_value_t *value; // Value
// Range check input...
if (!pdf || !pdf->info_obj || (dict = pdfioObjGetDict(pdf->info_obj)) == NULL || (value = _pdfioDictGetValue(dict, key)) == NULL)
return (NULL);
// If we already have a value, return it...
if (value->type == PDFIO_VALTYPE_NAME || value->type == PDFIO_VALTYPE_STRING)
{
return (value->value.string);
}
else if (value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096)
{
// Convert binary string to regular string...
char temp[4096]; // Temporary string
memcpy(temp, value->value.binary.data, value->value.binary.datalen);
temp[value->value.binary.datalen] = '\0';
free(value->value.binary.data);
value->type = PDFIO_VALTYPE_STRING;
value->value.string = pdfioStringCreate(pdf, temp);
return (value->value.string);
}
else
{
// Something else that is not a string...
return (NULL);
}
}
// //
// 'load_obj_stream()' - Load an object stream. // 'load_obj_stream()' - Load an object stream.
// //

View File

@ -219,7 +219,6 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
PDFIO_DEBUG("_pdfioValueRead(pdf=%p, obj=%p, v=%p)\n", pdf, obj, v); PDFIO_DEBUG("_pdfioValueRead(pdf=%p, obj=%p, v=%p)\n", pdf, obj, v);
(void)obj; // TODO: Implement decryption
if (!_pdfioTokenGet(tb, token, sizeof(token))) if (!_pdfioTokenGet(tb, token, sizeof(token)))
return (NULL); return (NULL);
@ -284,6 +283,7 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
} }
} }
} }
if (token[i]) if (token[i])
{ {
// Just a string... // Just a string...
@ -367,6 +367,33 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
*dataptr++ = (unsigned char)d; *dataptr++ = (unsigned char)d;
} }
if (obj && pdf->encryption)
{
// Decrypt the string...
_pdfio_crypto_ctx_t ctx; // Decryption context
_pdfio_crypto_cb_t cb; // Decryption callback
size_t ivlen; // Number of initialization vector bytes
uint8_t temp[32768]; // Temporary buffer for decryption
size_t templen; // Number of actual data bytes
if (v->value.binary.datalen > (sizeof(temp) - 32))
{
_pdfioFileError(pdf, "Unable to read encrypted binary string - too long.");
return (false);
}
cb = _pdfioCryptoMakeReader(pdf, obj, &ctx, v->value.binary.data, &ivlen);
templen = (cb)(&ctx, temp, v->value.binary.data + ivlen, v->value.binary.datalen - ivlen);
// Copy the decrypted string back to the value and adjust the length...
memcpy(v->value.binary.data, temp, templen);
if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128)
v->value.binary.datalen = templen - temp[templen - 1];
else
v->value.binary.datalen = templen;
}
} }
else if (strchr("0123456789-+.", token[0]) != NULL) else if (strchr("0123456789-+.", token[0]) != NULL)
{ {

View File

@ -1310,6 +1310,7 @@ read_unit_file(const char *filename, // I - File to read
{ {
pdfio_file_t *pdf; // PDF file pdfio_file_t *pdf; // PDF file
size_t i; // Looping var size_t i; // Looping var
const char *s; // String
bool error = false; // Error callback data bool error = false; // Error callback data
@ -1320,6 +1321,87 @@ read_unit_file(const char *filename, // I - File to read
else else
return (1); return (1);
// Verify metadata...
fputs("pdfioFileGetAuthor: ", stdout);
if ((s = pdfioFileGetAuthor(pdf)) != NULL && !strcmp(s, "Michael R Sweet"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'Michael R Sweet')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'Michael R Sweet')");
return (1);
}
fputs("pdfioFileGetCreator: ", stdout);
if ((s = pdfioFileGetCreator(pdf)) != NULL && !strcmp(s, "testpdfio"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'testpdfio')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'testpdfio')");
return (1);
}
fputs("pdfioFileGetKeywords: ", stdout);
if ((s = pdfioFileGetKeywords(pdf)) != NULL && !strcmp(s, "one fish,two fish,red fish,blue fish"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'one fish,two fish,red fish,blue fish')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'one fish,two fish,red fish,blue fish')");
return (1);
}
fputs("pdfioFileGetSubject: ", stdout);
if ((s = pdfioFileGetSubject(pdf)) != NULL && !strcmp(s, "Unit test document"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'Unit test document')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'Unit test document')");
return (1);
}
fputs("pdfioFileGetTitle: ", stdout);
if ((s = pdfioFileGetTitle(pdf)) != NULL && !strcmp(s, "Test Document"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'Test Document')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'Test Document')");
return (1);
}
// Verify the number of pages is the same... // Verify the number of pages is the same...
fputs("pdfioFileGetNumPages: ", stdout); fputs("pdfioFileGetNumPages: ", stdout);
if (num_pages == pdfioFileGetNumPages(pdf)) if (num_pages == pdfioFileGetNumPages(pdf))