mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2025-04-21 16:06:48 +02:00
Compare commits
3 Commits
4219b8fd77
...
29eea131b9
Author | SHA1 | Date | |
---|---|---|---|
|
29eea131b9 | ||
|
2dcef0936e | ||
|
20dd2a6d28 |
@ -2,6 +2,13 @@ Changes in PDFio
|
||||
================
|
||||
|
||||
|
||||
v1.5.3 - YYYY-MM-DD
|
||||
-------------------
|
||||
|
||||
- Fixed decryption of PDF files "protected" by 40-bit RC4 (Issue #42)
|
||||
- Fixed decryption of UTF-16 strings (Issue #42)
|
||||
|
||||
|
||||
v1.5.2 - 2025-04-12
|
||||
-------------------
|
||||
|
||||
|
@ -409,13 +409,6 @@ _pdfioCryptoMakeReader(
|
||||
uint8_t data[21]; // Key data
|
||||
_pdfio_md5_t md5; // MD5 state
|
||||
uint8_t digest[16]; // MD5 digest value
|
||||
#if PDFIO_OBJ_CRYPT
|
||||
pdfio_array_t *id_array; // Object ID array
|
||||
unsigned char *id_value; // Object ID value
|
||||
size_t id_len; // Length of object ID
|
||||
uint8_t temp_key[16]; // File key for object
|
||||
#endif // PDFIO_OBJ_CRYPT
|
||||
uint8_t *file_key; // Computed file key to use
|
||||
|
||||
|
||||
PDFIO_DEBUG("_pdfioCryptoMakeReader(pdf=%p, obj=%p(%d), ctx=%p, iv=%p, ivlen=%p(%d))\n", pdf, obj, (int)obj->number, ctx, iv, ivlen, (int)*ivlen);
|
||||
@ -427,60 +420,6 @@ _pdfioCryptoMakeReader(
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
#if PDFIO_OBJ_CRYPT
|
||||
if ((id_array = pdfioDictGetArray(pdfioObjGetDict(obj), "ID")) != NULL)
|
||||
{
|
||||
// Object has its own ID that will get used for encryption...
|
||||
_pdfio_md5_t md5; // MD5 context
|
||||
uint8_t file_digest[16]; // MD5 digest of file ID and pad
|
||||
uint8_t user_pad[32], // Padded user password
|
||||
own_user_key[32], // Calculated user key
|
||||
pdf_user_key[32]; // Decrypted user key
|
||||
|
||||
PDFIO_DEBUG("_pdfioCryptoMakeReader: Per-object file ID.\n");
|
||||
|
||||
if ((id_value = pdfioArrayGetBinary(id_array, 0, &id_len)) == NULL)
|
||||
{
|
||||
*ivlen = 0;
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
_pdfioCryptoMD5Init(&md5);
|
||||
_pdfioCryptoMD5Append(&md5, pdf_passpad, 32);
|
||||
_pdfioCryptoMD5Append(&md5, id_value, id_len);
|
||||
_pdfioCryptoMD5Finish(&md5, file_digest);
|
||||
|
||||
make_owner_key(pdf->encryption, pdf->password, pdf->owner_key, user_pad);
|
||||
make_file_key(pdf->encryption, pdf->permissions, id_value, id_len, user_pad, pdf->owner_key, temp_key);
|
||||
make_user_key(id_value, id_len, own_user_key);
|
||||
|
||||
if (memcmp(own_user_key, pdf->user_key, sizeof(own_user_key)))
|
||||
{
|
||||
PDFIO_DEBUG("_pdfioCryptoMakeReader: Not user password, trying owner password.\n");
|
||||
|
||||
make_file_key(pdf->encryption, pdf->permissions, id_value, id_len, pdf->password, pdf->owner_key, temp_key);
|
||||
make_user_key(id_value, id_len, own_user_key);
|
||||
|
||||
memcpy(pdf_user_key, pdf->user_key, sizeof(pdf_user_key));
|
||||
decrypt_user_key(pdf->encryption, temp_key, pdf_user_key);
|
||||
|
||||
if (memcmp(pdf->password, pdf_user_key, 32) && memcmp(own_user_key, pdf_user_key, 16))
|
||||
{
|
||||
_pdfioFileError(pdf, "Unable to unlock file.");
|
||||
*ivlen = 0;
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
|
||||
file_key = temp_key;
|
||||
}
|
||||
else
|
||||
#endif // PDFIO_OBJ_CRYPT
|
||||
{
|
||||
// Use the default file key...
|
||||
file_key = pdf->file_key;
|
||||
}
|
||||
|
||||
switch (pdf->encryption)
|
||||
{
|
||||
default :
|
||||
@ -490,20 +429,20 @@ _pdfioCryptoMakeReader(
|
||||
|
||||
case PDFIO_ENCRYPTION_RC4_40 :
|
||||
// Copy the key data for the MD5 hash.
|
||||
memcpy(data, file_key, 16);
|
||||
data[16] = (uint8_t)obj->number;
|
||||
data[17] = (uint8_t)(obj->number >> 8);
|
||||
data[18] = (uint8_t)(obj->number >> 16);
|
||||
data[19] = (uint8_t)obj->generation;
|
||||
data[20] = (uint8_t)(obj->generation >> 8);
|
||||
memcpy(data, pdf->file_key, 5);
|
||||
data[5] = (uint8_t)obj->number;
|
||||
data[6] = (uint8_t)(obj->number >> 8);
|
||||
data[7] = (uint8_t)(obj->number >> 16);
|
||||
data[8] = (uint8_t)obj->generation;
|
||||
data[9] = (uint8_t)(obj->generation >> 8);
|
||||
|
||||
// Hash it...
|
||||
_pdfioCryptoMD5Init(&md5);
|
||||
_pdfioCryptoMD5Append(&md5, data, sizeof(data));
|
||||
_pdfioCryptoMD5Append(&md5, data, 10);
|
||||
_pdfioCryptoMD5Finish(&md5, digest);
|
||||
|
||||
// Initialize the RC4 context using 40 bits of the digest...
|
||||
_pdfioCryptoRC4Init(&ctx->rc4, digest, 5);
|
||||
// Initialize the RC4 context using 80 bits of the digest...
|
||||
_pdfioCryptoRC4Init(&ctx->rc4, digest, 10);
|
||||
*ivlen = 0;
|
||||
return ((_pdfio_crypto_cb_t)_pdfioCryptoRC4Crypt);
|
||||
|
||||
@ -517,7 +456,7 @@ _pdfioCryptoMakeReader(
|
||||
|
||||
case PDFIO_ENCRYPTION_RC4_128 :
|
||||
// Copy the key data for the MD5 hash.
|
||||
memcpy(data, file_key, 16);
|
||||
memcpy(data, pdf->file_key, 16);
|
||||
data[16] = (uint8_t)obj->number;
|
||||
data[17] = (uint8_t)(obj->number >> 8);
|
||||
data[18] = (uint8_t)(obj->number >> 16);
|
||||
|
123
pdfio-dict.c
123
pdfio-dict.c
@ -465,127 +465,12 @@ pdfioDictGetString(pdfio_dict_t *dict, // I - Dictionary
|
||||
else if (value && value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096)
|
||||
{
|
||||
// Convert binary string to regular string...
|
||||
char temp[4096], // Temporary string
|
||||
*tempptr; // Pointer into temporary string
|
||||
unsigned char *dataptr; // Pointer into the data string
|
||||
char temp[4096]; // Temporary UTF-8 string
|
||||
|
||||
if (!(value->value.binary.datalen & 1) && !memcmp(value->value.binary.data, "\376\377", 2))
|
||||
if (!(value->value.binary.datalen & 1) && (!memcmp(value->value.binary.data, "\376\377", 2) || !memcmp(value->value.binary.data, "\377\376", 2)))
|
||||
{
|
||||
// Copy UTF-16 BE
|
||||
int ch; // Unicode character
|
||||
size_t remaining; // Remaining bytes
|
||||
|
||||
for (dataptr = value->value.binary.data + 2, remaining = value->value.binary.datalen - 2, tempptr = temp; remaining > 1 && tempptr < (temp + sizeof(temp) - 5); dataptr += 2, remaining -= 2)
|
||||
{
|
||||
ch = (dataptr[0] << 8) | dataptr[1];
|
||||
|
||||
if (ch >= 0xd800 && ch <= 0xdbff && remaining > 3)
|
||||
{
|
||||
// Multi-word UTF-16 char...
|
||||
int lch; // Lower bits
|
||||
|
||||
lch = (dataptr[2] << 8) | dataptr[3];
|
||||
|
||||
if (lch < 0xdc00 || lch >= 0xdfff)
|
||||
break;
|
||||
|
||||
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
|
||||
dataptr += 2;
|
||||
remaining -= 2;
|
||||
}
|
||||
else if (ch >= 0xfffe)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch < 128)
|
||||
{
|
||||
// ASCII
|
||||
*tempptr++ = (char)ch;
|
||||
}
|
||||
else if (ch < 4096)
|
||||
{
|
||||
// 2-byte UTF-8
|
||||
*tempptr++ = (char)(0xc0 | (ch >> 6));
|
||||
*tempptr++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
else if (ch < 65536)
|
||||
{
|
||||
// 3-byte UTF-8
|
||||
*tempptr++ = (char)(0xe0 | (ch >> 12));
|
||||
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
||||
*tempptr++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
else
|
||||
{
|
||||
// 4-byte UTF-8
|
||||
*tempptr++ = (char)(0xe0 | (ch >> 18));
|
||||
*tempptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
|
||||
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
||||
*tempptr++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
}
|
||||
|
||||
*tempptr = '\0';
|
||||
}
|
||||
else if (!(value->value.binary.datalen & 1) && !memcmp(value->value.binary.data, "\377\376", 2))
|
||||
{
|
||||
// Copy UTF-16 LE
|
||||
int ch; // Unicode character
|
||||
size_t remaining; // Remaining bytes
|
||||
|
||||
for (dataptr = value->value.binary.data + 2, remaining = value->value.binary.datalen - 2, tempptr = temp; remaining > 1 && tempptr < (temp + sizeof(temp) - 5); dataptr += 2, remaining -= 2)
|
||||
{
|
||||
ch = (dataptr[1] << 8) | dataptr[0];
|
||||
|
||||
if (ch >= 0xd800 && ch <= 0xdbff && remaining > 3)
|
||||
{
|
||||
// Multi-word UTF-16 char...
|
||||
int lch; // Lower bits
|
||||
|
||||
lch = (dataptr[3] << 8) | dataptr[2];
|
||||
|
||||
if (lch < 0xdc00 || lch >= 0xdfff)
|
||||
break;
|
||||
|
||||
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
|
||||
dataptr += 2;
|
||||
remaining -= 2;
|
||||
}
|
||||
else if (ch >= 0xfffe)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch < 128)
|
||||
{
|
||||
// ASCII
|
||||
*tempptr++ = (char)ch;
|
||||
}
|
||||
else if (ch < 4096)
|
||||
{
|
||||
// 2-byte UTF-8
|
||||
*tempptr++ = (char)(0xc0 | (ch >> 6));
|
||||
*tempptr++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
else if (ch < 65536)
|
||||
{
|
||||
// 3-byte UTF-8
|
||||
*tempptr++ = (char)(0xe0 | (ch >> 12));
|
||||
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
||||
*tempptr++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
else
|
||||
{
|
||||
// 4-byte UTF-8
|
||||
*tempptr++ = (char)(0xe0 | (ch >> 18));
|
||||
*tempptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
|
||||
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
||||
*tempptr++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
}
|
||||
|
||||
*tempptr = '\0';
|
||||
// Copy UTF-16...
|
||||
_pdfio_utf16cpy(temp, value->value.binary.data, value->value.binary.datalen, sizeof(temp));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -339,6 +339,7 @@ struct _pdfio_stream_s // Stream
|
||||
|
||||
extern size_t _pdfio_strlcpy(char *dst, const char *src, size_t dstsize) _PDFIO_INTERNAL;
|
||||
extern double _pdfio_strtod(pdfio_file_t *pdf, const char *s) _PDFIO_INTERNAL;
|
||||
extern void _pdfio_utf16cpy(char *dst, const unsigned char *src, size_t srclen, size_t dstsize) _PDFIO_INTERNAL;
|
||||
extern ssize_t _pdfio_vsnprintf(pdfio_file_t *pdf, char *buffer, size_t bufsize, const char *format, va_list ap) _PDFIO_INTERNAL;
|
||||
|
||||
extern bool _pdfioArrayDecrypt(pdfio_file_t *pdf, pdfio_obj_t *obj, pdfio_array_t *a, size_t depth) _PDFIO_INTERNAL;
|
||||
|
@ -158,6 +158,89 @@ _pdfio_strtod(pdfio_file_t *pdf, // I - PDF file
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// '_pdfio_utf16cpy()' - Convert UTF-16 to UTF-8.
|
||||
//
|
||||
|
||||
void
|
||||
_pdfio_utf16cpy(
|
||||
char *dst, // I - Destination buffer for UTF-8
|
||||
const unsigned char *src, // I - Source UTF-16
|
||||
size_t srclen, // I - Length of UTF-16
|
||||
size_t dstsize) // I - Destination buffer size
|
||||
{
|
||||
char *dstptr = dst, // Pointer into buffer
|
||||
*dstend = dst + dstsize - 5; // End of buffer
|
||||
int ch; // Unicode character
|
||||
bool is_be = !memcmp(src, "\376\377", 2);
|
||||
// Big-endian strings?
|
||||
|
||||
|
||||
// Loop through the UTF-16 string, converting to Unicode then UTF-8...
|
||||
for (src += 2, srclen -= 2; srclen > 1 && dstptr < dstend; src += 2, srclen -= 2)
|
||||
{
|
||||
// Initial character...
|
||||
if (is_be)
|
||||
ch = (src[0] << 8) | src[1];
|
||||
else
|
||||
ch = (src[1] << 8) | src[0];
|
||||
|
||||
if (ch >= 0xd800 && ch <= 0xdbff && srclen > 3)
|
||||
{
|
||||
// Multi-word UTF-16 char...
|
||||
int lch; // Lower bits
|
||||
|
||||
if (is_be)
|
||||
lch = (src[2] << 8) | src[3];
|
||||
else
|
||||
lch = (src[3] << 8) | src[2];
|
||||
|
||||
if (lch < 0xdc00 || lch >= 0xdfff)
|
||||
break;
|
||||
|
||||
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
|
||||
src += 2;
|
||||
srclen -= 2;
|
||||
}
|
||||
else if (ch >= 0xfffe)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Convert Unicode to UTF-8...
|
||||
if (ch < 128)
|
||||
{
|
||||
// ASCII
|
||||
*dstptr++ = (char)ch;
|
||||
}
|
||||
else if (ch < 4096)
|
||||
{
|
||||
// 2-byte UTF-8
|
||||
*dstptr++ = (char)(0xc0 | (ch >> 6));
|
||||
*dstptr++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
else if (ch < 65536)
|
||||
{
|
||||
// 3-byte UTF-8
|
||||
*dstptr++ = (char)(0xe0 | (ch >> 12));
|
||||
*dstptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
||||
*dstptr++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
else
|
||||
{
|
||||
// 4-byte UTF-8
|
||||
*dstptr++ = (char)(0xe0 | (ch >> 18));
|
||||
*dstptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
|
||||
*dstptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
||||
*dstptr++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
}
|
||||
|
||||
// Nul-terminate the UTF-8 string...
|
||||
*dstptr = '\0';
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// '_pdfio_vsnprintf()' - Format a string.
|
||||
//
|
||||
|
@ -172,7 +172,7 @@ _pdfioValueDecrypt(pdfio_file_t *pdf, // I - PDF file
|
||||
// Copy the decrypted string back to the value and adjust the length...
|
||||
memcpy(v->value.binary.data, temp, templen);
|
||||
|
||||
if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128)
|
||||
if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128 && temp[templen - 1] <= templen)
|
||||
v->value.binary.datalen = templen - temp[templen - 1];
|
||||
else
|
||||
v->value.binary.datalen = templen;
|
||||
@ -183,20 +183,48 @@ _pdfioValueDecrypt(pdfio_file_t *pdf, // I - PDF file
|
||||
case PDFIO_VALTYPE_STRING :
|
||||
// Decrypt regular string...
|
||||
templen = strlen(v->value.string);
|
||||
if (templen > (sizeof(temp) - 33))
|
||||
if (templen > (PDFIO_MAX_STRING - 1))
|
||||
{
|
||||
_pdfioFileError(pdf, "Unable to read encrypted string - too long.");
|
||||
return (false);
|
||||
}
|
||||
else if ((temp = (uint8_t *)_pdfioStringAllocBuffer(pdf)) == NULL)
|
||||
{
|
||||
_pdfioFileError(pdf, "Unable to read encrypted binary string - out of memory.");
|
||||
return (false);
|
||||
}
|
||||
|
||||
ivlen = templen;
|
||||
if ((cb = _pdfioCryptoMakeReader(pdf, obj, &ctx, (uint8_t *)v->value.string, &ivlen)) == NULL)
|
||||
return (false);
|
||||
|
||||
templen = (cb)(&ctx, temp, (uint8_t *)v->value.string + ivlen, templen - ivlen);
|
||||
|
||||
if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128 && temp[templen - 1] <= templen)
|
||||
templen -= temp[templen - 1];
|
||||
|
||||
temp[templen] = '\0';
|
||||
|
||||
if ((timeval = get_date_time((char *)temp)) != 0)
|
||||
if ((templen & 1) == 0 && (!memcmp(temp, "\376\377", 2) || !memcmp(temp, "\377\376", 2)))
|
||||
{
|
||||
// Convert UTF-16 to UTF-8...
|
||||
char utf8[4096]; // Temporary string
|
||||
|
||||
_pdfio_utf16cpy(utf8, temp, templen, sizeof(utf8));
|
||||
|
||||
if ((timeval = get_date_time((char *)utf8)) != 0)
|
||||
{
|
||||
// Change the type to date...
|
||||
v->type = PDFIO_VALTYPE_DATE;
|
||||
v->value.date = timeval;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Copy the decrypted string back to the value...
|
||||
v->value.string = pdfioStringCreate(pdf, utf8);
|
||||
}
|
||||
}
|
||||
else if ((timeval = get_date_time((char *)temp)) != 0)
|
||||
{
|
||||
// Change the type to date...
|
||||
v->type = PDFIO_VALTYPE_DATE;
|
||||
@ -207,6 +235,8 @@ _pdfioValueDecrypt(pdfio_file_t *pdf, // I - PDF file
|
||||
// Copy the decrypted string back to the value...
|
||||
v->value.string = pdfioStringCreate(pdf, (char *)temp);
|
||||
}
|
||||
|
||||
_pdfioStringFreeBuffer(pdf, (char *)temp);
|
||||
break;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user