Compare commits

...

3 Commits

Author SHA1 Message Date
Michael R Sweet
29eea131b9
Remove old per-object crypto stuff we dont need. 2025-04-13 09:10:17 -04:00
Michael R Sweet
2dcef0936e
Fix decryption of UTF-16 strings (Issue #42) 2025-04-13 09:05:02 -04:00
Michael R Sweet
20dd2a6d28
Fix decryption of RC4-40 files. 2025-04-13 08:37:24 -04:00
6 changed files with 138 additions and 193 deletions

View File

@ -2,6 +2,13 @@ Changes in PDFio
================ ================
v1.5.3 - YYYY-MM-DD
-------------------
- Fixed decryption of PDF files "protected" by 40-bit RC4 (Issue #42)
- Fixed decryption of UTF-16 strings (Issue #42)
v1.5.2 - 2025-04-12 v1.5.2 - 2025-04-12
------------------- -------------------

View File

@ -409,13 +409,6 @@ _pdfioCryptoMakeReader(
uint8_t data[21]; // Key data uint8_t data[21]; // Key data
_pdfio_md5_t md5; // MD5 state _pdfio_md5_t md5; // MD5 state
uint8_t digest[16]; // MD5 digest value uint8_t digest[16]; // MD5 digest value
#if PDFIO_OBJ_CRYPT
pdfio_array_t *id_array; // Object ID array
unsigned char *id_value; // Object ID value
size_t id_len; // Length of object ID
uint8_t temp_key[16]; // File key for object
#endif // PDFIO_OBJ_CRYPT
uint8_t *file_key; // Computed file key to use
PDFIO_DEBUG("_pdfioCryptoMakeReader(pdf=%p, obj=%p(%d), ctx=%p, iv=%p, ivlen=%p(%d))\n", pdf, obj, (int)obj->number, ctx, iv, ivlen, (int)*ivlen); PDFIO_DEBUG("_pdfioCryptoMakeReader(pdf=%p, obj=%p(%d), ctx=%p, iv=%p, ivlen=%p(%d))\n", pdf, obj, (int)obj->number, ctx, iv, ivlen, (int)*ivlen);
@ -427,60 +420,6 @@ _pdfioCryptoMakeReader(
return (NULL); return (NULL);
} }
#if PDFIO_OBJ_CRYPT
if ((id_array = pdfioDictGetArray(pdfioObjGetDict(obj), "ID")) != NULL)
{
// Object has its own ID that will get used for encryption...
_pdfio_md5_t md5; // MD5 context
uint8_t file_digest[16]; // MD5 digest of file ID and pad
uint8_t user_pad[32], // Padded user password
own_user_key[32], // Calculated user key
pdf_user_key[32]; // Decrypted user key
PDFIO_DEBUG("_pdfioCryptoMakeReader: Per-object file ID.\n");
if ((id_value = pdfioArrayGetBinary(id_array, 0, &id_len)) == NULL)
{
*ivlen = 0;
return (NULL);
}
_pdfioCryptoMD5Init(&md5);
_pdfioCryptoMD5Append(&md5, pdf_passpad, 32);
_pdfioCryptoMD5Append(&md5, id_value, id_len);
_pdfioCryptoMD5Finish(&md5, file_digest);
make_owner_key(pdf->encryption, pdf->password, pdf->owner_key, user_pad);
make_file_key(pdf->encryption, pdf->permissions, id_value, id_len, user_pad, pdf->owner_key, temp_key);
make_user_key(id_value, id_len, own_user_key);
if (memcmp(own_user_key, pdf->user_key, sizeof(own_user_key)))
{
PDFIO_DEBUG("_pdfioCryptoMakeReader: Not user password, trying owner password.\n");
make_file_key(pdf->encryption, pdf->permissions, id_value, id_len, pdf->password, pdf->owner_key, temp_key);
make_user_key(id_value, id_len, own_user_key);
memcpy(pdf_user_key, pdf->user_key, sizeof(pdf_user_key));
decrypt_user_key(pdf->encryption, temp_key, pdf_user_key);
if (memcmp(pdf->password, pdf_user_key, 32) && memcmp(own_user_key, pdf_user_key, 16))
{
_pdfioFileError(pdf, "Unable to unlock file.");
*ivlen = 0;
return (NULL);
}
}
file_key = temp_key;
}
else
#endif // PDFIO_OBJ_CRYPT
{
// Use the default file key...
file_key = pdf->file_key;
}
switch (pdf->encryption) switch (pdf->encryption)
{ {
default : default :
@ -490,20 +429,20 @@ _pdfioCryptoMakeReader(
case PDFIO_ENCRYPTION_RC4_40 : case PDFIO_ENCRYPTION_RC4_40 :
// Copy the key data for the MD5 hash. // Copy the key data for the MD5 hash.
memcpy(data, file_key, 16); memcpy(data, pdf->file_key, 5);
data[16] = (uint8_t)obj->number; data[5] = (uint8_t)obj->number;
data[17] = (uint8_t)(obj->number >> 8); data[6] = (uint8_t)(obj->number >> 8);
data[18] = (uint8_t)(obj->number >> 16); data[7] = (uint8_t)(obj->number >> 16);
data[19] = (uint8_t)obj->generation; data[8] = (uint8_t)obj->generation;
data[20] = (uint8_t)(obj->generation >> 8); data[9] = (uint8_t)(obj->generation >> 8);
// Hash it... // Hash it...
_pdfioCryptoMD5Init(&md5); _pdfioCryptoMD5Init(&md5);
_pdfioCryptoMD5Append(&md5, data, sizeof(data)); _pdfioCryptoMD5Append(&md5, data, 10);
_pdfioCryptoMD5Finish(&md5, digest); _pdfioCryptoMD5Finish(&md5, digest);
// Initialize the RC4 context using 40 bits of the digest... // Initialize the RC4 context using 80 bits of the digest...
_pdfioCryptoRC4Init(&ctx->rc4, digest, 5); _pdfioCryptoRC4Init(&ctx->rc4, digest, 10);
*ivlen = 0; *ivlen = 0;
return ((_pdfio_crypto_cb_t)_pdfioCryptoRC4Crypt); return ((_pdfio_crypto_cb_t)_pdfioCryptoRC4Crypt);
@ -517,7 +456,7 @@ _pdfioCryptoMakeReader(
case PDFIO_ENCRYPTION_RC4_128 : case PDFIO_ENCRYPTION_RC4_128 :
// Copy the key data for the MD5 hash. // Copy the key data for the MD5 hash.
memcpy(data, file_key, 16); memcpy(data, pdf->file_key, 16);
data[16] = (uint8_t)obj->number; data[16] = (uint8_t)obj->number;
data[17] = (uint8_t)(obj->number >> 8); data[17] = (uint8_t)(obj->number >> 8);
data[18] = (uint8_t)(obj->number >> 16); data[18] = (uint8_t)(obj->number >> 16);

View File

@ -465,127 +465,12 @@ pdfioDictGetString(pdfio_dict_t *dict, // I - Dictionary
else if (value && value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096) else if (value && value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096)
{ {
// Convert binary string to regular string... // Convert binary string to regular string...
char temp[4096], // Temporary string char temp[4096]; // Temporary UTF-8 string
*tempptr; // Pointer into temporary string
unsigned char *dataptr; // Pointer into the data string
if (!(value->value.binary.datalen & 1) && !memcmp(value->value.binary.data, "\376\377", 2)) if (!(value->value.binary.datalen & 1) && (!memcmp(value->value.binary.data, "\376\377", 2) || !memcmp(value->value.binary.data, "\377\376", 2)))
{ {
// Copy UTF-16 BE // Copy UTF-16...
int ch; // Unicode character _pdfio_utf16cpy(temp, value->value.binary.data, value->value.binary.datalen, sizeof(temp));
size_t remaining; // Remaining bytes
for (dataptr = value->value.binary.data + 2, remaining = value->value.binary.datalen - 2, tempptr = temp; remaining > 1 && tempptr < (temp + sizeof(temp) - 5); dataptr += 2, remaining -= 2)
{
ch = (dataptr[0] << 8) | dataptr[1];
if (ch >= 0xd800 && ch <= 0xdbff && remaining > 3)
{
// Multi-word UTF-16 char...
int lch; // Lower bits
lch = (dataptr[2] << 8) | dataptr[3];
if (lch < 0xdc00 || lch >= 0xdfff)
break;
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
dataptr += 2;
remaining -= 2;
}
else if (ch >= 0xfffe)
{
continue;
}
if (ch < 128)
{
// ASCII
*tempptr++ = (char)ch;
}
else if (ch < 4096)
{
// 2-byte UTF-8
*tempptr++ = (char)(0xc0 | (ch >> 6));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else if (ch < 65536)
{
// 3-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 12));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else
{
// 4-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 18));
*tempptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
}
*tempptr = '\0';
}
else if (!(value->value.binary.datalen & 1) && !memcmp(value->value.binary.data, "\377\376", 2))
{
// Copy UTF-16 LE
int ch; // Unicode character
size_t remaining; // Remaining bytes
for (dataptr = value->value.binary.data + 2, remaining = value->value.binary.datalen - 2, tempptr = temp; remaining > 1 && tempptr < (temp + sizeof(temp) - 5); dataptr += 2, remaining -= 2)
{
ch = (dataptr[1] << 8) | dataptr[0];
if (ch >= 0xd800 && ch <= 0xdbff && remaining > 3)
{
// Multi-word UTF-16 char...
int lch; // Lower bits
lch = (dataptr[3] << 8) | dataptr[2];
if (lch < 0xdc00 || lch >= 0xdfff)
break;
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
dataptr += 2;
remaining -= 2;
}
else if (ch >= 0xfffe)
{
continue;
}
if (ch < 128)
{
// ASCII
*tempptr++ = (char)ch;
}
else if (ch < 4096)
{
// 2-byte UTF-8
*tempptr++ = (char)(0xc0 | (ch >> 6));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else if (ch < 65536)
{
// 3-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 12));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else
{
// 4-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 18));
*tempptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
}
*tempptr = '\0';
} }
else else
{ {

View File

@ -339,6 +339,7 @@ struct _pdfio_stream_s // Stream
extern size_t _pdfio_strlcpy(char *dst, const char *src, size_t dstsize) _PDFIO_INTERNAL; extern size_t _pdfio_strlcpy(char *dst, const char *src, size_t dstsize) _PDFIO_INTERNAL;
extern double _pdfio_strtod(pdfio_file_t *pdf, const char *s) _PDFIO_INTERNAL; extern double _pdfio_strtod(pdfio_file_t *pdf, const char *s) _PDFIO_INTERNAL;
extern void _pdfio_utf16cpy(char *dst, const unsigned char *src, size_t srclen, size_t dstsize) _PDFIO_INTERNAL;
extern ssize_t _pdfio_vsnprintf(pdfio_file_t *pdf, char *buffer, size_t bufsize, const char *format, va_list ap) _PDFIO_INTERNAL; extern ssize_t _pdfio_vsnprintf(pdfio_file_t *pdf, char *buffer, size_t bufsize, const char *format, va_list ap) _PDFIO_INTERNAL;
extern bool _pdfioArrayDecrypt(pdfio_file_t *pdf, pdfio_obj_t *obj, pdfio_array_t *a, size_t depth) _PDFIO_INTERNAL; extern bool _pdfioArrayDecrypt(pdfio_file_t *pdf, pdfio_obj_t *obj, pdfio_array_t *a, size_t depth) _PDFIO_INTERNAL;

View File

@ -158,6 +158,89 @@ _pdfio_strtod(pdfio_file_t *pdf, // I - PDF file
} }
//
// '_pdfio_utf16cpy()' - Convert UTF-16 to UTF-8.
//
void
_pdfio_utf16cpy(
char *dst, // I - Destination buffer for UTF-8
const unsigned char *src, // I - Source UTF-16
size_t srclen, // I - Length of UTF-16
size_t dstsize) // I - Destination buffer size
{
char *dstptr = dst, // Pointer into buffer
*dstend = dst + dstsize - 5; // End of buffer
int ch; // Unicode character
bool is_be = !memcmp(src, "\376\377", 2);
// Big-endian strings?
// Loop through the UTF-16 string, converting to Unicode then UTF-8...
for (src += 2, srclen -= 2; srclen > 1 && dstptr < dstend; src += 2, srclen -= 2)
{
// Initial character...
if (is_be)
ch = (src[0] << 8) | src[1];
else
ch = (src[1] << 8) | src[0];
if (ch >= 0xd800 && ch <= 0xdbff && srclen > 3)
{
// Multi-word UTF-16 char...
int lch; // Lower bits
if (is_be)
lch = (src[2] << 8) | src[3];
else
lch = (src[3] << 8) | src[2];
if (lch < 0xdc00 || lch >= 0xdfff)
break;
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
src += 2;
srclen -= 2;
}
else if (ch >= 0xfffe)
{
continue;
}
// Convert Unicode to UTF-8...
if (ch < 128)
{
// ASCII
*dstptr++ = (char)ch;
}
else if (ch < 4096)
{
// 2-byte UTF-8
*dstptr++ = (char)(0xc0 | (ch >> 6));
*dstptr++ = (char)(0x80 | (ch & 0x3f));
}
else if (ch < 65536)
{
// 3-byte UTF-8
*dstptr++ = (char)(0xe0 | (ch >> 12));
*dstptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*dstptr++ = (char)(0x80 | (ch & 0x3f));
}
else
{
// 4-byte UTF-8
*dstptr++ = (char)(0xe0 | (ch >> 18));
*dstptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*dstptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*dstptr++ = (char)(0x80 | (ch & 0x3f));
}
}
// Nul-terminate the UTF-8 string...
*dstptr = '\0';
}
// //
// '_pdfio_vsnprintf()' - Format a string. // '_pdfio_vsnprintf()' - Format a string.
// //

View File

@ -172,7 +172,7 @@ _pdfioValueDecrypt(pdfio_file_t *pdf, // I - PDF file
// Copy the decrypted string back to the value and adjust the length... // Copy the decrypted string back to the value and adjust the length...
memcpy(v->value.binary.data, temp, templen); memcpy(v->value.binary.data, temp, templen);
if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128) if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128 && temp[templen - 1] <= templen)
v->value.binary.datalen = templen - temp[templen - 1]; v->value.binary.datalen = templen - temp[templen - 1];
else else
v->value.binary.datalen = templen; v->value.binary.datalen = templen;
@ -183,20 +183,48 @@ _pdfioValueDecrypt(pdfio_file_t *pdf, // I - PDF file
case PDFIO_VALTYPE_STRING : case PDFIO_VALTYPE_STRING :
// Decrypt regular string... // Decrypt regular string...
templen = strlen(v->value.string); templen = strlen(v->value.string);
if (templen > (sizeof(temp) - 33)) if (templen > (PDFIO_MAX_STRING - 1))
{ {
_pdfioFileError(pdf, "Unable to read encrypted string - too long."); _pdfioFileError(pdf, "Unable to read encrypted string - too long.");
return (false); return (false);
} }
else if ((temp = (uint8_t *)_pdfioStringAllocBuffer(pdf)) == NULL)
{
_pdfioFileError(pdf, "Unable to read encrypted binary string - out of memory.");
return (false);
}
ivlen = templen; ivlen = templen;
if ((cb = _pdfioCryptoMakeReader(pdf, obj, &ctx, (uint8_t *)v->value.string, &ivlen)) == NULL) if ((cb = _pdfioCryptoMakeReader(pdf, obj, &ctx, (uint8_t *)v->value.string, &ivlen)) == NULL)
return (false); return (false);
templen = (cb)(&ctx, temp, (uint8_t *)v->value.string + ivlen, templen - ivlen); templen = (cb)(&ctx, temp, (uint8_t *)v->value.string + ivlen, templen - ivlen);
if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128 && temp[templen - 1] <= templen)
templen -= temp[templen - 1];
temp[templen] = '\0'; temp[templen] = '\0';
if ((timeval = get_date_time((char *)temp)) != 0) if ((templen & 1) == 0 && (!memcmp(temp, "\376\377", 2) || !memcmp(temp, "\377\376", 2)))
{
// Convert UTF-16 to UTF-8...
char utf8[4096]; // Temporary string
_pdfio_utf16cpy(utf8, temp, templen, sizeof(utf8));
if ((timeval = get_date_time((char *)utf8)) != 0)
{
// Change the type to date...
v->type = PDFIO_VALTYPE_DATE;
v->value.date = timeval;
}
else
{
// Copy the decrypted string back to the value...
v->value.string = pdfioStringCreate(pdf, utf8);
}
}
else if ((timeval = get_date_time((char *)temp)) != 0)
{ {
// Change the type to date... // Change the type to date...
v->type = PDFIO_VALTYPE_DATE; v->type = PDFIO_VALTYPE_DATE;
@ -207,6 +235,8 @@ _pdfioValueDecrypt(pdfio_file_t *pdf, // I - PDF file
// Copy the decrypted string back to the value... // Copy the decrypted string back to the value...
v->value.string = pdfioStringCreate(pdf, (char *)temp); v->value.string = pdfioStringCreate(pdf, (char *)temp);
} }
_pdfioStringFreeBuffer(pdf, (char *)temp);
break; break;
} }