Compare commits

...

5 Commits

7 changed files with 548 additions and 263 deletions

View File

@ -8,6 +8,7 @@ v1.5.0 - YYYY-MM-DD
- Added `pdfioFileCreateICCObjFromData` API. - Added `pdfioFileCreateICCObjFromData` API.
- Added `pdfioFileGetModDate()` API (Issue #88) - Added `pdfioFileGetModDate()` API (Issue #88)
- Added support for using libpng to embed PNG images in PDF output (Issue #90) - Added support for using libpng to embed PNG images in PDF output (Issue #90)
- Now support opening damaged PDF files (Issue #45)
- Updated the pdf2txt example to support font encodings. - Updated the pdf2txt example to support font encodings.

View File

@ -15,7 +15,7 @@
.SILENT: .SILENT:
# Version number... # Version numbers...
PDFIO_VERSION = @PDFIO_VERSION@ PDFIO_VERSION = @PDFIO_VERSION@
PDFIO_VERSION_MAJOR = @PDFIO_VERSION_MAJOR@ PDFIO_VERSION_MAJOR = @PDFIO_VERSION_MAJOR@
PDFIO_VERSION_MINOR = @PDFIO_VERSION_MINOR@ PDFIO_VERSION_MINOR = @PDFIO_VERSION_MINOR@

View File

@ -21,40 +21,60 @@ if test $# != 1; then
exit 1 exit 1
fi fi
status=0
version=$1 version=$1
version_major=$(echo $1 | awk -F. '{print $1}')
version_minor=$(echo $1 | awk -F. '{print $2}')
# Check that version number has been updated everywhere... # Check that version number has been updated everywhere...
if test $(grep AC_INIT configure.ac | awk '{print $2}') != "[$version],"; then if test $(grep AC_INIT configure.ac | awk '{print $2}') != "[$version],"; then
echo "Still need to update AC_INIT version in 'configure.ac'." echo "Still need to update AC_INIT version in 'configure.ac'."
exit 1 status=1
fi fi
if test $(head -4 CHANGES.md | tail -1 | awk '{print $1}') != "v$version"; then if test $(head -4 CHANGES.md | tail -1 | awk '{print $1}') != "v$version"; then
echo "Still need to update CHANGES.md version number." echo "Still need to update CHANGES.md version number."
exit 1 status=1
fi fi
if test $(head -4 CHANGES.md | tail -1 | awk '{print $3}') = "YYYY-MM-DD"; then if test $(head -4 CHANGES.md | tail -1 | awk '{print $3}') = "YYYY-MM-DD"; then
echo "Still need to update CHANGES.md release date." echo "Still need to update CHANGES.md release date."
exit 1 status=1
fi fi
if test $(grep PDFIO_VERSION= configure | awk -F \" '{print $2}') != "$version"; then if test $(grep PDFIO_VERSION= configure | awk -F \" '{print $2}') != "$version"; then
echo "Still need to run 'autoconf -f'." echo "Still need to run 'autoconf -f'."
exit 1 status=1
fi fi
if test $(grep '<version>' pdfio_native.nuspec | sed -E -e '1,$s/^.*<version>([0-9.]+).*$/\1/') != "$version"; then if test $(grep '<version>' pdfio_native.nuspec | sed -E -e '1,$s/^.*<version>([0-9.]+).*$/\1/') != "$version"; then
echo "Still need to update version in 'pdfio_native.nuspec'." echo "Still need to update version in 'pdfio_native.nuspec'."
exit 1 status=1
fi fi
if test $(grep '<version>' pdfio_native.redist.nuspec | sed -E -e '1,$s/^.*<version>([0-9.]+).*$/\1/') != "$version"; then if test $(grep '<version>' pdfio_native.redist.nuspec | sed -E -e '1,$s/^.*<version>([0-9.]+).*$/\1/') != "$version"; then
echo "Still need to update version in 'pdfio_native.redist.nuspec'." echo "Still need to update version in 'pdfio_native.redist.nuspec'."
exit 1 status=1
fi fi
if test $(grep PDFIO_VERSION pdfio.h | awk -F \" '{print $2}') != "$version"; then if test $(grep PDFIO_VERSION pdfio.h | awk -F \" '{print $2}') != "$version"; then
echo "Still need to update PDFIO_VERSION in 'pdfio.h'." echo "Still need to update PDFIO_VERSION in 'pdfio.h'."
status=1
fi
if test $(grep PDFIO_VERSION_MAJOR pdfio.h | awk '{print $4}') != "$version_major"; then
echo "Still need to update PDFIO_VERSION_MAJOR in 'pdfio.h'."
status=1
fi
if test $(grep PDFIO_VERSION_MINOR pdfio.h | awk '{print $4}') != "$version_minor"; then
echo "Still need to update PDFIO_VERSION_MINOR in 'pdfio.h'."
status=1
fi
if test $(grep VERSION pdfio1.def | awk '{print $2}') != "$version_major.$version_minor"; then
echo "Still need to update VERSION in 'pdfio1.def'."
status=1
fi
if test $status = 1; then
exit 1 exit 1
fi fi

View File

@ -1,7 +1,7 @@
// //
// AES functions for PDFio. // AES functions for PDFio.
// //
// Copyright © 2021 by Michael R Sweet. // Copyright © 2021-2025 by Michael R Sweet.
// //
// Licensed under Apache License v2.0. See the file "LICENSE" for more // Licensed under Apache License v2.0. See the file "LICENSE" for more
// information. // information.
@ -76,18 +76,18 @@ static const uint8_t Rcon[11] = // Round constants
// Local functions... // Local functions...
// //
static void AddRoundKey(size_t round, state_t *state, const uint8_t *RoundKey); static void add_round_key(size_t round, state_t *state, const uint8_t *round_key);
static void SubBytes(state_t *state); static void sub_bytes(state_t *state);
static void ShiftRows(state_t *state); static void shift_rows(state_t *state);
static uint8_t xtime(uint8_t x); static uint8_t xtime(uint8_t x);
static void MixColumns(state_t *state); static void mix_columns(state_t *state);
static uint8_t Multiply(uint8_t x, uint8_t y); static uint8_t multiply(uint8_t x, uint8_t y);
static void InvMixColumns(state_t *state); static void inv_mix_columns(state_t *state);
static void InvSubBytes(state_t *state); static void inv_sub_bytes(state_t *state);
static void InvShiftRows(state_t *state); static void inv_shift_rows(state_t *state);
static void Cipher(state_t *state, const _pdfio_aes_t *ctx); static void cipher(state_t *state, const _pdfio_aes_t *ctx);
static void InvCipher(state_t *state, const _pdfio_aes_t *ctx); static void inv_cipher(state_t *state, const _pdfio_aes_t *ctx);
static void XorWithIv(uint8_t *buf, const uint8_t *Iv); static void xor_with_iv(uint8_t *buf, const uint8_t *Iv);
// //
@ -106,7 +106,6 @@ _pdfioCryptoAESInit(
*rkptr, // Current round_key values *rkptr, // Current round_key values
*rkend, // End of round_key values *rkend, // End of round_key values
tempa[4]; // Used for the column/row operations tempa[4]; // Used for the column/row operations
// size_t roundlen = keylen + 24; // Length of round_key
size_t nwords = keylen / 4; // Number of 32-bit words in key size_t nwords = keylen / 4; // Number of 32-bit words in key
@ -188,8 +187,8 @@ _pdfioCryptoAESDecrypt(
while (len > 15) while (len > 15)
{ {
memcpy(next_iv, outbuffer, 16); memcpy(next_iv, outbuffer, 16);
InvCipher((state_t *)outbuffer, ctx); inv_cipher((state_t *)outbuffer, ctx);
XorWithIv(outbuffer, ctx->iv); xor_with_iv(outbuffer, ctx->iv);
memcpy(ctx->iv, next_iv, 16); memcpy(ctx->iv, next_iv, 16);
outbuffer += 16; outbuffer += 16;
len -= 16; len -= 16;
@ -231,8 +230,8 @@ _pdfioCryptoAESEncrypt(
while (len > 15) while (len > 15)
{ {
XorWithIv(outbuffer, iv); xor_with_iv(outbuffer, iv);
Cipher((state_t*)outbuffer, ctx); cipher((state_t*)outbuffer, ctx);
iv = outbuffer; iv = outbuffer;
outbuffer += 16; outbuffer += 16;
len -= 16; len -= 16;
@ -244,8 +243,8 @@ _pdfioCryptoAESEncrypt(
// Pad the final buffer with (16 - len)... // Pad the final buffer with (16 - len)...
memset(outbuffer + len, 16 - len, 16 - len); memset(outbuffer + len, 16 - len, 16 - len);
XorWithIv(outbuffer, iv); xor_with_iv(outbuffer, iv);
Cipher((state_t*)outbuffer, ctx); cipher((state_t*)outbuffer, ctx);
iv = outbuffer; iv = outbuffer;
outbytes += 16; outbytes += 16;
} }
@ -257,24 +256,32 @@ _pdfioCryptoAESEncrypt(
} }
// This function adds the round key to state. //
// 'add_round_key()' - Add the round key to state.
//
// The round key is added to the state by an XOR function. // The round key is added to the state by an XOR function.
//
static void static void
AddRoundKey(size_t round, state_t *state, const uint8_t *RoundKey) add_round_key(size_t round, // I - Which round
state_t *state, // I - Current state
const uint8_t *round_key) // I - Key
{ {
unsigned i; // Looping var unsigned i; // Looping var
uint8_t *sptr = (*state)[0]; // Pointer into state uint8_t *sptr = (*state)[0]; // Pointer into state
for (RoundKey += round * 16, i = 16; i > 0; i --, sptr ++, RoundKey ++) for (round_key += round * 16, i = 16; i > 0; i --, sptr ++, round_key ++)
*sptr ^= *RoundKey; *sptr ^= *round_key;
} }
// The SubBytes Function Substitutes the values in the //
// state matrix with values in an S-box. // 'sub_bytes()' - Substitute the values in the state matrix with values in an S-box.
//
static void static void
SubBytes(state_t *state) sub_bytes(state_t *state) // I - Current state
{ {
unsigned i; // Looping var unsigned i; // Looping var
uint8_t *sptr = (*state)[0]; // Pointer into state uint8_t *sptr = (*state)[0]; // Pointer into state
@ -284,11 +291,16 @@ SubBytes(state_t *state)
*sptr = sbox[*sptr]; *sptr = sbox[*sptr];
} }
// The ShiftRows() function shifts the rows in the state to the left.
//
// 'shift_rows()' - Shift the rows in the state to the left.
//
// Each row is shifted with different offset. // Each row is shifted with different offset.
// Offset = Row number. So the first row is not shifted. // Offset = Row number. So the first row is not shifted.
//
static void static void
ShiftRows(state_t *state) shift_rows(state_t *state) // I - Current state
{ {
uint8_t *sptr = (*state)[0]; // Pointer into state uint8_t *sptr = (*state)[0]; // Pointer into state
uint8_t temp; // Temporary value uint8_t temp; // Temporary value
@ -319,21 +331,29 @@ ShiftRows(state_t *state)
} }
static uint8_t //
xtime(uint8_t x) // 'xtime()' - Compute the AES xtime function.
//
static uint8_t // O - xtime(x)
xtime(uint8_t x) // I - Column value
{ {
return ((uint8_t)((x << 1) ^ ((x >> 7) * 0x1b))); return ((uint8_t)((x << 1) ^ ((x >> 7) * 0x1b)));
} }
// MixColumns function mixes the columns of the state matrix //
// 'mix_columns()' - Mix the columns of the state matrix.
//
static void static void
MixColumns(state_t *state) mix_columns(state_t *state) // I - Current state
{ {
unsigned i; // Looping var unsigned i; // Looping var
uint8_t *sptr = (*state)[0]; // Pointer into state uint8_t *sptr = (*state)[0]; // Pointer into state
uint8_t Tmp, Tm, t; // Temporary values uint8_t Tmp, Tm, t; // Temporary values
for (i = 4; i > 0; i --, sptr += 4) for (i = 4; i > 0; i --, sptr += 4)
{ {
t = sptr[0]; t = sptr[0];
@ -357,11 +377,15 @@ MixColumns(state_t *state)
} }
// Multiply is used to multiply numbers in the field GF(2^8) //
// 'multiply()' - Multiply numbers in the field GF(2^8)
//
// Note: The last call to xtime() is unneeded, but often ends up generating a smaller binary // Note: The last call to xtime() is unneeded, but often ends up generating a smaller binary
// The compiler seems to be able to vectorize the operation better this way. // The compiler seems to be able to vectorize the operation better this way.
// See https://github.com/kokke/tiny-AES-c/pull/34 // See https://github.com/kokke/tiny-AES-c/pull/34
static uint8_t Multiply(uint8_t x, uint8_t y) //
static uint8_t multiply(uint8_t x, uint8_t y)
{ {
return (((y & 1) * x) ^ return (((y & 1) * x) ^
((y>>1 & 1) * xtime(x)) ^ ((y>>1 & 1) * xtime(x)) ^
@ -371,11 +395,15 @@ static uint8_t Multiply(uint8_t x, uint8_t y)
} }
// MixColumns function mixes the columns of the state matrix. //
// 'mix_columns()' - Mix the columns of the state matrix.
//
// The method used to multiply may be difficult to understand for the inexperienced. // The method used to multiply may be difficult to understand for the inexperienced.
// Please use the references to gain more information. // Please use the references to gain more information.
//
static void static void
InvMixColumns(state_t *state) inv_mix_columns(state_t *state) // I - Current state
{ {
unsigned i; // Looping var unsigned i; // Looping var
uint8_t *sptr = (*state)[0]; // Pointer into state uint8_t *sptr = (*state)[0]; // Pointer into state
@ -389,18 +417,20 @@ InvMixColumns(state_t *state)
c = sptr[2]; c = sptr[2];
d = sptr[3]; d = sptr[3];
*sptr++ = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09); *sptr++ = multiply(a, 0x0e) ^ multiply(b, 0x0b) ^ multiply(c, 0x0d) ^ multiply(d, 0x09);
*sptr++ = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d); *sptr++ = multiply(a, 0x09) ^ multiply(b, 0x0e) ^ multiply(c, 0x0b) ^ multiply(d, 0x0d);
*sptr++ = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b); *sptr++ = multiply(a, 0x0d) ^ multiply(b, 0x09) ^ multiply(c, 0x0e) ^ multiply(d, 0x0b);
*sptr++ = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e); *sptr++ = multiply(a, 0x0b) ^ multiply(b, 0x0d) ^ multiply(c, 0x09) ^ multiply(d, 0x0e);
} }
} }
// The SubBytes Function Substitutes the values in the //
// state matrix with values in an S-box. // 'sub_bytes()' - Substitute the values in the state matrix with values in an S-box.
//
static void static void
InvSubBytes(state_t *state) inv_sub_bytes(state_t *state) // I - Current state
{ {
unsigned i; // Looping var unsigned i; // Looping var
uint8_t *sptr = (*state)[0]; // Pointer into state uint8_t *sptr = (*state)[0]; // Pointer into state
@ -411,8 +441,12 @@ InvSubBytes(state_t *state)
} }
//
// 'inv_shift_rows()' - Shift the rows in the state to the right.
//
static void static void
InvShiftRows(state_t *state) inv_shift_rows(state_t *state) // I - Current state
{ {
uint8_t *sptr = (*state)[0]; // Pointer into state uint8_t *sptr = (*state)[0]; // Pointer into state
uint8_t temp; // Temporary value uint8_t temp; // Temporary value
@ -443,40 +477,52 @@ InvShiftRows(state_t *state)
} }
// Cipher is the main function that encrypts the PlainText. //
// 'cipher()' - Encrypt the PlainText.
//
static void static void
Cipher(state_t *state, const _pdfio_aes_t *ctx) cipher(state_t *state, // I - Current state
const _pdfio_aes_t *ctx) // I - AES context
{ {
size_t round = 0; size_t round = 0; // Current round
// Add the First round key to the state before starting the rounds. // Add the First round key to the state before starting the rounds.
AddRoundKey(0, state, ctx->round_key); add_round_key(0, state, ctx->round_key);
// There will be Nr rounds. // There will be Nr rounds.
// The first Nr-1 rounds are identical. // The first Nr-1 rounds are identical.
// These Nr rounds are executed in the loop below. // These Nr rounds are executed in the loop below.
// Last one without MixColumns() // Last one without mix_columns()
for (round = 1; round < ctx->round_size; round ++) for (round = 1; round < ctx->round_size; round ++)
{ {
SubBytes(state); sub_bytes(state);
ShiftRows(state); shift_rows(state);
MixColumns(state); mix_columns(state);
AddRoundKey(round, state, ctx->round_key); add_round_key(round, state, ctx->round_key);
} }
// Add round key to last round // Add round key to last round
SubBytes(state); sub_bytes(state);
ShiftRows(state); shift_rows(state);
AddRoundKey(ctx->round_size, state, ctx->round_key); add_round_key(ctx->round_size, state, ctx->round_key);
} }
//
// 'inv_cipher()' - Decrypt the CipherText.
//
static void static void
InvCipher(state_t *state, const _pdfio_aes_t *ctx) inv_cipher(state_t *state, // I - Current state
const _pdfio_aes_t *ctx) // I - AES context
{ {
size_t round; size_t round; // Current round
// Add the First round key to the state before starting the rounds. // Add the First round key to the state before starting the rounds.
AddRoundKey(ctx->round_size, state, ctx->round_key); add_round_key(ctx->round_size, state, ctx->round_key);
// There will be Nr rounds. // There will be Nr rounds.
// The first Nr-1 rounds are identical. // The first Nr-1 rounds are identical.
@ -484,20 +530,25 @@ InvCipher(state_t *state, const _pdfio_aes_t *ctx)
// Last one without InvMixColumn() // Last one without InvMixColumn()
for (round = ctx->round_size - 1; ; round --) for (round = ctx->round_size - 1; ; round --)
{ {
InvShiftRows(state); inv_shift_rows(state);
InvSubBytes(state); inv_sub_bytes(state);
AddRoundKey(round, state, ctx->round_key); add_round_key(round, state, ctx->round_key);
if (round == 0) if (round == 0)
break; break;
InvMixColumns(state); inv_mix_columns(state);
} }
} }
//
// 'xor_with_iv()' - XOR a block with the initialization vector.
//
static void static void
XorWithIv(uint8_t *buf, const uint8_t *Iv) xor_with_iv(uint8_t *buf, // I - Block
const uint8_t *Iv) // I - Initialization vector
{ {
// 16-byte block... // 16-byte block...
*buf++ ^= *Iv++; *buf++ ^= *Iv++;

View File

@ -25,6 +25,7 @@ static struct lconv *get_lconv(void);
static bool load_obj_stream(pdfio_obj_t *obj); static bool load_obj_stream(pdfio_obj_t *obj);
static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth); static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth);
static bool load_xref(pdfio_file_t *pdf, off_t xref_offset, pdfio_password_cb_t password_cb, void *password_data); static bool load_xref(pdfio_file_t *pdf, off_t xref_offset, pdfio_password_cb_t password_cb, void *password_data);
static bool repair_xref(pdfio_file_t *pdf, pdfio_password_cb_t password_cb, void *password_data);
static bool write_pages(pdfio_file_t *pdf); static bool write_pages(pdfio_file_t *pdf);
static bool write_trailer(pdfio_file_t *pdf); static bool write_trailer(pdfio_file_t *pdf);
@ -1070,7 +1071,10 @@ pdfioFileOpen(
xref_offset = (off_t)strtol(ptr + 9, NULL, 10); xref_offset = (off_t)strtol(ptr + 9, NULL, 10);
if (!load_xref(pdf, xref_offset, password_cb, password_cbdata)) if (!load_xref(pdf, xref_offset, password_cb, password_cbdata))
goto error; {
if (!repair_xref(pdf, password_cb, password_cbdata))
goto error;
}
return (pdf); return (pdf);
@ -2165,6 +2169,159 @@ load_xref(
} }
//
// 'repair_xref()' - Try to "repair" a PDF file and its cross-references...
//
static bool // O - `true` on success, `false` on failure
repair_xref(
pdfio_file_t *pdf, // I - PDF file
pdfio_password_cb_t password_cb, // I - Password callback or `NULL` for none
void *password_data) // I - Password callback data, if any
{
char line[16384], // Line from file
*ptr; // Pointer into line
off_t line_offset; // Offset in file
intmax_t number; // Object number
int generation; // Generation number
size_t i; // Looping var
size_t num_sobjs = 0; // Number of object streams
pdfio_obj_t *sobjs[16384]; // Object streams to load
// Read from the beginning of the file, looking for
if ((line_offset = _pdfioFileSeek(pdf, 0, SEEK_SET)) < 0)
return (false);
while (_pdfioFileGets(pdf, line, sizeof(line)))
{
// See if this is the start of an object...
if (line[0] >= '1' && line[0] <= '9')
{
// Maybe, look some more...
if ((number = strtoimax(line, &ptr, 10)) >= 1 && (generation = (int)strtol(ptr, &ptr, 10)) >= 0 && generation < 65536)
{
while (isspace(*ptr & 255))
ptr ++;
if (!strncmp(ptr, "obj", 3))
{
// Yes, start of an object...
pdfio_obj_t *obj; // Object
_pdfio_token_t tb; // Token buffer/stack
PDFIO_DEBUG("OBJECT %ld %d at offset %ld\n", (long)number, generation, (long)line_offset);
if ((obj = add_obj(pdf, (size_t)number, (unsigned short)generation, line_offset)) == NULL)
{
_pdfioFileError(pdf, "Unable to allocate memory for object.");
return (false);
}
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, obj, &tb, &obj->value, 0))
{
_pdfioFileError(pdf, "Unable to read cross-reference stream dictionary.");
return (false);
}
if (_pdfioTokenGet(&tb, line, sizeof(line)) && strcmp(line, "stream"))
{
const char *type = pdfioObjGetType(obj);
// Object type
_pdfioTokenFlush(&tb);
obj->stream_offset = _pdfioFileTell(pdf);
if (type && !strcmp(type, "ObjStm") && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0])))
{
sobjs[num_sobjs] = obj;
num_sobjs ++;
}
if (type && !strcmp(type, "XRef") && !pdf->trailer_dict)
{
// Save the trailer dictionary...
pdf->trailer_dict = pdfioObjGetDict(obj);
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
}
}
}
}
}
else if (!strncmp(line, "trailer", 7) && (!line[7] || isspace(line[7] & 255) || line[7] == '<'))
{
// Trailer dictionary
_pdfio_token_t tb; // Token buffer/stack
_pdfio_value_t trailer; // Trailer
if (line[7])
{
// Probably the start of the trailer dictionary, rewind the file so
// we can read it...
_pdfioFileSeek(pdf, line_offset + 7, SEEK_SET);
}
PDFIO_DEBUG("TRAILER at offset %ld\n", (long)line_offset);
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, NULL, &tb, &trailer, 0))
{
_pdfioFileError(pdf, "Unable to read cross-reference stream dictionary.");
return (false);
}
else if (trailer.type != PDFIO_VALTYPE_DICT)
{
_pdfioFileError(pdf, "Trailer is not a dictionary.");
return (false);
}
_pdfioTokenFlush(&tb);
if (!pdf->trailer_dict)
{
// Save the trailer dictionary and grab the root (catalog) and info
// objects...
pdf->trailer_dict = trailer.value.dict;
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
}
}
// Get the offset for the next line...
line_offset = _pdfioFileTell(pdf);
}
// If the trailer contains an Encrypt key, try unlocking the file...
if (pdf->encrypt_obj && !_pdfioCryptoUnlock(pdf, password_cb, password_data))
return (false);
// Load any stream objects...
for (i = 0; i < num_sobjs; i ++)
{
if (!load_obj_stream(sobjs[i]))
return (false);
}
// Once we have all of the xref tables loaded, get the important objects and
// build the pages array...
pdf->info_obj = pdfioDictGetObj(pdf->trailer_dict, "Info");
if ((pdf->root_obj = pdfioDictGetObj(pdf->trailer_dict, "Root")) == NULL)
{
_pdfioFileError(pdf, "Missing Root object.");
return (false);
}
PDFIO_DEBUG("repair_xref: Root=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number);
// Load pages...
return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages"), 0));
}
// //
// 'write_pages()' - Write the PDF pages objects. // 'write_pages()' - Write the PDF pages objects.
// //

View File

@ -1,7 +1,7 @@
// //
// MD5 functions for PDFio. // MD5 functions for PDFio.
// //
// Copyright © 2021 by Michael R Sweet. // Copyright © 2021-2025 by Michael R Sweet.
// Copyright © 1999 Aladdin Enterprises. All rights reserved. // Copyright © 1999 Aladdin Enterprises. All rights reserved.
// //
// This software is provided 'as-is', without any express or implied // This software is provided 'as-is', without any express or implied
@ -108,231 +108,285 @@
#define T63 0x2ad7d2bb #define T63 0x2ad7d2bb
#define T64 0xeb86d391 #define T64 0xeb86d391
//
// Use the unoptimized (big-endian) implementation if we don't know the
// endian-ness of the platform.
//
#ifdef __BYTE_ORDER__
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define ARCH_IS_BIG_ENDIAN 0 // Use little endian optimized version
# else
# define ARCH_IS_BIG_ENDIAN 1 // Use generic version
# endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#elif !defined(ARCH_IS_BIG_ENDIAN)
# define ARCH_IS_BIG_ENDIAN 1 // Use generic version
#endif // !ARCH_IS_BIG_ENDIAN
//
// 'md5_process()' - Hash a block of data.
//
static void static void
md5_process(_pdfio_md5_t *pms, const uint8_t *data /*[64]*/) md5_process(_pdfio_md5_t *pms, // I - MD5 state
const uint8_t *data/*[64]*/)// I - Data
{ {
uint32_t uint32_t a = pms->abcd[0], // First word of state
a = pms->abcd[0], b = pms->abcd[1], b = pms->abcd[1], // Second word of state
c = pms->abcd[2], d = pms->abcd[3]; c = pms->abcd[2], // Third word of state
uint32_t t; d = pms->abcd[3]; // Fourth word of state
uint32_t t; // Temporary state
#ifndef ARCH_IS_BIG_ENDIAN
# define ARCH_IS_BIG_ENDIAN 1 /* slower, default implementation */
#endif
#if ARCH_IS_BIG_ENDIAN #if ARCH_IS_BIG_ENDIAN
// On big-endian machines, we must arrange the bytes in the right
// order. (This also works on machines of unknown byte order.)
uint32_t X[16]; // Little-endian representation
const uint8_t *xp; // Pointer into data
int i; // Looping var
/* for (i = 0, xp = data; i < 16; i ++, xp += 4)
* On big-endian machines, we must arrange the bytes in the right X[i] = xp[0] + (unsigned)(xp[1] << 8) + (unsigned)(xp[2] << 16) + (unsigned)(xp[3] << 24);
* order. (This also works on machines of unknown byte order.)
*/
uint32_t X[16];
const uint8_t *xp = data;
int i;
for (i = 0; i < 16; ++i, xp += 4)
X[i] = xp[0] + (unsigned)(xp[1] << 8) + (unsigned)(xp[2] << 16) + (unsigned)(xp[3] << 24);
#else /* !ARCH_IS_BIG_ENDIAN */ #else /* !ARCH_IS_BIG_ENDIAN */
// On little-endian machines, we can process properly aligned data without copying it.
uint32_t xbuf[16]; // Aligned buffer
const uint32_t *X; // Pointer to little-endian representation
/* if (!((data - (const uint8_t *)0) & 3))
* On little-endian machines, we can process properly aligned data {
* without copying it. // data is properly aligned, use it directly...
*/ X = (const uint32_t *)data;
uint32_t xbuf[16]; }
const uint32_t *X; else
{
if (!((data - (const uint8_t *)0) & 3)) { // data is not aligned, copy to the aligned buffer...
/* data are properly aligned */ memcpy(xbuf, data, 64);
X = (const uint32_t *)data; X = xbuf;
} else { }
/* not aligned */ #endif // ARCH_IS_BIG_ENDIAN
memcpy(xbuf, data, 64);
X = xbuf;
}
#endif
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
/* Round 1. */ // Round 1.
/* Let [abcd k s i] denote the operation // Let [abcd k s i] denote the operation
a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ // a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s).
#define F(x, y, z) (((x) & (y)) | (~(x) & (z))) #define F(x, y, z) (((x) & (y)) | (~(x) & (z)))
#define SET(a, b, c, d, k, s, Ti)\ #define SET(a, b, c, d, k, s, Ti) t = a + F(b,c,d) + X[k] + Ti; a = ROTATE_LEFT(t, s) + b
t = a + F(b,c,d) + X[k] + Ti;\
a = ROTATE_LEFT(t, s) + b // Do the following 16 operations.
/* Do the following 16 operations. */ SET(a, b, c, d, 0, 7, T1);
SET(a, b, c, d, 0, 7, T1); SET(d, a, b, c, 1, 12, T2);
SET(d, a, b, c, 1, 12, T2); SET(c, d, a, b, 2, 17, T3);
SET(c, d, a, b, 2, 17, T3); SET(b, c, d, a, 3, 22, T4);
SET(b, c, d, a, 3, 22, T4); SET(a, b, c, d, 4, 7, T5);
SET(a, b, c, d, 4, 7, T5); SET(d, a, b, c, 5, 12, T6);
SET(d, a, b, c, 5, 12, T6); SET(c, d, a, b, 6, 17, T7);
SET(c, d, a, b, 6, 17, T7); SET(b, c, d, a, 7, 22, T8);
SET(b, c, d, a, 7, 22, T8); SET(a, b, c, d, 8, 7, T9);
SET(a, b, c, d, 8, 7, T9); SET(d, a, b, c, 9, 12, T10);
SET(d, a, b, c, 9, 12, T10); SET(c, d, a, b, 10, 17, T11);
SET(c, d, a, b, 10, 17, T11); SET(b, c, d, a, 11, 22, T12);
SET(b, c, d, a, 11, 22, T12); SET(a, b, c, d, 12, 7, T13);
SET(a, b, c, d, 12, 7, T13); SET(d, a, b, c, 13, 12, T14);
SET(d, a, b, c, 13, 12, T14); SET(c, d, a, b, 14, 17, T15);
SET(c, d, a, b, 14, 17, T15); SET(b, c, d, a, 15, 22, T16);
SET(b, c, d, a, 15, 22, T16);
#undef SET #undef SET
/* Round 2. */ // Round 2.
/* Let [abcd k s i] denote the operation // Let [abcd k s i] denote the operation
a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ // a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s).
#define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) #define G(x, y, z) (((x) & (z)) | ((y) & ~(z)))
#define SET(a, b, c, d, k, s, Ti)\ #define SET(a, b, c, d, k, s, Ti) t = a + G(b,c,d) + X[k] + Ti; a = ROTATE_LEFT(t, s) + b
t = a + G(b,c,d) + X[k] + Ti;\
a = ROTATE_LEFT(t, s) + b // Do the following 16 operations.
/* Do the following 16 operations. */ SET(a, b, c, d, 1, 5, T17);
SET(a, b, c, d, 1, 5, T17); SET(d, a, b, c, 6, 9, T18);
SET(d, a, b, c, 6, 9, T18); SET(c, d, a, b, 11, 14, T19);
SET(c, d, a, b, 11, 14, T19); SET(b, c, d, a, 0, 20, T20);
SET(b, c, d, a, 0, 20, T20); SET(a, b, c, d, 5, 5, T21);
SET(a, b, c, d, 5, 5, T21); SET(d, a, b, c, 10, 9, T22);
SET(d, a, b, c, 10, 9, T22); SET(c, d, a, b, 15, 14, T23);
SET(c, d, a, b, 15, 14, T23); SET(b, c, d, a, 4, 20, T24);
SET(b, c, d, a, 4, 20, T24); SET(a, b, c, d, 9, 5, T25);
SET(a, b, c, d, 9, 5, T25); SET(d, a, b, c, 14, 9, T26);
SET(d, a, b, c, 14, 9, T26); SET(c, d, a, b, 3, 14, T27);
SET(c, d, a, b, 3, 14, T27); SET(b, c, d, a, 8, 20, T28);
SET(b, c, d, a, 8, 20, T28); SET(a, b, c, d, 13, 5, T29);
SET(a, b, c, d, 13, 5, T29); SET(d, a, b, c, 2, 9, T30);
SET(d, a, b, c, 2, 9, T30); SET(c, d, a, b, 7, 14, T31);
SET(c, d, a, b, 7, 14, T31); SET(b, c, d, a, 12, 20, T32);
SET(b, c, d, a, 12, 20, T32);
#undef SET #undef SET
/* Round 3. */ // Round 3.
/* Let [abcd k s t] denote the operation // Let [abcd k s t] denote the operation
a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */ // a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s).
#define H(x, y, z) ((x) ^ (y) ^ (z)) #define H(x, y, z) ((x) ^ (y) ^ (z))
#define SET(a, b, c, d, k, s, Ti)\ #define SET(a, b, c, d, k, s, Ti) t = a + H(b,c,d) + X[k] + Ti; a = ROTATE_LEFT(t, s) + b
t = a + H(b,c,d) + X[k] + Ti;\
a = ROTATE_LEFT(t, s) + b // Do the following 16 operations.
/* Do the following 16 operations. */ SET(a, b, c, d, 5, 4, T33);
SET(a, b, c, d, 5, 4, T33); SET(d, a, b, c, 8, 11, T34);
SET(d, a, b, c, 8, 11, T34); SET(c, d, a, b, 11, 16, T35);
SET(c, d, a, b, 11, 16, T35); SET(b, c, d, a, 14, 23, T36);
SET(b, c, d, a, 14, 23, T36); SET(a, b, c, d, 1, 4, T37);
SET(a, b, c, d, 1, 4, T37); SET(d, a, b, c, 4, 11, T38);
SET(d, a, b, c, 4, 11, T38); SET(c, d, a, b, 7, 16, T39);
SET(c, d, a, b, 7, 16, T39); SET(b, c, d, a, 10, 23, T40);
SET(b, c, d, a, 10, 23, T40); SET(a, b, c, d, 13, 4, T41);
SET(a, b, c, d, 13, 4, T41); SET(d, a, b, c, 0, 11, T42);
SET(d, a, b, c, 0, 11, T42); SET(c, d, a, b, 3, 16, T43);
SET(c, d, a, b, 3, 16, T43); SET(b, c, d, a, 6, 23, T44);
SET(b, c, d, a, 6, 23, T44); SET(a, b, c, d, 9, 4, T45);
SET(a, b, c, d, 9, 4, T45); SET(d, a, b, c, 12, 11, T46);
SET(d, a, b, c, 12, 11, T46); SET(c, d, a, b, 15, 16, T47);
SET(c, d, a, b, 15, 16, T47); SET(b, c, d, a, 2, 23, T48);
SET(b, c, d, a, 2, 23, T48);
#undef SET #undef SET
/* Round 4. */ // Round 4.
/* Let [abcd k s t] denote the operation // Let [abcd k s t] denote the operation
a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ // a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s).
#define I(x, y, z) ((y) ^ ((x) | ~(z))) #define I(x, y, z) ((y) ^ ((x) | ~(z)))
#define SET(a, b, c, d, k, s, Ti)\ #define SET(a, b, c, d, k, s, Ti) t = a + I(b,c,d) + X[k] + Ti; a = ROTATE_LEFT(t, s) + b
t = a + I(b,c,d) + X[k] + Ti;\
a = ROTATE_LEFT(t, s) + b // Do the following 16 operations.
/* Do the following 16 operations. */ SET(a, b, c, d, 0, 6, T49);
SET(a, b, c, d, 0, 6, T49); SET(d, a, b, c, 7, 10, T50);
SET(d, a, b, c, 7, 10, T50); SET(c, d, a, b, 14, 15, T51);
SET(c, d, a, b, 14, 15, T51); SET(b, c, d, a, 5, 21, T52);
SET(b, c, d, a, 5, 21, T52); SET(a, b, c, d, 12, 6, T53);
SET(a, b, c, d, 12, 6, T53); SET(d, a, b, c, 3, 10, T54);
SET(d, a, b, c, 3, 10, T54); SET(c, d, a, b, 10, 15, T55);
SET(c, d, a, b, 10, 15, T55); SET(b, c, d, a, 1, 21, T56);
SET(b, c, d, a, 1, 21, T56); SET(a, b, c, d, 8, 6, T57);
SET(a, b, c, d, 8, 6, T57); SET(d, a, b, c, 15, 10, T58);
SET(d, a, b, c, 15, 10, T58); SET(c, d, a, b, 6, 15, T59);
SET(c, d, a, b, 6, 15, T59); SET(b, c, d, a, 13, 21, T60);
SET(b, c, d, a, 13, 21, T60); SET(a, b, c, d, 4, 6, T61);
SET(a, b, c, d, 4, 6, T61); SET(d, a, b, c, 11, 10, T62);
SET(d, a, b, c, 11, 10, T62); SET(c, d, a, b, 2, 15, T63);
SET(c, d, a, b, 2, 15, T63); SET(b, c, d, a, 9, 21, T64);
SET(b, c, d, a, 9, 21, T64);
#undef SET #undef SET
/* Then perform the following additions. (That is increment each // Then perform the following additions. (That is increment each of the four
of the four registers by the value it had before this block // registers by the value it had before this block was started.)
was started.) */ pms->abcd[0] += a;
pms->abcd[0] += a; pms->abcd[1] += b;
pms->abcd[1] += b; pms->abcd[2] += c;
pms->abcd[2] += c; pms->abcd[3] += d;
pms->abcd[3] += d;
} }
//
// '_pdfioCryptoMD5Init()' - Initialize an MD5 hash.
//
void void
_pdfioCryptoMD5Init(_pdfio_md5_t *pms) _pdfioCryptoMD5Init(_pdfio_md5_t *pms) // I - MD5 state
{ {
pms->count[0] = pms->count[1] = 0; pms->count[0] = pms->count[1] = 0;
pms->abcd[0] = 0x67452301; pms->abcd[0] = 0x67452301;
pms->abcd[1] = 0xefcdab89; pms->abcd[1] = 0xefcdab89;
pms->abcd[2] = 0x98badcfe; pms->abcd[2] = 0x98badcfe;
pms->abcd[3] = 0x10325476; pms->abcd[3] = 0x10325476;
} }
//
// '_pdfioCryptoMD5Append()' - Append bytes to the MD5 hash.
//
void void
_pdfioCryptoMD5Append(_pdfio_md5_t *pms, const uint8_t *data, size_t nbytes) _pdfioCryptoMD5Append(
_pdfio_md5_t *pms, // I - MD5 state
const uint8_t *data, // I - Data to add
size_t nbytes) // I - Number of bytes
{ {
const uint8_t *p = data; const uint8_t *p = data; // Pointer into data
size_t left = nbytes; size_t left = nbytes; // Remaining bytes
size_t offset = (pms->count[0] >> 3) & 63; size_t offset = (pms->count[0] >> 3) & 63;
uint32_t nbits = (uint32_t)(nbytes << 3); // Offset into state
uint32_t nbits = (uint32_t)(nbytes << 3);
// Number of bits to add
if (nbytes == 0)
return;
/* Update the message length. */ if (nbytes == 0)
pms->count[1] += (unsigned)(nbytes >> 29); return;
pms->count[0] += nbits;
if (pms->count[0] < nbits)
pms->count[1]++;
/* Process an initial partial block. */ // Update the message length.
if (offset) { pms->count[1] += (unsigned)(nbytes >> 29);
size_t copy = (offset + nbytes > 64 ? 64 - offset : nbytes); pms->count[0] += nbits;
if (pms->count[0] < nbits)
pms->count[1] ++;
memcpy(pms->buf + offset, p, copy); // Process an initial partial block.
if (offset + copy < 64) if (offset)
return; {
p += copy; size_t copy = ((offset + nbytes) > 64 ? 64 - offset : nbytes);
left -= copy; // Number of bytes to copy
md5_process(pms, pms->buf);
}
/* Process full blocks. */ memcpy(pms->buf + offset, p, copy);
for (; left >= 64; p += 64, left -= 64)
md5_process(pms, p);
/* Process a final partial block. */ if ((offset + copy) < 64)
if (left) return;
memcpy(pms->buf, p, left);
p += copy;
left -= copy;
md5_process(pms, pms->buf);
}
// Process full blocks.
for (; left >= 64; p += 64, left -= 64)
md5_process(pms, p);
// Copy a final partial block.
if (left)
memcpy(pms->buf, p, left);
} }
//
// '_pdfioCryptoMD5Finish()' - Finalize the MD5 hash.
//
void void
_pdfioCryptoMD5Finish(_pdfio_md5_t *pms, uint8_t digest[16]) _pdfioCryptoMD5Finish(
_pdfio_md5_t *pms, // I - MD5 state
uint8_t digest[16]) // O - Digest value
{ {
static const uint8_t pad[64] = { int i; // Looping var
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, uint8_t data[8]; // Digest length data
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, static const uint8_t pad[64] = // Padding bytes
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}; 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
uint8_t data[8]; 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
int i; 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
/* Save the length before padding. */
for (i = 0; i < 8; ++i) // Save the length before padding.
data[i] = (uint8_t)(pms->count[i >> 2] >> ((i & 3) << 3)); for (i = 0; i < 8; ++i)
/* Pad to 56 bytes mod 64. */ data[i] = (uint8_t)(pms->count[i >> 2] >> ((i & 3) << 3));
_pdfioCryptoMD5Append(pms, pad, ((55 - (pms->count[0] >> 3)) & 63) + 1);
/* Append the length. */ // Pad to 56 bytes mod 64.
_pdfioCryptoMD5Append(pms, data, 8); _pdfioCryptoMD5Append(pms, pad, ((55 - (pms->count[0] >> 3)) & 63) + 1);
for (i = 0; i < 16; ++i)
digest[i] = (uint8_t)(pms->abcd[i >> 2] >> ((i & 3) << 3)); // Append the length.
_pdfioCryptoMD5Append(pms, data, 8);
// Copy the digest from the state...
for (i = 0; i < 16; ++i)
digest[i] = (uint8_t)(pms->abcd[i >> 2] >> ((i & 3) << 3));
} }

View File

@ -20,10 +20,12 @@ extern "C" {
// //
// Version number... // Version numbers...
// //
# define PDFIO_VERSION "1.4.1" # define PDFIO_VERSION "1.5.0"
# define PDFIO_VERSION_MAJOR 1
# define PDFIO_VERSION_MINOR 5
// //