diff --git a/.gitignore b/.gitignore index 222bce1..fa3bb5f 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,5 @@ /packages /pdfio.xcodeproj/xcshareddata /testpdfio -/testpdfio-out.pdf -/testpdfio-out2.pdf +/testpdfio-*.pdf /x64 diff --git a/FAQ.md b/FAQ.md deleted file mode 100644 index f595386..0000000 --- a/FAQ.md +++ /dev/null @@ -1,17 +0,0 @@ -Frequently Asked Questions -========================== - -Why Don't You Support Writing a PDF File with Encryption? ---------------------------------------------------------- - -PDF encryption offers very little protection: - -- PDF encryption keys are reused and derived from the user password (padded - with a standard base string) and the object numbers in the file. -- RC4 encryption (40- and 128-bit) was broken years ago. -- AES encryption (128- and 256-bit) is better, but PDF uses Cipher Block - Chaining (CBC) which enables attacks that allow the original encryption key - to be recovered. - -In addition, PDF usage controls (no print, no copy, etc.) are tied to this -encryption, making them trivial to bypass. diff --git a/Makefile b/Makefile index 816632d..840443b 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,8 @@ ARFLAGS = cr CC = cc CFLAGS = CODESIGN_IDENTITY = Developer ID -COMMONFLAGS = -Os -g +#COMMONFLAGS = -Os -g +COMMONFLAGS = -O0 -g CPPFLAGS = '-DPDFIO_VERSION="$(VERSION)"' DESTDIR = $(DSTROOT) DSO = cc @@ -40,13 +41,18 @@ PUBHEADERS = \ pdfio.h \ pdfio-content.h PUBOBJS = \ + pdfio-aes.o \ pdfio-array.o \ pdfio-common.o \ pdfio-content.o \ + pdfio-crypto.o \ pdfio-dict.o \ pdfio-file.o \ + pdfio-md5.o \ pdfio-object.o \ pdfio-page.o \ + pdfio-rc4.o \ + pdfio-sha256.o \ pdfio-stream.o \ pdfio-string.o \ pdfio-token.o \ @@ -144,16 +150,8 @@ pdfio1.def: $(LIBOBJS) Makefile echo "LIBRARY pdfio1" >$@ echo "VERSION 1.0" >>$@ echo "EXPORTS" >>$@ - (nm $(LIBOBJS) 2>/dev/null | grep "T _" | awk '{print $$3}' | \ - grep -v '^_ttf' | grep -v '^__' | sed -e '1,$$s/^_//'; \ - echo pdfioAdobeRGBGamma; echo pdfioAdobeRGBMatrix; \ - echo pdfioAdobeRGBWhitePoint; \ - echo pdfioDisplayP3Gamma; echo pdfioDisplayP3Matrix; \ - echo pdfioDisplayP3WhitePoint; \ - echo pdfioSRGBGamma; echo pdfioSRGBMatrix; \ - echo pdfioSRGBWhitePoint; \ - echo _pdfioTokenInit; \ - echo _pdfioValueDebug; echo _pdfioValueRead) | sort >>$@ + nm $(LIBOBJS) 2>/dev/null | grep "T _" | awk '{print $$3}' | \ + grep -v '^_ttf' | sed -e '1,$$s/^_//' | sort >>$@ # pdfio test program diff --git a/README.md b/README.md index e9a99ba..2c0d47e 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,7 @@ goals of PDFio are: - Read and write any version of PDF file - Provide access to pages, objects, and streams within a PDF file -- Support reading encrypted PDF files -- Support writing PDF files with digital signatures +- Support reading and writing of encrypted PDF files - Extract or embed useful metadata (author, creator, page information, etc.) - "Filter" PDF files, for example to extract a range of pages or to embed fonts that are missing from a PDF diff --git a/doc/pdfio.md b/doc/pdfio.md index 95ab285..5bea27d 100644 --- a/doc/pdfio.md +++ b/doc/pdfio.md @@ -4,8 +4,9 @@ Introduction PDFio is a simple C library for reading and writing PDF files. The primary goals of pdfio are: -- Read any PDF file with or without encryption or linearization -- Write PDF files without encryption or linearization +- Read and write any version of PDF file +- Provide access to pages, objects, and streams within a PDF file +- Support reading and writing of encrypted PDF files - Extract or embed useful metadata (author, creator, page information, etc.) - "Filter" PDF files, for example to extract a range of pages or to embed fonts that are missing from a PDF diff --git a/pdfio-aes.c b/pdfio-aes.c new file mode 100644 index 0000000..dbc8912 --- /dev/null +++ b/pdfio-aes.c @@ -0,0 +1,520 @@ +// +// AES functions for PDFio. +// +// Copyright © 2021 by Michael R Sweet. +// +// Licensed under Apache License v2.0. See the file "LICENSE" for more +// information. +// +// AES code is adapted from the "tiny-AES-c" project +// () +// + +// +// Include necessary headers... +// + +#include "pdfio-private.h" + + +// +// Local types... +// + +typedef uint8_t state_t[4][4]; // 4x4 AES state table + + +// +// Local globals... +// + +static const uint8_t sbox[256] = // S-box lookup table +{ + //0 1 2 3 4 5 6 7 8 9 A B C D E F + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +}; +static const uint8_t rsbox[256] = // Reverse S-box lookup table +{ + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +}; + +// The round constant word array, Rcon[i], contains the values given by +// x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8) +static const uint8_t Rcon[11] = // Round constants +{ + 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 +}; + + +// +// Local functions... +// + +static void AddRoundKey(size_t round, state_t *state, const uint8_t *RoundKey); +static void SubBytes(state_t *state); +static void ShiftRows(state_t *state); +static uint8_t xtime(uint8_t x); +static void MixColumns(state_t *state); +static uint8_t Multiply(uint8_t x, uint8_t y); +static void InvMixColumns(state_t *state); +static void InvSubBytes(state_t *state); +static void InvShiftRows(state_t *state); +static void Cipher(state_t *state, const _pdfio_aes_t *ctx); +static void InvCipher(state_t *state, const _pdfio_aes_t *ctx); +static void XorWithIv(uint8_t *buf, const uint8_t *Iv); + + +// +// '_pdfioCryptoAESInit()' - Initialize an AES context. +// + +void +_pdfioCryptoAESInit( + _pdfio_aes_t *ctx, // I - AES context + const uint8_t *key, // I - Key + size_t keylen, // I - Length of key (must be 16 or 32) + const uint8_t *iv) // I - 16-byte initialization vector +{ + size_t i; // Looping var + uint8_t *rkptr0, // Previous round_key values + *rkptr, // Current round_key values + *rkend, // End of round_key values + tempa[4]; // Used for the column/row operations +// size_t roundlen = keylen + 24; // Length of round_key + size_t nwords = keylen / 4; // Number of 32-bit words in key + + + // Clear context + memset(ctx, 0, sizeof(_pdfio_aes_t)); + ctx->round_size = keylen / 4 + 6; + + // The first round key is the key itself. + memcpy(ctx->round_key, key, keylen); + + // All other round keys are found from the previous round keys. + for (rkptr0 = ctx->round_key, rkptr = rkptr0 + keylen, rkend = rkptr + 16 * ctx->round_size, i = nwords; rkptr < rkend; i ++) + { + if ((i % nwords) == 0) + { + // Shifts word left once - [a0,a1,a2,a3] becomes [a1,a2,a3,a0], then + // apply the S-box to each of the four bytes to produce an output word. + tempa[0] = sbox[rkptr[-3]] ^ Rcon[i / nwords]; + tempa[1] = sbox[rkptr[-2]]; + tempa[2] = sbox[rkptr[-1]]; + tempa[3] = sbox[rkptr[-4]]; + } + else if (keylen == 32 && (i % nwords) == 4) + { + // Apply the S-box to each of the four bytes to produce an output word. + tempa[0] = sbox[rkptr[-4]]; + tempa[1] = sbox[rkptr[-3]]; + tempa[2] = sbox[rkptr[-2]]; + tempa[3] = sbox[rkptr[-1]]; + } + else + { + // Use unshifted values without S-box... + tempa[0] = rkptr[-4]; + tempa[1] = rkptr[-3]; + tempa[2] = rkptr[-2]; + tempa[3] = rkptr[-1]; + } + + // TODO: Optimize to incorporate this into previous steps + *rkptr++ = *rkptr0++ ^ tempa[0]; + *rkptr++ = *rkptr0++ ^ tempa[1]; + *rkptr++ = *rkptr0++ ^ tempa[2]; + *rkptr++ = *rkptr0++ ^ tempa[3]; + } + + // Copy the initialization vector... + if (iv) + memcpy(ctx->iv, iv, sizeof(ctx->iv)); +} + + +// +// '_pdfioCryptoAESDecrypt()' - Decrypt a block of bytes with AES. +// +// "inbuffer" and "outbuffer" can point to the same memory. Length must be a +// multiple of 16 bytes (excess is not decrypted). +// + +size_t // O - Number of bytes in output buffer +_pdfioCryptoAESDecrypt( + _pdfio_aes_t *ctx, // I - AES context + uint8_t *outbuffer, // I - Output buffer + const uint8_t *inbuffer, // I - Input buffer + size_t len) // I - Number of bytes to decrypt +{ + uint8_t next_iv[16]; // Next IV value + size_t outbytes = 0; // Output bytes + + + if (inbuffer != outbuffer) + { + // Not the most efficient, but we can optimize later - the sample AES code + // manipulates the data directly in memory and doesn't support separate + // input and output buffers... + memcpy(outbuffer, inbuffer, len); + } + + while (len > 15) + { + memcpy(next_iv, outbuffer, 16); + InvCipher((state_t *)outbuffer, ctx); + XorWithIv(outbuffer, ctx->iv); + memcpy(ctx->iv, next_iv, 16); + outbuffer += 16; + len -= 16; + outbytes += 16; + } + + return (outbytes); +} + + +// +// '_pdfioCryptoAESEncrypt()' - Encrypt a block of bytes with AES. +// +// "inbuffer" and "outbuffer" can point to the same memory. "outbuffer" must +// be a multiple of 16 bytes. +// + +size_t // O - Number of bytes in output buffer +_pdfioCryptoAESEncrypt( + _pdfio_aes_t *ctx, // I - AES context + uint8_t *outbuffer, // I - Output buffer + const uint8_t *inbuffer, // I - Input buffer + size_t len) // I - Number of bytes to decrypt +{ + uint8_t *iv = ctx->iv; // Current IV for CBC + size_t outbytes = 0; // Output bytes + + + if (inbuffer != outbuffer) + { + // Not the most efficient, but we can optimize later - the sample AES code + // manipulates the data directly in memory and doesn't support separate + // input and output buffers... + memcpy(outbuffer, inbuffer, len); + } + + while (len > 15) + { + XorWithIv(outbuffer, iv); + Cipher((state_t*)outbuffer, ctx); + iv = outbuffer; + outbuffer += 16; + len -= 16; + outbytes += 16; + } + + if (len > 0) + { + // Pad the final buffer with (16 - len)... + memset(outbuffer + len, 16 - len, 16 - len); + + XorWithIv(outbuffer, iv); + Cipher((state_t*)outbuffer, ctx); + iv = outbuffer; + outbytes += 16; + } + + /* store Iv in ctx for next call */ + memcpy(ctx->iv, iv, 16); + + return (outbytes); +} + + +// This function adds the round key to state. +// The round key is added to the state by an XOR function. +static void +AddRoundKey(size_t round, state_t *state, const uint8_t *RoundKey) +{ + unsigned i; // Looping var + uint8_t *sptr = (*state)[0]; // Pointer into state + + + for (RoundKey += round * 16, i = 16; i > 0; i --, sptr ++, RoundKey ++) + *sptr ^= *RoundKey; +} + + +// The SubBytes Function Substitutes the values in the +// state matrix with values in an S-box. +static void +SubBytes(state_t *state) +{ + unsigned i; // Looping var + uint8_t *sptr = (*state)[0]; // Pointer into state + + + for (i = 16; i > 0; i --, sptr ++) + *sptr = sbox[*sptr]; +} + +// The ShiftRows() function shifts the rows in the state to the left. +// Each row is shifted with different offset. +// Offset = Row number. So the first row is not shifted. +static void +ShiftRows(state_t *state) +{ + uint8_t *sptr = (*state)[0]; // Pointer into state + uint8_t temp; // Temporary value + + + // Rotate first row 1 columns to left + temp = sptr[1]; + sptr[1] = sptr[5]; + sptr[5] = sptr[9]; + sptr[9] = sptr[13]; + sptr[13] = temp; + + // Rotate second row 2 columns to left + temp = sptr[2]; + sptr[2] = sptr[10]; + sptr[10] = temp; + + temp = sptr[6]; + sptr[6] = sptr[14]; + sptr[14] = temp; + + // Rotate third row 3 columns to left + temp = sptr[3]; + sptr[3] = sptr[15]; + sptr[15] = sptr[11]; + sptr[11] = sptr[7]; + sptr[7] = temp; +} + + +static uint8_t +xtime(uint8_t x) +{ + return ((uint8_t)((x << 1) ^ ((x >> 7) * 0x1b))); +} + + +// MixColumns function mixes the columns of the state matrix +static void +MixColumns(state_t *state) +{ + unsigned i; // Looping var + uint8_t *sptr = (*state)[0]; // Pointer into state + uint8_t Tmp, Tm, t; // Temporary values + + for (i = 4; i > 0; i --, sptr += 4) + { + t = sptr[0]; + Tmp = sptr[0] ^ sptr[1] ^ sptr[2] ^ sptr[3]; + Tm = sptr[0] ^ sptr[1]; + Tm = xtime(Tm); + sptr[0] ^= Tm ^ Tmp; + + Tm = sptr[1] ^ sptr[2]; + Tm = xtime(Tm); + sptr[1] ^= Tm ^ Tmp; + + Tm = sptr[2] ^ sptr[3]; + Tm = xtime(Tm); + sptr[2] ^= Tm ^ Tmp; + + Tm = sptr[3] ^ t; + Tm = xtime(Tm); + sptr[3] ^= Tm ^ Tmp; + } +} + + +// Multiply is used to multiply numbers in the field GF(2^8) +// Note: The last call to xtime() is unneeded, but often ends up generating a smaller binary +// The compiler seems to be able to vectorize the operation better this way. +// See https://github.com/kokke/tiny-AES-c/pull/34 +static uint8_t Multiply(uint8_t x, uint8_t y) +{ + return (((y & 1) * x) ^ + ((y>>1 & 1) * xtime(x)) ^ + ((y>>2 & 1) * xtime(xtime(x))) ^ + ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^ + ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))); /* this last call to xtime() can be omitted */ +} + + +// MixColumns function mixes the columns of the state matrix. +// The method used to multiply may be difficult to understand for the inexperienced. +// Please use the references to gain more information. +static void +InvMixColumns(state_t *state) +{ + unsigned i; // Looping var + uint8_t *sptr = (*state)[0]; // Pointer into state + uint8_t a, b, c, d; // Temporary values + + + for (i = 4; i > 0; i --) + { + a = sptr[0]; + b = sptr[1]; + c = sptr[2]; + d = sptr[3]; + + *sptr++ = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09); + *sptr++ = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d); + *sptr++ = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b); + *sptr++ = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e); + } +} + + +// The SubBytes Function Substitutes the values in the +// state matrix with values in an S-box. +static void +InvSubBytes(state_t *state) +{ + unsigned i; // Looping var + uint8_t *sptr = (*state)[0]; // Pointer into state + + + for (i = 16; i > 0; i --, sptr ++) + *sptr = rsbox[*sptr]; +} + + +static void +InvShiftRows(state_t *state) +{ + uint8_t *sptr = (*state)[0]; // Pointer into state + uint8_t temp; // Temporary value + + + // Rotate first row 1 columns to right + temp = sptr[13]; + sptr[13] = sptr[9]; + sptr[9] = sptr[5]; + sptr[5] = sptr[1]; + sptr[1] = temp; + + // Rotate second row 2 columns to right + temp = sptr[2]; + sptr[2] = sptr[10]; + sptr[10] = temp; + + temp = sptr[6]; + sptr[6] = sptr[14]; + sptr[14] = temp; + + // Rotate third row 3 columns to right + temp = sptr[3]; + sptr[3] = sptr[7]; + sptr[7] = sptr[11]; + sptr[11] = sptr[15]; + sptr[15] = temp; +} + + +// Cipher is the main function that encrypts the PlainText. +static void +Cipher(state_t *state, const _pdfio_aes_t *ctx) +{ + size_t round = 0; + + // Add the First round key to the state before starting the rounds. + AddRoundKey(0, state, ctx->round_key); + + // There will be Nr rounds. + // The first Nr-1 rounds are identical. + // These Nr rounds are executed in the loop below. + // Last one without MixColumns() + for (round = 1; round < ctx->round_size; round ++) + { + SubBytes(state); + ShiftRows(state); + MixColumns(state); + AddRoundKey(round, state, ctx->round_key); + } + // Add round key to last round + SubBytes(state); + ShiftRows(state); + AddRoundKey(ctx->round_size, state, ctx->round_key); +} + + +static void +InvCipher(state_t *state, const _pdfio_aes_t *ctx) +{ + size_t round; + + // Add the First round key to the state before starting the rounds. + AddRoundKey(ctx->round_size, state, ctx->round_key); + + // There will be Nr rounds. + // The first Nr-1 rounds are identical. + // These Nr rounds are executed in the loop below. + // Last one without InvMixColumn() + for (round = ctx->round_size - 1; ; round --) + { + InvShiftRows(state); + InvSubBytes(state); + AddRoundKey(round, state, ctx->round_key); + + if (round == 0) + break; + + InvMixColumns(state); + } +} + + +static void +XorWithIv(uint8_t *buf, const uint8_t *Iv) +{ + // 16-byte block... + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; + *buf++ ^= *Iv++; +} diff --git a/pdfio-array.c b/pdfio-array.c index 336a08e..4196ec8 100644 --- a/pdfio-array.c +++ b/pdfio-array.c @@ -398,7 +398,7 @@ pdfioArrayGetBinary( size_t n, // I - Index size_t *length) // O - Length of string { - if (!a || n >= a->num_values || a->values[n].type != PDFIO_VALTYPE_BINARY || !length) + if (!a || n >= a->num_values || a->values[n].type != PDFIO_VALTYPE_BINARY) { if (length) *length = 0; @@ -407,7 +407,9 @@ pdfioArrayGetBinary( } else { - *length = a->values[n].value.binary.datalen; + if (length) + *length = a->values[n].value.binary.datalen; + return (a->values[n].value.binary.data); } } @@ -567,6 +569,7 @@ _pdfioArrayGetValue(pdfio_array_t *a, // I - Array pdfio_array_t * // O - New array _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file + pdfio_obj_t *obj, // I - Object, if any _pdfio_token_t *tb) // I - Token buffer/stack { pdfio_array_t *array; // New array @@ -591,7 +594,7 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file // Push the token and decode the value... _pdfioTokenPush(tb, token); - if (!_pdfioValueRead(pdf, tb, &value)) + if (!_pdfioValueRead(pdf, obj, tb, &value)) break; // PDFIO_DEBUG("_pdfioArrayRead(%p): Appending ", (void *)array); @@ -610,7 +613,8 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file // bool // O - `true` on success, `false` otherwise -_pdfioArrayWrite(pdfio_array_t *a) // I - Array +_pdfioArrayWrite(pdfio_array_t *a, // I - Array + pdfio_obj_t *obj) // I - Object, if any { pdfio_file_t *pdf = a->pdf; // PDF file size_t i; // Looping var @@ -624,7 +628,7 @@ _pdfioArrayWrite(pdfio_array_t *a) // I - Array // Write each value... for (i = a->num_values, v = a->values; i > 0; i --, v ++) { - if (!_pdfioValueWrite(pdf, v, NULL)) + if (!_pdfioValueWrite(pdf, obj, v, NULL)) return (false); } diff --git a/pdfio-crypto.c b/pdfio-crypto.c new file mode 100644 index 0000000..0591642 --- /dev/null +++ b/pdfio-crypto.c @@ -0,0 +1,980 @@ +// +// Cryptographic support functions for PDFio. +// +// Copyright © 2021 by Michael R Sweet. +// +// Licensed under Apache License v2.0. See the file "LICENSE" for more +// information. +// + +// +// Include necessary headers... +// + +#include "pdfio-private.h" +#if !_WIN32 +# include +#endif // !_WIN32 +#ifdef __has_include +# if __has_include() +# define HAVE_GETRANDOM 1 +# include +# endif // __has_include() +#endif // __has_include + + +// +// PDF files can use one of several methods to encrypt a PDF file. There is +// an owner password that controls/unlocks full editing/usage permissions and a +// user password that unlocks limited usage of the PDF. Permissions are set +// using bits for copy, print, etc. (see the `pdfio_permission_t` enumeration). +// Passwords can be up to 32 bytes in length, with a well-known padding string +// that is applied if the string is less than 32 bytes or there is no password. +// +// > Note: PDF encryption has several design weaknesses which limit the +// > protection offered. The V2 and V4 security handlers depend on the obsolete +// > MD5 and RC4 algorithms for key generation, and Cipher Block Chaining (CBC) +// > further weakens AES support. Enforcement of usage permissions depends on +// > the consuming software honoring them, so if the password is known or (more +// > commonly) the user password is blank, it is possible to bypass usage +// > permissions completely. +// +// PDFio supports the following: +// +// - The original 40-bit RC4 (V2+R2) encryption for reading only +// - 128-bit RC4 (V2+R3) encryption for reading and writing +// - 128-bit AES (V4+R4) encryption for reading and writing +// - TODO: 256-bit AES (V6+R6) encryption for reading and writing +// +// Common values: +// +// - "F" is the file encryption key (40 to 256 bits/5 to 32 bytes) +// - "Fid" is the file ID string (stored in PDF file, 32 bytes) +// - "O" is the owner key (stored in PDF file, 32 bytes) +// - "Opad" is the padded owner password (32 bytes) +// - "P" is the permissions integer (stored in PDF file) +// - "P4" is the permissions integer as a 32-bit little-endian value +// - "U" is the user key (stored in PDF file, 32 bytes) +// - "Upad" is the padded user password (32 bytes) +// +// V2+R2 handler: +// +// F = md5(Upad+O+P4+Fid) +// O = rc4(Upad, md5(Opad)) +// (unlock with owner password) +// Upad = rc4(O, md5(Opad)) +// U = rc4(md5(Upad+Fid)+0[16], F) +// +// V2+R3/V4+R4 handler: +// +// F = md5(md5(Upad+O+P4+Fid))^50 +// O = rc4(Upad, md5(md5(Opad))^50)^20 +// (unlock with owner password) +// Upad = rc4(O, md5(md5(Opad))^50)^20 +// U = rc4(md5(Upad+Fid)+0[16], F)^20 +// +// V6+R6 handler: +// +// TODO: document V6+R6 handler +// + +// +// Local globals... +// + +static uint8_t pdf_passpad[32] = // Padding for passwords +{ + 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41, + 0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08, + 0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80, + 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a +}; + + +// +// Local functions... +// + +static void decrypt_user_key(pdfio_encryption_t encryption, const uint8_t *file_key, uint8_t user_key[32]); +static void encrypt_user_key(pdfio_encryption_t encryption, const uint8_t *file_key, uint8_t user_key[32]); +static void make_file_key(pdfio_encryption_t encryption, pdfio_permission_t permissions, const unsigned char *file_id, size_t file_idlen, const uint8_t *user_pad, const uint8_t *owner_key, uint8_t file_key[16]); +static void make_owner_key(pdfio_encryption_t encryption, const uint8_t *owner_pad, const uint8_t *user_pad, uint8_t owner_key[32]); +static void make_user_key(pdfio_encryption_t encryption, const unsigned char *file_id, size_t file_idlen, uint8_t user_key[32]); +static void pad_password(const char *password, uint8_t pad[32]); + + +// +// '_pdfioCryptoLock()' - Lock a PDF file by generating the encryption object and keys. +// + +bool // O - `true` on success, `false` otherwise +_pdfioCryptoLock( + pdfio_file_t *pdf, // I - PDF file + pdfio_permission_t permissions, // I - Use permissions + pdfio_encryption_t encryption, // I - Type of encryption to use + const char *owner_password, // I - Owner password, if any + const char *user_password) // I - User password, if any +{ + pdfio_dict_t *dict; // Encryption dictionary + size_t i, j; // Looping vars + _pdfio_md5_t md5; // MD5 context + uint8_t digest[16]; // 128-bit MD5 digest + _pdfio_rc4_t rc4; // RC4 encryption context + size_t len; // Length of password + uint8_t owner_pad[32], // Padded owner password + user_pad[32], // Padded user password + perm_bytes[4], // Permissions bytes + *file_id; // File ID bytes + size_t file_idlen; // Length of file ID + pdfio_dict_t *cf_dict, // CF dictionary + *filter_dict; // CryptFilter dictionary + + + if ((dict = pdfioDictCreate(pdf)) == NULL) + { + _pdfioFileError(pdf, "Unable to create encryption dictionary."); + return (false); + } + + pdfioDictSetName(dict, "Filter", "Standard"); + + switch (encryption) + { + case PDFIO_ENCRYPTION_RC4_128 : + case PDFIO_ENCRYPTION_AES_128 : + // Create the 128-bit encryption keys... + pad_password(user_password, user_pad); + + if (!owner_password && user_password && *user_password) + { + // Generate a random owner password... + _pdfioCryptoMakeRandom(owner_pad, sizeof(owner_pad)); + } + else + { + // Use supplied owner password + pad_password(owner_password, owner_pad); + } + + // Compute the owner key... + make_owner_key(encryption, owner_pad, user_pad, pdf->owner_key); + pdf->owner_keylen = 32; + + // Generate the encryption key + file_id = pdfioArrayGetBinary(pdf->id_array, 0, &file_idlen); + + make_file_key(encryption, permissions, file_id, file_idlen, user_pad, pdf->owner_key, pdf->file_key); + pdf->file_keylen = 16; + + // Generate the user key... + make_user_key(encryption, file_id, file_idlen, pdf->user_key); + encrypt_user_key(encryption, pdf->file_key, pdf->user_key); + pdf->user_keylen = 32; + + // Save everything in the dictionary... + pdfioDictSetNumber(dict, "Length", 128); + pdfioDictSetBinary(dict, "O", pdf->owner_key, sizeof(pdf->owner_key)); + pdfioDictSetNumber(dict, "P", (int)permissions); + pdfioDictSetNumber(dict, "R", encryption == PDFIO_ENCRYPTION_RC4_128 ? 3 : 4); + pdfioDictSetNumber(dict, "V", encryption == PDFIO_ENCRYPTION_RC4_128 ? 2 : 4); + pdfioDictSetBinary(dict, "U", pdf->user_key, sizeof(pdf->user_key)); + + if (encryption == PDFIO_ENCRYPTION_AES_128) + { + if ((cf_dict = pdfioDictCreate(pdf)) == NULL) + { + _pdfioFileError(pdf, "Unable to create Encryption CF dictionary."); + return (false); + } + + if ((filter_dict = pdfioDictCreate(pdf)) == NULL) + { + _pdfioFileError(pdf, "Unable to create Encryption CryptFilter dictionary."); + return (false); + } + + pdfioDictSetName(filter_dict, "Type", "CryptFilter"); + pdfioDictSetName(filter_dict, "CFM", encryption == PDFIO_ENCRYPTION_RC4_128 ? "V2" : "AESV2"); + pdfioDictSetDict(cf_dict, "PDFio", filter_dict); + pdfioDictSetDict(dict, "CF", cf_dict); + pdfioDictSetName(dict, "StmF", "PDFio"); + pdfioDictSetName(dict, "StrF", "PDFio"); + pdfioDictSetBoolean(dict, "EncryptMetadata", true); + } + break; + + case PDFIO_ENCRYPTION_AES_256 : + // TODO: Implement AES-256 (/V 6 /R 6) + + default : + _pdfioFileError(pdf, "Encryption mode %d not supported for writing.", (int)encryption); + return (false); + } + + if ((pdf->encrypt_obj = pdfioFileCreateObj(pdf, dict)) == NULL) + { + _pdfioFileError(pdf, "Unable to create encryption object."); + return (false); + } + + pdfioObjClose(pdf->encrypt_obj); + + pdf->encryption = encryption; + pdf->permissions = permissions; + + return (true); +} + + +// +// '_pdfioCryptoMakeRandom()' - Fill a buffer with good random numbers. +// + +void +_pdfioCryptoMakeRandom(uint8_t *buffer, // I - Buffer + size_t bytes) // I - Number of bytes +{ +#ifdef __APPLE__ + // macOS/iOS provide the arc4random function which is seeded with entropy + // from the system... + while (bytes > 0) + { + // Just collect 8 bits from each call to fill the buffer... + *buffer++ = (uint8_t)arc4random(); + bytes --; + } + +#else +# if _WIN32 + // Windows provides the CryptGenRandom function... + HCRYPTPROV prov; // Cryptographic provider + + if (CryptAcquireContextA(&prov, NULL, NULL, PROV_RSA_FULL, 0)) + { + // Got the default crypto provider, try to get random data... + BOOL success = CryptGenRandom(prov, (DWORD)bytes, buffer); + + // Release the crypto provider and return on success... + CryptReleaseContext(prov, 0); + + if (success) + return; + } + +# elif HAVE_GETRANDOM + // Linux provides a system call called getrandom that uses system entropy ... + ssize_t rbytes; // Bytes read + + while (bytes > 0) + { + if ((rbytes = getrandom(buffer, bytes, 0)) < 0) + { + if (errno != EINTR && errno != EAGAIN) + break; + } + bytes -= (size_t)rbytes; + buffer += rbytes; + } + + if (bytes == 0) + return; + +# else + // Other UNIX-y systems have /dev/urandom... + int fd; // Random number file + ssize_t rbytes; // Bytes read + + + // Fall back on /dev/urandom... + if ((fd = open("/dev/urandom", O_RDONLY)) >= 0) + { + while (bytes > 0) + { + if ((rbytes = read(fd, buffer, bytes)) < 0) + { + if (errno != EINTR && errno != EAGAIN) + break; + } + bytes -= (size_t)rbytes; + buffer += rbytes; + } + + close(fd); + + if (bytes == 0) + return; + } +# endif // _WIN32 + + // If we get here then we were unable to get enough random data or the local + // system doesn't have enough entropy. Make some up... + uint32_t i, // Looping var + mt_state[624], // Mersenne twister state + mt_index, // Mersenne twister index + temp; // Temporary value +# if _WIN32 + struct _timeb curtime; // Current time + + _ftime(&curtime); + mt_state[0] = (uint32_t)(curtime.time + curtime.millitm); + +# else + struct timeval curtime; // Current time + + gettimeofday(&curtime, NULL); + mt_state[0] = (uint32_t)(curtime.tv_sec + curtime.tv_usec); +# endif // _WIN32 + + // Seed the random number state... + mt_index = 0; + + for (i = 1; i < 624; i ++) + mt_state[i] = (uint32_t)((1812433253 * (mt_state[i - 1] ^ (mt_state[i - 1] >> 30))) + i); + + // Fill the buffer with random numbers... + while (bytes > 0) + { + if (mt_index == 0) + { + // Generate a sequence of random numbers... + uint32_t i1 = 1, i397 = 397; // Looping vars + + for (i = 0; i < 624; i ++) + { + temp = (mt_state[i] & 0x80000000) + (mt_state[i1] & 0x7fffffff); + mt_state[i] = mt_state[i397] ^ (temp >> 1); + + if (temp & 1) + mt_state[i] ^= 2567483615u; + + i1 ++; + i397 ++; + + if (i1 == 624) + i1 = 0; + + if (i397 == 624) + i397 = 0; + } + } + + // Pull 32-bits of random data... + temp = mt_state[mt_index ++]; + temp ^= temp >> 11; + temp ^= (temp << 7) & 2636928640u; + temp ^= (temp << 15) & 4022730752u; + temp ^= temp >> 18; + + if (mt_index == 624) + mt_index = 0; + + // Copy to the buffer... + switch (bytes) + { + case 1 : + *buffer++ = (uint8_t)(temp >> 24); + bytes --; + break; + case 2 : + *buffer++ = (uint8_t)(temp >> 24); + *buffer++ = (uint8_t)(temp >> 16); + bytes -= 2; + break; + case 3 : + *buffer++ = (uint8_t)(temp >> 24); + *buffer++ = (uint8_t)(temp >> 16); + *buffer++ = (uint8_t)(temp >> 8); + bytes -= 3; + break; + default : + *buffer++ = (uint8_t)(temp >> 24); + *buffer++ = (uint8_t)(temp >> 16); + *buffer++ = (uint8_t)(temp >> 8); + *buffer++ = (uint8_t)temp; + bytes -= 4; + break; + } + } +#endif // __APPLE__ +} + + +// +// '_pdfioCryptoMakeReader()' - Setup a cryptographic context and callback for reading. +// + +_pdfio_crypto_cb_t // O - Decryption callback or `NULL` for none + _pdfioCryptoMakeReader( + pdfio_file_t *pdf, // I - PDF file + pdfio_obj_t *obj, // I - PDF object + _pdfio_crypto_ctx_t *ctx, // I - Pointer to crypto context + uint8_t *iv, // I - Buffer for initialization vector + size_t *ivlen) // IO - Size of initialization vector +{ + uint8_t data[21]; /* Key data */ + _pdfio_md5_t md5; /* MD5 state */ + uint8_t digest[16]; /* MD5 digest value */ + + + // Range check input... + if (!pdf) + { + *ivlen = 0; + return (NULL); + } + + switch (pdf->encryption) + { + default : + *ivlen = 0; + return (NULL); + + case PDFIO_ENCRYPTION_RC4_128 : + case PDFIO_ENCRYPTION_AES_128 : + // Copy the key data for the MD5 hash. + memcpy(data, pdf->file_key, sizeof(pdf->file_key)); + data[16] = (uint8_t)obj->number; + data[17] = (uint8_t)(obj->number >> 8); + data[18] = (uint8_t)(obj->number >> 16); + data[19] = (uint8_t)obj->generation; + data[20] = (uint8_t)(obj->generation >> 8); + + // Hash it... + _pdfioCryptoMD5Init(&md5); + _pdfioCryptoMD5Append(&md5, data, sizeof(data)); + if (pdf->encryption == PDFIO_ENCRYPTION_AES_128) + _pdfioCryptoMD5Append(&md5, (const uint8_t *)"sAlT", 4); + _pdfioCryptoMD5Finish(&md5, digest); + + // Initialize the RC4/AES context using the digest... + if (pdf->encryption == PDFIO_ENCRYPTION_RC4_128) + { + *ivlen = 0; + _pdfioCryptoRC4Init(&ctx->rc4, digest, sizeof(digest)); + return ((_pdfio_crypto_cb_t)_pdfioCryptoRC4Crypt); + } + else + { + *ivlen = 16; + _pdfioCryptoAESInit(&ctx->aes, digest, sizeof(digest), iv); + return ((_pdfio_crypto_cb_t)_pdfioCryptoAESDecrypt); + } + } +} + + +// +// '_pdfioCryptoMakeWriter()' - Setup a cryptographic context and callback for writing. +// + +_pdfio_crypto_cb_t // O - Encryption callback or `NULL` for none + _pdfioCryptoMakeWriter( + pdfio_file_t *pdf, // I - PDF file + pdfio_obj_t *obj, // I - PDF object + _pdfio_crypto_ctx_t *ctx, // I - Pointer to crypto context + uint8_t *iv, // I - Buffer for initialization vector + size_t *ivlen) // IO - Size of initialization vector +{ + uint8_t data[21]; /* Key data */ + _pdfio_md5_t md5; /* MD5 state */ + uint8_t digest[16]; /* MD5 digest value */ + + + // Range check input... + if (!pdf) + { + *ivlen = 0; + return (NULL); + } + + switch (pdf->encryption) + { + default : + *ivlen = 0; + return (NULL); + + case PDFIO_ENCRYPTION_RC4_128 : + case PDFIO_ENCRYPTION_AES_128 : + // Copy the key data for the MD5 hash. + memcpy(data, pdf->file_key, sizeof(pdf->file_key)); + data[16] = (uint8_t)obj->number; + data[17] = (uint8_t)(obj->number >> 8); + data[18] = (uint8_t)(obj->number >> 16); + data[19] = (uint8_t)obj->generation; + data[20] = (uint8_t)(obj->generation >> 8); + + // Hash it... + _pdfioCryptoMD5Init(&md5); + _pdfioCryptoMD5Append(&md5, data, sizeof(data)); + if (pdf->encryption == PDFIO_ENCRYPTION_AES_128) + _pdfioCryptoMD5Append(&md5, (const uint8_t *)"sAlT", 4); + _pdfioCryptoMD5Finish(&md5, digest); + + // Initialize the RC4/AES context using the digest... + if (pdf->encryption == PDFIO_ENCRYPTION_RC4_128) + { + *ivlen = 0; + _pdfioCryptoRC4Init(&ctx->rc4, digest, sizeof(digest)); + return ((_pdfio_crypto_cb_t)_pdfioCryptoRC4Crypt); + } + else + { + *ivlen = 16; + _pdfioCryptoMakeRandom(iv, *ivlen); + _pdfioCryptoAESInit(&ctx->aes, digest, sizeof(digest), iv); + return ((_pdfio_crypto_cb_t)_pdfioCryptoAESEncrypt); + } + } +} + + +// +// '_pdfioCryptoUnlock()' - Unlock an encrypted PDF. +// + +bool // O - `true` on success, `false` otherwise +_pdfioCryptoUnlock( + pdfio_file_t *pdf, // I - PDF file + pdfio_password_cb_t password_cb, // I - Password callback or `NULL` for none + void *password_data) // I - Password callback data, if any +{ + int tries; // Number of tries + const char *password = NULL; // Password to try + pdfio_dict_t *encrypt_dict; // Encrypt objection dictionary + int version, // Version value + revision, // Revision value + length; // Key length value + const char *handler, // Security handler name + *stream_filter, // Stream encryption filter + *string_filter; // String encryption filter + pdfio_dict_t *cf_dict; // CryptFilters dictionary + unsigned char *owner_key, // Owner key + *user_key, // User key + *file_id; // File ID value + size_t owner_keylen, // Length of owner key + user_keylen, // Length of user key + file_idlen; // Length of file ID + _pdfio_md5_t md5; // MD5 context + uint8_t file_digest[16]; // MD5 digest of file ID and pad + + + // See if we support the type of encryption specified by the Encrypt object + // dictionary... + if ((encrypt_dict = pdfioObjGetDict(pdf->encrypt_obj)) == NULL) + { + _pdfioFileError(pdf, "Unable to get encryption dictionary."); + return (false); + } + + handler = pdfioDictGetName(encrypt_dict, "Filter"); + version = pdfioDictGetNumber(encrypt_dict, "V"); + revision = pdfioDictGetNumber(encrypt_dict, "R"); + length = pdfioDictGetNumber(encrypt_dict, "Length"); + stream_filter = pdfioDictGetName(encrypt_dict, "StmF"); + string_filter = pdfioDictGetName(encrypt_dict, "StrF"); + cf_dict = pdfioDictGetDict(encrypt_dict, "CF"); + + if (!handler || strcmp(handler, "Standard")) + { + _pdfioFileError(pdf, "Unsupported security handler '%s'.", handler ? handler : "(null)"); + return (false); + } + + if (version == 4 && revision == 4) + { + // Lookup crypt filter to see if we support it... + pdfio_dict_t *filter; // Crypt Filter + const char *cfm; // Crypt filter method + + if ((filter = pdfioDictGetDict(cf_dict, stream_filter)) != NULL && (cfm = pdfioDictGetName(filter, "CFM")) != NULL) + { + if (!strcmp(cfm, "V2")) + { + pdf->encryption = PDFIO_ENCRYPTION_RC4_128; + if (length < 40 || length > 128) + length = 128; + } + if (!strcmp(cfm, "AESV2")) + { + pdf->encryption = PDFIO_ENCRYPTION_AES_128; + length = 128; + } + } + } + else if (version == 2) + { + if (revision == 2) + { + pdf->encryption = PDFIO_ENCRYPTION_RC4_40; + length = 40; + } + else if (revision == 3) + { + pdf->encryption = PDFIO_ENCRYPTION_RC4_128; + if (length < 40 || length > 128) + length = 128; + } + } + // TODO: Implement AES-256 - V6 R6 + + if (pdf->encryption == PDFIO_ENCRYPTION_NONE) + { + _pdfioFileError(pdf, "Unsupported encryption V%d R%d.", version, revision); + return (false); + } + + // Grab the remaining values we need to unlock the PDF... + pdf->file_keylen = length / 8; + pdf->permissions = pdfioDictGetNumber(encrypt_dict, "P"); + + owner_key = pdfioDictGetBinary(encrypt_dict, "O", &owner_keylen); + user_key = pdfioDictGetBinary(encrypt_dict, "U", &user_keylen); + + if (!owner_key || owner_keylen < 32 || owner_keylen > sizeof(pdf->owner_key)) + { + _pdfioFileError(pdf, "Missing or bad owner key, unable to unlock file."); + return (false); + } + + memcpy(pdf->owner_key, owner_key, owner_keylen); + pdf->owner_keylen = owner_keylen; + + if (!user_key || user_keylen < 32 || user_keylen > sizeof(pdf->user_key)) + { + _pdfioFileError(pdf, "Missing or bad user key, unable to unlock file."); + return (false); + } + + memcpy(pdf->user_key, user_key, user_keylen); + pdf->user_keylen = user_keylen; + + if ((file_id = pdfioArrayGetBinary(pdf->id_array, 0, &file_idlen)) == NULL || file_idlen < 16) + { + _pdfioFileError(pdf, "Missing or bad file ID, unable to unlock file."); + return (false); + } + + // Generate a base hash from known values... + _pdfioCryptoMD5Init(&md5); + _pdfioCryptoMD5Append(&md5, pdf_passpad, 32); + _pdfioCryptoMD5Append(&md5, file_id, file_idlen); + _pdfioCryptoMD5Finish(&md5, file_digest); + + // Now try to unlock the PDF... + for (tries = 0; tries < 4; tries ++) + { + if (pdf->encryption <= PDFIO_ENCRYPTION_AES_128) + { + uint8_t pad[32], // Padded password + file_key[16], // File key + owner_key[32], // Owner key + user_pad[32], // Padded user password + user_key[32], // User key + pdf_user_key[32]; // Decrypted user key + + // Pad the supplied password, if any... + pad_password(password, pad); + + // Generate keys to see if things match... + PDFIO_DEBUG("\nTrying %02X%02X%02X%02X...%02X%02X%02X%02X\n", pad[0], pad[1], pad[2], pad[3], pad[28], pad[29], pad[30], pad[31]); + PDFIO_DEBUG("P=%d\n", pdf->permissions); + PDFIO_DEBUG("Fid(%d)=%02X%02X%02X%02X...%02X%02X%02X%02X\n", (int)file_idlen, file_id[0], file_id[1], file_id[2], file_id[3], file_id[12], file_id[13], file_id[14], file_id[15]); + + make_owner_key(pdf->encryption, pad, pdf->owner_key, user_pad); + PDFIO_DEBUG("Upad=%02X%02X%02X%02X...%02X%02X%02X%02X\n", user_pad[0], user_pad[1], user_pad[2], user_pad[3], user_pad[28], user_pad[29], user_pad[30], user_pad[31]); + + make_file_key(pdf->encryption, pdf->permissions, file_id, file_idlen, user_pad, pdf->owner_key, file_key); + PDFIO_DEBUG("Fown=%02X%02X%02X%02X...%02X%02X%02X%02X\n", file_key[0], file_key[1], file_key[2], file_key[3], file_key[12], file_key[13], file_key[14], file_key[15]); + + make_user_key(pdf->encryption, file_id, file_idlen, user_key); + + PDFIO_DEBUG("U=%02X%02X%02X%02X...%02X%02X%02X%02X\n", pdf->user_key[0], pdf->user_key[1], pdf->user_key[2], pdf->user_key[3], pdf->user_key[28], pdf->user_key[29], pdf->user_key[30], pdf->user_key[31]); + PDFIO_DEBUG("Uown=%02X%02X%02X%02X...%02X%02X%02X%02X\n", user_key[0], user_key[1], user_key[2], user_key[3], user_key[28], user_key[29], user_key[30], user_key[31]); + + if (!memcmp(user_key, pdf->user_key, sizeof(user_key))) + { + // Matches! + memcpy(pdf->file_key, file_key, sizeof(pdf->file_key)); + return (true); + } + + /* + * Not the owner password, try the user password... + */ + + make_file_key(pdf->encryption, pdf->permissions, file_id, file_idlen, pad, pdf->owner_key, file_key); + PDFIO_DEBUG("Fuse=%02X%02X%02X%02X...%02X%02X%02X%02X\n", file_key[0], file_key[1], file_key[2], file_key[3], file_key[12], file_key[13], file_key[14], file_key[15]); + + make_user_key(pdf->encryption, file_id, file_idlen, user_key); + + memcpy(pdf_user_key, pdf->user_key, sizeof(pdf_user_key)); + decrypt_user_key(pdf->encryption, file_key, pdf_user_key); + + PDFIO_DEBUG("Uuse=%02X%02X%02X%02X...%02X%02X%02X%02X\n", user_key[0], user_key[1], user_key[2], user_key[3], user_key[28], user_key[29], user_key[30], user_key[31]); + PDFIO_DEBUG("Updf=%02X%02X%02X%02X...%02X%02X%02X%02X\n", pdf_user_key[0], pdf_user_key[1], pdf_user_key[2], pdf_user_key[3], pdf_user_key[28], pdf_user_key[29], pdf_user_key[30], pdf_user_key[31]); + + if (!memcmp(user_key, pdf_user_key, 16)) + { + // Matches! + memcpy(pdf->file_key, file_key, sizeof(pdf->file_key)); + return (true); + } + } + else + { + // TODO: Implement AES-256 security handler + } + + // If we get here we need to try another password... + if (password_cb) + password = (password_cb)(password_data, pdf->filename); + + if (!password) + break; + } + + _pdfioFileError(pdf, "Unable to unlock PDF file."); + + return (false); +} + + +// +// 'decrypt_user_key()' - Decrypt the user key. +// + +static void +decrypt_user_key( + pdfio_encryption_t encryption, // I - Type of encryption + const uint8_t *file_key, // I - File encryption key + uint8_t user_key[32]) // IO - User key +{ + size_t i, j; // Looping vars + _pdfio_rc4_t rc4; // RC4 encryption context + + + if (encryption == PDFIO_ENCRYPTION_RC4_40) + { + // Encrypt the result once... + _pdfioCryptoRC4Init(&rc4, file_key, 5); + _pdfioCryptoRC4Crypt(&rc4, user_key, user_key, 32); + } + else + { + // Encrypt the result 20 times... + uint8_t key[16]; // Current encryption key + + for (i = 19; i > 0; i --) + { + // XOR each byte in the key with the loop counter... + for (j = 0; j < 16; j ++) + key[j] = (uint8_t)(file_key[j] ^ i); + + _pdfioCryptoRC4Init(&rc4, key, 16); + _pdfioCryptoRC4Crypt(&rc4, user_key, user_key, 32); + } + + _pdfioCryptoRC4Init(&rc4, file_key, 16); + _pdfioCryptoRC4Crypt(&rc4, user_key, user_key, 32); + } +} + + +// +// 'encrypt_user_key()' - Encrypt the user key. +// + +static void +encrypt_user_key( + pdfio_encryption_t encryption, // I - Type of encryption + const uint8_t *file_key, // I - File encryption key + uint8_t user_key[32]) // IO - User key +{ + size_t i, j; // Looping vars + _pdfio_rc4_t rc4; // RC4 encryption context + + + if (encryption == PDFIO_ENCRYPTION_RC4_40) + { + // Encrypt the result once... + _pdfioCryptoRC4Init(&rc4, file_key, 5); + _pdfioCryptoRC4Crypt(&rc4, user_key, user_key, 32); + } + else + { + // Encrypt the result 20 times... + uint8_t key[16]; // Current encryption key + + for (i = 0; i < 20; i ++) + { + // XOR each byte in the key with the loop counter... + for (j = 0; j < 16; j ++) + key[j] = (uint8_t)(file_key[j] ^ i); + + _pdfioCryptoRC4Init(&rc4, key, 16); + _pdfioCryptoRC4Crypt(&rc4, user_key, user_key, 32); + } + } +} + + +// +// 'make_file_key()' - Make the file encryption key. +// + +static void +make_file_key( + pdfio_encryption_t encryption, // I - Type of encryption + pdfio_permission_t permissions, // I - File permissions + const unsigned char *file_id, // I - File ID value + size_t file_idlen, // I - Length of file ID + const uint8_t *user_pad, // I - Padded user password + const uint8_t *owner_key, // I - Owner key + uint8_t file_key[16]) // O - Encryption key +{ + size_t i, j; // Looping vars + uint8_t perm_bytes[4]; // Permissions bytes + _pdfio_md5_t md5; // MD5 context + uint8_t digest[16]; // 128-bit MD5 digest + _pdfio_rc4_t rc4; // RC4 encryption context + + + perm_bytes[0] = (uint8_t)permissions; + perm_bytes[1] = (uint8_t)(permissions >> 8); + perm_bytes[2] = (uint8_t)(permissions >> 16); + perm_bytes[3] = (uint8_t)(permissions >> 24); + + _pdfioCryptoMD5Init(&md5); + _pdfioCryptoMD5Append(&md5, user_pad, 32); + _pdfioCryptoMD5Append(&md5, owner_key, 32); + _pdfioCryptoMD5Append(&md5, perm_bytes, 4); + _pdfioCryptoMD5Append(&md5, file_id, file_idlen); + _pdfioCryptoMD5Finish(&md5, digest); + + if (encryption != PDFIO_ENCRYPTION_RC4_40) + { + // MD5 the result 50 times.. + for (i = 0; i < 50; i ++) + { + _pdfioCryptoMD5Init(&md5); + _pdfioCryptoMD5Append(&md5, digest, 16); + _pdfioCryptoMD5Finish(&md5, digest); + } + } + + memcpy(file_key, digest, 16); +} + + +// +// 'make_owner_key()' - Generate the (encrypted) owner key... +// + +static void +make_owner_key( + pdfio_encryption_t encryption, // I - Type of encryption + const uint8_t *owner_pad, // I - Padded owner password + const uint8_t *user_pad, // I - Padded user password + uint8_t owner_key[32]) // O - Owner key value +{ + size_t i, j; // Looping vars + _pdfio_md5_t md5; // MD5 context + uint8_t digest[16]; // 128-bit MD5 digest + _pdfio_rc4_t rc4; // RC4 encryption context + + + // Hash the owner password... + _pdfioCryptoMD5Init(&md5); + _pdfioCryptoMD5Append(&md5, owner_pad, 32); + _pdfioCryptoMD5Finish(&md5, digest); + + if (encryption != PDFIO_ENCRYPTION_RC4_40) + { + for (i = 0; i < 50; i ++) + { + _pdfioCryptoMD5Init(&md5); + _pdfioCryptoMD5Append(&md5, digest, 16); + _pdfioCryptoMD5Finish(&md5, digest); + } + } + + // Copy and encrypt the padded user password... + memcpy(owner_key, user_pad, 32); + + if (encryption == PDFIO_ENCRYPTION_RC4_40) + { + // Encrypt once... + _pdfioCryptoRC4Init(&rc4, digest, 5); + _pdfioCryptoRC4Crypt(&rc4, owner_key, owner_key, 32); + } + else + { + // Encrypt 20 times... + uint8_t encrypt_key[16]; // RC4 encryption key + + for (i = 0; i < 20; i ++) + { + // XOR each byte in the digest with the loop counter to make a key... + for (j = 0; j < sizeof(encrypt_key); j ++) + encrypt_key[j] = (uint8_t)(digest[j] ^ i); + + _pdfioCryptoRC4Init(&rc4, encrypt_key, sizeof(encrypt_key)); + _pdfioCryptoRC4Crypt(&rc4, owner_key, owner_key, 32); + } + } +} + + +// +// 'make_user_key()' - Make the user key. +// + +static void +make_user_key( + pdfio_encryption_t encryption, // I - Type of encryption + const unsigned char *file_id, // I - File ID value + size_t file_idlen, // I - Length of file ID + uint8_t user_key[32]) // O - User key +{ + _pdfio_md5_t md5; // MD5 context + uint8_t digest[16]; // 128-bit MD5 digest + + + // Generate a base hash from known values... + _pdfioCryptoMD5Init(&md5); + _pdfioCryptoMD5Append(&md5, pdf_passpad, 32); + _pdfioCryptoMD5Append(&md5, file_id, file_idlen); + _pdfioCryptoMD5Finish(&md5, user_key); + + memset(user_key + 16, 0, 16); +} + + +// +// 'pad_password()' - Generate a padded password. +// + +static void +pad_password(const char *password, // I - Password string or `NULL` + uint8_t pad[32]) // O - Padded password +{ + size_t len; // Length of password + + + if (password) + { + // Use the specified password + if ((len = strlen(password)) > 32) + len = 32; + } + else + { + // No password + len = 0; + } + + if (len > 0) + memcpy(pad, password, len); + if (len < 32) + memcpy(pad + len, pdf_passpad, 32 - len); +} diff --git a/pdfio-dict.c b/pdfio-dict.c index b436e9f..a678e11 100644 --- a/pdfio-dict.c +++ b/pdfio-dict.c @@ -21,6 +21,39 @@ static int compare_pairs(_pdfio_pair_t *a, _pdfio_pair_t *b); +// +// '_pdfioDictClear()' - Remove a key/value pair from a dictionary. +// + +void +_pdfioDictClear(pdfio_dict_t *dict, // I - Dictionary + const char *key) // I - Key +{ + size_t idx; // Index into pairs + _pdfio_pair_t *pair, // Current pair + pkey; // Search key + + + PDFIO_DEBUG("_pdfioDictClear(dict=%p, key=\"%s\")\n", dict, key); + + // See if the key is already set... + if (dict->num_pairs > 0) + { + pkey.key = key; + + if ((pair = (_pdfio_pair_t *)bsearch(&pkey, dict->pairs, dict->num_pairs, sizeof(_pdfio_pair_t), (int (*)(const void *, const void *))compare_pairs)) != NULL) + { + // Yes, remove it... + idx = (size_t)(pair - dict->pairs); + dict->num_pairs --; + + if (idx < dict->num_pairs) + memmove(pair, pair + 1, (dict->num_pairs - idx) * sizeof(_pdfio_pair_t)); + } + } +} + + // // 'pdfioDictCopy()' - Copy a dictionary to a PDF file. // @@ -420,6 +453,7 @@ _pdfioDictGetValue(pdfio_dict_t *dict, // I - Dictionary pdfio_dict_t * // O - New dictionary _pdfioDictRead(pdfio_file_t *pdf, // I - PDF file + pdfio_obj_t *obj, // I - Object, if any _pdfio_token_t *tb) // I - Token buffer/stack { pdfio_dict_t *dict; // New dictionary @@ -448,7 +482,7 @@ _pdfioDictRead(pdfio_file_t *pdf, // I - PDF file } // Then get the next value... - if (!_pdfioValueRead(pdf, tb, &value)) + if (!_pdfioValueRead(pdf, obj, tb, &value)) { _pdfioFileError(pdf, "Missing value for dictionary key."); break; @@ -850,6 +884,7 @@ _pdfioDictSetValue( bool // O - `true` on success, `false` on failure _pdfioDictWrite(pdfio_dict_t *dict, // I - Dictionary + pdfio_obj_t *obj, // I - Object, if any off_t *length) // I - Offset to length value { pdfio_file_t *pdf = dict->pdf; // PDF file @@ -877,7 +912,7 @@ _pdfioDictWrite(pdfio_dict_t *dict, // I - Dictionary if (!_pdfioFilePuts(pdf, " 9999999999")) return (false); } - else if (!_pdfioValueWrite(pdf, &pair->value, NULL)) + else if (!_pdfioValueWrite(pdf, obj, &pair->value, NULL)) return (false); } diff --git a/pdfio-file.c b/pdfio-file.c index 0dc9adb..ac9e2dc 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -26,7 +26,7 @@ static int compare_objmaps(_pdfio_objmap_t *a, _pdfio_objmap_t *b); static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b); static bool load_obj_stream(pdfio_obj_t *obj); static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj); -static bool load_xref(pdfio_file_t *pdf, off_t xref_offset); +static bool load_xref(pdfio_file_t *pdf, off_t xref_offset, pdfio_password_cb_t password_cb, void *password_data); static bool write_catalog(pdfio_file_t *pdf); static bool write_pages(pdfio_file_t *pdf); static bool write_trailer(pdfio_file_t *pdf); @@ -123,7 +123,7 @@ pdfioFileClose(pdfio_file_t *pdf) // I - PDF file { ret = false; - if (pdfioObjClose(pdf->info)) + if (pdfioObjClose(pdf->info_obj)) if (write_pages(pdf)) if (write_catalog(pdf)) if (write_trailer(pdf)) @@ -193,6 +193,7 @@ pdfioFileCreate( pdfio_file_t *pdf; // PDF file pdfio_dict_t *dict; // Dictionary for pages object pdfio_dict_t *info_dict; // Dictionary for information object + unsigned char id_value[16]; // File ID value // Range check input... @@ -220,13 +221,14 @@ pdfioFileCreate( return (NULL); } - pdf->filename = strdup(filename); - pdf->version = strdup(version); - pdf->mode = _PDFIO_MODE_WRITE; - pdf->error_cb = error_cb; - pdf->error_data = error_data; - pdf->bufptr = pdf->buffer; - pdf->bufend = pdf->buffer + sizeof(pdf->buffer); + pdf->filename = strdup(filename); + pdf->version = strdup(version); + pdf->mode = _PDFIO_MODE_WRITE; + pdf->error_cb = error_cb; + pdf->error_data = error_data; + pdf->permissions = PDFIO_PERMISSION_ALL; + pdf->bufptr = pdf->buffer; + pdf->bufend = pdf->buffer + sizeof(pdf->buffer); if (media_box) { @@ -278,7 +280,7 @@ pdfioFileCreate( pdfioDictSetName(dict, "Type", "Pages"); - if ((pdf->pages_root = pdfioFileCreateObj(pdf, dict)) == NULL) + if ((pdf->pages_obj = pdfioFileCreateObj(pdf, dict)) == NULL) { pdfioFileClose(pdf); unlink(filename); @@ -296,13 +298,22 @@ pdfioFileCreate( pdfioDictSetDate(info_dict, "CreationDate", time(NULL)); pdfioDictSetString(info_dict, "Producer", "pdfio/" PDFIO_VERSION); - if ((pdf->info = pdfioFileCreateObj(pdf, info_dict)) == NULL) + if ((pdf->info_obj = pdfioFileCreateObj(pdf, info_dict)) == NULL) { pdfioFileClose(pdf); unlink(filename); return (NULL); } + // Create random file ID values... + _pdfioCryptoMakeRandom(id_value, sizeof(id_value)); + + if ((pdf->id_array = pdfioArrayCreate(pdf)) != NULL) + { + pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value)); + pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value)); + } + return (pdf); } @@ -458,6 +469,7 @@ pdfioFileCreateOutput( pdfio_file_t *pdf; // PDF file pdfio_dict_t *dict; // Dictionary for pages object pdfio_dict_t *info_dict; // Dictionary for information object + unsigned char id_value[16]; // File ID value // Range check input... @@ -485,13 +497,14 @@ pdfioFileCreateOutput( return (NULL); } - pdf->filename = strdup("output.pdf"); - pdf->version = strdup(version); - pdf->mode = _PDFIO_MODE_WRITE; - pdf->error_cb = error_cb; - pdf->error_data = error_data; - pdf->bufptr = pdf->buffer; - pdf->bufend = pdf->buffer + sizeof(pdf->buffer); + pdf->filename = strdup("output.pdf"); + pdf->version = strdup(version); + pdf->mode = _PDFIO_MODE_WRITE; + pdf->error_cb = error_cb; + pdf->error_data = error_data; + pdf->permissions = PDFIO_PERMISSION_ALL; + pdf->bufptr = pdf->buffer; + pdf->bufend = pdf->buffer + sizeof(pdf->buffer); if (media_box) { @@ -536,7 +549,7 @@ pdfioFileCreateOutput( pdfioDictSetName(dict, "Type", "Pages"); - if ((pdf->pages_root = pdfioFileCreateObj(pdf, dict)) == NULL) + if ((pdf->pages_obj = pdfioFileCreateObj(pdf, dict)) == NULL) { pdfioFileClose(pdf); return (NULL); @@ -552,12 +565,21 @@ pdfioFileCreateOutput( pdfioDictSetDate(info_dict, "CreationDate", time(NULL)); pdfioDictSetString(info_dict, "Producer", "pdfio/" PDFIO_VERSION); - if ((pdf->info = pdfioFileCreateObj(pdf, info_dict)) == NULL) + if ((pdf->info_obj = pdfioFileCreateObj(pdf, info_dict)) == NULL) { pdfioFileClose(pdf); return (NULL); } + // Create random file ID values... + _pdfioCryptoMakeRandom(id_value, sizeof(id_value)); + + if ((pdf->id_array = pdfioArrayCreate(pdf)) != NULL) + { + pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value)); + pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value)); + } + return (pdf); } @@ -595,7 +617,7 @@ pdfioFileCreatePage(pdfio_file_t *pdf, // I - PDF file if (!_pdfioDictGetValue(dict, "MediaBox")) pdfioDictSetRect(dict, "MediaBox", &pdf->media_box); - pdfioDictSetObj(dict, "Parent", pdf->pages_root); + pdfioDictSetObj(dict, "Parent", pdf->pages_obj); if (!_pdfioDictGetValue(dict, "Resources")) pdfioDictSetDict(dict, "Resources", pdfioDictCreate(pdf)); @@ -701,7 +723,7 @@ pdfioFileFindObj( const char * // O - Author or `NULL` for none pdfioFileGetAuthor(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info ? pdfioDictGetString(pdf->info->value.value.dict, "Author") : NULL); + return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Author") : NULL); } @@ -713,7 +735,7 @@ time_t // O - Creation date or `0` for none pdfioFileGetCreationDate( pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info ? pdfioDictGetDate(pdf->info->value.value.dict, "CreationDate") : 0); + return (pdf && pdf->info_obj ? pdfioDictGetDate(pdf->info_obj->value.value.dict, "CreationDate") : 0); } @@ -724,7 +746,7 @@ pdfioFileGetCreationDate( const char * // O - Creator string or `NULL` for none pdfioFileGetCreator(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info ? pdfioDictGetString(pdf->info->value.value.dict, "Creator") : NULL); + return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Creator") : NULL); } @@ -735,7 +757,7 @@ pdfioFileGetCreator(pdfio_file_t *pdf) // I - PDF file pdfio_array_t * // O - Array with binary strings pdfioFileGetID(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info ? pdfioDictGetArray(pdf->trailer, "ID") : NULL); + return (pdf ? pdf->id_array : NULL); } @@ -746,7 +768,7 @@ pdfioFileGetID(pdfio_file_t *pdf) // I - PDF file const char * // O - Keywords string or `NULL` for none pdfioFileGetKeywords(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info ? pdfioDictGetString(pdf->info->value.value.dict, "Keywords") : NULL); + return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Keywords") : NULL); } @@ -814,6 +836,35 @@ pdfioFileGetPage(pdfio_file_t *pdf, // I - PDF file } +// +// 'pdfioFileGetPermissions()' - Get the access permissions of a PDF file. +// +// This function returns the access permissions of a PDF file and (optionally) +// the type of encryption that has been used. +// + +pdfio_permission_t // O - Permission bits +pdfioFileGetPermissions( + pdfio_file_t *pdf, // I - PDF file + pdfio_encryption_t *encryption) // O - Type of encryption used or `NULL` to ignore +{ + // Range check input... + if (!pdf) + { + if (encryption) + *encryption = PDFIO_ENCRYPTION_NONE; + + return (PDFIO_PERMISSION_ALL); + } + + // Return values... + if (encryption) + *encryption = pdf->encryption; + + return (pdf->permissions); +} + + // // 'pdfioFileGetProducer()' - Get the producer string for a PDF file. // @@ -821,7 +872,7 @@ pdfioFileGetPage(pdfio_file_t *pdf, // I - PDF file const char * // O - Producer string or `NULL` for none pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info ? pdfioDictGetString(pdf->info->value.value.dict, "Producer") : NULL); + return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Producer") : NULL); } @@ -832,7 +883,7 @@ pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file const char * // O - Subject or `NULL` for none pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info ? pdfioDictGetString(pdf->info->value.value.dict, "Subject") : NULL); + return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Subject") : NULL); } @@ -843,7 +894,7 @@ pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file const char * // O - Title or `NULL` for none pdfioFileGetTitle(pdfio_file_t *pdf) // I - PDF file { - return (pdf && pdf->info ? pdfioDictGetString(pdf->info->value.value.dict, "Title") : NULL); + return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Title") : NULL); } @@ -890,9 +941,6 @@ pdfioFileOpen( off_t xref_offset; // Offset to xref table - (void)password_cb; - (void)password_data; - // Range check input... if (!filename) return (NULL); @@ -915,10 +963,11 @@ pdfioFileOpen( return (NULL); } - pdf->filename = strdup(filename); - pdf->mode = _PDFIO_MODE_READ; - pdf->error_cb = error_cb; - pdf->error_data = error_data; + pdf->filename = strdup(filename); + pdf->mode = _PDFIO_MODE_READ; + pdf->error_cb = error_cb; + pdf->error_data = error_data; + pdf->permissions = PDFIO_PERMISSION_ALL; // Open the file... if ((pdf->fd = open(filename, O_RDONLY | O_BINARY)) < 0) @@ -965,7 +1014,7 @@ pdfioFileOpen( xref_offset = (off_t)strtol(ptr + 9, NULL, 10); - if (!load_xref(pdf, xref_offset)) + if (!load_xref(pdf, xref_offset, password_cb, password_data)) goto error; return (pdf); @@ -988,8 +1037,8 @@ void pdfioFileSetAuthor(pdfio_file_t *pdf, // I - PDF file const char *value) // I - Value { - if (pdf && pdf->info) - pdfioDictSetString(pdf->info->value.value.dict, "Author", pdfioStringCreate(pdf, value)); + if (pdf && pdf->info_obj) + pdfioDictSetString(pdf->info_obj->value.value.dict, "Author", pdfioStringCreate(pdf, value)); } @@ -1002,8 +1051,8 @@ pdfioFileSetCreationDate( pdfio_file_t *pdf, // I - PDF file time_t value) // I - Value { - if (pdf && pdf->info) - pdfioDictSetDate(pdf->info->value.value.dict, "CreationDate", value); + if (pdf && pdf->info_obj) + pdfioDictSetDate(pdf->info_obj->value.value.dict, "CreationDate", value); } @@ -1015,8 +1064,8 @@ void pdfioFileSetCreator(pdfio_file_t *pdf, // I - PDF file const char *value)// I - Value { - if (pdf && pdf->info) - pdfioDictSetString(pdf->info->value.value.dict, "Creator", pdfioStringCreate(pdf, value)); + if (pdf && pdf->info_obj) + pdfioDictSetString(pdf->info_obj->value.value.dict, "Creator", pdfioStringCreate(pdf, value)); } @@ -1029,8 +1078,44 @@ pdfioFileSetKeywords( pdfio_file_t *pdf, // I - PDF file const char *value) // I - Value { - if (pdf && pdf->info) - pdfioDictSetString(pdf->info->value.value.dict, "Keywords", pdfioStringCreate(pdf, value)); + if (pdf && pdf->info_obj) + pdfioDictSetString(pdf->info_obj->value.value.dict, "Keywords", pdfioStringCreate(pdf, value)); +} + + +// +// 'pdfioFileSetPermissions()' - Set the PDF permissions, encryption mode, and passwords. +// +// This function sets the PDF usage permissions, encryption mode, and +// passwords. +// +// > *Note*: This function must be called before creating or copying any +// > objects. Due to fundamental limitations in the PDF format, PDF encryption +// > offers little protection from disclosure. Permissions are not enforced in +// > any meaningful way. +// + +bool // O - `true` on success, `false` otherwise +pdfioFileSetPermissions( + pdfio_file_t *pdf, // I - PDF file + pdfio_permission_t permissions, // I - Use permissions + pdfio_encryption_t encryption, // I - Type of encryption to use + const char *owner_password, // I - Owner password, if any + const char *user_password) // I - User password, if any +{ + if (!pdf) + return (false); + + if (pdf->num_objs > 2) // First two objects are pages and info + { + _pdfioFileError(pdf, "You must call pdfioFileSetPermissions before adding any objects."); + return (false); + } + + if (encryption == PDFIO_ENCRYPTION_NONE) + return (true); + + return (_pdfioCryptoLock(pdf, permissions, encryption, owner_password, user_password)); } @@ -1039,11 +1124,12 @@ pdfioFileSetKeywords( // void -pdfioFileSetSubject(pdfio_file_t *pdf, // I - PDF file - const char *value)// I - Value +pdfioFileSetSubject( + pdfio_file_t *pdf, // I - PDF file + const char *value) // I - Value { - if (pdf && pdf->info) - pdfioDictSetString(pdf->info->value.value.dict, "Subject", pdfioStringCreate(pdf, value)); + if (pdf && pdf->info_obj) + pdfioDictSetString(pdf->info_obj->value.value.dict, "Subject", pdfioStringCreate(pdf, value)); } @@ -1055,8 +1141,8 @@ void pdfioFileSetTitle(pdfio_file_t *pdf, // I - PDF file const char *value) // I - Value { - if (pdf && pdf->info) - pdfioDictSetString(pdf->info->value.value.dict, "Title", pdfioStringCreate(pdf, value)); + if (pdf && pdf->info_obj) + pdfioDictSetString(pdf->info_obj->value.value.dict, "Title", pdfioStringCreate(pdf, value)); } @@ -1226,7 +1312,7 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load // Read the objects themselves... for (cur_obj = 0; cur_obj < num_objs; cur_obj ++) { - if (!_pdfioValueRead(obj->pdf, &tb, &(objs[cur_obj]->value))) + if (!_pdfioValueRead(obj->pdf, obj, &tb, &(objs[cur_obj]->value))) { pdfioStreamClose(st); return (false); @@ -1313,8 +1399,11 @@ load_pages(pdfio_file_t *pdf, // I - PDF file // static bool // O - `true` on success, `false` on failure -load_xref(pdfio_file_t *pdf, // I - PDF file - off_t xref_offset) // I - Offset to xref +load_xref( + pdfio_file_t *pdf, // I - PDF file + off_t xref_offset, // I - Offset to xref + pdfio_password_cb_t password_cb, // I - Password callback or `NULL` for none + void *password_data) // I - Password callback data, if any { bool done = false; // Are we done? char line[1024], // Line from file @@ -1404,7 +1493,7 @@ load_xref(pdfio_file_t *pdf, // I - PDF file _pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf); - if (!_pdfioValueRead(pdf, &tb, &trailer)) + if (!_pdfioValueRead(pdf, obj, &tb, &trailer)) { _pdfioFileError(pdf, "Unable to read cross-reference stream dictionary."); return (false); @@ -1414,12 +1503,6 @@ load_xref(pdfio_file_t *pdf, // I - PDF file _pdfioFileError(pdf, "Cross-reference stream does not have a dictionary."); return (false); } - else if (_pdfioDictGetValue(pdf->trailer, "Encrypt")) - { - // Encryption not yet supported... - _pdfioFileError(pdf, "Sorry, PDFio currently does not support encrypted PDF files."); - return (false); - } obj->value = trailer; @@ -1554,6 +1637,20 @@ load_xref(pdfio_file_t *pdf, // I - PDF file pdfioStreamClose(st); + if (!pdf->trailer_dict) + { + // Save the trailer dictionary and grab the root (catalog) and info + // objects... + pdf->trailer_dict = trailer.value.dict; + pdf->info_obj = pdfioDictGetObj(pdf->trailer_dict, "Info"); + pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt"); + pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID"); + + // If the trailer contains an Encrypt key, try unlocking the file... + if (pdf->encrypt_obj && !_pdfioCryptoUnlock(pdf, password_cb, password_data)) + return (false); + } + // Load any object streams that are left... PDFIO_DEBUG("load_xref: %lu compressed object streams to load.\n", (unsigned long)num_sobjs); @@ -1651,7 +1748,7 @@ load_xref(pdfio_file_t *pdf, // I - PDF file _pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf); - if (!_pdfioValueRead(pdf, &tb, &trailer)) + if (!_pdfioValueRead(pdf, NULL, &tb, &trailer)) { _pdfioFileError(pdf, "Unable to read trailer dictionary."); return (false); @@ -1661,14 +1758,22 @@ load_xref(pdfio_file_t *pdf, // I - PDF file _pdfioFileError(pdf, "Trailer is not a dictionary."); return (false); } - else if (_pdfioDictGetValue(pdf->trailer, "Encrypt")) - { - // Encryption not yet supported... - _pdfioFileError(pdf, "Sorry, PDFio currently does not support encrypted PDF files."); - return (false); - } _pdfioTokenFlush(&tb); + + if (!pdf->trailer_dict) + { + // Save the trailer dictionary and grab the root (catalog) and info + // objects... + pdf->trailer_dict = trailer.value.dict; + pdf->info_obj = pdfioDictGetObj(pdf->trailer_dict, "Info"); + pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt"); + pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID"); + + // If the trailer contains an Encrypt key, try unlocking the file... + if (pdf->encrypt_obj && !_pdfioCryptoUnlock(pdf, password_cb, password_data)) + return (false); + } } else { @@ -1681,32 +1786,21 @@ load_xref(pdfio_file_t *pdf, // I - PDF file PDFIO_DEBUG_VALUE(&trailer); PDFIO_DEBUG("\n"); - if (!pdf->trailer) - { - // Save the trailer dictionary and grab the root (catalog) and info - // objects... - pdf->trailer = trailer.value.dict; - } - if ((xref_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev")) <= 0) done = true; } // Once we have all of the xref tables loaded, get the important objects and // build the pages array... - if ((pdf->root = pdfioDictGetObj(pdf->trailer, "Root")) == NULL) + if ((pdf->root_obj = pdfioDictGetObj(pdf->trailer_dict, "Root")) == NULL) { _pdfioFileError(pdf, "Missing Root object."); return (false); } - PDFIO_DEBUG("load_xref: Root=%p(%lu)\n", pdf->root, (unsigned long)pdf->root->number); + PDFIO_DEBUG("load_xref: Root=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number); - pdf->info = pdfioDictGetObj(pdf->trailer, "Info"); - pdf->encrypt = pdfioDictGetObj(pdf->trailer, "Encrypt"); - pdf->id_array = pdfioDictGetArray(pdf->trailer, "ID"); - - return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root), "Pages"))); + return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages"))); } @@ -1724,13 +1818,13 @@ write_catalog(pdfio_file_t *pdf) // I - PDF file return (false); pdfioDictSetName(dict, "Type", "Catalog"); - pdfioDictSetObj(dict, "Pages", pdf->pages_root); + pdfioDictSetObj(dict, "Pages", pdf->pages_obj); // TODO: Add support for all of the root object dictionary keys - if ((pdf->root = pdfioFileCreateObj(pdf, dict)) == NULL) + if ((pdf->root_obj = pdfioFileCreateObj(pdf, dict)) == NULL) return (false); else - return (pdfioObjClose(pdf->root)); + return (pdfioObjClose(pdf->root_obj)); } @@ -1752,11 +1846,11 @@ write_pages(pdfio_file_t *pdf) // I - PDF file for (i = 0; i < pdf->num_pages; i ++) pdfioArrayAppendObj(kids, pdf->pages[i]); - pdfioDictSetNumber(pdf->pages_root->value.value.dict, "Count", pdf->num_pages); - pdfioDictSetArray(pdf->pages_root->value.value.dict, "Kids", kids); + pdfioDictSetNumber(pdf->pages_obj->value.value.dict, "Count", pdf->num_pages); + pdfioDictSetArray(pdf->pages_obj->value.value.dict, "Kids", kids); // Write the Pages object... - return (pdfioObjClose(pdf->pages_root)); + return (pdfioObjClose(pdf->pages_obj)); } @@ -1770,8 +1864,6 @@ write_trailer(pdfio_file_t *pdf) // I - PDF file bool ret = true; // Return value off_t xref_offset; // Offset to xref table size_t i; // Looping var - int fd; // File for /dev/urandom - unsigned char id_values[2][16]; // ID array values // Write the xref table... @@ -1805,34 +1897,22 @@ write_trailer(pdfio_file_t *pdf) // I - PDF file goto done; } - if ((fd = open("/dev/urandom", O_RDONLY)) >= 0) - { - // Load ID array with random values from /dev/urandom... - if (read(fd, id_values[0], sizeof(id_values[0])) == (ssize_t)sizeof(id_values[0]) && read(fd, id_values[1], sizeof(id_values[1])) == (ssize_t)sizeof(id_values[1])) - { - pdf->id_array = pdfioArrayCreate(pdf); - pdfioArrayAppendBinary(pdf->id_array, id_values[0], sizeof(id_values[0])); - pdfioArrayAppendBinary(pdf->id_array, id_values[1], sizeof(id_values[1])); - } - - close(fd); - } - - if ((pdf->trailer = pdfioDictCreate(pdf)) == NULL) + if ((pdf->trailer_dict = pdfioDictCreate(pdf)) == NULL) { + _pdfioFileError(pdf, "Unable to create trailer."); ret = false; goto done; } - if (pdf->encrypt) - pdfioDictSetObj(pdf->trailer, "Encrypt", pdf->encrypt); + if (pdf->encrypt_obj) + pdfioDictSetObj(pdf->trailer_dict, "Encrypt", pdf->encrypt_obj); if (pdf->id_array) - pdfioDictSetArray(pdf->trailer, "ID", pdf->id_array); - pdfioDictSetObj(pdf->trailer, "Info", pdf->info); - pdfioDictSetObj(pdf->trailer, "Root", pdf->root); - pdfioDictSetNumber(pdf->trailer, "Size", pdf->num_objs + 1); + pdfioDictSetArray(pdf->trailer_dict, "ID", pdf->id_array); + pdfioDictSetObj(pdf->trailer_dict, "Info", pdf->info_obj); + pdfioDictSetObj(pdf->trailer_dict, "Root", pdf->root_obj); + pdfioDictSetNumber(pdf->trailer_dict, "Size", pdf->num_objs + 1); - if (!_pdfioDictWrite(pdf->trailer, NULL)) + if (!_pdfioDictWrite(pdf->trailer_dict, NULL, NULL)) { _pdfioFileError(pdf, "Unable to write trailer."); ret = false; diff --git a/pdfio-md5.c b/pdfio-md5.c new file mode 100644 index 0000000..45e1511 --- /dev/null +++ b/pdfio-md5.c @@ -0,0 +1,338 @@ +// +// MD5 functions for PDFio. +// +// Copyright © 2021 by Michael R Sweet. +// Copyright © 1999 Aladdin Enterprises. All rights reserved. +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. +// +// L. Peter Deutsch +// ghost@aladdin.com +// + +#include "pdfio-private.h" + + +/* + Independent implementation of MD5 (RFC 1321). + + This code implements the MD5 Algorithm defined in RFC 1321. + It is derived directly from the text of the RFC and not from the + reference implementation. + + The original and principal author of md5.c is L. Peter Deutsch + . Other authors are noted in the change history + that follows (in reverse chronological order): + + 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. + 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5). + 1999-05-03 lpd Original version. + */ + +#define T1 0xd76aa478 +#define T2 0xe8c7b756 +#define T3 0x242070db +#define T4 0xc1bdceee +#define T5 0xf57c0faf +#define T6 0x4787c62a +#define T7 0xa8304613 +#define T8 0xfd469501 +#define T9 0x698098d8 +#define T10 0x8b44f7af +#define T11 0xffff5bb1 +#define T12 0x895cd7be +#define T13 0x6b901122 +#define T14 0xfd987193 +#define T15 0xa679438e +#define T16 0x49b40821 +#define T17 0xf61e2562 +#define T18 0xc040b340 +#define T19 0x265e5a51 +#define T20 0xe9b6c7aa +#define T21 0xd62f105d +#define T22 0x02441453 +#define T23 0xd8a1e681 +#define T24 0xe7d3fbc8 +#define T25 0x21e1cde6 +#define T26 0xc33707d6 +#define T27 0xf4d50d87 +#define T28 0x455a14ed +#define T29 0xa9e3e905 +#define T30 0xfcefa3f8 +#define T31 0x676f02d9 +#define T32 0x8d2a4c8a +#define T33 0xfffa3942 +#define T34 0x8771f681 +#define T35 0x6d9d6122 +#define T36 0xfde5380c +#define T37 0xa4beea44 +#define T38 0x4bdecfa9 +#define T39 0xf6bb4b60 +#define T40 0xbebfbc70 +#define T41 0x289b7ec6 +#define T42 0xeaa127fa +#define T43 0xd4ef3085 +#define T44 0x04881d05 +#define T45 0xd9d4d039 +#define T46 0xe6db99e5 +#define T47 0x1fa27cf8 +#define T48 0xc4ac5665 +#define T49 0xf4292244 +#define T50 0x432aff97 +#define T51 0xab9423a7 +#define T52 0xfc93a039 +#define T53 0x655b59c3 +#define T54 0x8f0ccc92 +#define T55 0xffeff47d +#define T56 0x85845dd1 +#define T57 0x6fa87e4f +#define T58 0xfe2ce6e0 +#define T59 0xa3014314 +#define T60 0x4e0811a1 +#define T61 0xf7537e82 +#define T62 0xbd3af235 +#define T63 0x2ad7d2bb +#define T64 0xeb86d391 + +static void +md5_process(_pdfio_md5_t *pms, const uint8_t *data /*[64]*/) +{ + uint32_t + a = pms->abcd[0], b = pms->abcd[1], + c = pms->abcd[2], d = pms->abcd[3]; + uint32_t t; + +#ifndef ARCH_IS_BIG_ENDIAN +# define ARCH_IS_BIG_ENDIAN 1 /* slower, default implementation */ +#endif +#if ARCH_IS_BIG_ENDIAN + + /* + * On big-endian machines, we must arrange the bytes in the right + * order. (This also works on machines of unknown byte order.) + */ + uint32_t X[16]; + const uint8_t *xp = data; + int i; + + for (i = 0; i < 16; ++i, xp += 4) + X[i] = xp[0] + (unsigned)(xp[1] << 8) + (unsigned)(xp[2] << 16) + (unsigned)(xp[3] << 24); + +#else /* !ARCH_IS_BIG_ENDIAN */ + + /* + * On little-endian machines, we can process properly aligned data + * without copying it. + */ + uint32_t xbuf[16]; + const uint32_t *X; + + if (!((data - (const uint8_t *)0) & 3)) { + /* data are properly aligned */ + X = (const uint32_t *)data; + } else { + /* not aligned */ + memcpy(xbuf, data, 64); + X = xbuf; + } +#endif + +#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + + /* Round 1. */ + /* Let [abcd k s i] denote the operation + a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ +#define F(x, y, z) (((x) & (y)) | (~(x) & (z))) +#define SET(a, b, c, d, k, s, Ti)\ + t = a + F(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 0, 7, T1); + SET(d, a, b, c, 1, 12, T2); + SET(c, d, a, b, 2, 17, T3); + SET(b, c, d, a, 3, 22, T4); + SET(a, b, c, d, 4, 7, T5); + SET(d, a, b, c, 5, 12, T6); + SET(c, d, a, b, 6, 17, T7); + SET(b, c, d, a, 7, 22, T8); + SET(a, b, c, d, 8, 7, T9); + SET(d, a, b, c, 9, 12, T10); + SET(c, d, a, b, 10, 17, T11); + SET(b, c, d, a, 11, 22, T12); + SET(a, b, c, d, 12, 7, T13); + SET(d, a, b, c, 13, 12, T14); + SET(c, d, a, b, 14, 17, T15); + SET(b, c, d, a, 15, 22, T16); +#undef SET + + /* Round 2. */ + /* Let [abcd k s i] denote the operation + a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ +#define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) +#define SET(a, b, c, d, k, s, Ti)\ + t = a + G(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 1, 5, T17); + SET(d, a, b, c, 6, 9, T18); + SET(c, d, a, b, 11, 14, T19); + SET(b, c, d, a, 0, 20, T20); + SET(a, b, c, d, 5, 5, T21); + SET(d, a, b, c, 10, 9, T22); + SET(c, d, a, b, 15, 14, T23); + SET(b, c, d, a, 4, 20, T24); + SET(a, b, c, d, 9, 5, T25); + SET(d, a, b, c, 14, 9, T26); + SET(c, d, a, b, 3, 14, T27); + SET(b, c, d, a, 8, 20, T28); + SET(a, b, c, d, 13, 5, T29); + SET(d, a, b, c, 2, 9, T30); + SET(c, d, a, b, 7, 14, T31); + SET(b, c, d, a, 12, 20, T32); +#undef SET + + /* Round 3. */ + /* Let [abcd k s t] denote the operation + a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */ +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define SET(a, b, c, d, k, s, Ti)\ + t = a + H(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 5, 4, T33); + SET(d, a, b, c, 8, 11, T34); + SET(c, d, a, b, 11, 16, T35); + SET(b, c, d, a, 14, 23, T36); + SET(a, b, c, d, 1, 4, T37); + SET(d, a, b, c, 4, 11, T38); + SET(c, d, a, b, 7, 16, T39); + SET(b, c, d, a, 10, 23, T40); + SET(a, b, c, d, 13, 4, T41); + SET(d, a, b, c, 0, 11, T42); + SET(c, d, a, b, 3, 16, T43); + SET(b, c, d, a, 6, 23, T44); + SET(a, b, c, d, 9, 4, T45); + SET(d, a, b, c, 12, 11, T46); + SET(c, d, a, b, 15, 16, T47); + SET(b, c, d, a, 2, 23, T48); +#undef SET + + /* Round 4. */ + /* Let [abcd k s t] denote the operation + a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ +#define I(x, y, z) ((y) ^ ((x) | ~(z))) +#define SET(a, b, c, d, k, s, Ti)\ + t = a + I(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 0, 6, T49); + SET(d, a, b, c, 7, 10, T50); + SET(c, d, a, b, 14, 15, T51); + SET(b, c, d, a, 5, 21, T52); + SET(a, b, c, d, 12, 6, T53); + SET(d, a, b, c, 3, 10, T54); + SET(c, d, a, b, 10, 15, T55); + SET(b, c, d, a, 1, 21, T56); + SET(a, b, c, d, 8, 6, T57); + SET(d, a, b, c, 15, 10, T58); + SET(c, d, a, b, 6, 15, T59); + SET(b, c, d, a, 13, 21, T60); + SET(a, b, c, d, 4, 6, T61); + SET(d, a, b, c, 11, 10, T62); + SET(c, d, a, b, 2, 15, T63); + SET(b, c, d, a, 9, 21, T64); +#undef SET + + /* Then perform the following additions. (That is increment each + of the four registers by the value it had before this block + was started.) */ + pms->abcd[0] += a; + pms->abcd[1] += b; + pms->abcd[2] += c; + pms->abcd[3] += d; +} + +void +_pdfioCryptoMD5Init(_pdfio_md5_t *pms) +{ + pms->count[0] = pms->count[1] = 0; + pms->abcd[0] = 0x67452301; + pms->abcd[1] = 0xefcdab89; + pms->abcd[2] = 0x98badcfe; + pms->abcd[3] = 0x10325476; +} + +void +_pdfioCryptoMD5Append(_pdfio_md5_t *pms, const uint8_t *data, size_t nbytes) +{ + const uint8_t *p = data; + size_t left = nbytes; + size_t offset = (pms->count[0] >> 3) & 63; + uint32_t nbits = (uint32_t)(nbytes << 3); + + if (nbytes == 0) + return; + + /* Update the message length. */ + pms->count[1] += (unsigned)(nbytes >> 29); + pms->count[0] += nbits; + if (pms->count[0] < nbits) + pms->count[1]++; + + /* Process an initial partial block. */ + if (offset) { + size_t copy = (offset + nbytes > 64 ? 64 - offset : nbytes); + + memcpy(pms->buf + offset, p, copy); + if (offset + copy < 64) + return; + p += copy; + left -= copy; + md5_process(pms, pms->buf); + } + + /* Process full blocks. */ + for (; left >= 64; p += 64, left -= 64) + md5_process(pms, p); + + /* Process a final partial block. */ + if (left) + memcpy(pms->buf, p, left); +} + +void +_pdfioCryptoMD5Finish(_pdfio_md5_t *pms, uint8_t digest[16]) +{ + static const uint8_t pad[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + uint8_t data[8]; + int i; + + /* Save the length before padding. */ + for (i = 0; i < 8; ++i) + data[i] = (uint8_t)(pms->count[i >> 2] >> ((i & 3) << 3)); + /* Pad to 56 bytes mod 64. */ + _pdfioCryptoMD5Append(pms, pad, ((55 - (pms->count[0] >> 3)) & 63) + 1); + /* Append the length. */ + _pdfioCryptoMD5Append(pms, data, 8); + for (i = 0; i < 16; ++i) + digest[i] = (uint8_t)(pms->abcd[i >> 2] >> ((i & 3) << 3)); +} diff --git a/pdfio-object.c b/pdfio-object.c index 9059ff8..f7dab8c 100644 --- a/pdfio-object.c +++ b/pdfio-object.c @@ -96,6 +96,9 @@ pdfioObjCopy(pdfio_file_t *pdf, // I - PDF file if (!_pdfioValueCopy(pdf, &dstobj->value, srcobj->pdf, &srcobj->value)) return (NULL); + if (dstobj->value.type == PDFIO_VALTYPE_DICT) + _pdfioDictClear(dstobj->value.value.dict, "Length"); + if (srcobj->stream_offset) { // Copy stream data... @@ -409,7 +412,7 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object // Then grab the object value... _pdfioTokenInit(&tb, obj->pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, obj->pdf); - if (!_pdfioValueRead(obj->pdf, &tb, &obj->value)) + if (!_pdfioValueRead(obj->pdf, obj, &tb, &obj->value)) { _pdfioFileError(obj->pdf, "Unable to read value for object %lu.", (unsigned long)obj->number); return (false); @@ -479,7 +482,7 @@ write_obj_header(pdfio_obj_t *obj) // I - Object if (!_pdfioFilePrintf(obj->pdf, "%lu %u obj\n", (unsigned long)obj->number, obj->generation)) return (false); - if (!_pdfioValueWrite(obj->pdf, &obj->value, &obj->length_offset)) + if (!_pdfioValueWrite(obj->pdf, obj, &obj->value, &obj->length_offset)) return (false); return (_pdfioFilePuts(obj->pdf, "\n")); diff --git a/pdfio-private.h b/pdfio-private.h index 34d41c3..8140582 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -24,6 +24,7 @@ # include "pdfio.h" # include +# include # include # include # include @@ -174,6 +175,44 @@ typedef struct _pdfio_value_s // Value structure } value; // Value union } _pdfio_value_t; +typedef struct _pdfio_aes_s // AES encryption state +{ + size_t round_size; // Size of round key + uint8_t round_key[240], // Round key + iv[16]; // Initialization vector +} _pdfio_aes_t; + +typedef struct _pdfio_md5_s // MD5 hash state +{ + uint32_t count[2]; // Message length in bits, lsw first + uint32_t abcd[4]; // Digest buffer + uint8_t buf[64]; // Accumulate block +} _pdfio_md5_t; + +typedef struct _pdfio_rc4_s // RC4 encryption state +{ + uint8_t sbox[256]; // S boxes for encryption + uint8_t i, j; // Current indices into S boxes +} _pdfio_rc4_t; + +typedef struct _pdfio_sha265_s // SHA-256 hash state +{ + uint32_t Intermediate_Hash[8]; // Message Digest + uint32_t Length_High; // Message length in bits + uint32_t Length_Low; // Message length in bits + int Message_Block_Index; // Message_Block array index + uint8_t Message_Block[64]; // 512-bit message blocks + int Computed; // Is the hash computed? + int Corrupted; // Cumulative corruption code +} _pdfio_sha256_t; + +typedef union _pdfio_crypto_ctx_u // Cryptographic contexts +{ + _pdfio_aes_t aes; // AES-128/256 context + _pdfio_rc4_t rc4; // RC4-40/128 context +} _pdfio_crypto_ctx_t; +typedef size_t (*_pdfio_crypto_cb_t)(_pdfio_crypto_ctx_t *ctx, uint8_t *outbuffer, const uint8_t *inbuffer, size_t len); + struct _pdfio_array_s { pdfio_file_t *pdf; // PDF file @@ -215,17 +254,26 @@ struct _pdfio_file_s // PDF file structure pdfio_error_cb_t error_cb; // Error callback void *error_data; // Data for error callback + pdfio_encryption_t encryption; // Encryption mode + pdfio_permission_t permissions; // Access permissions (encrypted PDF files) + uint8_t file_key[16], // File encryption key + owner_key[32], // Owner encryption key + user_key[32]; // User encryption key + size_t file_keylen, // Length of file encryption key + owner_keylen, // Length of owner encryption key + user_keylen; // Length of user encryption key + // Active file data int fd; // File descriptor char buffer[8192], // Read/write buffer *bufptr, // Pointer into buffer *bufend; // End of buffer off_t bufpos; // Position in file for start of buffer - pdfio_dict_t *trailer; // Trailer dictionary - pdfio_obj_t *root; // Root object/dictionary - pdfio_obj_t *info; // Information object - pdfio_obj_t *pages_root; // Root pages object - pdfio_obj_t *encrypt; // Encryption object/dictionary + pdfio_dict_t *trailer_dict; // Trailer dictionary + pdfio_obj_t *root_obj; // Root object/dictionary + pdfio_obj_t *info_obj; // Information object + pdfio_obj_t *pages_obj; // Root pages object + pdfio_obj_t *encrypt_obj; // De/Encryption object/dictionary pdfio_obj_t *cp1252_obj, // CP1252 font encoding object *unicode_obj; // Unicode font encoding object pdfio_array_t *id_array; // ID array @@ -281,6 +329,8 @@ struct _pdfio_stream_s // Stream unsigned char cbuffer[4096], // Compressed data buffer *prbuffer, // Raw buffer (previous line), as needed *psbuffer; // PNG filter buffer, as needed + _pdfio_crypto_cb_t crypto_cb; // Encryption/descryption callback, if any + _pdfio_crypto_ctx_t crypto_ctx; // Cryptographic context }; @@ -291,15 +341,33 @@ struct _pdfio_stream_s // Stream extern void _pdfioArrayDebug(pdfio_array_t *a, FILE *fp) _PDFIO_INTERNAL; extern void _pdfioArrayDelete(pdfio_array_t *a) _PDFIO_INTERNAL; extern _pdfio_value_t *_pdfioArrayGetValue(pdfio_array_t *a, size_t n) _PDFIO_INTERNAL; -extern pdfio_array_t *_pdfioArrayRead(pdfio_file_t *pdf, _pdfio_token_t *ts) _PDFIO_INTERNAL; -extern bool _pdfioArrayWrite(pdfio_array_t *a) _PDFIO_INTERNAL; +extern pdfio_array_t *_pdfioArrayRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts) _PDFIO_INTERNAL; +extern bool _pdfioArrayWrite(pdfio_array_t *a, pdfio_obj_t *obj) _PDFIO_INTERNAL; +extern void _pdfioCryptoAESInit(_pdfio_aes_t *ctx, const uint8_t *key, size_t keylen, const uint8_t *iv) _PDFIO_INTERNAL; +extern size_t _pdfioCryptoAESDecrypt(_pdfio_aes_t *ctx, uint8_t *outbuffer, const uint8_t *inbuffer, size_t len) _PDFIO_INTERNAL; +extern size_t _pdfioCryptoAESEncrypt(_pdfio_aes_t *ctx, uint8_t *outbuffer, const uint8_t *inbuffer, size_t len) _PDFIO_INTERNAL; +extern bool _pdfioCryptoLock(pdfio_file_t *pdf, pdfio_permission_t permissions, pdfio_encryption_t encryption, const char *owner_password, const char *user_password) _PDFIO_INTERNAL; +extern void _pdfioCryptoMakeRandom(uint8_t *buffer, size_t bytes) _PDFIO_INTERNAL; +extern _pdfio_crypto_cb_t _pdfioCryptoMakeReader(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_crypto_ctx_t *ctx, uint8_t *iv, size_t *ivlen) _PDFIO_INTERNAL; +extern _pdfio_crypto_cb_t _pdfioCryptoMakeWriter(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_crypto_ctx_t *ctx, uint8_t *iv, size_t *ivlen) _PDFIO_INTERNAL; +extern void _pdfioCryptoMD5Append(_pdfio_md5_t *pms, const uint8_t *data, size_t nbytes) _PDFIO_INTERNAL; +extern void _pdfioCryptoMD5Finish(_pdfio_md5_t *pms, uint8_t digest[16]) _PDFIO_INTERNAL; +extern void _pdfioCryptoMD5Init(_pdfio_md5_t *pms) _PDFIO_INTERNAL; +extern void _pdfioCryptoRC4Init(_pdfio_rc4_t *ctx, const uint8_t *key, size_t keylen) _PDFIO_INTERNAL; +extern size_t _pdfioCryptoRC4Crypt(_pdfio_rc4_t *ctx, uint8_t *outbuffer, const uint8_t *inbuffer, size_t len) _PDFIO_INTERNAL; +extern void _pdfioCryptoSHA256Append(_pdfio_sha256_t *, const uint8_t *bytes, size_t bytecount) _PDFIO_INTERNAL; +extern void _pdfioCryptoSHA256Init(_pdfio_sha256_t *ctx) _PDFIO_INTERNAL; +extern void _pdfioCryptoSHA256Finish(_pdfio_sha256_t *ctx, uint8_t *Message_Digest) _PDFIO_INTERNAL; +extern bool _pdfioCryptoUnlock(pdfio_file_t *pdf, pdfio_password_cb_t password_cb, void *password_data) _PDFIO_INTERNAL; + +extern void _pdfioDictClear(pdfio_dict_t *dict, const char *key) _PDFIO_INTERNAL; extern void _pdfioDictDebug(pdfio_dict_t *dict, FILE *fp) _PDFIO_INTERNAL; extern void _pdfioDictDelete(pdfio_dict_t *dict) _PDFIO_INTERNAL; extern _pdfio_value_t *_pdfioDictGetValue(pdfio_dict_t *dict, const char *key) _PDFIO_INTERNAL; -extern pdfio_dict_t *_pdfioDictRead(pdfio_file_t *pdf, _pdfio_token_t *ts) _PDFIO_INTERNAL; +extern pdfio_dict_t *_pdfioDictRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts) _PDFIO_INTERNAL; extern bool _pdfioDictSetValue(pdfio_dict_t *dict, const char *key, _pdfio_value_t *value) _PDFIO_INTERNAL; -extern bool _pdfioDictWrite(pdfio_dict_t *dict, off_t *length) _PDFIO_INTERNAL; +extern bool _pdfioDictWrite(pdfio_dict_t *dict, pdfio_obj_t *obj, off_t *length) _PDFIO_INTERNAL; extern bool _pdfioFileAddMappedObj(pdfio_file_t *pdf, pdfio_obj_t *dst_obj, pdfio_obj_t *src_obj) _PDFIO_INTERNAL; extern bool _pdfioFileAddPage(pdfio_file_t *pdf, pdfio_obj_t *obj) _PDFIO_INTERNAL; @@ -337,7 +405,7 @@ extern bool _pdfioTokenRead(_pdfio_token_t *tb, char *buffer, size_t bufsize); extern _pdfio_value_t *_pdfioValueCopy(pdfio_file_t *pdfdst, _pdfio_value_t *vdst, pdfio_file_t *pdfsrc, _pdfio_value_t *vsrc) _PDFIO_INTERNAL; extern void _pdfioValueDebug(_pdfio_value_t *v, FILE *fp) _PDFIO_INTERNAL; extern void _pdfioValueDelete(_pdfio_value_t *v) _PDFIO_INTERNAL; -extern _pdfio_value_t *_pdfioValueRead(pdfio_file_t *pdf, _pdfio_token_t *ts, _pdfio_value_t *v) _PDFIO_INTERNAL; -extern bool _pdfioValueWrite(pdfio_file_t *pdf, _pdfio_value_t *v, off_t *length) _PDFIO_INTERNAL; +extern _pdfio_value_t *_pdfioValueRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts, _pdfio_value_t *v) _PDFIO_INTERNAL; +extern bool _pdfioValueWrite(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_value_t *v, off_t *length) _PDFIO_INTERNAL; #endif // !PDFIO_PRIVATE_H diff --git a/pdfio-rc4.c b/pdfio-rc4.c new file mode 100644 index 0000000..cbaf2dc --- /dev/null +++ b/pdfio-rc4.c @@ -0,0 +1,113 @@ +// +// RC4 functions for PDFio. +// +// Copyright © 2021 by Michael R Sweet. +// +// Original code by Tim Martin +// Copyright © 1999 by Carnegie Mellon University, All Rights Reserved +// +// Permission to use, copy, modify, and distribute this software and its +// documentation for any purpose and without fee is hereby granted, +// provided that the above copyright notice appear in all copies and that +// both that copyright notice and this permission notice appear in +// supporting documentation, and that the name of Carnegie Mellon +// University not be used in advertising or publicity pertaining to +// distribution of the software without specific, written prior +// permission. +// +// CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO +// THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE FOR +// ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +// OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +#include "pdfio-private.h" + + +// +// '_pdfioCryptoRC4Init()' - Initialize an RC4 context with the specified key. +// + +void +_pdfioCryptoRC4Init( + _pdfio_rc4_t *ctx, // IO - Context + const uint8_t *key, // I - Key + size_t keylen) // I - Length of key +{ + size_t i; // Looping var + uint8_t j, // S box counter + tmp; // Temporary variable + + + // Fill in linearly s0=0, s1=1, ... + for (i = 0; i < 256; i ++) + ctx->sbox[i] = (uint8_t)i; + + for (i = 0, j = 0; i < 256; i ++) + { + // j = (j + Si + Ki) mod 256 + j += ctx->sbox[i] + key[i % keylen]; + + // Swap Si and Sj... + tmp = ctx->sbox[i]; + ctx->sbox[i] = ctx->sbox[j]; + ctx->sbox[j] = tmp; + } + + // Initialize counters to 0 and return... + ctx->i = 0; + ctx->j = 0; +} + + +// +// '_pdfioCryptoRC4Crypt()' - De/encrypt the given buffer. +// +// "inbuffer" and "outbuffer" can point to the same memory. +// + +size_t // O - Number of output bytes +_pdfioCryptoRC4Crypt( + _pdfio_rc4_t *ctx, // I - Context + uint8_t *outbuffer, // I - Output buffer + const uint8_t *inbuffer, // I - Input buffer + size_t len) // I - Size of buffers +{ + uint8_t tmp, // Swap variable + i, j, // Looping vars + t; // Current S box + size_t outbytes = len; // Number of output bytes + + + // Loop through the entire buffer... + i = ctx->i; + j = ctx->j; + + while (len > 0) + { + // Get the next S box indices... + i ++; + j += ctx->sbox[i]; + + // Swap Si and Sj... + tmp = ctx->sbox[i]; + ctx->sbox[i] = ctx->sbox[j]; + ctx->sbox[j] = tmp; + + // Get the S box index for this byte... + t = ctx->sbox[i] + ctx->sbox[j]; + + // Encrypt using the S box... + *outbuffer++ = *inbuffer++ ^ ctx->sbox[t]; + len --; + } + + // Copy current S box indices back to context... + ctx->i = i; + ctx->j = j; + + return (outbytes); +} diff --git a/pdfio-sha256.c b/pdfio-sha256.c new file mode 100644 index 0000000..8bea533 --- /dev/null +++ b/pdfio-sha256.c @@ -0,0 +1,480 @@ +// +// SHA-256 functions for PDFio. +// +// Copyright © 2021 by Michael R Sweet. +// Copyright © 2011 IETF Trust and the persons identified as authors of the +// code. All rights reserved. +// +// Redistribution and use in source and binary forms, with or +// without modification, are permitted provided that the following +// conditions are met: +// +// - Redistributions of source code must retain the above +// copyright notice, this list of conditions and +// the following disclaimer. +// +// - Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// - Neither the name of Internet Society, IETF or IETF Trust, nor +// the names of specific contributors, may be used to endorse or +// promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +// CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +// INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +/* + * Description: + * This file implements the Secure Hash Algorithms SHA-224 and + * SHA-256 as defined in the U.S. National Institute of Standards + * and Technology Federal Information Processing Standards + * Publication (FIPS PUB) 180-3 published in October 2008 + * and formerly defined in its predecessors, FIPS PUB 180-1 + * and FIP PUB 180-2. + * + * A combined document showing all algorithms is available at + * http://csrc.nist.gov/publications/fips/ + * fips180-3/fips180-3_final.pdf + * + * The SHA-224 and SHA-256 algorithms produce 224-bit and 256-bit + * message digests for a given data stream. It should take about + * 2**n steps to find a message with the same digest as a given + * message and 2**(n/2) to find any two messages with the same + * digest, when n is the digest size in bits. Therefore, this + * algorithm can serve as a means of providing a + * "fingerprint" for a message. + * + * Portability Issues: + * SHA-224 and SHA-256 are defined in terms of 32-bit "words". + * This code uses (included via "sha.h") to define 32- + * and 8-bit unsigned integer types. If your C compiler does not + * support 32-bit unsigned integers, this code is not + * appropriate. + * + * Caveats: + * SHA-224 and SHA-256 are designed to work with messages less + * than 2^64 bits long. This implementation uses SHA224/256Input() + * to hash the bits that are a multiple of the size of an 8-bit + * octet, and then optionally uses SHA224/256FinalBits() + * to hash the final few bits of the input. + */ + +#include "pdfio-private.h" + +/* Constants from sha.h */ +enum { + SHA256_Message_Block_Size = 64, + SHA256HashSize = 32, + SHA256HashSizeBits = 256 +}; + +enum { + shaSuccess = 0, + shaNull, /* Null pointer parameter */ + shaInputTooLong, /* input data too long */ + shaStateError, /* called Input after FinalBits or Result */ + shaBadParam /* passed a bad parameter */ +}; + +/* Macros from sha-private.h */ +#define SHA_Ch(x, y, z) (((x) & ((y) ^ (z))) ^ (z)) +#define SHA_Maj(x, y, z) (((x) & ((y) | (z))) | ((y) & (z))) +#define SHA_Parity(x, y, z) ((x) ^ (y) ^ (z)) + +/* Define the SHA shift, rotate left, and rotate right macros */ +#define SHA256_SHR(bits,word) ((word) >> (bits)) +#define SHA256_ROTL(bits,word) \ + (((word) << (bits)) | ((word) >> (32-(bits)))) +#define SHA256_ROTR(bits,word) \ + (((word) >> (bits)) | ((word) << (32-(bits)))) + +/* Define the SHA SIGMA and sigma macros */ +#define SHA256_SIGMA0(word) \ + (SHA256_ROTR( 2,word) ^ SHA256_ROTR(13,word) ^ SHA256_ROTR(22,word)) +#define SHA256_SIGMA1(word) \ + (SHA256_ROTR( 6,word) ^ SHA256_ROTR(11,word) ^ SHA256_ROTR(25,word)) +#define SHA256_sigma0(word) \ + (SHA256_ROTR( 7,word) ^ SHA256_ROTR(18,word) ^ SHA256_SHR( 3,word)) +#define SHA256_sigma1(word) \ + (SHA256_ROTR(17,word) ^ SHA256_ROTR(19,word) ^ SHA256_SHR(10,word)) + +/* + * Add "length" to the length. + * Set Corrupted when overflow has occurred. + */ +static uint32_t addTemp; +#define SHA224_256AddLength(context, length) \ + (addTemp = (context)->Length_Low, (context)->Corrupted = \ + (((context)->Length_Low += (length)) < addTemp) && \ + (++(context)->Length_High == 0) ? shaInputTooLong : \ + (context)->Corrupted ) + +/* Local Function Prototypes */ +static int SHA224_256Reset(_pdfio_sha256_t *context, uint32_t *H0); +static void SHA224_256ProcessMessageBlock(_pdfio_sha256_t *context); +static void SHA224_256Finalize(_pdfio_sha256_t *context, + uint8_t Pad_Byte); +static void SHA224_256PadMessage(_pdfio_sha256_t *context, + uint8_t Pad_Byte); +static int SHA224_256ResultN(_pdfio_sha256_t *context, + uint8_t Message_Digest[ ], int HashSize); + +/* Initial Hash Values: FIPS 180-3 section 5.3.3 */ +static uint32_t SHA256_H0[SHA256HashSize/4] = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 +}; + +/* + * _pdfioCryptoSHA256Init + * + * Description: + * This function will initialize the _pdfio_sha256_t in preparation + * for computing a new SHA256 message digest. + * + * Parameters: + * context: [in/out] + * The context to reset. + * + * Returns: + * sha Error Code. + */ +void _pdfioCryptoSHA256Init(_pdfio_sha256_t *context) +{ + SHA224_256Reset(context, SHA256_H0); +} + +/* + * _pdfioCryptoSHA256Append + * + * Description: + * This function accepts an array of octets as the next portion + * of the message. + * + * Parameters: + * context: [in/out] + * The SHA context to update. + * message_array[ ]: [in] + * An array of octets representing the next portion of + * the message. + * length: [in] + * The length of the message in message_array. + * + * Returns: + * sha Error Code. + */ +void +_pdfioCryptoSHA256Append(_pdfio_sha256_t *context, const uint8_t *message_array, + size_t length) +{ + if (!length) return; + + while (length--) { + context->Message_Block[context->Message_Block_Index++] = + *message_array; + + if ((SHA224_256AddLength(context, 8) == shaSuccess) && + (context->Message_Block_Index == SHA256_Message_Block_Size)) + SHA224_256ProcessMessageBlock(context); + + message_array++; + } +} + +/* + * _pdfioCryptoSHA256Finish + * + * Description: + * This function will return the 256-bit message digest + * into the Message_Digest array provided by the caller. + * NOTE: + * The first octet of hash is stored in the element with index 0, + * the last octet of hash in the element with index 31. + * + * Parameters: + * context: [in/out] + * The context to use to calculate the SHA hash. + * Message_Digest[ ]: [out] + * Where the digest is returned. + * + * Returns: + * sha Error Code. + */ +void +_pdfioCryptoSHA256Finish(_pdfio_sha256_t *context, + uint8_t Message_Digest[SHA256HashSize]) +{ + SHA224_256ResultN(context, Message_Digest, SHA256HashSize); +} + +/* + * SHA224_256Reset + * + * Description: + * This helper function will initialize the _pdfio_sha256_t in + * preparation for computing a new SHA-224 or SHA-256 message digest. + * + * Parameters: + * context: [in/out] + * The context to reset. + * H0[ ]: [in] + * The initial hash value array to use. + * + * Returns: + * sha Error Code. + */ +static int SHA224_256Reset(_pdfio_sha256_t *context, uint32_t *H0) +{ + if (!context) return shaNull; + + context->Length_High = context->Length_Low = 0; + context->Message_Block_Index = 0; + + context->Intermediate_Hash[0] = H0[0]; + context->Intermediate_Hash[1] = H0[1]; + context->Intermediate_Hash[2] = H0[2]; + context->Intermediate_Hash[3] = H0[3]; + context->Intermediate_Hash[4] = H0[4]; + context->Intermediate_Hash[5] = H0[5]; + context->Intermediate_Hash[6] = H0[6]; + context->Intermediate_Hash[7] = H0[7]; + + context->Computed = 0; + context->Corrupted = shaSuccess; + + return shaSuccess; +} + +/* + * SHA224_256ProcessMessageBlock + * + * Description: + * This helper function will process the next 512 bits of the + * message stored in the Message_Block array. + * + * Parameters: + * context: [in/out] + * The SHA context to update. + * + * Returns: + * Nothing. + * + * Comments: + * Many of the variable names in this code, especially the + * single character names, were used because those were the + * names used in the Secure Hash Standard. + */ +static void SHA224_256ProcessMessageBlock(_pdfio_sha256_t *context) +{ + /* Constants defined in FIPS 180-3, section 4.2.2 */ + static const uint32_t K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, + 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, + 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, + 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, + 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, + 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, + 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, + 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, + 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + }; + int t, t4; /* Loop counter */ + uint32_t temp1, temp2; /* Temporary word value */ + uint32_t W[64]; /* Word sequence */ + uint32_t A, B, C, D, E, F, G, H; /* Word buffers */ + + /* + * Initialize the first 16 words in the array W + */ + for (t = t4 = 0; t < 16; t++, t4 += 4) + W[t] = (((uint32_t)context->Message_Block[t4]) << 24) | + (((uint32_t)context->Message_Block[t4 + 1]) << 16) | + (((uint32_t)context->Message_Block[t4 + 2]) << 8) | + (((uint32_t)context->Message_Block[t4 + 3])); + + for (t = 16; t < 64; t++) + W[t] = SHA256_sigma1(W[t-2]) + W[t-7] + + SHA256_sigma0(W[t-15]) + W[t-16]; + + A = context->Intermediate_Hash[0]; + B = context->Intermediate_Hash[1]; + C = context->Intermediate_Hash[2]; + D = context->Intermediate_Hash[3]; + E = context->Intermediate_Hash[4]; + F = context->Intermediate_Hash[5]; + G = context->Intermediate_Hash[6]; + H = context->Intermediate_Hash[7]; + + for (t = 0; t < 64; t++) { + temp1 = H + SHA256_SIGMA1(E) + SHA_Ch(E,F,G) + K[t] + W[t]; + temp2 = SHA256_SIGMA0(A) + SHA_Maj(A,B,C); + H = G; + G = F; + F = E; + E = D + temp1; + D = C; + C = B; + B = A; + A = temp1 + temp2; + } + + context->Intermediate_Hash[0] += A; + context->Intermediate_Hash[1] += B; + context->Intermediate_Hash[2] += C; + context->Intermediate_Hash[3] += D; + context->Intermediate_Hash[4] += E; + context->Intermediate_Hash[5] += F; + context->Intermediate_Hash[6] += G; + context->Intermediate_Hash[7] += H; + + context->Message_Block_Index = 0; +} + +/* + * SHA224_256Finalize + * + * Description: + * This helper function finishes off the digest calculations. + * + * Parameters: + * context: [in/out] + * The SHA context to update. + * Pad_Byte: [in] + * The last byte to add to the message block before the 0-padding + * and length. This will contain the last bits of the message + * followed by another single bit. If the message was an + * exact multiple of 8-bits long, Pad_Byte will be 0x80. + * + * Returns: + * sha Error Code. + */ +static void SHA224_256Finalize(_pdfio_sha256_t *context, + uint8_t Pad_Byte) +{ + int i; + SHA224_256PadMessage(context, Pad_Byte); + /* message may be sensitive, so clear it out */ + for (i = 0; i < SHA256_Message_Block_Size; ++i) + context->Message_Block[i] = 0; + context->Length_High = 0; /* and clear length */ + context->Length_Low = 0; + context->Computed = 1; +} + +/* + * SHA224_256PadMessage + * + * Description: + * According to the standard, the message must be padded to the next + * even multiple of 512 bits. The first padding bit must be a '1'. + * The last 64 bits represent the length of the original message. + * All bits in between should be 0. This helper function will pad + * the message according to those rules by filling the + * Message_Block array accordingly. When it returns, it can be + * assumed that the message digest has been computed. + * + * Parameters: + * context: [in/out] + * The context to pad. + * Pad_Byte: [in] + * The last byte to add to the message block before the 0-padding + * and length. This will contain the last bits of the message + * followed by another single bit. If the message was an + * exact multiple of 8-bits long, Pad_Byte will be 0x80. + * + * Returns: + * Nothing. + */ +static void SHA224_256PadMessage(_pdfio_sha256_t *context, + uint8_t Pad_Byte) +{ + /* + * Check to see if the current message block is too small to hold + * the initial padding bits and length. If so, we will pad the + * block, process it, and then continue padding into a second + * block. + */ + if (context->Message_Block_Index >= (SHA256_Message_Block_Size-8)) { + context->Message_Block[context->Message_Block_Index++] = Pad_Byte; + while (context->Message_Block_Index < SHA256_Message_Block_Size) + context->Message_Block[context->Message_Block_Index++] = 0; + SHA224_256ProcessMessageBlock(context); + } else + context->Message_Block[context->Message_Block_Index++] = Pad_Byte; + + while (context->Message_Block_Index < (SHA256_Message_Block_Size-8)) + context->Message_Block[context->Message_Block_Index++] = 0; + + /* + * Store the message length as the last 8 octets + */ + context->Message_Block[56] = (uint8_t)(context->Length_High >> 24); + context->Message_Block[57] = (uint8_t)(context->Length_High >> 16); + context->Message_Block[58] = (uint8_t)(context->Length_High >> 8); + context->Message_Block[59] = (uint8_t)(context->Length_High); + context->Message_Block[60] = (uint8_t)(context->Length_Low >> 24); + context->Message_Block[61] = (uint8_t)(context->Length_Low >> 16); + context->Message_Block[62] = (uint8_t)(context->Length_Low >> 8); + context->Message_Block[63] = (uint8_t)(context->Length_Low); + + SHA224_256ProcessMessageBlock(context); +} + +/* + * SHA224_256ResultN + * + * Description: + * This helper function will return the 224-bit or 256-bit message + * digest into the Message_Digest array provided by the caller. + * NOTE: + * The first octet of hash is stored in the element with index 0, + * the last octet of hash in the element with index 27/31. + * + * Parameters: + * context: [in/out] + * The context to use to calculate the SHA hash. + * Message_Digest[ ]: [out] + * Where the digest is returned. + * HashSize: [in] + * The size of the hash, either 28 or 32. + * + * Returns: + * sha Error Code. + */ +static int SHA224_256ResultN(_pdfio_sha256_t *context, + uint8_t Message_Digest[ ], int HashSize) +{ + int i; + + if (!context) return shaNull; + if (!Message_Digest) return shaNull; + if (context->Corrupted) return context->Corrupted; + + if (!context->Computed) + SHA224_256Finalize(context, 0x80); + + for (i = 0; i < HashSize; ++i) + Message_Digest[i] = (uint8_t) + (context->Intermediate_Hash[i>>2] >> 8 * ( 3 - ( i & 0x03 ) )); + + return shaSuccess; +} diff --git a/pdfio-stream.c b/pdfio-stream.c index 395abc7..de77031 100644 --- a/pdfio-stream.c +++ b/pdfio-stream.c @@ -54,6 +54,10 @@ pdfioStreamClose(pdfio_stream_t *st) // I - Stream while ((status = deflate(&st->flate, Z_FINISH)) != Z_STREAM_END) { + size_t bytes = sizeof(st->cbuffer) - st->flate.avail_out, + // Bytes to write + outbytes; // Actual bytes written + if (status < Z_OK && status != Z_BUF_ERROR) { _pdfioFileError(st->pdf, "Flate compression failed: %s", zstrerror(status)); @@ -61,20 +65,50 @@ pdfioStreamClose(pdfio_stream_t *st) // I - Stream goto done; } - if (!_pdfioFileWrite(st->pdf, st->cbuffer, sizeof(st->cbuffer) - st->flate.avail_out)) + if (st->crypto_cb) + { + // Encrypt it first... + outbytes = (st->crypto_cb)(&st->crypto_ctx, st->cbuffer, st->cbuffer, bytes & ~15); + } + else + { + // No encryption + outbytes = bytes; + } + + if (!_pdfioFileWrite(st->pdf, st->cbuffer, outbytes)) { ret = false; goto done; } - st->flate.next_out = (Bytef *)st->cbuffer; - st->flate.avail_out = (uInt)sizeof(st->cbuffer); + if (bytes > outbytes) + { + bytes -= outbytes; + memmove(st->cbuffer, st->cbuffer + outbytes, bytes); + } + else + { + bytes = 0; + } + + st->flate.next_out = (Bytef *)st->cbuffer + bytes; + st->flate.avail_out = (uInt)sizeof(st->cbuffer) - bytes; } if (st->flate.avail_out < (uInt)sizeof(st->cbuffer)) { // Write any residuals... - if (!_pdfioFileWrite(st->pdf, st->cbuffer, sizeof(st->cbuffer) - st->flate.avail_out)) + size_t bytes = sizeof(st->cbuffer) - st->flate.avail_out; + // Bytes to write + + if (st->crypto_cb) + { + // Encrypt it first... + bytes = (st->crypto_cb)(&st->crypto_ctx, st->cbuffer, st->cbuffer, bytes); + } + + if (!_pdfioFileWrite(st->pdf, st->cbuffer, bytes)) { ret = false; goto done; @@ -83,6 +117,19 @@ pdfioStreamClose(pdfio_stream_t *st) // I - Stream deflateEnd(&st->flate); } + else if (st->crypto_cb && st->bufptr > st->buffer) + { + // Encrypt and flush + uint8_t temp[8192]; // Temporary buffer + size_t outbytes; // Output bytes + + outbytes = (st->crypto_cb)(&st->crypto_ctx, temp, (uint8_t *)st->buffer, (size_t)(st->bufptr - st->buffer)); + if (!_pdfioFileWrite(st->pdf, temp, outbytes)) + { + ret = false; + goto done; + } + } // Save the length of this stream... st->obj->stream_length = (size_t)(_pdfioFileTell(st->pdf) - st->obj->stream_offset); @@ -161,6 +208,24 @@ _pdfioStreamCreate( st->obj = obj; st->length_obj = length_obj; st->filter = compression; + st->bufptr = st->buffer; + st->bufend = st->buffer + sizeof(st->buffer); + + if (obj->pdf->encryption) + { + uint8_t iv[64]; // Initialization vector + size_t ivlen = sizeof(iv); // Length of initialization vector, if any + + if ((st->crypto_cb = _pdfioCryptoMakeWriter(st->pdf, obj, &st->crypto_ctx, iv, &ivlen)) == NULL) + { + // TODO: Add error message? + free(st); + return (NULL); + } + + if (ivlen > 0) + _pdfioFileWrite(st->pdf, iv, ivlen); + } if (compression == PDFIO_FILTER_FLATE) { @@ -360,6 +425,24 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object return (NULL); } + if (obj->pdf->encryption) + { + uint8_t iv[64]; // Initialization vector + size_t ivlen; // Length of initialization vector, if any + + ivlen = _pdfioFilePeek(st->pdf, iv, sizeof(iv)); + + if ((st->crypto_cb = _pdfioCryptoMakeReader(st->pdf, obj, &st->crypto_ctx, iv, &ivlen)) == NULL) + { + // TODO: Add error message? + free(st); + return (NULL); + } + + if (ivlen > 0) + _pdfioFileConsume(st->pdf, ivlen); + } + if (decode) { // Try to decode/decompress the contents of this object... @@ -474,6 +557,9 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object return (NULL); } + if (st->crypto_cb) + rbytes = (st->crypto_cb)(&st->crypto_ctx, st->cbuffer, st->cbuffer, rbytes); + st->flate.next_in = (Bytef *)st->cbuffer; st->flate.avail_in = (uInt)rbytes; @@ -594,7 +680,7 @@ pdfioStreamPrintf( // -// '()' - Write a single character to a stream. +// 'pdfioStreamPutChar()' - Write a single character to a stream. // bool // O - `true` on success, `false` on failure @@ -724,8 +810,63 @@ pdfioStreamWrite( // Write it... if (st->filter == PDFIO_FILTER_NONE) { - // No filtering so just write it... - return (_pdfioFileWrite(st->pdf, buffer, bytes)); + // No filtering... + if (st->crypto_cb) + { + // Encrypt data before writing... + uint8_t temp[8192]; // Temporary buffer + size_t cbytes, // Current bytes + outbytes; // Output bytes + + bufptr = (const unsigned char *)buffer; + + while (bytes > 0) + { + if (st->bufptr > st->buffer || bytes < 16) + { + // Write through the stream's buffer... + if ((cbytes = bytes) > (st->bufend - st->bufptr)) + cbytes = st->bufend - st->bufptr; + + memcpy(st->bufptr, bufptr, cbytes); + st->bufptr += cbytes; + if (st->bufptr >= st->bufend) + { + // Encrypt and flush + outbytes = (st->crypto_cb)(&st->crypto_ctx, temp, (uint8_t *)st->buffer, sizeof(st->buffer)); + if (!_pdfioFileWrite(st->pdf, temp, outbytes)) + return (false); + + st->bufptr = st->buffer; + } + } + else + { + // Write directly up to sizeof(temp) bytes... + if ((cbytes = bytes) > sizeof(temp)) + cbytes = sizeof(temp); + if (cbytes & 15) + { + // AES has a 16-byte block size, so save the last few bytes... + cbytes &= ~15; + } + + outbytes = (st->crypto_cb)(&st->crypto_ctx, temp, bufptr, cbytes); + if (!_pdfioFileWrite(st->pdf, temp, outbytes)) + return (false); + } + + bytes -= cbytes; + bufptr += cbytes; + } + + return (true); + } + else + { + // Write unencrypted... + return (_pdfioFileWrite(st->pdf, buffer, bytes)); + } } pbline = st->pbsize - 1; @@ -862,8 +1003,13 @@ stream_read(pdfio_stream_t *st, // I - Stream rbytes = _pdfioFileRead(st->pdf, buffer, bytes); if (rbytes > 0) + { st->remaining -= (size_t)rbytes; + if (st->crypto_cb) + (st->crypto_cb)(&st->crypto_ctx, (uint8_t *)buffer, (uint8_t *)buffer, rbytes); + } + return (rbytes); } else if (st->filter == PDFIO_FILTER_FLATE) @@ -887,6 +1033,9 @@ stream_read(pdfio_stream_t *st, // I - Stream if (rbytes <= 0) return (-1); // End of file... + if (st->crypto_cb) + rbytes = (st->crypto_cb)(&st->crypto_ctx, st->cbuffer, st->cbuffer, rbytes); + st->remaining -= (size_t)rbytes; st->flate.next_in = (Bytef *)st->cbuffer; st->flate.avail_in = (uInt)rbytes; @@ -940,6 +1089,9 @@ stream_read(pdfio_stream_t *st, // I - Stream if (rbytes <= 0) return (-1); // End of file... + if (st->crypto_cb) + rbytes = (st->crypto_cb)(&st->crypto_ctx, st->cbuffer, st->cbuffer, rbytes); + st->remaining -= (size_t)rbytes; st->flate.next_in = (Bytef *)st->cbuffer; st->flate.avail_in = (uInt)rbytes; @@ -1004,6 +1156,9 @@ stream_read(pdfio_stream_t *st, // I - Stream if (rbytes <= 0) return (-1); // End of file... + if (st->crypto_cb) + rbytes = (st->crypto_cb)(&st->crypto_ctx, st->cbuffer, st->cbuffer, rbytes); + st->remaining -= (size_t)rbytes; st->flate.next_in = (Bytef *)st->cbuffer; st->flate.avail_in = (uInt)rbytes; @@ -1100,11 +1255,36 @@ stream_write(pdfio_stream_t *st, // I - Stream if (st->flate.avail_out < (sizeof(st->cbuffer) / 8)) { // Flush the compression buffer... - if (!_pdfioFileWrite(st->pdf, st->cbuffer, sizeof(st->cbuffer) - st->flate.avail_out)) + size_t cbytes = sizeof(st->cbuffer) - st->flate.avail_out, + outbytes; + + if (st->crypto_cb) + { + // Encrypt it first... + outbytes = (st->crypto_cb)(&st->crypto_ctx, st->cbuffer, st->cbuffer, cbytes & ~15); + } + else + { + outbytes = cbytes; + } + +// fprintf(stderr, "stream_write: bytes=%u, outbytes=%u\n", (unsigned)bytes, (unsigned)outbytes); + + if (!_pdfioFileWrite(st->pdf, st->cbuffer, outbytes)) return (false); - st->flate.next_out = (Bytef *)st->cbuffer; - st->flate.avail_out = sizeof(st->cbuffer); + if (cbytes > outbytes) + { + cbytes -= outbytes; + memmove(st->cbuffer, st->cbuffer + outbytes, cbytes); + } + else + { + cbytes = 0; + } + + st->flate.next_out = (Bytef *)st->cbuffer + cbytes; + st->flate.avail_out = sizeof(st->cbuffer) - cbytes; } // Deflate what we can this time... diff --git a/pdfio-token.c b/pdfio-token.c index 35ce9d1..6cb79c8 100644 --- a/pdfio-token.c +++ b/pdfio-token.c @@ -454,7 +454,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack return (false); } - while ((ch = get_char(tb)) != EOF && ch != '>') + do { if (isxdigit(ch)) { @@ -476,6 +476,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack return (false); } } + while ((ch = get_char(tb)) != EOF && ch != '>'); if (ch == EOF) { diff --git a/pdfio-value.c b/pdfio-value.c index 784aa61..a4651be 100644 --- a/pdfio-value.c +++ b/pdfio-value.c @@ -194,6 +194,7 @@ _pdfioValueDelete(_pdfio_value_t *v) // I - Value _pdfio_value_t * // O - Value or `NULL` on error/EOF _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file + pdfio_obj_t *obj, // I - Object, if any _pdfio_token_t *tb, // I - Token buffer/stack _pdfio_value_t *v) // I - Value { @@ -216,7 +217,8 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file #endif // DEBUG - PDFIO_DEBUG("_pdfioValueRead(pdf=%p, v=%p)\n", pdf, v); + PDFIO_DEBUG("_pdfioValueRead(pdf=%p, obj=%p, v=%p)\n", pdf, obj, v); + (void)obj; // TODO: Implement decryption if (!_pdfioTokenGet(tb, token, sizeof(token))) return (NULL); @@ -225,14 +227,14 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file { // Start of array v->type = PDFIO_VALTYPE_ARRAY; - if ((v->value.array = _pdfioArrayRead(pdf, tb)) == NULL) + if ((v->value.array = _pdfioArrayRead(pdf, obj, tb)) == NULL) return (NULL); } else if (!strcmp(token, "<<")) { // Start of dictionary v->type = PDFIO_VALTYPE_DICT; - if ((v->value.dict = _pdfioDictRead(pdf, tb)) == NULL) + if ((v->value.dict = _pdfioDictRead(pdf, obj, tb)) == NULL) return (NULL); } else if (!strncmp(token, "(D:", 3)) @@ -474,6 +476,7 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file bool // O - `true` on success, `false` on failure _pdfioValueWrite(pdfio_file_t *pdf, // I - PDF file + pdfio_obj_t *obj, // I - Object, if any _pdfio_value_t *v, // I - Value off_t *length)// O - Offset to /Length value, if any { @@ -483,23 +486,47 @@ _pdfioValueWrite(pdfio_file_t *pdf, // I - PDF file return (false); case PDFIO_VALTYPE_ARRAY : - return (_pdfioArrayWrite(v->value.array)); + return (_pdfioArrayWrite(v->value.array, obj)); case PDFIO_VALTYPE_BINARY : { - size_t i; // Looping var - unsigned char *dataptr; // Pointer into data + size_t databytes; // Bytes to write + uint8_t temp[32768], // Temporary buffer for encryption + *dataptr; // Pointer into data + + if (obj && pdf->encryption) + { + // Write encrypted string... + _pdfio_crypto_ctx_t ctx; // Encryption context + _pdfio_crypto_cb_t cb; // Encryption callback + size_t ivlen; // Number of initialization vector bytes + + if (v->value.binary.datalen > (sizeof(temp) - 32)) + { + _pdfioFileError(pdf, "Unable to write encrypted binary string - too long."); + return (false); + } + + cb = _pdfioCryptoMakeWriter(pdf, obj, &ctx, temp, &ivlen); + databytes = (cb)(&ctx, temp + ivlen, v->value.binary.data, v->value.binary.datalen) + ivlen; + dataptr = temp; + } + else + { + dataptr = v->value.binary.data; + databytes = v->value.binary.datalen; + } if (!_pdfioFilePuts(pdf, "<")) return (false); - for (i = v->value.binary.datalen, dataptr = v->value.binary.data; i > 1; i -= 2, dataptr += 2) + for (; databytes > 1; databytes -= 2, dataptr += 2) { if (!_pdfioFilePrintf(pdf, "%02X%02X", dataptr[0], dataptr[1])) return (false); } - if (i > 0) + if (databytes > 0) return (_pdfioFilePrintf(pdf, "%02X>", dataptr[0])); else return (_pdfioFilePuts(pdf, ">")); @@ -514,6 +541,7 @@ _pdfioValueWrite(pdfio_file_t *pdf, // I - PDF file case PDFIO_VALTYPE_DATE : { struct tm date; // Date values + char datestr[32]; // Formatted date value #ifdef _WIN32 gmtime_s(&date, &v->value.date); @@ -521,11 +549,45 @@ _pdfioValueWrite(pdfio_file_t *pdf, // I - PDF file gmtime_r(&v->value.date, &date); #endif // _WIN32 - return (_pdfioFilePrintf(pdf, "(D:%04d%02d%02d%02d%02d%02dZ)", date.tm_year + 1900, date.tm_mon + 1, date.tm_mday, date.tm_hour, date.tm_min, date.tm_sec)); + snprintf(datestr, sizeof(datestr), "D:%04d%02d%02d%02d%02d%02dZ", date.tm_year + 1900, date.tm_mon + 1, date.tm_mday, date.tm_hour, date.tm_min, date.tm_sec); + + if (obj && pdf->encryption) + { + // Write encrypted string... + uint8_t temp[32768], // Encrypted bytes + *tempptr; // Pointer into encrypted bytes + _pdfio_crypto_ctx_t ctx; // Encryption context + _pdfio_crypto_cb_t cb; // Encryption callback + size_t len = strlen(datestr), + // Length of value + ivlen, // Number of initialization vector bytes + tempbytes; // Number of output bytes + + cb = _pdfioCryptoMakeWriter(pdf, obj, &ctx, temp, &ivlen); + tempbytes = (cb)(&ctx, temp + ivlen, (const uint8_t *)datestr, len) + ivlen; + + if (!_pdfioFilePuts(pdf, "<")) + return (false); + + for (tempptr = temp; tempbytes > 1; tempbytes -= 2, tempptr += 2) + { + if (!_pdfioFilePrintf(pdf, "%02X%02X", tempptr[0], tempptr[1])) + return (false); + } + + if (tempbytes > 0) + return (_pdfioFilePrintf(pdf, "%02X>", *tempptr)); + else + return (_pdfioFilePuts(pdf, ">")); + } + else + { + return (_pdfioFilePrintf(pdf, "(%s)", datestr)); + } } case PDFIO_VALTYPE_DICT : - return (_pdfioDictWrite(v->value.dict, length)); + return (_pdfioDictWrite(v->value.dict, obj, length)); case PDFIO_VALTYPE_INDIRECT : return (_pdfioFilePrintf(pdf, " %lu %u R", (unsigned long)v->value.indirect.number, v->value.indirect.generation)); @@ -540,7 +602,44 @@ _pdfioValueWrite(pdfio_file_t *pdf, // I - PDF file return (_pdfioFilePrintf(pdf, " %g", v->value.number)); case PDFIO_VALTYPE_STRING : + if (obj && pdf->encryption) { + // Write encrypted string... + uint8_t temp[32768], // Encrypted bytes + *tempptr; // Pointer into encrypted bytes + _pdfio_crypto_ctx_t ctx; // Encryption context + _pdfio_crypto_cb_t cb; // Encryption callback + size_t len = strlen(v->value.string), + // Length of value + ivlen, // Number of initialization vector bytes + tempbytes; // Number of output bytes + + if (len > (sizeof(temp) - 32)) + { + _pdfioFileError(pdf, "Unable to write encrypted string - too long."); + return (false); + } + + cb = _pdfioCryptoMakeWriter(pdf, obj, &ctx, temp, &ivlen); + tempbytes = (cb)(&ctx, temp + ivlen, (const uint8_t *)v->value.string, len) + ivlen; + + if (!_pdfioFilePuts(pdf, "<")) + return (false); + + for (tempptr = temp; tempbytes > 1; tempbytes -= 2, tempptr += 2) + { + if (!_pdfioFilePrintf(pdf, "%02X%02X", tempptr[0], tempptr[1])) + return (false); + } + + if (tempbytes > 0) + return (_pdfioFilePrintf(pdf, "%02X>", *tempptr)); + else + return (_pdfioFilePuts(pdf, ">")); + } + else + { + // Write unencrypted string... const char *start, // Start of fragment *end; // End of fragment diff --git a/pdfio.h b/pdfio.h index 4bf9082..35951d9 100644 --- a/pdfio.h +++ b/pdfio.h @@ -59,6 +59,14 @@ typedef struct _pdfio_file_s pdfio_file_t; // PDF file typedef bool (*pdfio_error_cb_t)(pdfio_file_t *pdf, const char *message, void *data); // Error callback +typedef enum pdfio_encryption_e // PDF encryption modes +{ + PDFIO_ENCRYPTION_NONE = 0, // No encryption + PDFIO_ENCRYPTION_RC4_40, // 40-bit RC4 encryption (PDF 1.3) + PDFIO_ENCRYPTION_RC4_128, // 128-bit RC4 encryption (PDF 1.4) + PDFIO_ENCRYPTION_AES_128, // 128-bit AES encryption (PDF 1.6) + PDFIO_ENCRYPTION_AES_256 // 256-bit AES encryption (PDF 2.0) +} pdfio_encryption_t; typedef enum pdfio_filter_e // Compression/decompression filters for streams { PDFIO_FILTER_NONE, // No filter @@ -78,6 +86,20 @@ typedef ssize_t (*pdfio_output_cb_t)(void *ctx, const void *data, size_t datalen // Output callback for pdfioFileCreateOutput typedef const char *(*pdfio_password_cb_t)(void *data, const char *filename); // Password callback for pdfioFileOpen +enum pdfio_permission_e // PDF permission bits +{ + PDFIO_PERMISSION_NONE = 0, // No permissions + PDFIO_PERMISSION_PRINT = 0x0004, // PDF allows printing + PDFIO_PERMISSION_MODIFY = 0x0008, // PDF allows modification + PDFIO_PERMISSION_COPY = 0x0010, // PDF allows copying + PDFIO_PERMISSION_ANNOTATE = 0x0020, // PDF allows annotation + PDFIO_PERMISSION_FORMS = 0x0100, // PDF allows filling in forms + PDFIO_PERMISSION_READING = 0x0200, // PDF allows screen reading/accessibility (deprecated in PDF 2.0) + PDFIO_PERMISSION_ASSEMBLE = 0x0400, // PDF allows assembly (insert, delete, or rotate pages, add document outlines and thumbnails) + PDFIO_PERMISSION_PRINT_HIGH = 0x0800, // PDF allows high quality printing + PDFIO_PERMISSION_ALL = ~0 // All permissions +}; +typedef int pdfio_permission_t; // PDF permission bitfield typedef struct pdfio_rect_s // PDF rectangle { double x1; // Lower-left X coordinate @@ -174,6 +196,7 @@ extern size_t pdfioFileGetNumObjs(pdfio_file_t *pdf) _PDFIO_PUBLIC; extern size_t pdfioFileGetNumPages(pdfio_file_t *pdf) _PDFIO_PUBLIC; extern pdfio_obj_t *pdfioFileGetObj(pdfio_file_t *pdf, size_t n) _PDFIO_PUBLIC; extern pdfio_obj_t *pdfioFileGetPage(pdfio_file_t *pdf, size_t n) _PDFIO_PUBLIC; +extern pdfio_permission_t pdfioFileGetPermissions(pdfio_file_t *pdf, pdfio_encryption_t *encryption) _PDFIO_PUBLIC; extern const char *pdfioFileGetProducer(pdfio_file_t *pdf) _PDFIO_PUBLIC; extern const char *pdfioFileGetSubject(pdfio_file_t *pdf) _PDFIO_PUBLIC; extern const char *pdfioFileGetTitle(pdfio_file_t *pdf) _PDFIO_PUBLIC; @@ -183,6 +206,7 @@ extern void pdfioFileSetAuthor(pdfio_file_t *pdf, const char *value) _PDFIO_PUB extern void pdfioFileSetCreationDate(pdfio_file_t *pdf, time_t value) _PDFIO_PUBLIC; extern void pdfioFileSetCreator(pdfio_file_t *pdf, const char *value) _PDFIO_PUBLIC; extern void pdfioFileSetKeywords(pdfio_file_t *pdf, const char *value) _PDFIO_PUBLIC; +extern bool pdfioFileSetPermissions(pdfio_file_t *pdf, pdfio_permission_t permissions, pdfio_encryption_t encryption, const char *owner_password, const char *user_password) _PDFIO_PUBLIC; extern void pdfioFileSetSubject(pdfio_file_t *pdf, const char *value) _PDFIO_PUBLIC; extern void pdfioFileSetTitle(pdfio_file_t *pdf, const char *value) _PDFIO_PUBLIC; diff --git a/pdfio.xcodeproj/project.pbxproj b/pdfio.xcodeproj/project.pbxproj index a80d998..fa61cf4 100644 --- a/pdfio.xcodeproj/project.pbxproj +++ b/pdfio.xcodeproj/project.pbxproj @@ -25,7 +25,12 @@ 279E1035267D043B00D3A349 /* ttf.h in Headers */ = {isa = PBXBuildFile; fileRef = 279E1033267D043B00D3A349 /* ttf.h */; }; 279E1036267D043B00D3A349 /* ttf.c in Sources */ = {isa = PBXBuildFile; fileRef = 279E1034267D043B00D3A349 /* ttf.c */; }; 279E103B267D04E600D3A349 /* libz.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 279E103A267D04E600D3A349 /* libz.tbd */; }; + 27CF90442711DFFE00E50FE4 /* pdfio-aes.c in Sources */ = {isa = PBXBuildFile; fileRef = 27CF90432711DFFE00E50FE4 /* pdfio-aes.c */; }; 27ECBD8926419DAB0025312A /* libpdfio.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 273440B0263D6FE200FBFD63 /* libpdfio.a */; }; + 27F2F0602710BE92008ECD36 /* pdfio-md5.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F05D2710BE92008ECD36 /* pdfio-md5.c */; }; + 27F2F0612710BE92008ECD36 /* pdfio-rc4.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F05E2710BE92008ECD36 /* pdfio-rc4.c */; }; + 27F2F0622710BE92008ECD36 /* pdfio-crypto.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */; }; + 27F2F0642711243D008ECD36 /* pdfio-sha256.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F0632711243D008ECD36 /* pdfio-sha256.c */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -79,6 +84,11 @@ 279E1033267D043B00D3A349 /* ttf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ttf.h; sourceTree = ""; }; 279E1034267D043B00D3A349 /* ttf.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ttf.c; sourceTree = ""; }; 279E103A267D04E600D3A349 /* libz.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libz.tbd; path = usr/lib/libz.tbd; sourceTree = SDKROOT; }; + 27CF90432711DFFE00E50FE4 /* pdfio-aes.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-aes.c"; sourceTree = ""; }; + 27F2F05D2710BE92008ECD36 /* pdfio-md5.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-md5.c"; sourceTree = ""; }; + 27F2F05E2710BE92008ECD36 /* pdfio-rc4.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-rc4.c"; sourceTree = ""; }; + 27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-crypto.c"; sourceTree = ""; }; + 27F2F0632711243D008ECD36 /* pdfio-sha256.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-sha256.c"; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -150,19 +160,24 @@ 279E1038267D045C00D3A349 /* Library */ = { isa = PBXGroup; children = ( + 27CF90432711DFFE00E50FE4 /* pdfio-aes.c */, 273440BA263D727800FBFD63 /* pdfio-array.c */, 273440BB263D727800FBFD63 /* pdfio-common.c */, 271EA703265B2B1000ACDD39 /* pdfio-content.c */, + 27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */, 273440BE263D727800FBFD63 /* pdfio-dict.c */, 273440BD263D727800FBFD63 /* pdfio-file.c */, + 27F2F05D2710BE92008ECD36 /* pdfio-md5.c */, 273440BC263D727800FBFD63 /* pdfio-object.c */, 273440C2263D727800FBFD63 /* pdfio-page.c */, + 27F2F05E2710BE92008ECD36 /* pdfio-rc4.c */, + 27F2F0632711243D008ECD36 /* pdfio-sha256.c */, 273440BF263D727800FBFD63 /* pdfio-stream.c */, 273440B9263D727800FBFD63 /* pdfio-string.c */, 273440E3263DD7EA00FBFD63 /* pdfio-token.c */, 273440C0263D727800FBFD63 /* pdfio-value.c */, - 279E1033267D043B00D3A349 /* ttf.h */, 279E1034267D043B00D3A349 /* ttf.c */, + 279E1033267D043B00D3A349 /* ttf.h */, ); name = Library; sourceTree = ""; @@ -281,11 +296,16 @@ 273440CB263D727800FBFD63 /* pdfio-value.c in Sources */, 273440CA263D727800FBFD63 /* pdfio-stream.c in Sources */, 273440CD263D727800FBFD63 /* pdfio-page.c in Sources */, + 27F2F0622710BE92008ECD36 /* pdfio-crypto.c in Sources */, + 27F2F0642711243D008ECD36 /* pdfio-sha256.c in Sources */, 273440C5263D727800FBFD63 /* pdfio-array.c in Sources */, 273440E4263DD7EA00FBFD63 /* pdfio-token.c in Sources */, 273440C7263D727800FBFD63 /* pdfio-object.c in Sources */, + 27F2F0602710BE92008ECD36 /* pdfio-md5.c in Sources */, 273440C4263D727800FBFD63 /* pdfio-string.c in Sources */, + 27CF90442711DFFE00E50FE4 /* pdfio-aes.c in Sources */, 271EA705265B2B1000ACDD39 /* pdfio-content.c in Sources */, + 27F2F0612710BE92008ECD36 /* pdfio-rc4.c in Sources */, 273440C6263D727800FBFD63 /* pdfio-common.c in Sources */, ); runOnlyForDeploymentPostprocessing = 0; diff --git a/pdfio1.def b/pdfio1.def index 70a1eed..f68192a 100644 --- a/pdfio1.def +++ b/pdfio1.def @@ -1,9 +1,67 @@ LIBRARY pdfio1 VERSION 1.0 EXPORTS +_pdfioArrayDebug +_pdfioArrayDelete +_pdfioArrayGetValue +_pdfioArrayRead +_pdfioArrayWrite +_pdfioCryptoAESDecrypt +_pdfioCryptoAESEncrypt +_pdfioCryptoAESInit +_pdfioCryptoLock +_pdfioCryptoMD5Append +_pdfioCryptoMD5Finish +_pdfioCryptoMD5Init +_pdfioCryptoMakeRandom +_pdfioCryptoMakeReader +_pdfioCryptoMakeWriter +_pdfioCryptoRC4Crypt +_pdfioCryptoRC4Init +_pdfioCryptoSHA256Append +_pdfioCryptoSHA256Finish +_pdfioCryptoSHA256Init +_pdfioCryptoUnlock +_pdfioDictClear +_pdfioDictDebug +_pdfioDictDelete +_pdfioDictGetValue +_pdfioDictRead +_pdfioDictSetValue +_pdfioDictWrite +_pdfioFileAddMappedObj +_pdfioFileAddPage +_pdfioFileConsume +_pdfioFileCreateObj +_pdfioFileDefaultError +_pdfioFileError +_pdfioFileFindMappedObj +_pdfioFileFlush +_pdfioFileGetChar +_pdfioFileGets +_pdfioFilePeek +_pdfioFilePrintf +_pdfioFilePuts +_pdfioFileRead +_pdfioFileSeek +_pdfioFileTell +_pdfioFileWrite +_pdfioObjDelete +_pdfioObjLoad +_pdfioStreamCreate +_pdfioStreamOpen +_pdfioStringIsAllocated +_pdfioTokenClear +_pdfioTokenFlush +_pdfioTokenGet _pdfioTokenInit +_pdfioTokenPush +_pdfioTokenRead +_pdfioValueCopy _pdfioValueDebug +_pdfioValueDelete _pdfioValueRead +_pdfioValueWrite pdfioArrayAppendArray pdfioArrayAppendBinary pdfioArrayAppendBoolean @@ -130,6 +188,7 @@ pdfioFileGetNumObjs pdfioFileGetNumPages pdfioFileGetObj pdfioFileGetPage +pdfioFileGetPermissions pdfioFileGetProducer pdfioFileGetSubject pdfioFileGetTitle @@ -139,6 +198,7 @@ pdfioFileSetAuthor pdfioFileSetCreationDate pdfioFileSetCreator pdfioFileSetKeywords +pdfioFileSetPermissions pdfioFileSetSubject pdfioFileSetTitle pdfioImageGetBytesPerLine diff --git a/testpdfio.c b/testpdfio.c index 4569cac..79e9754 100644 --- a/testpdfio.c +++ b/testpdfio.c @@ -29,11 +29,13 @@ // Local functions... // +static int do_crypto_tests(void); static int do_test_file(const char *filename, int objnum, bool verbose); static int do_unit_tests(void); static int draw_image(pdfio_stream_t *st, const char *name, double x, double y, double w, double h, const char *label); static bool error_cb(pdfio_file_t *pdf, const char *message, bool *error); static ssize_t output_cb(int *fd, const void *buffer, size_t bytes); +static const char *password_cb(void *data, const char *filename); static int read_unit_file(const char *filename, size_t num_pages, size_t first_image, bool is_output); static ssize_t token_consume_cb(const char **s, size_t bytes); static ssize_t token_peek_cb(const char **s, char *buffer, size_t bytes); @@ -111,6 +113,249 @@ main(int argc, // I - Number of command-line arguments } +// +// 'do_crypto_tests()' - Test the various cryptographic functions in PDFio. +// + +static int // O - Exit status +do_crypto_tests(void) +{ + int ret = 0; // Return value + size_t i; // Looping var + _pdfio_aes_t aes; // AES context + _pdfio_md5_t md5; // MD5 context + _pdfio_rc4_t rc4; // RC4 context + _pdfio_sha256_t sha256; // SHA256 context + uint8_t key[32], // Encryption/decryption key + iv[32], // Initialization vector + buffer[256], // Output buffer + buffer2[256]; // Second output buffer + const char *prefix, *suffix; // Prefix/suffix strings + static const char *text = "Hello, World! Now is the time for all good men to come to the aid of their country.\n"; + // Test text + static uint8_t aes128key[] = { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }; + static uint8_t aes128rounds[] = { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c, 0xa0, 0xfa, 0xfe, 0x17, 0x88, 0x54, 0x2c, 0xb1, 0x23, 0xa3, 0x39, 0x39, 0x2a, 0x6c, 0x76, 0x05, 0xf2, 0xc2, 0x95, 0xf2, 0x7a, 0x96, 0xb9, 0x43, 0x59, 0x35, 0x80, 0x7a, 0x73, 0x59, 0xf6, 0x7f, 0x3d, 0x80, 0x47, 0x7d, 0x47, 0x16, 0xfe, 0x3e, 0x1e, 0x23, 0x7e, 0x44, 0x6d, 0x7a, 0x88, 0x3b, 0xef, 0x44, 0xa5, 0x41, 0xa8, 0x52, 0x5b, 0x7f, 0xb6, 0x71, 0x25, 0x3b, 0xdb, 0x0b, 0xad, 0x00, 0xd4, 0xd1, 0xc6, 0xf8, 0x7c, 0x83, 0x9d, 0x87, 0xca, 0xf2, 0xb8, 0xbc, 0x11, 0xf9, 0x15, 0xbc, 0x6d, 0x88, 0xa3, 0x7a, 0x11, 0x0b, 0x3e, 0xfd, 0xdb, 0xf9, 0x86, 0x41, 0xca, 0x00, 0x93, 0xfd, 0x4e, 0x54, 0xf7, 0x0e, 0x5f, 0x5f, 0xc9, 0xf3, 0x84, 0xa6, 0x4f, 0xb2, 0x4e, 0xa6, 0xdc, 0x4f, 0xea, 0xd2, 0x73, 0x21, 0xb5, 0x8d, 0xba, 0xd2, 0x31, 0x2b, 0xf5, 0x60, 0x7f, 0x8d, 0x29, 0x2f, 0xac, 0x77, 0x66, 0xf3, 0x19, 0xfa, 0xdc, 0x21, 0x28, 0xd1, 0x29, 0x41, 0x57, 0x5c, 0x00, 0x6e, 0xd0, 0x14, 0xf9, 0xa8, 0xc9, 0xee, 0x25, 0x89, 0xe1, 0x3f, 0x0c, 0xc8, 0xb6, 0x63, 0x0c, 0xa6 }; + // FIPS-197 example key expansion + static uint8_t aes128text[] = { 0xfb, 0x77, 0xac, 0xce, 0x3c, 0x95, 0x40, 0xcf, 0xca, 0xc8, 0x26, 0xbf, 0xc0, 0x69, 0x73, 0x3c, 0x01, 0xfd, 0x72, 0x01, 0xeb, 0x4d, 0x6f, 0xf7, 0xb4, 0x72, 0x6d, 0x84, 0x69, 0x9f, 0x89, 0xab, 0xe6, 0x2b, 0x9a, 0x9a, 0x6e, 0xc1, 0x61, 0xd7, 0x9d, 0x83, 0x2d, 0x58, 0x55, 0xa7, 0x58, 0x50, 0x00, 0xad, 0x19, 0x7b, 0xee, 0x6a, 0x36, 0x6f, 0xd1, 0xa7, 0xa4, 0x6b, 0xc5, 0x78, 0x9a, 0x18, 0x05, 0xf0, 0x2c, 0xd4, 0x60, 0x25, 0xe0, 0xa7, 0xb1, 0x36, 0xdb, 0x18, 0xd3, 0xf7, 0x59, 0x29, 0x22, 0xec, 0x25, 0x77, 0x0d, 0x9e, 0x5a, 0x01, 0xcc, 0xf6, 0x29, 0xc2, 0x08, 0xc2, 0xfc, 0x4f }; + // Expected AES-128 CBC result + static uint8_t aes256text[] = { 0x2b, 0x94, 0x45, 0x9e, 0xed, 0xa0, 0x89, 0x7b, 0x35, 0x4e, 0xde, 0x06, 0x00, 0x4d, 0xda, 0x6b, 0x61, 0x2f, 0xb9, 0x06, 0xd5, 0x0f, 0x22, 0xed, 0xd2, 0xe3, 0x6b, 0x39, 0x5a, 0xa1, 0xe3, 0x7d, 0xa1, 0xcc, 0xd4, 0x0b, 0x6b, 0xa4, 0xff, 0xe9, 0x9c, 0x89, 0x0c, 0xc7, 0x95, 0x47, 0x19, 0x9b, 0x06, 0xdc, 0xc8, 0x7c, 0x5c, 0x5d, 0x56, 0x99, 0x1e, 0x90, 0x7d, 0x99, 0xc5, 0x7b, 0xc4, 0xe4, 0xfb, 0x02, 0x15, 0x50, 0x23, 0x2a, 0xe4, 0xc1, 0x20, 0xfd, 0xf4, 0x03, 0xfe, 0x6f, 0x15, 0x48, 0xd8, 0x62, 0x36, 0x98, 0x2a, 0x62, 0xf5, 0x2c, 0xa6, 0xfa, 0x7a, 0x43, 0x53, 0xcd, 0xad, 0x18 }; + // Expected AES-256 CBC result + static uint8_t md5text[16] = { 0x74, 0x0c, 0x2c, 0xea, 0xe1, 0xab, 0x06, 0x7c, 0xdb, 0x1d, 0x49, 0x1d, 0x2d, 0x66, 0xf2, 0x93 }; + // Expected MD5 hash result + static uint8_t rc4text[] = { 0xd2, 0xa2, 0xa0, 0xf6, 0x0f, 0xb1, 0x3e, 0xa0, 0xdd, 0xe1, 0x44, 0xfd, 0xec, 0xc4, 0x55, 0xf8, 0x25, 0x68, 0xad, 0xe6, 0xb0, 0x60, 0x7a, 0x0f, 0x4e, 0xfe, 0xed, 0x9c, 0x78, 0x3a, 0xf8, 0x73, 0x79, 0xbd, 0x82, 0x88, 0x39, 0x01, 0xc7, 0xd0, 0x34, 0xfe, 0x40, 0x16, 0x93, 0x5a, 0xec, 0x81, 0xda, 0x34, 0xdf, 0x5b, 0xd1, 0x47, 0x2c, 0xfa, 0xe0, 0x13, 0xc5, 0xe2, 0xb0, 0x57, 0x5c, 0x17, 0x62, 0xaa, 0x83, 0x1c, 0x4f, 0xa0, 0x0a, 0xed, 0x6c, 0x42, 0x41, 0x8a, 0x45, 0x03, 0xb8, 0x72, 0xa8, 0x99, 0xd7, 0x06 }; + // Expected RC4 result + static uint8_t sha256text[32] = { 0x19, 0x71, 0x9b, 0xf0, 0xc6, 0xd8, 0x34, 0xc9, 0x6e, 0x8a, 0x56, 0xcc, 0x34, 0x45, 0xb7, 0x1d, 0x5b, 0x74, 0x9c, 0x52, 0x40, 0xcd, 0x30, 0xa2, 0xc2, 0x84, 0x53, 0x83, 0x16, 0xf8, 0x1a, 0xbb }; + // Expected SHA-256 hash result + + + fputs("_pdfioAESInit(128-bit sample key): ", stdout); + _pdfioCryptoAESInit(&aes, aes128key, sizeof(aes128key), NULL); + if (!memcmp(aes128rounds, aes.round_key, sizeof(aes128rounds))) + { + puts("PASS"); + } + else + { + for (i = 0; i < (sizeof(aes128rounds) - 4); i ++) + { + if (aes.round_key[i] != aes128rounds[i]) + break; + } + + prefix = i > 0 ? "..." : ""; + suffix = i < (sizeof(aes128rounds) - 4) ? "..." : ""; + + printf("FAIL (got '%s%02X%02X%02X%02X%s', expected '%s%02X%02X%02X%02X%s')\n", prefix, aes.round_key[i], aes.round_key[i + 1], aes.round_key[i + 2], aes.round_key[i + 3], suffix, prefix, aes128rounds[i], aes128rounds[i + 1], aes128rounds[i + 2], aes128rounds[i + 3], suffix); + ret = 1; + } + + fputs("_pdfioAESInit/Encrypt(128-bit CBC): ", stdout); + for (i = 0; i < 16; i ++) + { + key[i] = (uint8_t)i + 1; + iv[i] = (uint8_t)(0xff - i); + } + + _pdfioCryptoAESInit(&aes, key, 16, iv); + _pdfioCryptoAESEncrypt(&aes, buffer, (uint8_t *)text, strlen(text)); + + if (!memcmp(aes128text, buffer, sizeof(aes128text))) + { + puts("PASS"); + } + else + { + for (i = 0; i < (sizeof(aes128text) - 4); i ++) + { + if (buffer[i] != aes128text[i]) + break; + } + + prefix = i > 0 ? "..." : ""; + suffix = i < (sizeof(aes128text) - 4) ? "..." : ""; + + printf("FAIL (got '%s%02X%02X%02X%02X%s', expected '%s%02X%02X%02X%02X%s')\n", prefix, buffer[i], buffer[i + 1], buffer[i + 2], buffer[i + 3], suffix, prefix, aes128text[i], aes128text[i + 1], aes128text[i + 2], aes128text[i + 3], suffix); + ret = 1; + } + + fputs("_pdfioAESInit/Decrypt(128-bit CBC): ", stdout); + _pdfioCryptoAESInit(&aes, key, 16, iv); + _pdfioCryptoAESDecrypt(&aes, buffer2, buffer, sizeof(aes128text)); + + if (!memcmp(buffer2, text, strlen(text))) + { + puts("PASS"); + } + else + { + for (i = 0; text[i + 4]; i ++) + { + if (buffer2[i] != text[i]) + break; + } + + prefix = i > 0 ? "..." : ""; + suffix = text[i + 4] ? "..." : ""; + + printf("FAIL (got '%s%02X%02X%02X%02X%s', expected '%s%02X%02X%02X%02X%s')\n", prefix, buffer2[i], buffer2[i + 1], buffer2[i + 2], buffer2[i + 3], suffix, prefix, text[i], text[i + 1], text[i + 2], text[i + 3], suffix); + ret = 1; + } + + fputs("_pdfioAESInit/Encrypt(256-bit CBC): ", stdout); + for (i = 0; i < 32; i ++) + { + key[i] = (uint8_t)i + 1; + iv[i] = (uint8_t)(0xff - i); + } + + _pdfioCryptoAESInit(&aes, key, 32, iv); + _pdfioCryptoAESEncrypt(&aes, buffer, (uint8_t *)text, strlen(text)); + + if (!memcmp(aes256text, buffer, sizeof(aes256text))) + { + puts("PASS"); + } + else + { + for (i = 0; i < (sizeof(aes256text) - 4); i ++) + { + if (buffer[i] != aes256text[i]) + break; + } + + prefix = i > 0 ? "..." : ""; + suffix = i < (sizeof(aes256text) - 4) ? "..." : ""; + + printf("FAIL (got '%s%02X%02X%02X%02X%s', expected '%s%02X%02X%02X%02X%s')\n", prefix, buffer[i], buffer[i + 1], buffer[i + 2], buffer[i + 3], suffix, prefix, aes256text[i], aes256text[i + 1], aes256text[i + 2], aes256text[i + 3], suffix); + ret = 1; + } + + fputs("_pdfioAESInit/Decrypt(256-bit CBC): ", stdout); + _pdfioCryptoAESInit(&aes, key, 32, iv); + _pdfioCryptoAESDecrypt(&aes, buffer2, buffer, sizeof(aes256text)); + + if (!memcmp(buffer2, text, strlen(text))) + { + puts("PASS"); + } + else + { + for (i = 0; text[i + 4]; i ++) + { + if (buffer2[i] != text[i]) + break; + } + + prefix = i > 0 ? "..." : ""; + suffix = text[i + 4] ? "..." : ""; + + printf("FAIL (got '%s%02X%02X%02X%02X%s', expected '%s%02X%02X%02X%02X%s')\n", prefix, buffer2[i], buffer2[i + 1], buffer2[i + 2], buffer2[i + 3], suffix, prefix, text[i], text[i + 1], text[i + 2], text[i + 3], suffix); + ret = 1; + } + + fputs("_pdfioMD5Init/Append/Finish: ", stdout); + _pdfioCryptoMD5Init(&md5); + _pdfioCryptoMD5Append(&md5, (uint8_t *)text, strlen(text)); + _pdfioCryptoMD5Finish(&md5, buffer); + + if (!memcmp(md5text, buffer, sizeof(md5text))) + { + puts("PASS"); + } + else + { + printf("FAIL (got '%02X%02X%02X%02X...%02X%02X%02X%02X', expected '%02X%02X%02X%02X...%02X%02X%02X%02X')\n", buffer[0], buffer[1], buffer[2], buffer[3], buffer[12], buffer[13], buffer[14], buffer[15], md5text[0], md5text[1], md5text[2], md5text[3], md5text[12], md5text[13], md5text[14], md5text[15]); + ret = 1; + } + + fputs("_pdfioRC4Init/Encrypt(128-bit): ", stdout); + for (i = 0; i < 16; i ++) + key[i] = (uint8_t)i + 1; + + _pdfioCryptoRC4Init(&rc4, key, 16); + _pdfioCryptoRC4Crypt(&rc4, buffer, (uint8_t *)text, strlen(text)); + + if (!memcmp(rc4text, buffer, sizeof(rc4text))) + { + puts("PASS"); + } + else + { + for (i = 0; i < (sizeof(rc4text) - 4); i ++) + { + if (buffer[i] != rc4text[i]) + break; + } + + prefix = i > 0 ? "..." : ""; + suffix = i < (sizeof(rc4text) - 4) ? "..." : ""; + + printf("FAIL (got '%s%02X%02X%02X%02X%s', expected '%s%02X%02X%02X%02X%s')\n", prefix, buffer[i], buffer[i + 1], buffer[i + 2], buffer[i + 3], suffix, prefix, rc4text[i], rc4text[i + 1], rc4text[i + 2], rc4text[i + 3], suffix); + ret = 1; + } + + fputs("_pdfioRC4Init/Decrypt(128-bit): ", stdout); + _pdfioCryptoRC4Init(&rc4, key, 16); + _pdfioCryptoRC4Crypt(&rc4, buffer2, buffer, strlen(text)); + + if (!memcmp(buffer2, text, strlen(text))) + { + puts("PASS"); + } + else + { + for (i = 0; text[i + 4]; i ++) + { + if (buffer2[i] != text[i]) + break; + } + + prefix = i > 0 ? "..." : ""; + suffix = text[i + 4] ? "..." : ""; + + printf("FAIL (got '%s%02X%02X%02X%02X%s', expected '%s%02X%02X%02X%02X%s')\n", prefix, buffer2[i], buffer2[i + 1], buffer2[i + 2], buffer2[i + 3], suffix, prefix, text[i], text[i + 1], text[i + 2], text[i + 3], suffix); + ret = 1; + } + + fputs("_pdfioSHA256Init/Append/Finish: ", stdout); + _pdfioCryptoSHA256Init(&sha256); + _pdfioCryptoSHA256Append(&sha256, (uint8_t *)text, strlen(text)); + _pdfioCryptoSHA256Finish(&sha256, buffer); + + if (!memcmp(sha256text, buffer, sizeof(sha256text))) + { + puts("PASS"); + } + else + { + printf("FAIL (got '%02X%02X%02X%02X...%02X%02X%02X%02X', expected '%02X%02X%02X%02X...%02X%02X%02X%02X')\n", buffer[0], buffer[1], buffer[2], buffer[3], buffer[28], buffer[29], buffer[30], buffer[31], sha256text[0], sha256text[1], sha256text[2], sha256text[3], sha256text[28], sha256text[29], sha256text[30], sha256text[31]); + ret = 1; + } + + return (ret); +} + + // // 'do_test_file()' - Try loading a PDF file and listing pages and objects. // @@ -726,7 +971,7 @@ do_unit_tests(void) fputs("_pdfioValueRead(complex_dict): ", stdout); s = complex_dict; _pdfioTokenInit(&tb, inpdf, (_pdfio_tconsume_cb_t)token_consume_cb, (_pdfio_tpeek_cb_t)token_peek_cb, (void *)&s); - if (_pdfioValueRead(inpdf, &tb, &value)) + if (_pdfioValueRead(inpdf, NULL, &tb, &value)) { // TODO: Check value... fputs("PASS: ", stdout); @@ -740,7 +985,7 @@ do_unit_tests(void) fputs("_pdfioValueRead(cid_dict): ", stdout); s = cid_dict; _pdfioTokenInit(&tb, inpdf, (_pdfio_tconsume_cb_t)token_consume_cb, (_pdfio_tpeek_cb_t)token_peek_cb, (void *)&s); - if (_pdfioValueRead(inpdf, &tb, &value)) + if (_pdfioValueRead(inpdf, NULL, &tb, &value)) { // TODO: Check value... fputs("PASS: ", stdout); @@ -750,6 +995,10 @@ do_unit_tests(void) else goto fail; + // Do crypto tests... + if (do_crypto_tests()) + return (1); + // Create a new PDF file... fputs("pdfioFileCreate(\"testpdfio-out.pdf\", ...): ", stdout); if ((outpdf = pdfioFileCreate("testpdfio-out.pdf", NULL, NULL, NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL) @@ -763,7 +1012,7 @@ do_unit_tests(void) if (read_unit_file("testpdfio-out.pdf", num_pages, first_image, false)) goto fail; - // Create a new PDF file... + // Stream a new PDF file... if ((outfd = open("testpdfio-out2.pdf", O_CREAT | O_TRUNC | O_WRONLY | O_BINARY, 0666)) < 0) { perror("Unable to open \"testpdfio-out2.pdf\""); @@ -784,6 +1033,80 @@ do_unit_tests(void) if (read_unit_file("testpdfio-out2.pdf", num_pages, first_image, true)) goto fail; + // Create new encrypted PDF files... + fputs("pdfioFileCreate(\"testpdfio-rc4.pdf\", ...): ", stdout); + if ((outpdf = pdfioFileCreate("testpdfio-rc4.pdf", NULL, NULL, NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL) + puts("PASS"); + else + return (1); + + fputs("pdfioFileSetPermissions(all, RC4-128, no passwords): ", stdout); + if (pdfioFileSetPermissions(outpdf, PDFIO_PERMISSION_ALL, PDFIO_ENCRYPTION_RC4_128, NULL, NULL)) + puts("PASS"); + else + return (1); + + if (write_unit_file(inpdf, outpdf, &num_pages, &first_image)) + return (1); + + if (read_unit_file("testpdfio-rc4.pdf", num_pages, first_image, false)) + return (1); + + // Create new encrypted PDF files... + fputs("pdfioFileCreate(\"testpdfio-rc4p.pdf\", ...): ", stdout); + if ((outpdf = pdfioFileCreate("testpdfio-rc4p.pdf", NULL, NULL, NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL) + puts("PASS"); + else + return (1); + + fputs("pdfioFileSetPermissions(no-print, RC4-128, passwords='owner' and 'user'): ", stdout); + if (pdfioFileSetPermissions(outpdf, PDFIO_PERMISSION_ALL ^ PDFIO_PERMISSION_PRINT, PDFIO_ENCRYPTION_RC4_128, "owner", "user")) + puts("PASS"); + else + return (1); + + if (write_unit_file(inpdf, outpdf, &num_pages, &first_image)) + return (1); + + if (read_unit_file("testpdfio-rc4p.pdf", num_pages, first_image, false)) + return (1); + + fputs("pdfioFileCreate(\"testpdfio-aes.pdf\", ...): ", stdout); + if ((outpdf = pdfioFileCreate("testpdfio-aes.pdf", NULL, NULL, NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL) + puts("PASS"); + else + return (1); + + fputs("pdfioFileSetPermissions(all, AES-128, no passwords): ", stdout); + if (pdfioFileSetPermissions(outpdf, PDFIO_PERMISSION_ALL, PDFIO_ENCRYPTION_AES_128, NULL, NULL)) + puts("PASS"); + else + return (1); + + if (write_unit_file(inpdf, outpdf, &num_pages, &first_image)) + return (1); + + if (read_unit_file("testpdfio-aes.pdf", num_pages, first_image, false)) + return (1); + + fputs("pdfioFileCreate(\"testpdfio-aesp.pdf\", ...): ", stdout); + if ((outpdf = pdfioFileCreate("testpdfio-aesp.pdf", NULL, NULL, NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL) + puts("PASS"); + else + return (1); + + fputs("pdfioFileSetPermissions(no-print, AES-128, passwords='owner' and 'user'): ", stdout); + if (pdfioFileSetPermissions(outpdf, PDFIO_PERMISSION_ALL ^ PDFIO_PERMISSION_PRINT, PDFIO_ENCRYPTION_AES_128, "owner", "user")) + puts("PASS"); + else + return (1); + + if (write_unit_file(inpdf, outpdf, &num_pages, &first_image)) + return (1); + + if (read_unit_file("testpdfio-aesp.pdf", num_pages, first_image, false)) + return (1); + pdfioFileClose(inpdf); return (0); @@ -889,6 +1212,20 @@ output_cb(int *fd, // I - File descriptor } +// +// 'password_cb()' - Password callback for PDF file. +// + +static const char * // O - Password string +password_cb(void *data, // I - Callback data + const char *filename) // I - Filename (not used) +{ + (void)filename; + + return ((const char *)data); +} + + // // 'read_unit_file()' - Read back a unit test file and confirm its contents. // @@ -906,7 +1243,7 @@ read_unit_file(const char *filename, // I - File to read // Open the new PDF file to read it... printf("pdfioFileOpen(\"%s\", ...): ", filename); - if ((pdf = pdfioFileOpen(filename, /*password_cb*/NULL, /*password_data*/NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL) + if ((pdf = pdfioFileOpen(filename, password_cb, (void *)"user", (pdfio_error_cb_t)error_cb, &error)) != NULL) puts("PASS"); else return (1);