Safe work on streams - still need to implement predictors

This commit is contained in:
Michael R Sweet 2021-05-07 08:47:49 -04:00
parent c61d6ad686
commit 44325ce2d9
No known key found for this signature in database
GPG Key ID: 999559A027815955
5 changed files with 355 additions and 24 deletions

View File

@ -10,6 +10,9 @@ To-Do List
one PDF to another, there are a bunch of resources that also need to be one PDF to another, there are a bunch of resources that also need to be
copied. A dictionary with an object reference can't be copied directly as the copied. A dictionary with an object reference can't be copied directly as the
object number in the new PDF will likely be different than the old one. object number in the new PDF will likely be different than the old one.
- Add _pdfio_map_t with original pdfio_file_t * and object numbers
- Add _pdfioObjCopy function
- Add _pdfioFileGetMappedObject function to get the new object number
- Security handlers (RC4 + AES, MD5 + SHA-256) for reading encrypted documents. - Security handlers (RC4 + AES, MD5 + SHA-256) for reading encrypted documents.
- Signature generation/validation code - Signature generation/validation code
- Documentation - Documentation

View File

@ -537,14 +537,19 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
return (false); return (false);
} }
PDFIO_DEBUG("load_xref: xref_offset=%lu, line='%s'\n", (unsigned long)xref_offset, line);
if (isdigit(line[0] & 255) && strlen(line) > 4 && !strcmp(line + strlen(line) - 4, " obj")) if (isdigit(line[0] & 255) && strlen(line) > 4 && !strcmp(line + strlen(line) - 4, " obj"))
{ {
// Cross-reference stream // Cross-reference stream
pdfio_obj_t *obj; // Object pdfio_obj_t *obj; // Object
size_t i; // Looping var
pdfio_array_t *w_array; // W array pdfio_array_t *w_array; // W array
size_t w[3]; // Size of each cross-reference field size_t w[3]; // Size of each cross-reference field
size_t w_2, // Offset to second field
w_3; // Offset to third field
size_t w_total; // Total length size_t w_total; // Total length
pdfio_stream_t *st; // Stream with pdfio_stream_t *st; // Stream
unsigned char buffer[32]; // Read buffer unsigned char buffer[32]; // Read buffer
if ((number = strtoimax(line, &ptr, 10)) < 1) if ((number = strtoimax(line, &ptr, 10)) < 1)
@ -568,6 +573,8 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
return (false); return (false);
} }
PDFIO_DEBUG("load_xref: Loading object %lu %u.\n", (unsigned long)number, (unsigned)generation);
if ((obj = add_obj(pdf, (size_t)number, (unsigned short)generation, xref_offset)) == NULL) if ((obj = add_obj(pdf, (size_t)number, (unsigned short)generation, xref_offset)) == NULL)
{ {
_pdfioFileError(pdf, "Unable to allocate memory for object."); _pdfioFileError(pdf, "Unable to allocate memory for object.");
@ -587,7 +594,85 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
obj->value = trailer; obj->value = trailer;
// TODO: read stream if (!_pdfioFileGetToken(pdf, line, sizeof(line)) || strcmp(line, "stream"))
{
_pdfioFileError(pdf, "Unable to get stream after xref dictionary.");
return (false);
}
obj->stream_offset = _pdfioFileTell(pdf);
if ((w_array = pdfioDictGetArray(trailer.value.dict, "W")) == NULL)
{
_pdfioFileError(pdf, "Cross-reference stream does not have required W key.");
return (false);
}
w[0] = (size_t)pdfioArrayGetNumber(w_array, 0);
w[1] = (size_t)pdfioArrayGetNumber(w_array, 1);
w[2] = (size_t)pdfioArrayGetNumber(w_array, 2);
w_total = w[0] + w[1] + w[2];
w_2 = w[0];
w_3 = w[0] + w[1];
if (w[1] == 0 || w[2] > 2 || w_total > sizeof(buffer))
{
_pdfioFileError(pdf, "Cross-reference stream has invalid W key.");
return (false);
}
if ((st = pdfioObjOpenStream(obj, true)) == NULL)
{
_pdfioFileError(pdf, "Unable to open cross-reference stream.");
return (false);
}
while (pdfioStreamRead(st, buffer, w_total) > 0)
{
PDFIO_DEBUG("load_xref: %02X%02X%02X%02X%02X\n", buffer[0], buffer[1], buffer[2], buffer[3], buffer[4]);
// Check whether this is an object definition...
if (w[0] > 0)
{
if (buffer[0] == 0)
{
// Ignore free objects...
continue;
}
else if (buffer[0] == 2)
{
// TODO: Add support for compressed object streams...
// Compressed object...
_pdfioFileError(pdf, "PDF file contains compressed object streams which are not currently supported.");
continue;
}
}
for (i = 1, offset = buffer[w_2]; i < w[1]; i ++)
offset = (offset << 8) | buffer[w_2 + i];
switch (w[2])
{
default :
generation = 0;
break;
case 1 :
generation = buffer[w_3];
break;
case 2 :
generation = (buffer[w_3] << 8) | buffer[w_3 + 1];
break;
}
// Create a placeholder for the object in memory...
if (pdfioFileFindObject(pdf, (size_t)number))
continue; // Don't replace newer object...
if (!add_obj(pdf, (size_t)number, (unsigned short)generation, offset))
return (false);
}
pdfioStreamClose(st);
} }
else if (!strcmp(line, "xref")) else if (!strcmp(line, "xref"))
{ {

View File

@ -200,9 +200,21 @@ pdfio_stream_t * // O - Stream or `NULL` on error
pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
bool decode) // I - Decode/decompress data? bool decode) // I - Decode/decompress data?
{ {
// TODO: Implement me // Range check input...
(void)obj; if (!obj)
(void)decode; return (NULL);
return (NULL); // Make sure we've loaded the object dictionary...
if (!obj->value.type)
{
if (!_pdfioObjLoad(obj))
return (NULL);
}
// No stream if there is no dict or offset to a stream...
if (obj->value.type != PDFIO_VALTYPE_DICT || !obj->stream_offset)
return (NULL);
// Open the stream...
return (_pdfioStreamOpen(obj, decode));
} }

View File

@ -69,6 +69,17 @@ typedef enum _pdfio_mode_e // Read/write mode
_PDFIO_MODE_WRITE // Write a PDF file _PDFIO_MODE_WRITE // Write a PDF file
} _pdfio_mode_t; } _pdfio_mode_t;
typedef enum _pdfio_predictor_e // PNG predictor constants
{
_PDFIO_PREDICTOR_NONE = 1, // No predictor (default)
_PDFIO_PREDICTOR_TIFF2 = 2, // TIFF2 predictor (???)
_PDFIO_PREDICTOR_PNG_NONE = 10, // PNG None predictor (same as `_PDFIO_PREDICTOR_NONE`)
_PDFIO_PREDICTOR_PNG_SUB = 11, // PNG Sub predictor
_PDFIO_PREDICTOR_PNG_UP = 12, // PNG Up predictor
_PDFIO_PREDICTOR_PNG_AVERAGE = 13, // PNG Average predictor
_PDFIO_PREDICTOR_PNG_PAETH = 14 // PNG Paeth predictor
} _pdfio_predictor_t;
typedef struct _pdfio_value_s // Value structure typedef struct _pdfio_value_s // Value structure
{ {
pdfio_valtype_t type; // Type of value pdfio_valtype_t type; // Type of value
@ -174,9 +185,12 @@ struct _pdfio_stream_s // Stream
pdfio_file_t *pdf; // PDF file pdfio_file_t *pdf; // PDF file
pdfio_obj_t *obj; // Object pdfio_obj_t *obj; // Object
pdfio_filter_t filter; // Compression/decompression filter pdfio_filter_t filter; // Compression/decompression filter
char buffer[8192]; // Read/write buffer size_t remaining; // Remaining bytes in stream
size_t bufused; // Number of bytes in buffer char buffer[8192], // Read/write buffer
*bufptr, // Current position in buffer
*bufend; // End of buffer
z_stream flate; // Flate filter state z_stream flate; // Flate filter state
char cbuffer[4096]; // Compressed data buffer
}; };
typedef ssize_t (*_pdfio_tconsume_cb_t)(void *data, size_t bytes); typedef ssize_t (*_pdfio_tconsume_cb_t)(void *data, size_t bytes);

View File

@ -14,6 +14,13 @@
#include "pdfio-private.h" #include "pdfio-private.h"
//
// Local functions...
//
static ssize_t stream_read(pdfio_stream_t *st, char *buffer, size_t bytes);
// //
// 'pdfioStreamClose()' - Close a (data) stream in a PDF file. // 'pdfioStreamClose()' - Close a (data) stream in a PDF file.
// //
@ -54,10 +61,35 @@ bool // O - `true` on success, `false` on EOF
pdfioStreamConsume(pdfio_stream_t *st, // I - Stream pdfioStreamConsume(pdfio_stream_t *st, // I - Stream
size_t bytes)// I - Number of bytes to consume size_t bytes)// I - Number of bytes to consume
{ {
// TODO: Implement me size_t remaining; // Remaining bytes in buffer
(void)st; ssize_t rbytes; // Bytes read
(void)bytes;
return (false);
// Range check input...
if (!st || st->pdf->mode != _PDFIO_MODE_READ || !bytes)
return (false);
// Skip bytes in the stream buffer until we've consumed the requested number
// or get to the end of the stream...
while ((remaining = (size_t)(st->bufend - st->bufptr)) < bytes)
{
bytes -= remaining;
if ((rbytes = stream_read(st, st->buffer, sizeof(st->buffer))) > 0)
{
st->bufptr = st->buffer;
st->bufend = st->buffer + rbytes;
}
else
{
st->bufptr = st->bufend = st->buffer;
return (false);
}
}
st->bufptr += bytes;
return (true);
} }
@ -107,6 +139,9 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
bool decode) // I - Decode/decompress the stream? bool decode) // I - Decode/decompress the stream?
{ {
pdfio_stream_t *st; // Stream pdfio_stream_t *st; // Stream
pdfio_dict_t *dict = pdfioObjGetDict(obj);
// Object dictionary
size_t length; // Length of stream
// Allocate a new stream object... // Allocate a new stream object...
@ -121,11 +156,36 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
_pdfioFileSeek(st->pdf, obj->stream_offset, SEEK_SET); _pdfioFileSeek(st->pdf, obj->stream_offset, SEEK_SET);
if ((length = (size_t)pdfioDictGetNumber(dict, "Length")) == 0)
{
// Length must be an indirect reference...
pdfio_obj_t *lenobj; // Length object
if ((lenobj = pdfioDictGetObject(dict, "Length")) == NULL)
{
_pdfioFileError(obj->pdf, "Unable to get length of stream.");
free(st);
return (NULL);
}
if (lenobj->value.type == PDFIO_VALTYPE_NONE)
_pdfioObjLoad(lenobj);
if (lenobj->value.type != PDFIO_VALTYPE_NUMBER || lenobj->value.value.number <= 0.0f)
{
_pdfioFileError(obj->pdf, "Unable to get length of stream.");
free(st);
return (NULL);
}
length = (size_t)lenobj->value.value.number;
}
st->remaining = length;
if (decode) if (decode)
{ {
// Try to decode/decompress the contents of this object... // Try to decode/decompress the contents of this object...
pdfio_dict_t *dict = pdfioObjGetDict(obj);
// Object dictionary
const char *filter = pdfioDictGetName(dict, "Filter"); const char *filter = pdfioDictGetName(dict, "Filter");
// Filter value // Filter value
@ -146,6 +206,7 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
else if (!strcmp(filter, "FlateDecode")) else if (!strcmp(filter, "FlateDecode"))
{ {
// Flate compression // Flate compression
#if 0 // TODO: Determine whether we need to implement support for predictors
int bpc = (int)pdfioDictGetNumber(dict, "BitsPerComponent"); int bpc = (int)pdfioDictGetNumber(dict, "BitsPerComponent");
// Bits per component // Bits per component
int colors = (int)pdfioDictGetNumber(dict, "Colors"); int colors = (int)pdfioDictGetNumber(dict, "Colors");
@ -154,8 +215,26 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
// Number of columns // Number of columns
int predictor = (int)pdfioDictGetNumber(dict, "Predictor"); int predictor = (int)pdfioDictGetNumber(dict, "Predictor");
// Predictory value, if any // Predictory value, if any
#endif // 0
st->filter = PDFIO_FILTER_FLATE; st->filter = PDFIO_FILTER_FLATE;
st->flate.zalloc = (alloc_func)0;
st->flate.zfree = (free_func)0;
st->flate.opaque = (voidpf)0;
st->flate.next_in = (Bytef *)st->cbuffer;
st->flate.next_out = NULL;
st->flate.avail_in = (uInt)_pdfioFileRead(st->pdf, st->cbuffer, sizeof(st->cbuffer));
st->flate.avail_out = 0;
if (inflateInit(&(st->flate)) != Z_OK)
{
_pdfioFileError(st->pdf, "Unable to start Flate filter.");
free(st);
return (NULL);
}
st->remaining -= st->flate.avail_in;
} }
else if (!strcmp(filter, "LZWDecode")) else if (!strcmp(filter, "LZWDecode"))
{ {
@ -189,12 +268,40 @@ pdfioStreamPeek(pdfio_stream_t *st, // I - Stream
void *buffer, // I - Buffer void *buffer, // I - Buffer
size_t bytes) // I - Size of buffer size_t bytes) // I - Size of buffer
{ {
// TODO: Implement me size_t remaining; // Remaining bytes in buffer
(void)st;
(void)buffer;
(void)bytes;
return (-1);
// Range check input...
if (!st || st->pdf->mode != _PDFIO_MODE_READ || !buffer || !bytes)
return (-1);
// See if we have enough bytes in the buffer...
if ((remaining = (size_t)(st->bufend - st->bufptr)) < bytes)
{
// No, shift the buffer and read more
ssize_t rbytes; // Bytes read
if (remaining > 0)
memmove(st->buffer, st->bufptr, remaining);
st->bufptr = st->buffer;
st->bufend = st->buffer + remaining;
if ((rbytes = stream_read(st, st->bufptr, sizeof(st->buffer) - remaining)) > 0)
{
st->bufend += rbytes;
remaining += (size_t)rbytes;
}
}
// Copy bytes from the buffer...
if (bytes > remaining)
bytes = remaining;
memcpy(buffer, st->bufptr, bytes);
// Return the number of bytes that were copied...
return ((ssize_t)bytes);
} }
@ -247,12 +354,57 @@ pdfioStreamRead(
void *buffer, // I - Buffer void *buffer, // I - Buffer
size_t bytes) // I - Bytes to read size_t bytes) // I - Bytes to read
{ {
// TODO: Implement me char *bufptr = (char *)buffer;
(void)st; // Pointer into buffer
(void)buffer; size_t remaining; // Remaining bytes in buffer
(void)bytes; ssize_t rbytes; // Bytes read
return (-1);
// Range check input...
if (!st || st->pdf->mode != _PDFIO_MODE_READ || !buffer || !bytes)
return (-1);
// Loop until we have the requested bytes or hit the end of the stream...
while ((remaining = (size_t)(st->bufend - st->bufptr)) < bytes)
{
memcpy(bufptr, st->bufptr, remaining);
bufptr += remaining;
bytes -= remaining;
if (bytes >= sizeof(st->buffer))
{
// Read large amounts directly to caller's buffer...
if ((rbytes = stream_read(st, bufptr, bytes)) > 0)
{
bufptr += rbytes;
bytes = 0;
}
st->bufptr = st->bufend = st->buffer;
break;
}
else if ((rbytes = stream_read(st, st->buffer, sizeof(st->buffer))) > 0)
{
st->bufptr = st->buffer;
st->bufend = st->buffer + rbytes;
}
else
{
st->bufptr = st->bufend = st->buffer;
break;
}
}
// Copy any remaining bytes from the stream buffer...
if (bytes > 0)
{
memcpy(bufptr, st->bufptr, bytes);
bufptr += bytes;
st->bufptr += bytes;
}
// Return the number of bytes that were read...
return (bufptr - (char *)buffer);
} }
@ -273,3 +425,68 @@ pdfioStreamWrite(
return (false); return (false);
} }
//
// 'stream_read()' - Read data from a stream, including filters.
//
static ssize_t // O - Number of bytes read or `-1` on error
stream_read(pdfio_stream_t *st, // I - Stream
char *buffer, // I - Buffer
size_t bytes) // I - Number of bytes to read
{
ssize_t rbytes; // Bytes read
if (st->filter == PDFIO_FILTER_NONE)
{
// No filtering, but limit reads to the length of the stream...
if (bytes > st->remaining)
rbytes = _pdfioFileRead(st->pdf, buffer, st->remaining);
else
rbytes = _pdfioFileRead(st->pdf, buffer, bytes);
if (rbytes > 0)
st->remaining -= (size_t)rbytes;
return (rbytes);
}
else if (st->filter == PDFIO_FILTER_FLATE)
{
// Deflate compression...
int status; // Status of decompression
if (st->flate.avail_in == 0)
{
// Read more from the file...
if (sizeof(st->cbuffer) > st->remaining)
rbytes = _pdfioFileRead(st->pdf, st->cbuffer, st->remaining);
else
rbytes = _pdfioFileRead(st->pdf, st->cbuffer, sizeof(st->cbuffer));
if (rbytes <= 0)
return (-1); // End of file...
st->remaining -= (size_t)rbytes;
st->flate.next_in = (Bytef *)st->cbuffer;
st->flate.avail_in = (uInt)rbytes;
}
// Decompress into the buffer...
st->flate.next_out = (Bytef *)buffer;
st->flate.avail_out = (uInt)bytes;
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data: %d", status);
return (-1);
}
return (st->flate.next_out - (Bytef *)buffer);
}
// If we get here something bad happened...
return (-1);
}