mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2024-12-26 13:28:22 +01:00
Safe work on streams - still need to implement predictors
This commit is contained in:
parent
c61d6ad686
commit
44325ce2d9
3
TODO.md
3
TODO.md
@ -10,6 +10,9 @@ To-Do List
|
||||
one PDF to another, there are a bunch of resources that also need to be
|
||||
copied. A dictionary with an object reference can't be copied directly as the
|
||||
object number in the new PDF will likely be different than the old one.
|
||||
- Add _pdfio_map_t with original pdfio_file_t * and object numbers
|
||||
- Add _pdfioObjCopy function
|
||||
- Add _pdfioFileGetMappedObject function to get the new object number
|
||||
- Security handlers (RC4 + AES, MD5 + SHA-256) for reading encrypted documents.
|
||||
- Signature generation/validation code
|
||||
- Documentation
|
||||
|
89
pdfio-file.c
89
pdfio-file.c
@ -537,14 +537,19 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
|
||||
return (false);
|
||||
}
|
||||
|
||||
PDFIO_DEBUG("load_xref: xref_offset=%lu, line='%s'\n", (unsigned long)xref_offset, line);
|
||||
|
||||
if (isdigit(line[0] & 255) && strlen(line) > 4 && !strcmp(line + strlen(line) - 4, " obj"))
|
||||
{
|
||||
// Cross-reference stream
|
||||
pdfio_obj_t *obj; // Object
|
||||
size_t i; // Looping var
|
||||
pdfio_array_t *w_array; // W array
|
||||
size_t w[3]; // Size of each cross-reference field
|
||||
size_t w_2, // Offset to second field
|
||||
w_3; // Offset to third field
|
||||
size_t w_total; // Total length
|
||||
pdfio_stream_t *st; // Stream with
|
||||
pdfio_stream_t *st; // Stream
|
||||
unsigned char buffer[32]; // Read buffer
|
||||
|
||||
if ((number = strtoimax(line, &ptr, 10)) < 1)
|
||||
@ -568,6 +573,8 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
|
||||
return (false);
|
||||
}
|
||||
|
||||
PDFIO_DEBUG("load_xref: Loading object %lu %u.\n", (unsigned long)number, (unsigned)generation);
|
||||
|
||||
if ((obj = add_obj(pdf, (size_t)number, (unsigned short)generation, xref_offset)) == NULL)
|
||||
{
|
||||
_pdfioFileError(pdf, "Unable to allocate memory for object.");
|
||||
@ -587,7 +594,85 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
|
||||
|
||||
obj->value = trailer;
|
||||
|
||||
// TODO: read stream
|
||||
if (!_pdfioFileGetToken(pdf, line, sizeof(line)) || strcmp(line, "stream"))
|
||||
{
|
||||
_pdfioFileError(pdf, "Unable to get stream after xref dictionary.");
|
||||
return (false);
|
||||
}
|
||||
|
||||
obj->stream_offset = _pdfioFileTell(pdf);
|
||||
|
||||
if ((w_array = pdfioDictGetArray(trailer.value.dict, "W")) == NULL)
|
||||
{
|
||||
_pdfioFileError(pdf, "Cross-reference stream does not have required W key.");
|
||||
return (false);
|
||||
}
|
||||
|
||||
w[0] = (size_t)pdfioArrayGetNumber(w_array, 0);
|
||||
w[1] = (size_t)pdfioArrayGetNumber(w_array, 1);
|
||||
w[2] = (size_t)pdfioArrayGetNumber(w_array, 2);
|
||||
w_total = w[0] + w[1] + w[2];
|
||||
w_2 = w[0];
|
||||
w_3 = w[0] + w[1];
|
||||
|
||||
if (w[1] == 0 || w[2] > 2 || w_total > sizeof(buffer))
|
||||
{
|
||||
_pdfioFileError(pdf, "Cross-reference stream has invalid W key.");
|
||||
return (false);
|
||||
}
|
||||
|
||||
if ((st = pdfioObjOpenStream(obj, true)) == NULL)
|
||||
{
|
||||
_pdfioFileError(pdf, "Unable to open cross-reference stream.");
|
||||
return (false);
|
||||
}
|
||||
|
||||
while (pdfioStreamRead(st, buffer, w_total) > 0)
|
||||
{
|
||||
PDFIO_DEBUG("load_xref: %02X%02X%02X%02X%02X\n", buffer[0], buffer[1], buffer[2], buffer[3], buffer[4]);
|
||||
|
||||
// Check whether this is an object definition...
|
||||
if (w[0] > 0)
|
||||
{
|
||||
if (buffer[0] == 0)
|
||||
{
|
||||
// Ignore free objects...
|
||||
continue;
|
||||
}
|
||||
else if (buffer[0] == 2)
|
||||
{
|
||||
// TODO: Add support for compressed object streams...
|
||||
// Compressed object...
|
||||
_pdfioFileError(pdf, "PDF file contains compressed object streams which are not currently supported.");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 1, offset = buffer[w_2]; i < w[1]; i ++)
|
||||
offset = (offset << 8) | buffer[w_2 + i];
|
||||
|
||||
switch (w[2])
|
||||
{
|
||||
default :
|
||||
generation = 0;
|
||||
break;
|
||||
case 1 :
|
||||
generation = buffer[w_3];
|
||||
break;
|
||||
case 2 :
|
||||
generation = (buffer[w_3] << 8) | buffer[w_3 + 1];
|
||||
break;
|
||||
}
|
||||
|
||||
// Create a placeholder for the object in memory...
|
||||
if (pdfioFileFindObject(pdf, (size_t)number))
|
||||
continue; // Don't replace newer object...
|
||||
|
||||
if (!add_obj(pdf, (size_t)number, (unsigned short)generation, offset))
|
||||
return (false);
|
||||
}
|
||||
|
||||
pdfioStreamClose(st);
|
||||
}
|
||||
else if (!strcmp(line, "xref"))
|
||||
{
|
||||
|
@ -200,9 +200,21 @@ pdfio_stream_t * // O - Stream or `NULL` on error
|
||||
pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
|
||||
bool decode) // I - Decode/decompress data?
|
||||
{
|
||||
// TODO: Implement me
|
||||
(void)obj;
|
||||
(void)decode;
|
||||
// Range check input...
|
||||
if (!obj)
|
||||
return (NULL);
|
||||
|
||||
return (NULL);
|
||||
// Make sure we've loaded the object dictionary...
|
||||
if (!obj->value.type)
|
||||
{
|
||||
if (!_pdfioObjLoad(obj))
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
// No stream if there is no dict or offset to a stream...
|
||||
if (obj->value.type != PDFIO_VALTYPE_DICT || !obj->stream_offset)
|
||||
return (NULL);
|
||||
|
||||
// Open the stream...
|
||||
return (_pdfioStreamOpen(obj, decode));
|
||||
}
|
||||
|
@ -69,6 +69,17 @@ typedef enum _pdfio_mode_e // Read/write mode
|
||||
_PDFIO_MODE_WRITE // Write a PDF file
|
||||
} _pdfio_mode_t;
|
||||
|
||||
typedef enum _pdfio_predictor_e // PNG predictor constants
|
||||
{
|
||||
_PDFIO_PREDICTOR_NONE = 1, // No predictor (default)
|
||||
_PDFIO_PREDICTOR_TIFF2 = 2, // TIFF2 predictor (???)
|
||||
_PDFIO_PREDICTOR_PNG_NONE = 10, // PNG None predictor (same as `_PDFIO_PREDICTOR_NONE`)
|
||||
_PDFIO_PREDICTOR_PNG_SUB = 11, // PNG Sub predictor
|
||||
_PDFIO_PREDICTOR_PNG_UP = 12, // PNG Up predictor
|
||||
_PDFIO_PREDICTOR_PNG_AVERAGE = 13, // PNG Average predictor
|
||||
_PDFIO_PREDICTOR_PNG_PAETH = 14 // PNG Paeth predictor
|
||||
} _pdfio_predictor_t;
|
||||
|
||||
typedef struct _pdfio_value_s // Value structure
|
||||
{
|
||||
pdfio_valtype_t type; // Type of value
|
||||
@ -174,9 +185,12 @@ struct _pdfio_stream_s // Stream
|
||||
pdfio_file_t *pdf; // PDF file
|
||||
pdfio_obj_t *obj; // Object
|
||||
pdfio_filter_t filter; // Compression/decompression filter
|
||||
char buffer[8192]; // Read/write buffer
|
||||
size_t bufused; // Number of bytes in buffer
|
||||
size_t remaining; // Remaining bytes in stream
|
||||
char buffer[8192], // Read/write buffer
|
||||
*bufptr, // Current position in buffer
|
||||
*bufend; // End of buffer
|
||||
z_stream flate; // Flate filter state
|
||||
char cbuffer[4096]; // Compressed data buffer
|
||||
};
|
||||
|
||||
typedef ssize_t (*_pdfio_tconsume_cb_t)(void *data, size_t bytes);
|
||||
|
249
pdfio-stream.c
249
pdfio-stream.c
@ -14,6 +14,13 @@
|
||||
#include "pdfio-private.h"
|
||||
|
||||
|
||||
//
|
||||
// Local functions...
|
||||
//
|
||||
|
||||
static ssize_t stream_read(pdfio_stream_t *st, char *buffer, size_t bytes);
|
||||
|
||||
|
||||
//
|
||||
// 'pdfioStreamClose()' - Close a (data) stream in a PDF file.
|
||||
//
|
||||
@ -54,10 +61,35 @@ bool // O - `true` on success, `false` on EOF
|
||||
pdfioStreamConsume(pdfio_stream_t *st, // I - Stream
|
||||
size_t bytes)// I - Number of bytes to consume
|
||||
{
|
||||
// TODO: Implement me
|
||||
(void)st;
|
||||
(void)bytes;
|
||||
return (false);
|
||||
size_t remaining; // Remaining bytes in buffer
|
||||
ssize_t rbytes; // Bytes read
|
||||
|
||||
|
||||
// Range check input...
|
||||
if (!st || st->pdf->mode != _PDFIO_MODE_READ || !bytes)
|
||||
return (false);
|
||||
|
||||
// Skip bytes in the stream buffer until we've consumed the requested number
|
||||
// or get to the end of the stream...
|
||||
while ((remaining = (size_t)(st->bufend - st->bufptr)) < bytes)
|
||||
{
|
||||
bytes -= remaining;
|
||||
|
||||
if ((rbytes = stream_read(st, st->buffer, sizeof(st->buffer))) > 0)
|
||||
{
|
||||
st->bufptr = st->buffer;
|
||||
st->bufend = st->buffer + rbytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
st->bufptr = st->bufend = st->buffer;
|
||||
return (false);
|
||||
}
|
||||
}
|
||||
|
||||
st->bufptr += bytes;
|
||||
|
||||
return (true);
|
||||
}
|
||||
|
||||
|
||||
@ -107,6 +139,9 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
|
||||
bool decode) // I - Decode/decompress the stream?
|
||||
{
|
||||
pdfio_stream_t *st; // Stream
|
||||
pdfio_dict_t *dict = pdfioObjGetDict(obj);
|
||||
// Object dictionary
|
||||
size_t length; // Length of stream
|
||||
|
||||
|
||||
// Allocate a new stream object...
|
||||
@ -121,11 +156,36 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
|
||||
|
||||
_pdfioFileSeek(st->pdf, obj->stream_offset, SEEK_SET);
|
||||
|
||||
if ((length = (size_t)pdfioDictGetNumber(dict, "Length")) == 0)
|
||||
{
|
||||
// Length must be an indirect reference...
|
||||
pdfio_obj_t *lenobj; // Length object
|
||||
|
||||
if ((lenobj = pdfioDictGetObject(dict, "Length")) == NULL)
|
||||
{
|
||||
_pdfioFileError(obj->pdf, "Unable to get length of stream.");
|
||||
free(st);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
if (lenobj->value.type == PDFIO_VALTYPE_NONE)
|
||||
_pdfioObjLoad(lenobj);
|
||||
|
||||
if (lenobj->value.type != PDFIO_VALTYPE_NUMBER || lenobj->value.value.number <= 0.0f)
|
||||
{
|
||||
_pdfioFileError(obj->pdf, "Unable to get length of stream.");
|
||||
free(st);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
length = (size_t)lenobj->value.value.number;
|
||||
}
|
||||
|
||||
st->remaining = length;
|
||||
|
||||
if (decode)
|
||||
{
|
||||
// Try to decode/decompress the contents of this object...
|
||||
pdfio_dict_t *dict = pdfioObjGetDict(obj);
|
||||
// Object dictionary
|
||||
const char *filter = pdfioDictGetName(dict, "Filter");
|
||||
// Filter value
|
||||
|
||||
@ -146,6 +206,7 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
|
||||
else if (!strcmp(filter, "FlateDecode"))
|
||||
{
|
||||
// Flate compression
|
||||
#if 0 // TODO: Determine whether we need to implement support for predictors
|
||||
int bpc = (int)pdfioDictGetNumber(dict, "BitsPerComponent");
|
||||
// Bits per component
|
||||
int colors = (int)pdfioDictGetNumber(dict, "Colors");
|
||||
@ -154,8 +215,26 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
|
||||
// Number of columns
|
||||
int predictor = (int)pdfioDictGetNumber(dict, "Predictor");
|
||||
// Predictory value, if any
|
||||
#endif // 0
|
||||
|
||||
st->filter = PDFIO_FILTER_FLATE;
|
||||
|
||||
st->flate.zalloc = (alloc_func)0;
|
||||
st->flate.zfree = (free_func)0;
|
||||
st->flate.opaque = (voidpf)0;
|
||||
st->flate.next_in = (Bytef *)st->cbuffer;
|
||||
st->flate.next_out = NULL;
|
||||
st->flate.avail_in = (uInt)_pdfioFileRead(st->pdf, st->cbuffer, sizeof(st->cbuffer));
|
||||
st->flate.avail_out = 0;
|
||||
|
||||
if (inflateInit(&(st->flate)) != Z_OK)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to start Flate filter.");
|
||||
free(st);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
st->remaining -= st->flate.avail_in;
|
||||
}
|
||||
else if (!strcmp(filter, "LZWDecode"))
|
||||
{
|
||||
@ -189,12 +268,40 @@ pdfioStreamPeek(pdfio_stream_t *st, // I - Stream
|
||||
void *buffer, // I - Buffer
|
||||
size_t bytes) // I - Size of buffer
|
||||
{
|
||||
// TODO: Implement me
|
||||
(void)st;
|
||||
(void)buffer;
|
||||
(void)bytes;
|
||||
size_t remaining; // Remaining bytes in buffer
|
||||
|
||||
return (-1);
|
||||
|
||||
// Range check input...
|
||||
if (!st || st->pdf->mode != _PDFIO_MODE_READ || !buffer || !bytes)
|
||||
return (-1);
|
||||
|
||||
// See if we have enough bytes in the buffer...
|
||||
if ((remaining = (size_t)(st->bufend - st->bufptr)) < bytes)
|
||||
{
|
||||
// No, shift the buffer and read more
|
||||
ssize_t rbytes; // Bytes read
|
||||
|
||||
if (remaining > 0)
|
||||
memmove(st->buffer, st->bufptr, remaining);
|
||||
|
||||
st->bufptr = st->buffer;
|
||||
st->bufend = st->buffer + remaining;
|
||||
|
||||
if ((rbytes = stream_read(st, st->bufptr, sizeof(st->buffer) - remaining)) > 0)
|
||||
{
|
||||
st->bufend += rbytes;
|
||||
remaining += (size_t)rbytes;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy bytes from the buffer...
|
||||
if (bytes > remaining)
|
||||
bytes = remaining;
|
||||
|
||||
memcpy(buffer, st->bufptr, bytes);
|
||||
|
||||
// Return the number of bytes that were copied...
|
||||
return ((ssize_t)bytes);
|
||||
}
|
||||
|
||||
|
||||
@ -247,12 +354,57 @@ pdfioStreamRead(
|
||||
void *buffer, // I - Buffer
|
||||
size_t bytes) // I - Bytes to read
|
||||
{
|
||||
// TODO: Implement me
|
||||
(void)st;
|
||||
(void)buffer;
|
||||
(void)bytes;
|
||||
char *bufptr = (char *)buffer;
|
||||
// Pointer into buffer
|
||||
size_t remaining; // Remaining bytes in buffer
|
||||
ssize_t rbytes; // Bytes read
|
||||
|
||||
return (-1);
|
||||
|
||||
// Range check input...
|
||||
if (!st || st->pdf->mode != _PDFIO_MODE_READ || !buffer || !bytes)
|
||||
return (-1);
|
||||
|
||||
// Loop until we have the requested bytes or hit the end of the stream...
|
||||
while ((remaining = (size_t)(st->bufend - st->bufptr)) < bytes)
|
||||
{
|
||||
memcpy(bufptr, st->bufptr, remaining);
|
||||
bufptr += remaining;
|
||||
bytes -= remaining;
|
||||
|
||||
if (bytes >= sizeof(st->buffer))
|
||||
{
|
||||
// Read large amounts directly to caller's buffer...
|
||||
if ((rbytes = stream_read(st, bufptr, bytes)) > 0)
|
||||
{
|
||||
bufptr += rbytes;
|
||||
bytes = 0;
|
||||
}
|
||||
|
||||
st->bufptr = st->bufend = st->buffer;
|
||||
break;
|
||||
}
|
||||
else if ((rbytes = stream_read(st, st->buffer, sizeof(st->buffer))) > 0)
|
||||
{
|
||||
st->bufptr = st->buffer;
|
||||
st->bufend = st->buffer + rbytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
st->bufptr = st->bufend = st->buffer;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy any remaining bytes from the stream buffer...
|
||||
if (bytes > 0)
|
||||
{
|
||||
memcpy(bufptr, st->bufptr, bytes);
|
||||
bufptr += bytes;
|
||||
st->bufptr += bytes;
|
||||
}
|
||||
|
||||
// Return the number of bytes that were read...
|
||||
return (bufptr - (char *)buffer);
|
||||
}
|
||||
|
||||
|
||||
@ -273,3 +425,68 @@ pdfioStreamWrite(
|
||||
|
||||
return (false);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// 'stream_read()' - Read data from a stream, including filters.
|
||||
//
|
||||
|
||||
static ssize_t // O - Number of bytes read or `-1` on error
|
||||
stream_read(pdfio_stream_t *st, // I - Stream
|
||||
char *buffer, // I - Buffer
|
||||
size_t bytes) // I - Number of bytes to read
|
||||
{
|
||||
ssize_t rbytes; // Bytes read
|
||||
|
||||
|
||||
if (st->filter == PDFIO_FILTER_NONE)
|
||||
{
|
||||
// No filtering, but limit reads to the length of the stream...
|
||||
if (bytes > st->remaining)
|
||||
rbytes = _pdfioFileRead(st->pdf, buffer, st->remaining);
|
||||
else
|
||||
rbytes = _pdfioFileRead(st->pdf, buffer, bytes);
|
||||
|
||||
if (rbytes > 0)
|
||||
st->remaining -= (size_t)rbytes;
|
||||
|
||||
return (rbytes);
|
||||
}
|
||||
else if (st->filter == PDFIO_FILTER_FLATE)
|
||||
{
|
||||
// Deflate compression...
|
||||
int status; // Status of decompression
|
||||
|
||||
if (st->flate.avail_in == 0)
|
||||
{
|
||||
// Read more from the file...
|
||||
if (sizeof(st->cbuffer) > st->remaining)
|
||||
rbytes = _pdfioFileRead(st->pdf, st->cbuffer, st->remaining);
|
||||
else
|
||||
rbytes = _pdfioFileRead(st->pdf, st->cbuffer, sizeof(st->cbuffer));
|
||||
|
||||
if (rbytes <= 0)
|
||||
return (-1); // End of file...
|
||||
|
||||
st->remaining -= (size_t)rbytes;
|
||||
st->flate.next_in = (Bytef *)st->cbuffer;
|
||||
st->flate.avail_in = (uInt)rbytes;
|
||||
}
|
||||
|
||||
// Decompress into the buffer...
|
||||
st->flate.next_out = (Bytef *)buffer;
|
||||
st->flate.avail_out = (uInt)bytes;
|
||||
|
||||
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to decompress stream data: %d", status);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
return (st->flate.next_out - (Bytef *)buffer);
|
||||
}
|
||||
|
||||
// If we get here something bad happened...
|
||||
return (-1);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user