mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2024-12-27 05:48:20 +01:00
Fix pdfioFileFindObject, fix cross-reference streams, implement support for
compressed object streams.
This commit is contained in:
parent
9a919d478f
commit
979ccf19ff
164
pdfio-file.c
164
pdfio-file.c
@ -23,6 +23,7 @@
|
|||||||
|
|
||||||
static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset);
|
static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset);
|
||||||
static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b);
|
static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b);
|
||||||
|
static bool load_obj_stream(pdfio_obj_t *obj);
|
||||||
static bool load_xref(pdfio_file_t *pdf, off_t xref_offset);
|
static bool load_xref(pdfio_file_t *pdf, off_t xref_offset);
|
||||||
static bool write_trailer(pdfio_file_t *pdf);
|
static bool write_trailer(pdfio_file_t *pdf);
|
||||||
|
|
||||||
@ -236,15 +237,17 @@ pdfioFileFindObject(
|
|||||||
size_t number) // I - Object number (1 to N)
|
size_t number) // I - Object number (1 to N)
|
||||||
{
|
{
|
||||||
pdfio_obj_t key, // Search key
|
pdfio_obj_t key, // Search key
|
||||||
*keyptr; // Pointer to key
|
*keyptr, // Pointer to key
|
||||||
|
**match; // Pointer to match
|
||||||
|
|
||||||
|
|
||||||
if (pdf->num_objs > 0)
|
if (pdf->num_objs > 0)
|
||||||
{
|
{
|
||||||
key.number = number;
|
key.number = number;
|
||||||
keyptr = &key;
|
keyptr = &key;
|
||||||
|
match = (pdfio_obj_t **)bsearch(&keyptr, pdf->objs, pdf->num_objs, sizeof(pdfio_obj_t *), (int (*)(const void *, const void *))compare_objs);
|
||||||
|
|
||||||
return ((pdfio_obj_t *)bsearch(&keyptr, pdf->objs, pdf->num_objs, sizeof(pdfio_obj_t *), (int (*)(const void *, const void *))compare_objs));
|
return (match ? *match : NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
return (NULL);
|
return (NULL);
|
||||||
@ -479,12 +482,13 @@ add_obj(pdfio_file_t *pdf, // I - PDF file
|
|||||||
obj->generation = generation;
|
obj->generation = generation;
|
||||||
obj->offset = offset;
|
obj->offset = offset;
|
||||||
|
|
||||||
|
PDFIO_DEBUG("add_obj: obj=%p, ->pdf=%p, ->number=%lu\n", obj, pdf, (unsigned long)obj->number);
|
||||||
|
|
||||||
// Re-sort object array as needed...
|
// Re-sort object array as needed...
|
||||||
if (pdf->num_objs > 1 && pdf->objs[pdf->num_objs - 2]->number > number)
|
if (pdf->num_objs > 1 && pdf->objs[pdf->num_objs - 2]->number > number)
|
||||||
qsort(pdf->objs, pdf->num_objs, sizeof(pdfio_obj_t *), (int (*)(const void *, const void *))compare_objs);
|
qsort(pdf->objs, pdf->num_objs, sizeof(pdfio_obj_t *), (int (*)(const void *, const void *))compare_objs);
|
||||||
|
|
||||||
return (obj);
|
return (obj);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -505,6 +509,87 @@ compare_objs(pdfio_obj_t **a, // I - First object
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// 'load_obj_stream()' - Load an object stream.
|
||||||
|
//
|
||||||
|
// Object streams are Adobe's complicated solution for saving a few
|
||||||
|
// kilobytes in an average PDF file at the expense of massively more
|
||||||
|
// complicated reader applications.
|
||||||
|
//
|
||||||
|
// Each object stream starts with pairs of object numbers and offsets,
|
||||||
|
// followed by the object values (typically dictionaries). For
|
||||||
|
// simplicity pdfio loads all of these values into memory so that we
|
||||||
|
// don't later have to randomly access compressed stream data to get
|
||||||
|
// a dictionary.
|
||||||
|
//
|
||||||
|
|
||||||
|
static bool // O - `true` on success, `false` on error
|
||||||
|
load_obj_stream(pdfio_obj_t *obj) // I - Object to load
|
||||||
|
{
|
||||||
|
pdfio_stream_t *st; // Stream
|
||||||
|
_pdfio_token_t tb; // Token buffer/stack
|
||||||
|
char buffer[32]; // Token
|
||||||
|
size_t cur_obj, // Current object
|
||||||
|
num_objs = 0; // Number of objects
|
||||||
|
pdfio_obj_t *objs[1000]; // Objects
|
||||||
|
|
||||||
|
|
||||||
|
// Open the object stream...
|
||||||
|
if ((st = pdfioObjOpenStream(obj, true)) == NULL)
|
||||||
|
{
|
||||||
|
_pdfioFileError(obj->pdf, "Unable to open compressed object stream %lu.", (unsigned long)obj->number);
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
|
||||||
|
_pdfioTokenInit(&tb, obj->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
|
||||||
|
|
||||||
|
// Read the object numbers from the beginning of the stream...
|
||||||
|
while (_pdfioTokenGet(&tb, buffer, sizeof(buffer)))
|
||||||
|
{
|
||||||
|
// Stop if this isn't an object number...
|
||||||
|
if (!isdigit(buffer[0] & 255))
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Stop if we have too many objects...
|
||||||
|
if (num_objs >= (sizeof(objs) / sizeof(objs[0])))
|
||||||
|
{
|
||||||
|
_pdfioFileError(obj->pdf, "Too many compressed objects in one stream.");
|
||||||
|
pdfioStreamClose(st);
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the object in memory...
|
||||||
|
objs[num_objs ++] = add_obj(obj->pdf, (size_t)strtoimax(buffer, NULL, 10), 0, 0);
|
||||||
|
|
||||||
|
// Skip offset
|
||||||
|
_pdfioTokenGet(&tb, buffer, sizeof(buffer));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!buffer[0])
|
||||||
|
{
|
||||||
|
pdfioStreamClose(st);
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
|
||||||
|
_pdfioTokenPush(&tb, buffer);
|
||||||
|
|
||||||
|
// Read the objects themselves...
|
||||||
|
for (cur_obj = 0; cur_obj < num_objs; cur_obj ++)
|
||||||
|
{
|
||||||
|
if (!_pdfioValueRead(obj->pdf, &tb, &(objs[cur_obj]->value)))
|
||||||
|
{
|
||||||
|
pdfioStreamClose(st);
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the stream and return
|
||||||
|
pdfioStreamClose(st);
|
||||||
|
|
||||||
|
return (true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// 'load_xref()' - Load an XREF table...
|
// 'load_xref()' - Load an XREF table...
|
||||||
//
|
//
|
||||||
@ -545,6 +630,7 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
|
|||||||
// Cross-reference stream
|
// Cross-reference stream
|
||||||
pdfio_obj_t *obj; // Object
|
pdfio_obj_t *obj; // Object
|
||||||
size_t i; // Looping var
|
size_t i; // Looping var
|
||||||
|
pdfio_array_t *index_array; // Index array
|
||||||
pdfio_array_t *w_array; // W array
|
pdfio_array_t *w_array; // W array
|
||||||
size_t w[3]; // Size of each cross-reference field
|
size_t w[3]; // Size of each cross-reference field
|
||||||
size_t w_2, // Offset to second field
|
size_t w_2, // Offset to second field
|
||||||
@ -552,6 +638,8 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
|
|||||||
size_t w_total; // Total length
|
size_t w_total; // Total length
|
||||||
pdfio_stream_t *st; // Stream
|
pdfio_stream_t *st; // Stream
|
||||||
unsigned char buffer[32]; // Read buffer
|
unsigned char buffer[32]; // Read buffer
|
||||||
|
size_t num_sobjs = 0, // Number of object streams
|
||||||
|
sobjs[1000]; // Object streams to load
|
||||||
|
|
||||||
if ((number = strtoimax(line, &ptr, 10)) < 1)
|
if ((number = strtoimax(line, &ptr, 10)) < 1)
|
||||||
{
|
{
|
||||||
@ -605,6 +693,20 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
|
|||||||
|
|
||||||
obj->stream_offset = _pdfioFileTell(pdf);
|
obj->stream_offset = _pdfioFileTell(pdf);
|
||||||
|
|
||||||
|
if ((index_array = pdfioDictGetArray(trailer.value.dict, "Index")) != NULL)
|
||||||
|
{
|
||||||
|
if (index_array->num_values > 2)
|
||||||
|
{
|
||||||
|
// TODO: Support Index array with multiple values in xref streams
|
||||||
|
_pdfioFileError(pdf, "Multiple indices not supported in cross-reference stream.");
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
|
||||||
|
number = (intmax_t)pdfioArrayGetNumber(index_array, 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
number = 0;
|
||||||
|
|
||||||
if ((w_array = pdfioDictGetArray(trailer.value.dict, "W")) == NULL)
|
if ((w_array = pdfioDictGetArray(trailer.value.dict, "W")) == NULL)
|
||||||
{
|
{
|
||||||
_pdfioFileError(pdf, "Cross-reference stream does not have required W key.");
|
_pdfioFileError(pdf, "Cross-reference stream does not have required W key.");
|
||||||
@ -640,15 +742,9 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
|
|||||||
if (buffer[0] == 0)
|
if (buffer[0] == 0)
|
||||||
{
|
{
|
||||||
// Ignore free objects...
|
// Ignore free objects...
|
||||||
|
number ++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else if (buffer[0] == 2)
|
|
||||||
{
|
|
||||||
// TODO: Add support for compressed object streams...
|
|
||||||
// Compressed object...
|
|
||||||
_pdfioFileError(pdf, "PDF file contains compressed object streams which are not currently supported.");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 1, offset = buffer[w_2]; i < w[1]; i ++)
|
for (i = 1, offset = buffer[w_2]; i < w[1]; i ++)
|
||||||
@ -669,13 +765,59 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
|
|||||||
|
|
||||||
// Create a placeholder for the object in memory...
|
// Create a placeholder for the object in memory...
|
||||||
if (pdfioFileFindObject(pdf, (size_t)number))
|
if (pdfioFileFindObject(pdf, (size_t)number))
|
||||||
|
{
|
||||||
|
number ++;
|
||||||
continue; // Don't replace newer object...
|
continue; // Don't replace newer object...
|
||||||
|
}
|
||||||
|
|
||||||
if (!add_obj(pdf, (size_t)number, (unsigned short)generation, offset))
|
if (w[0] > 0 && buffer[0] == 2)
|
||||||
|
{
|
||||||
|
// Object streams need to be loaded into memory...
|
||||||
|
if ((obj = pdfioFileFindObject(pdf, (size_t)offset)) != NULL)
|
||||||
|
{
|
||||||
|
// Load it now...
|
||||||
|
if (!load_obj_stream(obj))
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Add it to the list of objects to load later...
|
||||||
|
for (i = 0; i < num_sobjs; i ++)
|
||||||
|
{
|
||||||
|
if (sobjs[i] == (size_t)offset)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i >= num_sobjs && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0])))
|
||||||
|
sobjs[num_sobjs ++] = (size_t)offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (!add_obj(pdf, (size_t)number, (unsigned short)generation, offset))
|
||||||
return (false);
|
return (false);
|
||||||
|
|
||||||
|
number ++;
|
||||||
}
|
}
|
||||||
|
|
||||||
pdfioStreamClose(st);
|
pdfioStreamClose(st);
|
||||||
|
|
||||||
|
// Load any object streams that are left...
|
||||||
|
PDFIO_DEBUG("load_xref: %lu compressed object streams to load.\n", (unsigned long)num_sobjs);
|
||||||
|
|
||||||
|
for (i = 0; i < num_sobjs; i ++)
|
||||||
|
{
|
||||||
|
if ((obj = pdfioFileFindObject(pdf, sobjs[i])) != NULL)
|
||||||
|
{
|
||||||
|
PDFIO_DEBUG("load_xref: Loading compressed object stream %lu (pdf=%p, obj->pdf=%p).\n", (unsigned long)sobjs[i], pdf, obj->pdf);
|
||||||
|
|
||||||
|
if (!load_obj_stream(obj))
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_pdfioFileError(pdf, "Unable to find compressed object stream %lu.", (unsigned long)sobjs[i]);
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (!strcmp(line, "xref"))
|
else if (!strcmp(line, "xref"))
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user