mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2024-12-27 05:48:20 +01:00
Increase the maximum number of object streams in a file (Issue #58) - most files
only contain 1 or 2... Change the implementation of add/find object to use a custom binary insertion sort algorithm rather than doing a qsort after every addition. This results in a significant improvement in open speed - from 2371 seconds (about 39.5 minutes) to 3.1 seconds for one large test file (an ESRI standard).
This commit is contained in:
parent
2b92044504
commit
2a85baaf81
@ -5,16 +5,20 @@ Changes in PDFio
|
|||||||
v1.2.0 (Month DD, YYYY)
|
v1.2.0 (Month DD, YYYY)
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
|
- Now use autoconf to configure the PDFio sources (Issue #54)
|
||||||
- Added `pdfioFileCreateNumberObj` and `pdfioFileCreateStringObj` functions
|
- Added `pdfioFileCreateNumberObj` and `pdfioFileCreateStringObj` functions
|
||||||
(Issue #14)
|
(Issue #14)
|
||||||
- Added `pdfioContentTextMeasure` function (Issue #17)
|
- Added `pdfioContentTextMeasure` function (Issue #17)
|
||||||
- Added `pdfioContentTextNewLineShow` and `pdfioContentTextNewLineShowf`
|
- Added `pdfioContentTextNewLineShow` and `pdfioContentTextNewLineShowf`
|
||||||
functions (Issue #24)
|
functions (Issue #24)
|
||||||
- Renamed `pdfioContentTextNextLine` to `pdfioContentTextNewLine`.
|
- Renamed `pdfioContentTextNextLine` to `pdfioContentTextNewLine`.
|
||||||
- Now use autoconf to configure the PDFio sources (Issue #54)
|
- Updated the maximum number of object streams in a single file from 4096 to
|
||||||
|
8192 (Issue #58)
|
||||||
- Updated the token reading code to protect against some obvious abuses of the
|
- Updated the token reading code to protect against some obvious abuses of the
|
||||||
PDF format.
|
PDF format.
|
||||||
- Updated the xref reading code to protect against loops.
|
- Updated the xref reading code to protect against loops.
|
||||||
|
- Updated the object handling code to use a binary insertion algorithm -
|
||||||
|
provides a significant (~800x) improvement in open times.
|
||||||
- Fixed handling of encrypted PDFs with per-object file IDs (Issue #42)
|
- Fixed handling of encrypted PDFs with per-object file IDs (Issue #42)
|
||||||
- Fixed handling of of trailer dictionaries that started immediately after the
|
- Fixed handling of of trailer dictionaries that started immediately after the
|
||||||
"trailer" keyword (Issue #58)
|
"trailer" keyword (Issue #58)
|
||||||
|
132
pdfio-file.c
132
pdfio-file.c
@ -19,7 +19,6 @@
|
|||||||
|
|
||||||
static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset);
|
static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset);
|
||||||
static int compare_objmaps(_pdfio_objmap_t *a, _pdfio_objmap_t *b);
|
static int compare_objmaps(_pdfio_objmap_t *a, _pdfio_objmap_t *b);
|
||||||
static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b);
|
|
||||||
static const char *get_info_string(pdfio_file_t *pdf, const char *key);
|
static const char *get_info_string(pdfio_file_t *pdf, const char *key);
|
||||||
static bool load_obj_stream(pdfio_obj_t *obj);
|
static bool load_obj_stream(pdfio_obj_t *obj);
|
||||||
static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth);
|
static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth);
|
||||||
@ -916,20 +915,56 @@ pdfioFileFindObj(
|
|||||||
pdfio_file_t *pdf, // I - PDF file
|
pdfio_file_t *pdf, // I - PDF file
|
||||||
size_t number) // I - Object number (1 to N)
|
size_t number) // I - Object number (1 to N)
|
||||||
{
|
{
|
||||||
pdfio_obj_t key, // Search key
|
size_t left, // Left object
|
||||||
*keyptr, // Pointer to key
|
right, // Right object
|
||||||
**match; // Pointer to match
|
current; // Current object
|
||||||
|
|
||||||
|
|
||||||
if (pdf->num_objs > 0)
|
PDFIO_DEBUG("pdfioFileFindObj(pdf=%p, number=%lu) alloc_objs=%lu, num_objs=%lu, objs=%p\n", (void *)pdf, (unsigned long)number, (unsigned long)(pdf ? pdf->alloc_objs : 0), (unsigned long)(pdf ? pdf->num_objs : 0), (void *)(pdf ? pdf->objs : NULL));
|
||||||
|
|
||||||
|
// Range check input...
|
||||||
|
if (!pdf || pdf->num_objs == 0 || number < 1)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
// Do a binary search for the object...
|
||||||
|
if ((current = number - 1) >= pdf->num_objs)
|
||||||
|
current = pdf->num_objs / 2;
|
||||||
|
|
||||||
|
PDFIO_DEBUG("pdfioFileFindObj: objs[current=%lu]=%p\n", (unsigned long)current, (void *)pdf->objs[current]);
|
||||||
|
|
||||||
|
if (number == pdf->objs[current]->number)
|
||||||
{
|
{
|
||||||
key.number = number;
|
// Fast match...
|
||||||
keyptr = &key;
|
return (pdf->objs[current]);
|
||||||
match = (pdfio_obj_t **)bsearch(&keyptr, pdf->objs, pdf->num_objs, sizeof(pdfio_obj_t *), (int (*)(const void *, const void *))compare_objs);
|
}
|
||||||
|
else if (number < pdf->objs[current]->number)
|
||||||
return (match ? *match : NULL);
|
{
|
||||||
|
left = 0;
|
||||||
|
right = current;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
left = current;
|
||||||
|
right = pdf->num_objs - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while ((right - left) > 1)
|
||||||
|
{
|
||||||
|
current = (left + right) / 2;
|
||||||
|
|
||||||
|
if (number == pdf->objs[current]->number)
|
||||||
|
return (pdf->objs[current]);
|
||||||
|
else if (number < pdf->objs[current]->number)
|
||||||
|
right = current;
|
||||||
|
else
|
||||||
|
left = current;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (number == pdf->objs[left]->number)
|
||||||
|
return (pdf->objs[left]);
|
||||||
|
else if (number == pdf->objs[right]->number)
|
||||||
|
return (pdf->objs[right]);
|
||||||
|
else
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1385,6 +1420,9 @@ add_obj(pdfio_file_t *pdf, // I - PDF file
|
|||||||
off_t offset) // I - Offset in file
|
off_t offset) // I - Offset in file
|
||||||
{
|
{
|
||||||
pdfio_obj_t *obj; // Object
|
pdfio_obj_t *obj; // Object
|
||||||
|
size_t left, // Left object
|
||||||
|
right, // Right object
|
||||||
|
current; // Current object (center)
|
||||||
|
|
||||||
|
|
||||||
// Allocate memory for the object...
|
// Allocate memory for the object...
|
||||||
@ -1410,8 +1448,6 @@ add_obj(pdfio_file_t *pdf, // I - PDF file
|
|||||||
pdf->alloc_objs += 32;
|
pdf->alloc_objs += 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
pdf->objs[pdf->num_objs ++] = obj;
|
|
||||||
|
|
||||||
obj->pdf = pdf;
|
obj->pdf = pdf;
|
||||||
obj->number = number;
|
obj->number = number;
|
||||||
obj->generation = generation;
|
obj->generation = generation;
|
||||||
@ -1419,9 +1455,56 @@ add_obj(pdfio_file_t *pdf, // I - PDF file
|
|||||||
|
|
||||||
PDFIO_DEBUG("add_obj: obj=%p, ->pdf=%p, ->number=%lu, ->offset=%lu\n", obj, pdf, (unsigned long)obj->number, (unsigned long)offset);
|
PDFIO_DEBUG("add_obj: obj=%p, ->pdf=%p, ->number=%lu, ->offset=%lu\n", obj, pdf, (unsigned long)obj->number, (unsigned long)offset);
|
||||||
|
|
||||||
// Re-sort object array as needed...
|
// Insert object into array as needed...
|
||||||
if (pdf->num_objs > 1 && pdf->objs[pdf->num_objs - 2]->number > number)
|
if (pdf->num_objs == 0 || obj->number > pdf->objs[pdf->num_objs - 1]->number)
|
||||||
qsort(pdf->objs, pdf->num_objs, sizeof(pdfio_obj_t *), (int (*)(const void *, const void *))compare_objs);
|
{
|
||||||
|
// Append object...
|
||||||
|
PDFIO_DEBUG("add_obj: Appending at %lu\n", (unsigned long)pdf->num_objs);
|
||||||
|
|
||||||
|
pdf->objs[pdf->num_objs] = obj;
|
||||||
|
pdf->last_obj = pdf->num_objs;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Insert object...
|
||||||
|
if (obj->number < pdf->objs[pdf->last_obj]->number)
|
||||||
|
{
|
||||||
|
left = 0;
|
||||||
|
right = pdf->last_obj;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
left = pdf->last_obj;
|
||||||
|
right = pdf->num_objs - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while ((right - left) > 1)
|
||||||
|
{
|
||||||
|
current = (left + right) / 2;
|
||||||
|
|
||||||
|
if (obj->number < pdf->objs[current]->number)
|
||||||
|
right = current;
|
||||||
|
else
|
||||||
|
left = current;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (obj->number < pdf->objs[left]->number)
|
||||||
|
current = left;
|
||||||
|
else if (obj->number < pdf->objs[right]->number)
|
||||||
|
current = right;
|
||||||
|
else
|
||||||
|
current = right;
|
||||||
|
|
||||||
|
PDFIO_DEBUG("add_obj: Inserting at %lu\n", (unsigned long)current);
|
||||||
|
|
||||||
|
if (current < pdf->num_objs)
|
||||||
|
memmove(pdf->objs + current + 1, pdf->objs + current, (pdf->num_objs - current) * sizeof(pdfio_obj_t *));
|
||||||
|
|
||||||
|
pdf->objs[current] = obj;
|
||||||
|
pdf->last_obj = current;
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf->num_objs ++;
|
||||||
|
|
||||||
return (obj);
|
return (obj);
|
||||||
}
|
}
|
||||||
@ -1448,23 +1531,6 @@ compare_objmaps(_pdfio_objmap_t *a, // I - First object map
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// 'compare_objs()' - Compare the object numbers of two objects.
|
|
||||||
//
|
|
||||||
|
|
||||||
static int // O - Result of comparison
|
|
||||||
compare_objs(pdfio_obj_t **a, // I - First object
|
|
||||||
pdfio_obj_t **b) // I - Second object
|
|
||||||
{
|
|
||||||
if ((*a)->number < (*b)->number)
|
|
||||||
return (-1);
|
|
||||||
else if ((*a)->number == (*b)->number)
|
|
||||||
return (0);
|
|
||||||
else
|
|
||||||
return (1);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// 'get_info_string()' - Get a string value from the Info dictionary.
|
// 'get_info_string()' - Get a string value from the Info dictionary.
|
||||||
//
|
//
|
||||||
@ -1737,7 +1803,7 @@ load_xref(
|
|||||||
pdfio_stream_t *st; // Stream
|
pdfio_stream_t *st; // Stream
|
||||||
unsigned char buffer[32]; // Read buffer
|
unsigned char buffer[32]; // Read buffer
|
||||||
size_t num_sobjs = 0, // Number of object streams
|
size_t num_sobjs = 0, // Number of object streams
|
||||||
sobjs[4096]; // Object streams to load
|
sobjs[8192]; // Object streams to load
|
||||||
pdfio_obj_t *current; // Current object
|
pdfio_obj_t *current; // Current object
|
||||||
|
|
||||||
if ((number = strtoimax(line, &ptr, 10)) < 1)
|
if ((number = strtoimax(line, &ptr, 10)) < 1)
|
||||||
|
@ -266,7 +266,8 @@ struct _pdfio_file_s // PDF file structure
|
|||||||
alloc_dicts; // Allocated dictionaries
|
alloc_dicts; // Allocated dictionaries
|
||||||
pdfio_dict_t **dicts; // Dictionaries
|
pdfio_dict_t **dicts; // Dictionaries
|
||||||
size_t num_objs, // Number of objects
|
size_t num_objs, // Number of objects
|
||||||
alloc_objs; // Allocated objects
|
alloc_objs, // Allocated objects
|
||||||
|
last_obj; // Last object added
|
||||||
pdfio_obj_t **objs, // Objects
|
pdfio_obj_t **objs, // Objects
|
||||||
*current_obj; // Current object being written/read
|
*current_obj; // Current object being written/read
|
||||||
size_t num_objmaps, // Number of object maps
|
size_t num_objmaps, // Number of object maps
|
||||||
|
Loading…
Reference in New Issue
Block a user