Initial support for reading pages dictionary.

This commit is contained in:
Michael R Sweet 2021-05-08 22:50:18 -04:00
parent e1c145b10a
commit 5f43761537
No known key found for this signature in database
GPG Key ID: 999559A027815955
5 changed files with 104 additions and 6 deletions

View File

@ -450,7 +450,7 @@ pdfioArrayGetObject(pdfio_array_t *a, // I - Array
if (!a || n >= a->num_values || a->values[n].type != PDFIO_VALTYPE_INDIRECT) if (!a || n >= a->num_values || a->values[n].type != PDFIO_VALTYPE_INDIRECT)
return (NULL); return (NULL);
else else
return (pdfioFileGetObject(a->pdf, a->values[n].value.indirect.number)); return (pdfioFileFindObject(a->pdf, a->values[n].value.indirect.number));
} }

View File

@ -265,7 +265,7 @@ pdfioDictGetObject(pdfio_dict_t *dict, // I - Dictionary
_pdfio_value_t *value = _pdfioDictGetValue(dict, key); _pdfio_value_t *value = _pdfioDictGetValue(dict, key);
if (value && value->type == PDFIO_VALTYPE_INDIRECT) if (value && value->type == PDFIO_VALTYPE_INDIRECT)
return (pdfioFileGetObject(dict->pdf, value->value.indirect.number)); return (pdfioFileFindObject(dict->pdf, value->value.indirect.number));
else else
return (NULL); return (NULL);
} }

View File

@ -24,6 +24,7 @@
static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset); static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset);
static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b); static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b);
static bool load_obj_stream(pdfio_obj_t *obj); static bool load_obj_stream(pdfio_obj_t *obj);
static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj);
static bool load_xref(pdfio_file_t *pdf, off_t xref_offset); static bool load_xref(pdfio_file_t *pdf, off_t xref_offset);
static bool write_trailer(pdfio_file_t *pdf); static bool write_trailer(pdfio_file_t *pdf);
@ -590,6 +591,74 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
} }
//
// 'load_pages()' - Load pages in the document.
//
static bool // O - `true` on success, `false` on error
load_pages(pdfio_file_t *pdf, // I - PDF file
pdfio_obj_t *obj) // I - Page object
{
pdfio_dict_t *dict; // Page object dictionary
const char *type; // Node type
pdfio_array_t *kids; // Kids array
// Range check input...
if (!obj)
{
_pdfioFileError(pdf, "Unable to find pages object.");
return (false);
}
// Get the object dictionary and make sure this is a Pages or Page object...
if ((dict = pdfioObjGetDict(obj)) == NULL)
{
_pdfioFileError(pdf, "No dictionary for pages object.");
return (false);
}
if ((type = pdfioDictGetName(dict, "Type")) == NULL || (strcmp(type, "Pages") && strcmp(type, "Page")))
return (false);
// If there is a Kids array, then this is a parent node and we have to look
// at the child objects...
if ((kids = pdfioDictGetArray(dict, "Kids")) != NULL)
{
// Load the child objects...
size_t i, // Looping var
num_kids; // Number of elements in array
for (i = 0, num_kids = pdfioArrayGetSize(kids); i < num_kids; i ++)
{
if (!load_pages(pdf, pdfioArrayGetObject(kids, i)))
return (false);
}
}
else
{
// Add this page...
if (pdf->num_pages >= pdf->alloc_pages)
{
pdfio_obj_t **temp = (pdfio_obj_t **)realloc(pdf->pages, (pdf->alloc_pages + 32) * sizeof(pdfio_obj_t *));
if (!temp)
{
_pdfioFileError(pdf, "Unable to allocate memory for pages.");
return (false);
}
pdf->alloc_pages += 32;
pdf->pages = temp;
}
pdf->pages[pdf->num_pages ++] = obj;
}
return (true);
}
// //
// 'load_xref()' - Load an XREF table... // 'load_xref()' - Load an XREF table...
// //
@ -936,12 +1005,13 @@ load_xref(pdfio_file_t *pdf, // I - PDF file
return (false); return (false);
} }
PDFIO_DEBUG("load_xref: Root=%p(%lu)\n", pdf->root, (unsigned long)pdf->root->number);
pdf->info = pdfioDictGetObject(pdf->trailer, "Info"); pdf->info = pdfioDictGetObject(pdf->trailer, "Info");
pdf->encrypt = pdfioDictGetObject(pdf->trailer, "Encrypt"); pdf->encrypt = pdfioDictGetObject(pdf->trailer, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer, "ID"); pdf->id_array = pdfioDictGetArray(pdf->trailer, "ID");
// If we get this far, we successfully loaded everything... return (load_pages(pdf, pdfioDictGetObject(pdfioObjGetDict(pdf->root), "Pages")));
return (true);
} }

View File

@ -169,7 +169,9 @@ extern size_t pdfioObjGetNumber(pdfio_obj_t *obj) PDFIO_PUBLIC;
extern const char *pdfioObjGetType(pdfio_obj_t *obj) PDFIO_PUBLIC; extern const char *pdfioObjGetType(pdfio_obj_t *obj) PDFIO_PUBLIC;
extern pdfio_stream_t *pdfioObjOpenStream(pdfio_obj_t *obj, bool decode) PDFIO_PUBLIC; extern pdfio_stream_t *pdfioObjOpenStream(pdfio_obj_t *obj, bool decode) PDFIO_PUBLIC;
extern pdfio_obj_t *pdfioPageCopy(pdfio_file_t *pdf, pdfio_obj_t *src) PDFIO_PUBLIC; extern pdfio_obj_t *pdfioPageCopy(pdfio_file_t *pdf, pdfio_obj_t *srcpage) PDFIO_PUBLIC;
extern size_t pdfioPageGetNumStreams(pdfio_obj_t *page) PDFIO_PUBLIC;
extern pdfio_stream_t *pdfioPageOpenStream(pdfio_obj_t *page, size_t n, bool decode) PDFIO_PUBLIC;
extern bool pdfioStreamClose(pdfio_stream_t *st) PDFIO_PUBLIC; extern bool pdfioStreamClose(pdfio_stream_t *st) PDFIO_PUBLIC;
extern bool pdfioStreamConsume(pdfio_stream_t *st, size_t bytes) PDFIO_PUBLIC; extern bool pdfioStreamConsume(pdfio_stream_t *st, size_t bytes) PDFIO_PUBLIC;

View File

@ -40,7 +40,33 @@ main(int argc, // I - Number of command-line arguments
num_objs = pdfioFileGetNumObjects(pdf); num_objs = pdfioFileGetNumObjects(pdf);
num_pages = pdfioFileGetNumPages(pdf); num_pages = pdfioFileGetNumPages(pdf);
printf("%s: PDF %s, %d objects, %d pages.\n", argv[i], pdfioFileGetVersion(pdf), (int)num_objs, (int)num_pages); printf("%s: PDF %s, %d pages, %d objects.\n", argv[i], pdfioFileGetVersion(pdf), (int)num_pages, (int)num_objs);
for (n = 0; n < num_pages; n ++)
{
if ((obj = pdfioFileGetPage(pdf, n)) == NULL)
{
printf("%s: Unable to get page #%d.\n", argv[i], (int)n + 1);
}
else
{
pdfio_rect_t media_box; // MediaBox value
memset(&media_box, 0, sizeof(media_box));
dict = pdfioObjGetDict(obj);
if (!pdfioDictGetRect(dict, "MediaBox", &media_box))
{
if ((obj = pdfioDictGetObject(dict, "Parent")) != NULL)
{
dict = pdfioObjGetDict(obj);
pdfioDictGetRect(dict, "MediaBox", &media_box);
}
}
printf("%s: Page #%d is %gx%g.\n", argv[i], (int)n + 1, media_box.x2, media_box.y2);
}
}
for (n = 0; n < num_objs; n ++) for (n = 0; n < num_objs; n ++)
{ {