diff --git a/pdfio-array.c b/pdfio-array.c index 7d13bfd..98d0a94 100644 --- a/pdfio-array.c +++ b/pdfio-array.c @@ -450,7 +450,7 @@ pdfioArrayGetObject(pdfio_array_t *a, // I - Array if (!a || n >= a->num_values || a->values[n].type != PDFIO_VALTYPE_INDIRECT) return (NULL); else - return (pdfioFileGetObject(a->pdf, a->values[n].value.indirect.number)); + return (pdfioFileFindObject(a->pdf, a->values[n].value.indirect.number)); } diff --git a/pdfio-dict.c b/pdfio-dict.c index ec35b8b..828eeca 100644 --- a/pdfio-dict.c +++ b/pdfio-dict.c @@ -265,7 +265,7 @@ pdfioDictGetObject(pdfio_dict_t *dict, // I - Dictionary _pdfio_value_t *value = _pdfioDictGetValue(dict, key); if (value && value->type == PDFIO_VALTYPE_INDIRECT) - return (pdfioFileGetObject(dict->pdf, value->value.indirect.number)); + return (pdfioFileFindObject(dict->pdf, value->value.indirect.number)); else return (NULL); } diff --git a/pdfio-file.c b/pdfio-file.c index d21510b..cfec7a8 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -24,6 +24,7 @@ static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset); static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b); static bool load_obj_stream(pdfio_obj_t *obj); +static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj); static bool load_xref(pdfio_file_t *pdf, off_t xref_offset); static bool write_trailer(pdfio_file_t *pdf); @@ -590,6 +591,74 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load } +// +// 'load_pages()' - Load pages in the document. +// + +static bool // O - `true` on success, `false` on error +load_pages(pdfio_file_t *pdf, // I - PDF file + pdfio_obj_t *obj) // I - Page object +{ + pdfio_dict_t *dict; // Page object dictionary + const char *type; // Node type + pdfio_array_t *kids; // Kids array + + + // Range check input... + if (!obj) + { + _pdfioFileError(pdf, "Unable to find pages object."); + return (false); + } + + // Get the object dictionary and make sure this is a Pages or Page object... + if ((dict = pdfioObjGetDict(obj)) == NULL) + { + _pdfioFileError(pdf, "No dictionary for pages object."); + return (false); + } + + if ((type = pdfioDictGetName(dict, "Type")) == NULL || (strcmp(type, "Pages") && strcmp(type, "Page"))) + return (false); + + // If there is a Kids array, then this is a parent node and we have to look + // at the child objects... + if ((kids = pdfioDictGetArray(dict, "Kids")) != NULL) + { + // Load the child objects... + size_t i, // Looping var + num_kids; // Number of elements in array + + for (i = 0, num_kids = pdfioArrayGetSize(kids); i < num_kids; i ++) + { + if (!load_pages(pdf, pdfioArrayGetObject(kids, i))) + return (false); + } + } + else + { + // Add this page... + if (pdf->num_pages >= pdf->alloc_pages) + { + pdfio_obj_t **temp = (pdfio_obj_t **)realloc(pdf->pages, (pdf->alloc_pages + 32) * sizeof(pdfio_obj_t *)); + + if (!temp) + { + _pdfioFileError(pdf, "Unable to allocate memory for pages."); + return (false); + } + + pdf->alloc_pages += 32; + pdf->pages = temp; + } + + pdf->pages[pdf->num_pages ++] = obj; + } + + return (true); +} + + // // 'load_xref()' - Load an XREF table... // @@ -936,12 +1005,13 @@ load_xref(pdfio_file_t *pdf, // I - PDF file return (false); } + PDFIO_DEBUG("load_xref: Root=%p(%lu)\n", pdf->root, (unsigned long)pdf->root->number); + pdf->info = pdfioDictGetObject(pdf->trailer, "Info"); pdf->encrypt = pdfioDictGetObject(pdf->trailer, "Encrypt"); pdf->id_array = pdfioDictGetArray(pdf->trailer, "ID"); - // If we get this far, we successfully loaded everything... - return (true); + return (load_pages(pdf, pdfioDictGetObject(pdfioObjGetDict(pdf->root), "Pages"))); } diff --git a/pdfio.h b/pdfio.h index dc155be..53084a4 100644 --- a/pdfio.h +++ b/pdfio.h @@ -169,7 +169,9 @@ extern size_t pdfioObjGetNumber(pdfio_obj_t *obj) PDFIO_PUBLIC; extern const char *pdfioObjGetType(pdfio_obj_t *obj) PDFIO_PUBLIC; extern pdfio_stream_t *pdfioObjOpenStream(pdfio_obj_t *obj, bool decode) PDFIO_PUBLIC; -extern pdfio_obj_t *pdfioPageCopy(pdfio_file_t *pdf, pdfio_obj_t *src) PDFIO_PUBLIC; +extern pdfio_obj_t *pdfioPageCopy(pdfio_file_t *pdf, pdfio_obj_t *srcpage) PDFIO_PUBLIC; +extern size_t pdfioPageGetNumStreams(pdfio_obj_t *page) PDFIO_PUBLIC; +extern pdfio_stream_t *pdfioPageOpenStream(pdfio_obj_t *page, size_t n, bool decode) PDFIO_PUBLIC; extern bool pdfioStreamClose(pdfio_stream_t *st) PDFIO_PUBLIC; extern bool pdfioStreamConsume(pdfio_stream_t *st, size_t bytes) PDFIO_PUBLIC; diff --git a/testpdfio.c b/testpdfio.c index 36f8162..66137f1 100644 --- a/testpdfio.c +++ b/testpdfio.c @@ -40,7 +40,33 @@ main(int argc, // I - Number of command-line arguments num_objs = pdfioFileGetNumObjects(pdf); num_pages = pdfioFileGetNumPages(pdf); - printf("%s: PDF %s, %d objects, %d pages.\n", argv[i], pdfioFileGetVersion(pdf), (int)num_objs, (int)num_pages); + printf("%s: PDF %s, %d pages, %d objects.\n", argv[i], pdfioFileGetVersion(pdf), (int)num_pages, (int)num_objs); + + for (n = 0; n < num_pages; n ++) + { + if ((obj = pdfioFileGetPage(pdf, n)) == NULL) + { + printf("%s: Unable to get page #%d.\n", argv[i], (int)n + 1); + } + else + { + pdfio_rect_t media_box; // MediaBox value + + memset(&media_box, 0, sizeof(media_box)); + dict = pdfioObjGetDict(obj); + + if (!pdfioDictGetRect(dict, "MediaBox", &media_box)) + { + if ((obj = pdfioDictGetObject(dict, "Parent")) != NULL) + { + dict = pdfioObjGetDict(obj); + pdfioDictGetRect(dict, "MediaBox", &media_box); + } + } + + printf("%s: Page #%d is %gx%g.\n", argv[i], (int)n + 1, media_box.x2, media_box.y2); + } + } for (n = 0; n < num_objs; n ++) {