From 35d22705fa674fc3af684400b1ec68305b70d99e Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Sat, 29 May 2021 20:00:48 -0400 Subject: [PATCH] Implement pdfioObjCopy and pdfioPageCopy, with tests. --- Makefile | 2 +- pdfio-array.c | 2 ++ pdfio-dict.c | 21 +++++++++++- pdfio-file.c | 86 ++++++++++++++++++++++++++++++++----------------- pdfio-object.c | 82 ++++++++++++++++++++++++++++++++++++++++------ pdfio-page.c | 31 ++++++++++++++---- pdfio-private.h | 2 ++ pdfio-stream.c | 6 ++-- pdfio-value.c | 18 +++++++++++ pdfio.h | 2 +- testpdfio.c | 60 +++++++++++++++++++++++++--------- 11 files changed, 244 insertions(+), 68 deletions(-) diff --git a/Makefile b/Makefile index de4e74a..a95c3a7 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ ARFLAGS = cr CC = cc CFLAGS = CODESIGN_IDENTITY = Developer ID -COMMONFLAGS = -g +COMMONFLAGS = Os -g CPPFLAGS = DESTDIR = $(DSTROOT) DSO = cc diff --git a/pdfio-array.c b/pdfio-array.c index d63bde9..45d240e 100644 --- a/pdfio-array.c +++ b/pdfio-array.c @@ -242,6 +242,8 @@ pdfioArrayCopy(pdfio_file_t *pdf, // I - PDF file vdst; // Current destination value + PDFIO_DEBUG("pdfioArrayCopy(pdf=%p, a=%p(%p))\n", pdf, a, a ? a->pdf : NULL); + // Create the new array... if ((na = pdfioArrayCreate(pdf)) == NULL) return (NULL); diff --git a/pdfio-dict.c b/pdfio-dict.c index 6556b8b..73ef5ff 100644 --- a/pdfio-dict.c +++ b/pdfio-dict.c @@ -36,6 +36,8 @@ pdfioDictCopy(pdfio_file_t *pdf, // I - PDF file _pdfio_value_t v; // Current destination value + PDFIO_DEBUG("pdfioDictCopy(pdf=%p, dict=%p(%p))\n", pdf, dict, dict ? dict->pdf : NULL); + // Create the new dictionary... if ((ndict = pdfioDictCreate(pdf)) == NULL) return (NULL); @@ -49,7 +51,24 @@ pdfioDictCopy(pdfio_file_t *pdf, // I - PDF file // Copy and add each of the source dictionary's key/value pairs... for (i = dict->num_pairs, p = dict->pairs; i > 0; i --, p ++) { - if (!_pdfioValueCopy(pdf, &v, dict->pdf, &p->value)) + if (!strcmp(p->key, "Length") && p->value.type == PDFIO_VALTYPE_INDIRECT && dict->pdf != pdf) + { + // Don't use indirect stream lengths for copied objects... + pdfio_obj_t *lenobj = pdfioFileFindObject(dict->pdf, p->value.value.indirect.number); + // Length object + + v.type = PDFIO_VALTYPE_NUMBER; + if (lenobj) + { + if (lenobj->value.type == PDFIO_VALTYPE_NONE) + _pdfioObjLoad(lenobj); + + v.value.number = lenobj->value.value.number; + } + else + v.value.number = 0.0f; + } + else if (!_pdfioValueCopy(pdf, &v, dict->pdf, &p->value)) return (NULL); // Let pdfioFileClose do the cleanup... if (_pdfioStringIsAllocated(dict->pdf, p->key)) diff --git a/pdfio-file.c b/pdfio-file.c index 350a6f5..82a4457 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -74,6 +74,35 @@ _pdfioFileAddMappedObject( } +// +// '_pdfioFileAddPage()' - Add a page to a PDF file. +// + +bool // O - `true` on success and `false` on failure +_pdfioFileAddPage(pdfio_file_t *pdf, // I - PDF file + pdfio_obj_t *obj) // I - Page object +{ + // Add the page to the array of pages... + if (pdf->num_pages >= pdf->alloc_pages) + { + pdfio_obj_t **temp = (pdfio_obj_t **)realloc(pdf->pages, (pdf->alloc_pages + 16) * sizeof(pdfio_obj_t *)); + + if (!temp) + { + _pdfioFileError(pdf, "Unable to allocate memory for pages."); + return (false); + } + + pdf->alloc_pages += 16; + pdf->pages = temp; + } + + pdf->pages[pdf->num_pages ++] = obj; + + return (true); +} + + // // 'pdfioFileClose()' - Close a PDF file and free all memory used for it. // @@ -252,25 +281,37 @@ pdfio_obj_t * // O - New object pdfioFileCreateObject( pdfio_file_t *pdf, // I - PDF file pdfio_dict_t *dict) // I - Object dictionary +{ + _pdfio_value_t value; // Object value + + + value.type = PDFIO_VALTYPE_DICT; + value.value.dict = dict; + + return (_pdfioFileCreateObject(pdf, dict->pdf, &value)); +} + + +// +// '_pdfioFileCreateObject()' - Create a new object in a PDF file with a value. +// + +pdfio_obj_t * // O - New object +_pdfioFileCreateObject( + pdfio_file_t *pdf, // I - PDF file + pdfio_file_t *srcpdf, // I - Source PDF file, if any + _pdfio_value_t *value) // I - Object dictionary { pdfio_obj_t *obj; // New object // Range check input... - if (!pdf || !dict) - { - if (pdf) - _pdfioFileError(pdf, "Missing object dictionary."); - + if (!pdf) return (NULL); - } if (pdf->mode != _PDFIO_MODE_WRITE) return (NULL); - if (dict->pdf != pdf) - dict = pdfioDictCopy(pdf, dict); // Copy dictionary to new PDF - // Allocate memory for the object... if ((obj = (pdfio_obj_t *)calloc(1, sizeof(pdfio_obj_t))) == NULL) { @@ -297,10 +338,11 @@ pdfioFileCreateObject( pdf->objs[pdf->num_objs ++] = obj; // Initialize the object... - obj->pdf = pdf; - obj->number = pdf->num_objs; - obj->value.type = PDFIO_VALTYPE_DICT; - obj->value.value.dict = dict; + obj->pdf = pdf; + obj->number = pdf->num_objs; + + if (value) + _pdfioValueCopy(pdf, &obj->value, srcpdf, value); // Don't write anything just yet... return (obj); @@ -359,22 +401,8 @@ pdfioFileCreatePage(pdfio_file_t *pdf, // I - PDF file if (!pdfioObjClose(page)) return (NULL); - // Add the page to the array of pages... - if (pdf->num_pages >= pdf->alloc_pages) - { - pdfio_obj_t **temp = (pdfio_obj_t **)realloc(pdf->pages, (pdf->alloc_pages + 16) * sizeof(pdfio_obj_t *)); - - if (!temp) - { - _pdfioFileError(pdf, "Unable to allocate memory for pages."); - return (NULL); - } - - pdf->alloc_pages += 16; - pdf->pages = temp; - } - - pdf->pages[pdf->num_pages ++] = page; + if (!_pdfioFileAddPage(pdf, page)) + return (NULL); // Create the contents stream... return (pdfioObjCreateStream(contents, PDFIO_FILTER_FLATE)); diff --git a/pdfio-object.c b/pdfio-object.c index 0c805e9..3ca1b6e 100644 --- a/pdfio-object.c +++ b/pdfio-object.c @@ -42,22 +42,20 @@ pdfioObjClose(pdfio_obj_t *obj) // I - Object // Write the object value if (!write_obj_header(obj)) return (false); + + // Write the "endobj" line... + return (_pdfioFilePuts(obj->pdf, "endobj\n")); } else if (obj->stream) { // Close the stream... - if (!pdfioStreamClose(obj->stream)) - return (false); + return (pdfioStreamClose(obj->stream)); } else { // Already closed return (true); } - - // If we get here we wrote the object header or closed the stream and still - // need to write the "endobj" line... - return (_pdfioFilePuts(obj->pdf, "endobj\n")); } @@ -69,11 +67,70 @@ pdfio_obj_t * // O - New object or `NULL` on error pdfioObjCopy(pdfio_file_t *pdf, // I - PDF file pdfio_obj_t *srcobj) // I - Object to copy { - // TODO: Implement pdfioObjCopy - (void)pdf; - (void)srcobj; + pdfio_obj_t *dstobj; // Destination object + pdfio_stream_t *srcst, // Source stream + *dstst; // Destination stream + char buffer[32768]; // Copy buffer + ssize_t bytes; // Bytes read - return (NULL); + + PDFIO_DEBUG("pdfioObjCopy(pdf=%p, srcobj=%p(%p))\n", pdf, srcobj, srcobj ? srcobj->pdf : NULL); + + // Range check input + if (!pdf || !srcobj) + return (NULL); + + // Load the object value if needed... + if (srcobj->value.type == PDFIO_VALTYPE_NONE) + _pdfioObjLoad(srcobj); + + // Create the new object... + if ((dstobj = _pdfioFileCreateObject(pdf, srcobj->pdf, NULL)) == NULL) + return (NULL); + + // Add new object to the cache of copied objects... + if (!_pdfioFileAddMappedObject(pdf, dstobj, srcobj)) + return (NULL); + + // Copy the object's value... + if (!_pdfioValueCopy(pdf, &dstobj->value, srcobj->pdf, &srcobj->value)) + return (NULL); + + if (srcobj->stream_offset) + { + // Copy stream data... + if ((srcst = pdfioObjOpenStream(srcobj, false)) == NULL) + { + pdfioObjClose(dstobj); + return (NULL); + } + + if ((dstst = pdfioObjCreateStream(dstobj, PDFIO_FILTER_NONE)) == NULL) + { + pdfioStreamClose(srcst); + pdfioObjClose(dstobj); + return (NULL); + } + + while ((bytes = pdfioStreamRead(srcst, buffer, sizeof(buffer))) > 0) + { + if (!pdfioStreamWrite(dstst, buffer, (size_t)bytes)) + { + bytes = -1; + break; + } + } + + pdfioStreamClose(srcst); + pdfioStreamClose(dstst); + + if (bytes < 0) + return (NULL); + } + else + pdfioObjClose(dstobj); + + return (dstobj); } @@ -205,7 +262,10 @@ pdfioObjGetLength(pdfio_obj_t *obj) // I - Object // Try getting the length, directly or indirectly if ((length = (size_t)pdfioDictGetNumber(obj->value.value.dict, "Length")) > 0) + { + PDFIO_DEBUG("pdfioObjGetLength(obj=%p) returning %lu.\n", obj, (unsigned long)length); return (length); + } if ((lenobj = pdfioDictGetObject(obj->value.value.dict, "Length")) == NULL) { @@ -222,6 +282,8 @@ pdfioObjGetLength(pdfio_obj_t *obj) // I - Object return (0); } + PDFIO_DEBUG("pdfioObjGetLength(obj=%p) returning %lu.\n", obj, (unsigned long)lenobj->value.value.number); + return ((size_t)lenobj->value.value.number); } diff --git a/pdfio-page.c b/pdfio-page.c index e55f12d..6ab30eb 100644 --- a/pdfio-page.c +++ b/pdfio-page.c @@ -18,13 +18,32 @@ // 'pdfioPageCopy()' - Copy a page to a PDF file. // -pdfio_obj_t * // O - Destination page +bool // O - `true` on success, `false` on failure pdfioPageCopy(pdfio_file_t *pdf, // I - PDF file - pdfio_obj_t *src) // I - Source page + pdfio_obj_t *srcpage) // I - Source page { - // TODO: Implement pdfioPageCopy - (void)pdf; - (void)src; + pdfio_obj_t *dstpage; // Destination page object - return (NULL); + + PDFIO_DEBUG("pdfioPageCopy(pdf=%p, srcpage=%p(%p))\n", pdf, srcpage, srcpage ? srcpage->pdf : NULL); + + // Range check input + if (!pdf || !srcpage || srcpage->value.type != PDFIO_VALTYPE_DICT) + { + if (pdf) + { + if (!srcpage) + _pdfioFileError(pdf, "NULL page object specified."); + else + _pdfioFileError(pdf, "Object is not a page."); + } + + return (false); + } + + // Copy the page object and add it to the pages array... + if ((dstpage = pdfioObjCopy(pdf, srcpage)) == NULL) + return (false); + else + return (_pdfioFileAddPage(pdf, dstpage)); } diff --git a/pdfio-private.h b/pdfio-private.h index 8b6a5ec..0e177a2 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -288,7 +288,9 @@ extern bool _pdfioDictSetValue(pdfio_dict_t *dict, const char *key, _pdfio_valu extern bool _pdfioDictWrite(pdfio_dict_t *dict, off_t *length) PDFIO_INTERNAL; extern bool _pdfioFileAddMappedObject(pdfio_file_t *pdf, pdfio_obj_t *dst_obj, pdfio_obj_t *src_obj) PDFIO_INTERNAL; +extern bool _pdfioFileAddPage(pdfio_file_t *pdf, pdfio_obj_t *obj) PDFIO_INTERNAL; extern bool _pdfioFileConsume(pdfio_file_t *pdf, size_t bytes) PDFIO_INTERNAL; +extern pdfio_obj_t *_pdfioFileCreateObject(pdfio_file_t *pdf, pdfio_file_t *srcpdf, _pdfio_value_t *value) PDFIO_INTERNAL; extern bool _pdfioFileDefaultError(pdfio_file_t *pdf, const char *message, void *data) PDFIO_INTERNAL; extern bool _pdfioFileError(pdfio_file_t *pdf, const char *format, ...) PDFIO_FORMAT(2,3) PDFIO_INTERNAL; extern pdfio_obj_t *_pdfioFileFindMappedObject(pdfio_file_t *pdf, pdfio_file_t *src_pdf, size_t src_number) PDFIO_INTERNAL; diff --git a/pdfio-stream.c b/pdfio-stream.c index e2f6201..c69cc20 100644 --- a/pdfio-stream.c +++ b/pdfio-stream.c @@ -85,7 +85,7 @@ pdfioStreamClose(pdfio_stream_t *st) // I - Stream st->obj->stream_length = (size_t)(_pdfioFileTell(st->pdf) - st->obj->stream_offset); // End of stream marker... - if (!_pdfioFilePuts(st->pdf, "\nendstream\n")) + if (!_pdfioFilePuts(st->pdf, "\nendstream\nendobj\n")) { ret = false; goto done; @@ -598,11 +598,9 @@ pdfioStreamRead( { // Read large amounts directly to caller's buffer... if ((rbytes = stream_read(st, bufptr, bytes)) > 0) - { bufptr += rbytes; - bytes = 0; - } + bytes = 0; st->bufptr = st->bufend = st->buffer; break; } diff --git a/pdfio-value.c b/pdfio-value.c index 9d6e47d..0ac5723 100644 --- a/pdfio-value.c +++ b/pdfio-value.c @@ -25,8 +25,26 @@ _pdfioValueCopy(pdfio_file_t *pdfdst, // I - Destination PDF file _pdfio_value_t *vsrc) // I - Source value { pdfio_obj_t *obj; // Object reference +#ifdef DEBUG + static const char * const types[] = // Type strings for debug + { + "PDFIO_VALTYPE_NONE", + "PDFIO_VALTYPE_ARRAY", + "PDFIO_VALTYPE_BINARY", + "PDFIO_VALTYPE_BOOLEAN", + "PDFIO_VALTYPE_DATE", + "PDFIO_VALTYPE_DICT", + "PDFIO_VALTYPE_INDIRECT", + "PDFIO_VALTYPE_NAME", + "PDFIO_VALTYPE_NULL", + "PDFIO_VALTYPE_NUMBER", + "PDFIO_VALTYPE_STRING" + }; +#endif // DEBUG + PDFIO_DEBUG("_pdfioValueCopy(pdfdst=%p, vdst=%p, pdfsrc=%p, vsrc=%p(%s))\n", pdfdst, vdst, pdfsrc, vsrc, types[vsrc->type]); + if (pdfdst == pdfsrc && vsrc->type != PDFIO_VALTYPE_BINARY) { // For the same document we can copy the values without any other effort diff --git a/pdfio.h b/pdfio.h index b822da7..68fa9eb 100644 --- a/pdfio.h +++ b/pdfio.h @@ -172,7 +172,7 @@ extern size_t pdfioObjGetNumber(pdfio_obj_t *obj) PDFIO_PUBLIC; extern const char *pdfioObjGetType(pdfio_obj_t *obj) PDFIO_PUBLIC; extern pdfio_stream_t *pdfioObjOpenStream(pdfio_obj_t *obj, bool decode) PDFIO_PUBLIC; -extern pdfio_obj_t *pdfioPageCopy(pdfio_file_t *pdf, pdfio_obj_t *srcpage) PDFIO_PUBLIC; +extern bool pdfioPageCopy(pdfio_file_t *pdf, pdfio_obj_t *srcpage) PDFIO_PUBLIC; extern size_t pdfioPageGetNumStreams(pdfio_obj_t *page) PDFIO_PUBLIC; extern pdfio_stream_t *pdfioPageOpenStream(pdfio_obj_t *page, size_t n, bool decode) PDFIO_PUBLIC; diff --git a/testpdfio.c b/testpdfio.c index 7d226ed..ffe472d 100644 --- a/testpdfio.c +++ b/testpdfio.c @@ -149,13 +149,15 @@ do_unit_tests(void) { int i; // Looping var char filename[256]; // PDF filename - pdfio_file_t *pdf; // PDF file + pdfio_file_t *pdf, // Test PDF file + *outpdf; // Output PDF file bool error = false; // Error callback data _pdfio_token_t tb; // Token buffer const char *s; // String buffer _pdfio_value_t value; // Value pdfio_obj_t *color_jpg, // color.jpg image - *gray_jpg; // gray.jpg image + *gray_jpg, // gray.jpg image + *page; // Page from test PDF file static const char *complex_dict = // Complex dictionary value "<