mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2025-02-26 22:02:49 +01:00
When exploring a PDF, it may be convenient to have the typical informations delivered by some "Document Properties"---and some more about the MediaBox(es). So just add the function to get the ModDate and extend the pdfioinfo example as an example of what the library do have and pdfioinfo as a debugging tool also. Signed-off-by: Thierry LARONDE <tlaronde@kergis.com>
2251 lines
60 KiB
C
2251 lines
60 KiB
C
//
|
||
// PDF file functions for PDFio.
|
||
//
|
||
// Copyright © 2021-2025 by Michael R Sweet.
|
||
//
|
||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||
// information.
|
||
//
|
||
|
||
#include "pdfio-private.h"
|
||
#ifndef O_BINARY
|
||
# define O_BINARY 0
|
||
#endif // !O_BINARY
|
||
|
||
|
||
//
|
||
// Local functions...
|
||
//
|
||
|
||
static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset);
|
||
static int compare_objmaps(_pdfio_objmap_t *a, _pdfio_objmap_t *b);
|
||
static pdfio_file_t *create_common(const char *filename, int fd, pdfio_output_cb_t output_cb, void *output_cbdata, const char *version, pdfio_rect_t *media_box, pdfio_rect_t *crop_box, pdfio_error_cb_t error_cb, void *error_cbdata);
|
||
static const char *get_info_string(pdfio_file_t *pdf, const char *key);
|
||
static struct lconv *get_lconv(void);
|
||
static bool load_obj_stream(pdfio_obj_t *obj);
|
||
static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth);
|
||
static bool load_xref(pdfio_file_t *pdf, off_t xref_offset, pdfio_password_cb_t password_cb, void *password_data);
|
||
static bool write_pages(pdfio_file_t *pdf);
|
||
static bool write_trailer(pdfio_file_t *pdf);
|
||
|
||
|
||
//
|
||
// '_pdfioFileAddMappedObj()' - Add a mapped object.
|
||
//
|
||
|
||
bool // O - `true` on success, `false` on failure
|
||
_pdfioFileAddMappedObj(
|
||
pdfio_file_t *pdf, // I - Destination PDF file
|
||
pdfio_obj_t *dst_obj, // I - Destination object
|
||
pdfio_obj_t *src_obj) // I - Source object
|
||
{
|
||
_pdfio_objmap_t *map; // Object map
|
||
|
||
|
||
// Allocate memory as needed...
|
||
if (pdf->num_objmaps >= pdf->alloc_objmaps)
|
||
{
|
||
if ((map = realloc(pdf->objmaps, (pdf->alloc_objmaps + 16) * sizeof(_pdfio_objmap_t))) == NULL)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to allocate memory for object map.");
|
||
return (false);
|
||
}
|
||
|
||
pdf->alloc_objmaps += 16;
|
||
pdf->objmaps = map;
|
||
}
|
||
|
||
// Add an object to the end...
|
||
map = pdf->objmaps + pdf->num_objmaps;
|
||
pdf->num_objmaps ++;
|
||
|
||
map->obj = dst_obj;
|
||
map->src_pdf = src_obj->pdf;
|
||
map->src_number = src_obj->number;
|
||
|
||
// Sort as needed...
|
||
if (pdf->num_objmaps > 1 && compare_objmaps(map, pdf->objmaps + pdf->num_objmaps - 2) < 0)
|
||
qsort(pdf->objmaps, pdf->num_objmaps, sizeof(_pdfio_objmap_t), (int (*)(const void *, const void *))compare_objmaps);
|
||
|
||
return (true);
|
||
}
|
||
|
||
|
||
//
|
||
// '_pdfioFileAddPage()' - Add a page to a PDF file.
|
||
//
|
||
|
||
bool // O - `true` on success and `false` on failure
|
||
_pdfioFileAddPage(pdfio_file_t *pdf, // I - PDF file
|
||
pdfio_obj_t *obj) // I - Page object
|
||
{
|
||
// Add the page to the array of pages...
|
||
if (pdf->num_pages >= pdf->alloc_pages)
|
||
{
|
||
pdfio_obj_t **temp = (pdfio_obj_t **)realloc(pdf->pages, (pdf->alloc_pages + 16) * sizeof(pdfio_obj_t *));
|
||
|
||
if (!temp)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to allocate memory for pages.");
|
||
return (false);
|
||
}
|
||
|
||
pdf->alloc_pages += 16;
|
||
pdf->pages = temp;
|
||
}
|
||
|
||
pdf->pages[pdf->num_pages ++] = obj;
|
||
|
||
return (true);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileClose()' - Close a PDF file and free all memory used for it.
|
||
//
|
||
|
||
bool // O - `true` on success and `false` on failure
|
||
pdfioFileClose(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
bool ret = true; // Return value
|
||
size_t i; // Looping var
|
||
|
||
|
||
// Range check input
|
||
if (!pdf)
|
||
return (false);
|
||
|
||
// Close the file itself...
|
||
if (pdf->mode == _PDFIO_MODE_WRITE)
|
||
{
|
||
ret = false;
|
||
|
||
if (pdfioObjClose(pdf->info_obj) && write_pages(pdf) && pdfioObjClose(pdf->root_obj) && write_trailer(pdf))
|
||
ret = _pdfioFileFlush(pdf);
|
||
}
|
||
|
||
if (pdf->fd >= 0 && close(pdf->fd) < 0)
|
||
ret = false;
|
||
|
||
// Free all data...
|
||
free(pdf->filename);
|
||
free(pdf->version);
|
||
|
||
for (i = 0; i < pdf->num_arrays; i ++)
|
||
_pdfioArrayDelete(pdf->arrays[i]);
|
||
free(pdf->arrays);
|
||
|
||
for (i = 0; i < pdf->num_dicts; i ++)
|
||
_pdfioDictDelete(pdf->dicts[i]);
|
||
free(pdf->dicts);
|
||
|
||
for (i = 0; i < pdf->num_objs; i ++)
|
||
_pdfioObjDelete(pdf->objs[i]);
|
||
free(pdf->objs);
|
||
|
||
free(pdf->objmaps);
|
||
|
||
free(pdf->pages);
|
||
|
||
for (i = 0; i < pdf->num_strings; i ++)
|
||
free(pdf->strings[i]);
|
||
free(pdf->strings);
|
||
|
||
free(pdf);
|
||
|
||
return (ret);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileCreate()' - Create a PDF file.
|
||
//
|
||
// This function creates a new PDF file. The "filename" argument specifies the
|
||
// name of the PDF file to create.
|
||
//
|
||
// The "version" argument specifies the PDF version number for the file or
|
||
// `NULL` for the default ("2.0").
|
||
//
|
||
// The "media_box" and "crop_box" arguments specify the default MediaBox and
|
||
// CropBox for pages in the PDF file - if `NULL` then a default "Universal" size
|
||
// of 8.27x11in (the intersection of US Letter and ISO A4) is used.
|
||
//
|
||
// The "error_cb" and "error_cbdata" arguments specify an error handler callback
|
||
// and its data pointer - if `NULL` the default error handler is used that
|
||
// writes error messages to `stderr`.
|
||
//
|
||
|
||
pdfio_file_t * // O - PDF file or `NULL` on error
|
||
pdfioFileCreate(
|
||
const char *filename, // I - Filename
|
||
const char *version, // I - PDF version number or `NULL` for default (2.0)
|
||
pdfio_rect_t *media_box, // I - Default MediaBox for pages
|
||
pdfio_rect_t *crop_box, // I - Default CropBox for pages
|
||
pdfio_error_cb_t error_cb, // I - Error callback or `NULL` for default
|
||
void *error_cbdata) // I - Error callback data, if any
|
||
{
|
||
pdfio_file_t *pdf; // PDF file
|
||
int fd; // File descriptor
|
||
|
||
|
||
PDFIO_DEBUG("pdfioFileCreate(filename=\"%s\", version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", filename, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata);
|
||
|
||
// Range check input...
|
||
if (!filename)
|
||
return (NULL);
|
||
|
||
// Create the file...
|
||
if ((fd = open(filename, O_WRONLY | O_BINARY | O_CREAT | O_TRUNC, 0666)) < 0)
|
||
{
|
||
pdfio_file_t temp; // Dummy file
|
||
char message[8192]; // Message string
|
||
|
||
temp.filename = (char *)filename;
|
||
snprintf(message, sizeof(message), "Unable to create '%s': %s", filename, strerror(errno));
|
||
(error_cb)(&temp, message, error_cbdata);
|
||
|
||
return (NULL);
|
||
}
|
||
|
||
if ((pdf = create_common(filename, fd, /*output_cb*/NULL, /*output_cbdata*/NULL, version, media_box, crop_box, error_cb, error_cbdata)) == NULL)
|
||
{
|
||
// Remove the newly created file if we can't create the PDF file object...
|
||
close(fd);
|
||
unlink(filename);
|
||
}
|
||
|
||
return (pdf);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileCreateArrayObj()' - Create a new object in a PDF file containing an array.
|
||
//
|
||
// This function creates a new object with an array value in a PDF file.
|
||
// You must call @link pdfioObjClose@ to write the object to the file.
|
||
//
|
||
|
||
pdfio_obj_t * // O - New object
|
||
pdfioFileCreateArrayObj(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
pdfio_array_t *array) // I - Object array
|
||
{
|
||
_pdfio_value_t value; // Object value
|
||
|
||
|
||
// Range check input...
|
||
if (!pdf || !array)
|
||
return (NULL);
|
||
|
||
value.type = PDFIO_VALTYPE_ARRAY;
|
||
value.value.array = array;
|
||
|
||
return (_pdfioFileCreateObj(pdf, array->pdf, &value));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileCreateNameObj()' - Create a new object in a PDF file containing a name.
|
||
//
|
||
// This function creates a new object with a name value in a PDF file.
|
||
// You must call @link pdfioObjClose@ to write the object to the file.
|
||
//
|
||
|
||
pdfio_obj_t * // O - New object
|
||
pdfioFileCreateNameObj(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
const char *name) // I - Name value
|
||
{
|
||
_pdfio_value_t value; // Object value
|
||
|
||
|
||
// Range check input...
|
||
if (!pdf || !name)
|
||
return (NULL);
|
||
|
||
value.type = PDFIO_VALTYPE_NAME;
|
||
value.value.name = pdfioStringCreate(pdf, name);
|
||
|
||
if (!value.value.name)
|
||
return (NULL);
|
||
|
||
return (_pdfioFileCreateObj(pdf, NULL, &value));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileCreateNumberObj()' - Create a new object in a PDF file containing a number.
|
||
//
|
||
// This function creates a new object with a number value in a PDF file.
|
||
// You must call @link pdfioObjClose@ to write the object to the file.
|
||
//
|
||
|
||
pdfio_obj_t * // O - New object
|
||
pdfioFileCreateNumberObj(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
double number) // I - Number value
|
||
{
|
||
_pdfio_value_t value; // Object value
|
||
|
||
|
||
// Range check input...
|
||
if (!pdf)
|
||
return (NULL);
|
||
|
||
value.type = PDFIO_VALTYPE_NUMBER;
|
||
value.value.number = number;
|
||
|
||
return (_pdfioFileCreateObj(pdf, NULL, &value));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileCreateObj()' - Create a new object in a PDF file.
|
||
//
|
||
|
||
pdfio_obj_t * // O - New object
|
||
pdfioFileCreateObj(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
pdfio_dict_t *dict) // I - Object dictionary
|
||
{
|
||
_pdfio_value_t value; // Object value
|
||
|
||
|
||
// Range check input...
|
||
if (!pdf || !dict)
|
||
return (NULL);
|
||
|
||
value.type = PDFIO_VALTYPE_DICT;
|
||
value.value.dict = dict;
|
||
|
||
return (_pdfioFileCreateObj(pdf, dict->pdf, &value));
|
||
}
|
||
|
||
|
||
//
|
||
// '_pdfioFileCreateObj()' - Create a new object in a PDF file with a value.
|
||
//
|
||
|
||
pdfio_obj_t * // O - New object
|
||
_pdfioFileCreateObj(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
pdfio_file_t *srcpdf, // I - Source PDF file, if any
|
||
_pdfio_value_t *value) // I - Object dictionary
|
||
{
|
||
pdfio_obj_t *obj; // New object
|
||
|
||
|
||
// Range check input...
|
||
if (!pdf)
|
||
return (NULL);
|
||
|
||
if (pdf->mode != _PDFIO_MODE_WRITE)
|
||
return (NULL);
|
||
|
||
// Allocate memory for the object...
|
||
if ((obj = (pdfio_obj_t *)calloc(1, sizeof(pdfio_obj_t))) == NULL)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to allocate memory for object - %s", strerror(errno));
|
||
return (NULL);
|
||
}
|
||
|
||
// Expand the objects array as needed
|
||
if (pdf->num_objs >= pdf->alloc_objs)
|
||
{
|
||
pdfio_obj_t **temp = (pdfio_obj_t **)realloc(pdf->objs, (pdf->alloc_objs + 32) * sizeof(pdfio_obj_t *));
|
||
|
||
if (!temp)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to allocate memory for object - %s", strerror(errno));
|
||
free(obj);
|
||
return (NULL);
|
||
}
|
||
|
||
pdf->objs = temp;
|
||
pdf->alloc_objs += 32;
|
||
}
|
||
|
||
pdf->objs[pdf->num_objs ++] = obj;
|
||
|
||
// Initialize the object...
|
||
obj->pdf = pdf;
|
||
obj->number = pdf->num_objs;
|
||
|
||
if (value)
|
||
_pdfioValueCopy(pdf, &obj->value, srcpdf, value);
|
||
|
||
// Don't write anything just yet...
|
||
return (obj);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileCreateOutput()' - Create a PDF file through an output callback.
|
||
//
|
||
// This function creates a new PDF file that is streamed though an output
|
||
// callback. The "output_cb" and "output_cbdata" arguments specify the output
|
||
// callback and its data pointer which is called whenever data needs to be
|
||
// written:
|
||
//
|
||
// ```
|
||
// ssize_t
|
||
// output_cb(void *output_cbdata, const void *buffer, size_t bytes)
|
||
// {
|
||
// // Write buffer to output and return the number of bytes written
|
||
// }
|
||
// ```
|
||
//
|
||
// The "version" argument specifies the PDF version number for the file or
|
||
// `NULL` for the default ("2.0").
|
||
//
|
||
// The "media_box" and "crop_box" arguments specify the default MediaBox and
|
||
// CropBox for pages in the PDF file - if `NULL` then a default "Universal" size
|
||
// of 8.27x11in (the intersection of US Letter and ISO A4) is used.
|
||
//
|
||
// The "error_cb" and "error_cbdata" arguments specify an error handler callback
|
||
// and its data pointer - if `NULL` the default error handler is used that
|
||
// writes error messages to `stderr`.
|
||
//
|
||
// > *Note*: Files created using this API are slightly larger than those
|
||
// > created using the @link pdfioFileCreate@ function since stream lengths are
|
||
// > stored as indirect object references.
|
||
//
|
||
|
||
pdfio_file_t * // O - PDF file or `NULL` on error
|
||
pdfioFileCreateOutput(
|
||
pdfio_output_cb_t output_cb, // I - Output callback function
|
||
void *output_cbdata, // I - Output callback data
|
||
const char *version, // I - PDF version number or `NULL` for default (2.0)
|
||
pdfio_rect_t *media_box, // I - Default MediaBox for pages
|
||
pdfio_rect_t *crop_box, // I - Default CropBox for pages
|
||
pdfio_error_cb_t error_cb, // I - Error callback or `NULL` for default
|
||
void *error_cbdata) // I - Error callback data, if any
|
||
{
|
||
PDFIO_DEBUG("pdfioFileCreate(output_cb=%p, output_cbdata=%p, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", (void *)output_cb, (void *)output_cbdata, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata);
|
||
|
||
return (create_common("output.pdf", /*fd*/-1, output_cb, output_cbdata, version, media_box, crop_box, error_cb, error_cbdata));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileCreatePage()' - Create a page in a PDF file.
|
||
//
|
||
|
||
pdfio_stream_t * // O - Contents stream
|
||
pdfioFileCreatePage(pdfio_file_t *pdf, // I - PDF file
|
||
pdfio_dict_t *dict) // I - Page dictionary
|
||
{
|
||
pdfio_obj_t *page, // Page object
|
||
*contents; // Contents object
|
||
pdfio_dict_t *contents_dict; // Dictionary for Contents object
|
||
|
||
|
||
// Range check input...
|
||
if (!pdf)
|
||
return (NULL);
|
||
|
||
// Copy the page dictionary...
|
||
if (dict)
|
||
dict = pdfioDictCopy(pdf, dict);
|
||
else
|
||
dict = pdfioDictCreate(pdf);
|
||
|
||
if (!dict)
|
||
return (NULL);
|
||
|
||
// Make sure the page dictionary has all of the required keys...
|
||
if (!_pdfioDictGetValue(dict, "CropBox"))
|
||
pdfioDictSetRect(dict, "CropBox", &pdf->crop_box);
|
||
|
||
if (!_pdfioDictGetValue(dict, "MediaBox"))
|
||
pdfioDictSetRect(dict, "MediaBox", &pdf->media_box);
|
||
|
||
pdfioDictSetObj(dict, "Parent", pdf->pages_obj);
|
||
|
||
if (!_pdfioDictGetValue(dict, "Resources"))
|
||
pdfioDictSetDict(dict, "Resources", pdfioDictCreate(pdf));
|
||
|
||
if (!_pdfioDictGetValue(dict, "Type"))
|
||
pdfioDictSetName(dict, "Type", "Page");
|
||
|
||
// Create the page object...
|
||
if ((page = pdfioFileCreateObj(pdf, dict)) == NULL)
|
||
return (NULL);
|
||
|
||
// Create a contents object to hold the contents of the page...
|
||
if ((contents_dict = pdfioDictCreate(pdf)) == NULL)
|
||
return (NULL);
|
||
|
||
#ifndef DEBUG
|
||
pdfioDictSetName(contents_dict, "Filter", "FlateDecode");
|
||
#endif // !DEBUG
|
||
|
||
if ((contents = pdfioFileCreateObj(pdf, contents_dict)) == NULL)
|
||
return (NULL);
|
||
|
||
// Add the contents stream to the pages object and write it...
|
||
pdfioDictSetObj(dict, "Contents", contents);
|
||
if (!pdfioObjClose(page))
|
||
return (NULL);
|
||
|
||
if (!_pdfioFileAddPage(pdf, page))
|
||
return (NULL);
|
||
|
||
// Create the contents stream...
|
||
#ifdef DEBUG
|
||
return (pdfioObjCreateStream(contents, PDFIO_FILTER_NONE));
|
||
#else
|
||
return (pdfioObjCreateStream(contents, PDFIO_FILTER_FLATE));
|
||
#endif // DEBUG
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileCreateStringObj()' - Create a new object in a PDF file containing a string.
|
||
//
|
||
// This function creates a new object with a string value in a PDF file.
|
||
// You must call @link pdfioObjClose@ to write the object to the file.
|
||
//
|
||
|
||
pdfio_obj_t * // O - New object
|
||
pdfioFileCreateStringObj(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
const char *string) // I - String
|
||
{
|
||
_pdfio_value_t value; // Object value
|
||
|
||
|
||
// Range check input...
|
||
if (!pdf)
|
||
return (NULL);
|
||
|
||
value.type = PDFIO_VALTYPE_STRING;
|
||
value.value.string = string;
|
||
|
||
return (_pdfioFileCreateObj(pdf, NULL, &value));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileCreateTemporary()' - Create a temporary PDF file.
|
||
//
|
||
// This function creates a PDF file with a unique filename in the current
|
||
// temporary directory. The temporary file is stored in the string "buffer" an
|
||
// will have a ".pdf" extension. Otherwise, this function works the same as
|
||
// the @link pdfioFileCreate@ function.
|
||
//
|
||
// @since PDFio v1.1@
|
||
//
|
||
|
||
pdfio_file_t *
|
||
pdfioFileCreateTemporary(
|
||
char *buffer, // I - Filename buffer
|
||
size_t bufsize, // I - Size of filename buffer
|
||
const char *version, // I - PDF version number or `NULL` for default (2.0)
|
||
pdfio_rect_t *media_box, // I - Default MediaBox for pages
|
||
pdfio_rect_t *crop_box, // I - Default CropBox for pages
|
||
pdfio_error_cb_t error_cb, // I - Error callback or `NULL` for default
|
||
void *error_cbdata) // I - Error callback data, if any
|
||
{
|
||
pdfio_file_t *pdf; // PDF file
|
||
int i, // Looping var
|
||
fd; // File descriptor
|
||
const char *tmpdir; // Temporary directory
|
||
#if _WIN32 || defined(__APPLE__)
|
||
char tmppath[256]; // Temporary directory path
|
||
#endif // _WIN32 || __APPLE__
|
||
unsigned tmpnum; // Temporary filename number
|
||
|
||
|
||
PDFIO_DEBUG("pdfioFileCreate(buffer=%p, bufsize=%lu, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", (void *)buffer, (unsigned long)bufsize, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata);
|
||
|
||
// Range check input...
|
||
if (!buffer || bufsize < 32)
|
||
{
|
||
if (buffer)
|
||
*buffer = '\0';
|
||
return (NULL);
|
||
}
|
||
|
||
// Create the temporary PDF file...
|
||
#if _WIN32
|
||
if ((tmpdir = getenv("TEMP")) == NULL)
|
||
{
|
||
GetTempPathA(sizeof(tmppath), tmppath);
|
||
tmpdir = tmppath;
|
||
}
|
||
|
||
#elif defined(__APPLE__)
|
||
if ((tmpdir = getenv("TMPDIR")) != NULL && access(tmpdir, W_OK))
|
||
tmpdir = NULL;
|
||
|
||
if (!tmpdir)
|
||
{
|
||
// Grab the per-process temporary directory for sandboxed apps...
|
||
# ifdef _CS_DARWIN_USER_TEMP_DIR
|
||
if (confstr(_CS_DARWIN_USER_TEMP_DIR, tmppath, sizeof(tmppath)))
|
||
tmpdir = tmppath;
|
||
else
|
||
# endif // _CS_DARWIN_USER_TEMP_DIR
|
||
tmpdir = "/private/tmp";
|
||
}
|
||
|
||
#else
|
||
if ((tmpdir = getenv("TMPDIR")) == NULL || access(tmpdir, W_OK))
|
||
tmpdir = "/tmp";
|
||
#endif // _WIN32
|
||
|
||
for (i = 0, fd = -1; i < 1000; i ++)
|
||
{
|
||
_pdfioCryptoMakeRandom((uint8_t *)&tmpnum, sizeof(tmpnum));
|
||
snprintf(buffer, bufsize, "%s/%08x.pdf", tmpdir, tmpnum);
|
||
if ((fd = open(buffer, O_WRONLY | O_BINARY | O_CREAT | O_TRUNC | O_EXCL, 0666)) >= 0)
|
||
break;
|
||
}
|
||
|
||
if (fd < 0)
|
||
{
|
||
pdfio_file_t temp; // Dummy file
|
||
char message[8192]; // Message string
|
||
|
||
temp.filename = (char *)"<temporary>";
|
||
snprintf(message, sizeof(message), "Unable to create temporary PDF file: %s", strerror(errno));
|
||
(error_cb)(&temp, message, error_cbdata);
|
||
|
||
return (NULL);
|
||
}
|
||
|
||
if ((pdf = create_common(buffer, fd, /*output_cb*/NULL, /*output_cbdata*/NULL, version, media_box, crop_box, error_cb, error_cbdata)) == NULL)
|
||
{
|
||
// Remove the temporary file if we can't create the PDF file object...
|
||
close(fd);
|
||
unlink(buffer);
|
||
*buffer = '\0';
|
||
}
|
||
|
||
return (pdf);
|
||
}
|
||
|
||
|
||
//
|
||
// '_pdfioFileFindMappedObj()' - Find a mapped object.
|
||
//
|
||
|
||
pdfio_obj_t * // O - Match object or `NULL` if none
|
||
_pdfioFileFindMappedObj(
|
||
pdfio_file_t *pdf, // I - Destination PDF file
|
||
pdfio_file_t *src_pdf, // I - Source PDF file
|
||
size_t src_number) // I - Source object number
|
||
{
|
||
_pdfio_objmap_t key, // Search key
|
||
*match; // Matching object map
|
||
|
||
|
||
// If we have no mapped objects, return NULL immediately...
|
||
if (pdf->num_objmaps == 0)
|
||
return (NULL);
|
||
|
||
// Otherwise search for a match...
|
||
key.src_pdf = src_pdf;
|
||
key.src_number = src_number;
|
||
|
||
if ((match = (_pdfio_objmap_t *)bsearch(&key, pdf->objmaps, pdf->num_objmaps, sizeof(_pdfio_objmap_t), (int (*)(const void *, const void *))compare_objmaps)) != NULL)
|
||
return (match->obj);
|
||
else
|
||
return (NULL);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileFindObj()' - Find an object using its object number.
|
||
//
|
||
// This differs from @link pdfioFileGetObj@ which takes an index into the
|
||
// list of objects while this function takes the object number.
|
||
//
|
||
|
||
pdfio_obj_t * // O - Object or `NULL` if not found
|
||
pdfioFileFindObj(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
size_t number) // I - Object number (1 to N)
|
||
{
|
||
size_t left, // Left object
|
||
right, // Right object
|
||
current; // Current object
|
||
|
||
|
||
PDFIO_DEBUG("pdfioFileFindObj(pdf=%p, number=%lu) alloc_objs=%lu, num_objs=%lu, objs=%p\n", (void *)pdf, (unsigned long)number, (unsigned long)(pdf ? pdf->alloc_objs : 0), (unsigned long)(pdf ? pdf->num_objs : 0), (void *)(pdf ? pdf->objs : NULL));
|
||
|
||
// Range check input...
|
||
if (!pdf || pdf->num_objs == 0 || number < 1)
|
||
return (NULL);
|
||
|
||
// Do a binary search for the object...
|
||
if ((current = number - 1) >= pdf->num_objs)
|
||
current = pdf->num_objs / 2;
|
||
|
||
PDFIO_DEBUG("pdfioFileFindObj: objs[current=%lu]=%p(%lu)\n", (unsigned long)current, (void *)pdf->objs[current], (unsigned long)(pdf->objs[current] ? pdf->objs[current]->number : 0));
|
||
|
||
if (number == pdf->objs[current]->number)
|
||
{
|
||
// Fast match...
|
||
PDFIO_DEBUG("pdfioFileFindObj: Returning %lu (%p)\n", (unsigned long)current, pdf->objs[current]);
|
||
return (pdf->objs[current]);
|
||
}
|
||
else if (number < pdf->objs[current]->number)
|
||
{
|
||
left = 0;
|
||
right = current;
|
||
}
|
||
else
|
||
{
|
||
left = current;
|
||
right = pdf->num_objs - 1;
|
||
}
|
||
|
||
while ((right - left) > 1)
|
||
{
|
||
current = (left + right) / 2;
|
||
|
||
if (number == pdf->objs[current]->number)
|
||
return (pdf->objs[current]);
|
||
else if (number < pdf->objs[current]->number)
|
||
right = current;
|
||
else
|
||
left = current;
|
||
}
|
||
|
||
if (number == pdf->objs[left]->number)
|
||
{
|
||
PDFIO_DEBUG("pdfioFileFindObj: Returning %lu (%p)\n", (unsigned long)left, pdf->objs[left]);
|
||
return (pdf->objs[left]);
|
||
}
|
||
else if (number == pdf->objs[right]->number)
|
||
{
|
||
PDFIO_DEBUG("pdfioFileFindObj: Returning %lu (%p)\n", (unsigned long)right, pdf->objs[right]);
|
||
return (pdf->objs[right]);
|
||
}
|
||
else
|
||
{
|
||
PDFIO_DEBUG("pdfioFileFindObj: Returning NULL\n");
|
||
return (NULL);
|
||
}
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetAuthor()' - Get the author for a PDF file.
|
||
//
|
||
|
||
const char * // O - Author or `NULL` for none
|
||
pdfioFileGetAuthor(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (get_info_string(pdf, "Author"));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetCatalog()' - Get the document catalog dictionary.
|
||
//
|
||
// @since PDFio 1.3@
|
||
//
|
||
|
||
pdfio_dict_t * // O - Catalog dictionary
|
||
pdfioFileGetCatalog(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (pdf ? pdfioObjGetDict(pdf->root_obj) : NULL);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetCreationDate()' - Get the creation date for a PDF file.
|
||
//
|
||
|
||
time_t // O - Creation date or `0` for none
|
||
pdfioFileGetCreationDate(
|
||
pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (pdf && pdf->info_obj ? pdfioDictGetDate(pdfioObjGetDict(pdf->info_obj), "CreationDate") : 0);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetCreator()' - Get the creator string for a PDF file.
|
||
//
|
||
|
||
const char * // O - Creator string or `NULL` for none
|
||
pdfioFileGetCreator(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (get_info_string(pdf, "Creator"));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetID()' - Get the PDF file's ID strings.
|
||
//
|
||
|
||
pdfio_array_t * // O - Array with binary strings
|
||
pdfioFileGetID(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (pdf ? pdf->id_array : NULL);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetKeywords()' - Get the keywords for a PDF file.
|
||
//
|
||
|
||
const char * // O - Keywords string or `NULL` for none
|
||
pdfioFileGetKeywords(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (get_info_string(pdf, "Keywords"));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetModDate()' - Get the most recent modification date for a PDF file.
|
||
//
|
||
|
||
time_t // O - Modification date or `0` for none
|
||
pdfioFileGetModDate(
|
||
pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (pdf && pdf->info_obj ? pdfioDictGetDate(pdfioObjGetDict(pdf->info_obj), "ModDate") : 0);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetName()' - Get a PDF's filename.
|
||
//
|
||
|
||
const char * // O - Filename
|
||
pdfioFileGetName(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (pdf ? pdf->filename : NULL);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetNumObjs()' - Get the number of objects in a PDF file.
|
||
//
|
||
|
||
size_t // O - Number of objects
|
||
pdfioFileGetNumObjs(
|
||
pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (pdf ? pdf->num_objs : 0);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetNumPages()' - Get the number of pages in a PDF file.
|
||
//
|
||
|
||
size_t // O - Number of pages
|
||
pdfioFileGetNumPages(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (pdf ? pdf->num_pages : 0);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetObj()' - Get an object from a PDF file.
|
||
//
|
||
|
||
pdfio_obj_t * // O - Object
|
||
pdfioFileGetObj(pdfio_file_t *pdf, // I - PDF file
|
||
size_t n) // I - Object index (starting at 0)
|
||
{
|
||
if (!pdf || n >= pdf->num_objs)
|
||
return (NULL);
|
||
else
|
||
return (pdf->objs[n]);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetPage()' - Get a page object from a PDF file.
|
||
//
|
||
|
||
pdfio_obj_t * // O - Object
|
||
pdfioFileGetPage(pdfio_file_t *pdf, // I - PDF file
|
||
size_t n) // I - Page index (starting at 0)
|
||
{
|
||
if (!pdf || n >= pdf->num_pages)
|
||
return (NULL);
|
||
else
|
||
return (pdf->pages[n]);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetPermissions()' - Get the access permissions of a PDF file.
|
||
//
|
||
// This function returns the access permissions of a PDF file and (optionally)
|
||
// the type of encryption that has been used.
|
||
//
|
||
|
||
pdfio_permission_t // O - Permission bits
|
||
pdfioFileGetPermissions(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
pdfio_encryption_t *encryption) // O - Type of encryption used or `NULL` to ignore
|
||
{
|
||
// Range check input...
|
||
if (!pdf)
|
||
{
|
||
if (encryption)
|
||
*encryption = PDFIO_ENCRYPTION_NONE;
|
||
|
||
return (PDFIO_PERMISSION_ALL);
|
||
}
|
||
|
||
// Return values...
|
||
if (encryption)
|
||
*encryption = pdf->encryption;
|
||
|
||
return (pdf->permissions);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetProducer()' - Get the producer string for a PDF file.
|
||
//
|
||
|
||
const char * // O - Producer string or `NULL` for none
|
||
pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (get_info_string(pdf, "Producer"));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetSubject()' - Get the subject for a PDF file.
|
||
//
|
||
|
||
const char * // O - Subject or `NULL` for none
|
||
pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (get_info_string(pdf, "Subject"));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetTitle()' - Get the title for a PDF file.
|
||
//
|
||
|
||
const char * // O - Title or `NULL` for none
|
||
pdfioFileGetTitle(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (get_info_string(pdf, "Title"));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileGetVersion()' - Get the PDF version number for a PDF file.
|
||
//
|
||
|
||
const char * // O - Version number or `NULL`
|
||
pdfioFileGetVersion(
|
||
pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
return (pdf ? pdf->version : NULL);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileOpen()' - Open a PDF file for reading.
|
||
//
|
||
// This function opens an existing PDF file. The "filename" argument specifies
|
||
// the name of the PDF file to create.
|
||
//
|
||
// The "password_cb" and "password_cbdata" arguments specify a password callback
|
||
// and its data pointer for PDF files that use one of the standard Adobe
|
||
// "security" handlers. The callback returns a password string or `NULL` to
|
||
// cancel the open. If `NULL` is specified for the callback function and the
|
||
// PDF file requires a password, the open will always fail.
|
||
//
|
||
// The "error_cb" and "error_cbdata" arguments specify an error handler callback
|
||
// and its data pointer - if `NULL` the default error handler is used that
|
||
// writes error messages to `stderr`.
|
||
//
|
||
|
||
pdfio_file_t * // O - PDF file
|
||
pdfioFileOpen(
|
||
const char *filename, // I - Filename
|
||
pdfio_password_cb_t password_cb, // I - Password callback or `NULL` for none
|
||
void *password_cbdata,
|
||
// I - Password callback data, if any
|
||
pdfio_error_cb_t error_cb, // I - Error callback or `NULL` for default
|
||
void *error_cbdata) // I - Error callback data, if any
|
||
{
|
||
pdfio_file_t *pdf; // PDF file
|
||
char line[1025], // Line from file
|
||
*ptr, // Pointer into line
|
||
*end; // End of line
|
||
ssize_t bytes; // Bytes read
|
||
off_t xref_offset; // Offset to xref table
|
||
|
||
|
||
PDFIO_DEBUG("pdfioFileOpen(filename=\"%s\", password_cb=%p, password_cbdata=%p, error_cb=%p, error_cbdata=%p)\n", filename, (void *)password_cb, (void *)password_cbdata, (void *)error_cb, (void *)error_cbdata);
|
||
|
||
// Range check input...
|
||
if (!filename)
|
||
return (NULL);
|
||
|
||
if (!error_cb)
|
||
{
|
||
error_cb = _pdfioFileDefaultError;
|
||
error_cbdata = NULL;
|
||
}
|
||
|
||
// Allocate a PDF file structure...
|
||
if ((pdf = (pdfio_file_t *)calloc(1, sizeof(pdfio_file_t))) == NULL)
|
||
{
|
||
pdfio_file_t temp; // Dummy file
|
||
char message[8192]; // Message string
|
||
|
||
temp.filename = (char *)filename;
|
||
snprintf(message, sizeof(message), "Unable to allocate memory for PDF file - %s", strerror(errno));
|
||
(error_cb)(&temp, message, error_cbdata);
|
||
return (NULL);
|
||
}
|
||
|
||
pdf->loc = get_lconv();
|
||
pdf->filename = strdup(filename);
|
||
pdf->mode = _PDFIO_MODE_READ;
|
||
pdf->error_cb = error_cb;
|
||
pdf->error_data = error_cbdata;
|
||
pdf->permissions = PDFIO_PERMISSION_ALL;
|
||
|
||
// Open the file...
|
||
if ((pdf->fd = open(filename, O_RDONLY | O_BINARY)) < 0)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to open file - %s", strerror(errno));
|
||
free(pdf->filename);
|
||
free(pdf);
|
||
return (NULL);
|
||
}
|
||
|
||
// Read the header from the first line...
|
||
if (!_pdfioFileGets(pdf, line, sizeof(line)))
|
||
goto error;
|
||
|
||
if ((strncmp(line, "%PDF-1.", 7) && strncmp(line, "%PDF-2.", 7)) || !isdigit(line[7] & 255))
|
||
{
|
||
// Bad header
|
||
_pdfioFileError(pdf, "Bad header '%s'.", line);
|
||
goto error;
|
||
}
|
||
|
||
// Copy the version number...
|
||
pdf->version = strdup(line + 5);
|
||
|
||
// Grab the last 1k of the file to find the start of the xref table...
|
||
if (_pdfioFileSeek(pdf, -1024, SEEK_END) < 0)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to read startxref data.");
|
||
goto error;
|
||
}
|
||
|
||
if ((bytes = _pdfioFileRead(pdf, line, sizeof(line) - 1)) < 1)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to read startxref data.");
|
||
goto error;
|
||
}
|
||
|
||
line[bytes] = '\0';
|
||
end = line + bytes - 9;
|
||
|
||
for (ptr = line; ptr < end; ptr ++)
|
||
{
|
||
if (!memcmp(ptr, "startxref", 9))
|
||
break;
|
||
}
|
||
|
||
if (ptr >= end)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to find start of xref table.");
|
||
goto error;
|
||
}
|
||
|
||
xref_offset = (off_t)strtol(ptr + 9, NULL, 10);
|
||
|
||
if (!load_xref(pdf, xref_offset, password_cb, password_cbdata))
|
||
goto error;
|
||
|
||
return (pdf);
|
||
|
||
|
||
// If we get here we had a fatal read error...
|
||
error:
|
||
|
||
pdfioFileClose(pdf);
|
||
|
||
return (NULL);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileSetAuthor()' - Set the author for a PDF file.
|
||
//
|
||
|
||
void
|
||
pdfioFileSetAuthor(pdfio_file_t *pdf, // I - PDF file
|
||
const char *value) // I - Value
|
||
{
|
||
if (pdf && pdf->info_obj)
|
||
pdfioDictSetString(pdf->info_obj->value.value.dict, "Author", pdfioStringCreate(pdf, value));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileSetCreationDate()' - Set the creation date for a PDF file.
|
||
//
|
||
|
||
void
|
||
pdfioFileSetCreationDate(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
time_t value) // I - Value
|
||
{
|
||
if (pdf && pdf->info_obj)
|
||
pdfioDictSetDate(pdf->info_obj->value.value.dict, "CreationDate", value);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileSetCreator()' - Set the creator string for a PDF file.
|
||
//
|
||
|
||
void
|
||
pdfioFileSetCreator(pdfio_file_t *pdf, // I - PDF file
|
||
const char *value)// I - Value
|
||
{
|
||
if (pdf && pdf->info_obj)
|
||
pdfioDictSetString(pdf->info_obj->value.value.dict, "Creator", pdfioStringCreate(pdf, value));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileSetKeywords()' - Set the keywords string for a PDF file.
|
||
//
|
||
|
||
void
|
||
pdfioFileSetKeywords(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
const char *value) // I - Value
|
||
{
|
||
if (pdf && pdf->info_obj)
|
||
pdfioDictSetString(pdf->info_obj->value.value.dict, "Keywords", pdfioStringCreate(pdf, value));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileSetPermissions()' - Set the PDF permissions, encryption mode, and passwords.
|
||
//
|
||
// This function sets the PDF usage permissions, encryption mode, and
|
||
// passwords.
|
||
//
|
||
// > *Note*: This function must be called before creating or copying any
|
||
// > objects. Due to fundamental limitations in the PDF format, PDF encryption
|
||
// > offers little protection from disclosure. Permissions are not enforced in
|
||
// > any meaningful way.
|
||
//
|
||
|
||
bool // O - `true` on success, `false` otherwise
|
||
pdfioFileSetPermissions(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
pdfio_permission_t permissions, // I - Use permissions
|
||
pdfio_encryption_t encryption, // I - Type of encryption to use
|
||
const char *owner_password, // I - Owner password, if any
|
||
const char *user_password) // I - User password, if any
|
||
{
|
||
if (!pdf)
|
||
return (false);
|
||
|
||
if (pdf->num_objs > 3) // First three objects are pages, info, and root
|
||
{
|
||
_pdfioFileError(pdf, "You must call pdfioFileSetPermissions before adding any objects.");
|
||
return (false);
|
||
}
|
||
|
||
if (encryption == PDFIO_ENCRYPTION_NONE)
|
||
return (true);
|
||
|
||
return (_pdfioCryptoLock(pdf, permissions, encryption, owner_password, user_password));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileSetSubject()' - Set the subject for a PDF file.
|
||
//
|
||
|
||
void
|
||
pdfioFileSetSubject(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
const char *value) // I - Value
|
||
{
|
||
if (pdf && pdf->info_obj)
|
||
pdfioDictSetString(pdf->info_obj->value.value.dict, "Subject", pdfioStringCreate(pdf, value));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioFileSetTitle()' - Set the title for a PDF file.
|
||
//
|
||
|
||
void
|
||
pdfioFileSetTitle(pdfio_file_t *pdf, // I - PDF file
|
||
const char *value) // I - Value
|
||
{
|
||
if (pdf && pdf->info_obj)
|
||
pdfioDictSetString(pdf->info_obj->value.value.dict, "Title", pdfioStringCreate(pdf, value));
|
||
}
|
||
|
||
|
||
//
|
||
// '_pdfioObjAdd()' - Add an object to a file.
|
||
//
|
||
|
||
static pdfio_obj_t * // O - Object
|
||
add_obj(pdfio_file_t *pdf, // I - PDF file
|
||
size_t number, // I - Object number
|
||
unsigned short generation, // I - Object generation
|
||
off_t offset) // I - Offset in file
|
||
{
|
||
pdfio_obj_t *obj; // Object
|
||
size_t left, // Left object
|
||
right, // Right object
|
||
current; // Current object (center)
|
||
|
||
|
||
// Allocate memory for the object...
|
||
if ((obj = (pdfio_obj_t *)calloc(1, sizeof(pdfio_obj_t))) == NULL)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to allocate memory for object - %s", strerror(errno));
|
||
return (NULL);
|
||
}
|
||
|
||
// Expand the objects array as needed
|
||
if (pdf->num_objs >= pdf->alloc_objs)
|
||
{
|
||
pdfio_obj_t **temp = (pdfio_obj_t **)realloc(pdf->objs, (pdf->alloc_objs + 32) * sizeof(pdfio_obj_t *));
|
||
|
||
if (!temp)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to allocate memory for object - %s", strerror(errno));
|
||
free(obj);
|
||
return (NULL);
|
||
}
|
||
|
||
pdf->objs = temp;
|
||
pdf->alloc_objs += 32;
|
||
}
|
||
|
||
obj->pdf = pdf;
|
||
obj->number = number;
|
||
obj->generation = generation;
|
||
obj->offset = offset;
|
||
|
||
PDFIO_DEBUG("add_obj: obj=%p, ->pdf=%p, ->number=%lu, ->offset=%lu\n", obj, pdf, (unsigned long)obj->number, (unsigned long)offset);
|
||
|
||
// Insert object into array as needed...
|
||
if (pdf->num_objs == 0 || obj->number > pdf->objs[pdf->num_objs - 1]->number)
|
||
{
|
||
// Append object...
|
||
PDFIO_DEBUG("add_obj: Appending at %lu\n", (unsigned long)pdf->num_objs);
|
||
|
||
pdf->objs[pdf->num_objs] = obj;
|
||
pdf->last_obj = pdf->num_objs;
|
||
}
|
||
else
|
||
{
|
||
// Insert object...
|
||
if (obj->number < pdf->objs[pdf->last_obj]->number)
|
||
{
|
||
left = 0;
|
||
right = pdf->last_obj;
|
||
}
|
||
else
|
||
{
|
||
left = pdf->last_obj;
|
||
right = pdf->num_objs - 1;
|
||
}
|
||
|
||
while ((right - left) > 1)
|
||
{
|
||
current = (left + right) / 2;
|
||
|
||
if (obj->number < pdf->objs[current]->number)
|
||
right = current;
|
||
else
|
||
left = current;
|
||
}
|
||
|
||
if (obj->number < pdf->objs[left]->number)
|
||
current = left;
|
||
else if (obj->number < pdf->objs[right]->number)
|
||
current = right;
|
||
else
|
||
current = right;
|
||
|
||
PDFIO_DEBUG("add_obj: Inserting at %lu\n", (unsigned long)current);
|
||
|
||
if (current < pdf->num_objs)
|
||
memmove(pdf->objs + current + 1, pdf->objs + current, (pdf->num_objs - current) * sizeof(pdfio_obj_t *));
|
||
|
||
pdf->objs[current] = obj;
|
||
pdf->last_obj = current;
|
||
}
|
||
|
||
pdf->num_objs ++;
|
||
|
||
return (obj);
|
||
}
|
||
|
||
|
||
//
|
||
// 'compare_objmaps()' - Compare two object maps...
|
||
//
|
||
|
||
static int // O - Result of comparison
|
||
compare_objmaps(_pdfio_objmap_t *a, // I - First object map
|
||
_pdfio_objmap_t *b) // I - Second object map
|
||
{
|
||
if (a->src_pdf < b->src_pdf)
|
||
return (-1);
|
||
else if (a->src_pdf > b->src_pdf)
|
||
return (1);
|
||
else if (a->src_number < b->src_number)
|
||
return (-1);
|
||
else if (a->src_number > b->src_number)
|
||
return (1);
|
||
else
|
||
return (0);
|
||
}
|
||
|
||
|
||
//
|
||
// 'create_common()' - Allocate and initialize a pdfio_file_t object for writing.
|
||
//
|
||
|
||
static pdfio_file_t * // O - New PDF file
|
||
create_common(
|
||
const char *filename, // I - Filename
|
||
int fd, // I - File descriptor, if any
|
||
pdfio_output_cb_t output_cb, // I - Output callback function, if any
|
||
void *output_cbdata, // I - Output callback data, if any
|
||
const char *version, // I - PDF version
|
||
pdfio_rect_t *media_box, // I - Media box or `NULL` for default
|
||
pdfio_rect_t *crop_box, // I - Crop box of `NULL` for default
|
||
pdfio_error_cb_t error_cb, // I - Error callback function
|
||
void *error_cbdata) // I - Error callback data
|
||
{
|
||
pdfio_file_t *pdf; // New PDF file
|
||
pdfio_dict_t *dict; // Dictionary
|
||
unsigned char id_value[16]; // File ID value
|
||
|
||
|
||
PDFIO_DEBUG("create_common(filename=\"%s\", fd=%d, output_cb=%p, output_cbdata=%p, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", filename, fd, (void *)output_cb, (void *)output_cbdata, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata);
|
||
|
||
// Range check input...
|
||
if (!filename || (fd < 0 && !output_cb))
|
||
return (NULL);
|
||
|
||
if (!version)
|
||
version = "2.0";
|
||
|
||
if (!error_cb)
|
||
{
|
||
error_cb = _pdfioFileDefaultError;
|
||
error_cbdata = NULL;
|
||
}
|
||
|
||
// Allocate a PDF file structure...
|
||
if ((pdf = (pdfio_file_t *)calloc(1, sizeof(pdfio_file_t))) == NULL)
|
||
{
|
||
pdfio_file_t temp; // Dummy file
|
||
char message[8192]; // Message string
|
||
|
||
temp.filename = (char *)filename;
|
||
snprintf(message, sizeof(message), "Unable to allocate memory for PDF file: %s", strerror(errno));
|
||
(error_cb)(&temp, message, error_cbdata);
|
||
|
||
return (NULL);
|
||
}
|
||
|
||
// Initialize PDF object...
|
||
pdf->loc = get_lconv();
|
||
pdf->fd = fd;
|
||
pdf->output_cb = output_cb;
|
||
pdf->output_ctx = output_cbdata;
|
||
pdf->filename = strdup(filename);
|
||
pdf->version = strdup(version);
|
||
pdf->mode = _PDFIO_MODE_WRITE;
|
||
pdf->error_cb = error_cb;
|
||
pdf->error_data = error_cbdata;
|
||
pdf->permissions = PDFIO_PERMISSION_ALL;
|
||
pdf->bufptr = pdf->buffer;
|
||
pdf->bufend = pdf->buffer + sizeof(pdf->buffer);
|
||
|
||
if (media_box)
|
||
{
|
||
pdf->media_box = *media_box;
|
||
}
|
||
else
|
||
{
|
||
// Default to "universal" size (intersection of A4 and US Letter)
|
||
pdf->media_box.x2 = 210.0 * 72.0f / 25.4f;
|
||
pdf->media_box.y2 = 11.0f * 72.0f;
|
||
}
|
||
|
||
if (crop_box)
|
||
{
|
||
pdf->crop_box = *crop_box;
|
||
}
|
||
else
|
||
{
|
||
// Default to "universal" size (intersection of A4 and US Letter)
|
||
pdf->crop_box.x2 = 210.0 * 72.0f / 25.4f;
|
||
pdf->crop_box.y2 = 11.0f * 72.0f;
|
||
}
|
||
|
||
// Write a standard PDF header...
|
||
if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", version))
|
||
goto error;
|
||
|
||
// Create the pages object...
|
||
if ((dict = pdfioDictCreate(pdf)) == NULL)
|
||
goto error;
|
||
|
||
pdfioDictSetName(dict, "Type", "Pages");
|
||
|
||
if ((pdf->pages_obj = pdfioFileCreateObj(pdf, dict)) == NULL)
|
||
goto error;
|
||
|
||
// Create the info object...
|
||
if ((dict = pdfioDictCreate(pdf)) == NULL)
|
||
goto error;
|
||
|
||
pdfioDictSetDate(dict, "CreationDate", time(NULL));
|
||
pdfioDictSetString(dict, "Producer", "pdfio/" PDFIO_VERSION);
|
||
|
||
if ((pdf->info_obj = pdfioFileCreateObj(pdf, dict)) == NULL)
|
||
goto error;
|
||
|
||
// Create the root object...
|
||
if ((dict = pdfioDictCreate(pdf)) == NULL)
|
||
goto error;
|
||
|
||
pdfioDictSetName(dict, "Type", "Catalog");
|
||
pdfioDictSetObj(dict, "Pages", pdf->pages_obj);
|
||
|
||
if ((pdf->root_obj = pdfioFileCreateObj(pdf, dict)) == NULL)
|
||
goto error;
|
||
|
||
// Create random file ID values...
|
||
_pdfioCryptoMakeRandom(id_value, sizeof(id_value));
|
||
|
||
if ((pdf->id_array = pdfioArrayCreate(pdf)) != NULL)
|
||
{
|
||
pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value));
|
||
pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value));
|
||
}
|
||
|
||
return (pdf);
|
||
|
||
// Common error handling code...
|
||
error:
|
||
|
||
pdfioFileClose(pdf);
|
||
|
||
return (NULL);
|
||
}
|
||
|
||
|
||
//
|
||
// 'get_info_string()' - Get a string value from the Info dictionary.
|
||
//
|
||
// This function also handles converting binary strings to C strings, which
|
||
// occur in encrypted PDF files.
|
||
//
|
||
|
||
static const char * // O - String or `NULL` if not found
|
||
get_info_string(pdfio_file_t *pdf, // I - PDF file
|
||
const char *key) // I - Dictionary key
|
||
{
|
||
pdfio_dict_t *dict; // Info dictionary
|
||
|
||
|
||
// Range check input...
|
||
if (!pdf || !pdf->info_obj || (dict = pdfioObjGetDict(pdf->info_obj)) == NULL)
|
||
return (NULL);
|
||
else
|
||
return (pdfioDictGetString(dict, key));
|
||
}
|
||
|
||
|
||
//
|
||
// 'get_lconv()' - Get any locale-specific numeric information.
|
||
//
|
||
|
||
static struct lconv * // O - Locale information or `NULL`
|
||
get_lconv(void)
|
||
{
|
||
struct lconv *loc; // Locale information
|
||
|
||
|
||
if ((loc = localeconv()) != NULL)
|
||
{
|
||
PDFIO_DEBUG("get_lconv: loc=%p, loc->decimal_point=\"%s\"\n", loc, loc->decimal_point);
|
||
|
||
if (!loc->decimal_point || !strcmp(loc->decimal_point, "."))
|
||
loc = NULL;
|
||
}
|
||
|
||
return (loc);
|
||
}
|
||
|
||
|
||
//
|
||
// 'load_obj_stream()' - Load an object stream.
|
||
//
|
||
// Object streams are Adobe's complicated solution for saving a few
|
||
// kilobytes in an average PDF file at the expense of massively more
|
||
// complicated reader applications.
|
||
//
|
||
// Each object stream starts with pairs of object numbers and offsets,
|
||
// followed by the object values (typically dictionaries). For
|
||
// simplicity pdfio loads all of these values into memory so that we
|
||
// don't later have to randomly access compressed stream data to get
|
||
// a dictionary.
|
||
//
|
||
|
||
static bool // O - `true` on success, `false` on error
|
||
load_obj_stream(pdfio_obj_t *obj) // I - Object to load
|
||
{
|
||
pdfio_stream_t *st; // Stream
|
||
_pdfio_token_t tb; // Token buffer/stack
|
||
char buffer[32]; // Token
|
||
size_t number, // Object number
|
||
cur_obj, // Current object
|
||
num_objs = 0; // Number of objects
|
||
pdfio_obj_t *objs[16384]; // Objects
|
||
|
||
|
||
PDFIO_DEBUG("load_obj_stream(obj=%p(%d))\n", obj, (int)obj->number);
|
||
|
||
// Open the object stream...
|
||
if ((st = pdfioObjOpenStream(obj, true)) == NULL)
|
||
{
|
||
_pdfioFileError(obj->pdf, "Unable to open compressed object stream %lu.", (unsigned long)obj->number);
|
||
return (false);
|
||
}
|
||
|
||
_pdfioTokenInit(&tb, obj->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
|
||
|
||
// Read the object numbers from the beginning of the stream...
|
||
while (_pdfioTokenGet(&tb, buffer, sizeof(buffer)))
|
||
{
|
||
// Stop if this isn't an object number...
|
||
if (!isdigit(buffer[0] & 255))
|
||
break;
|
||
|
||
// Stop if we have too many objects...
|
||
if (num_objs >= (sizeof(objs) / sizeof(objs[0])))
|
||
{
|
||
_pdfioFileError(obj->pdf, "Too many compressed objects in one stream.");
|
||
pdfioStreamClose(st);
|
||
return (false);
|
||
}
|
||
|
||
// Add the object in memory...
|
||
number = (size_t)strtoimax(buffer, NULL, 10);
|
||
|
||
if ((objs[num_objs] = pdfioFileFindObj(obj->pdf, number)) == NULL)
|
||
objs[num_objs] = add_obj(obj->pdf, number, 0, 0);
|
||
|
||
num_objs ++;
|
||
|
||
// Skip offset
|
||
_pdfioTokenGet(&tb, buffer, sizeof(buffer));
|
||
PDFIO_DEBUG("load_obj_stream: %ld at offset %s\n", (long)number, buffer);
|
||
}
|
||
|
||
if (!buffer[0])
|
||
{
|
||
pdfioStreamClose(st);
|
||
return (false);
|
||
}
|
||
|
||
_pdfioTokenPush(&tb, buffer);
|
||
|
||
// Read the objects themselves...
|
||
for (cur_obj = 0; cur_obj < num_objs; cur_obj ++)
|
||
{
|
||
if (!_pdfioValueRead(obj->pdf, obj, &tb, &(objs[cur_obj]->value), 0))
|
||
{
|
||
pdfioStreamClose(st);
|
||
return (false);
|
||
}
|
||
}
|
||
|
||
// Close the stream and return
|
||
pdfioStreamClose(st);
|
||
|
||
return (true);
|
||
}
|
||
|
||
|
||
//
|
||
// 'load_pages()' - Load pages in the document.
|
||
//
|
||
|
||
static bool // O - `true` on success, `false` on error
|
||
load_pages(pdfio_file_t *pdf, // I - PDF file
|
||
pdfio_obj_t *obj, // I - Page object
|
||
size_t depth) // I - Depth of page tree
|
||
{
|
||
pdfio_dict_t *dict; // Page object dictionary
|
||
const char *type; // Node type
|
||
pdfio_array_t *kids; // Kids array
|
||
|
||
|
||
// Range check input...
|
||
if (!obj)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to find pages object.");
|
||
return (false);
|
||
}
|
||
|
||
// Get the object dictionary and make sure this is a Pages or Page object...
|
||
if ((dict = pdfioObjGetDict(obj)) == NULL)
|
||
{
|
||
_pdfioFileError(pdf, "No dictionary for pages object.");
|
||
return (false);
|
||
}
|
||
|
||
if ((type = pdfioDictGetName(dict, "Type")) == NULL || (strcmp(type, "Pages") && strcmp(type, "Page")))
|
||
return (false);
|
||
|
||
// If there is a Kids array, then this is a parent node and we have to look
|
||
// at the child objects...
|
||
if ((kids = pdfioDictGetArray(dict, "Kids")) != NULL)
|
||
{
|
||
// Load the child objects...
|
||
size_t i, // Looping var
|
||
num_kids; // Number of elements in array
|
||
|
||
if (depth >= PDFIO_MAX_DEPTH)
|
||
{
|
||
_pdfioFileError(pdf, "Depth of pages objects too great to load.");
|
||
return (false);
|
||
}
|
||
|
||
for (i = 0, num_kids = pdfioArrayGetSize(kids); i < num_kids; i ++)
|
||
{
|
||
if (!load_pages(pdf, pdfioArrayGetObj(kids, i), depth + 1))
|
||
return (false);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
// Add this page...
|
||
if (pdf->num_pages >= pdf->alloc_pages)
|
||
{
|
||
pdfio_obj_t **temp = (pdfio_obj_t **)realloc(pdf->pages, (pdf->alloc_pages + 32) * sizeof(pdfio_obj_t *));
|
||
|
||
if (!temp)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to allocate memory for pages.");
|
||
return (false);
|
||
}
|
||
|
||
pdf->alloc_pages += 32;
|
||
pdf->pages = temp;
|
||
}
|
||
|
||
pdf->pages[pdf->num_pages ++] = obj;
|
||
}
|
||
|
||
return (true);
|
||
}
|
||
|
||
|
||
//
|
||
// 'load_xref()' - Load an XREF table...
|
||
//
|
||
|
||
static bool // O - `true` on success, `false` on failure
|
||
load_xref(
|
||
pdfio_file_t *pdf, // I - PDF file
|
||
off_t xref_offset, // I - Offset to xref
|
||
pdfio_password_cb_t password_cb, // I - Password callback or `NULL` for none
|
||
void *password_data) // I - Password callback data, if any
|
||
{
|
||
bool done = false; // Are we done?
|
||
char line[1024], // Line from file
|
||
*ptr; // Pointer into line
|
||
_pdfio_value_t trailer; // Trailer dictionary
|
||
intmax_t number, // Object number
|
||
num_objects, // Number of objects
|
||
offset; // Offset in file
|
||
int generation; // Generation number
|
||
_pdfio_token_t tb; // Token buffer/stack
|
||
off_t line_offset; // Offset to start of line
|
||
|
||
|
||
while (!done)
|
||
{
|
||
if (_pdfioFileSeek(pdf, xref_offset, SEEK_SET) != xref_offset)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to seek to start of xref table.");
|
||
return (false);
|
||
}
|
||
|
||
do
|
||
{
|
||
line_offset = _pdfioFileTell(pdf);
|
||
|
||
if (!_pdfioFileGets(pdf, line, sizeof(line)))
|
||
{
|
||
_pdfioFileError(pdf, "Unable to read start of xref table.");
|
||
return (false);
|
||
}
|
||
}
|
||
while (!line[0]);
|
||
|
||
PDFIO_DEBUG("load_xref: line_offset=%lu, line='%s'\n", (unsigned long)line_offset, line);
|
||
|
||
if (isdigit(line[0] & 255) && strlen(line) > 4 && (!strcmp(line + strlen(line) - 4, " obj") || ((ptr = strstr(line, " obj")) != NULL && ptr[4] == '<')))
|
||
{
|
||
// Cross-reference stream
|
||
pdfio_obj_t *obj; // Object
|
||
size_t i; // Looping var
|
||
pdfio_array_t *index_array; // Index array
|
||
size_t index_n, // Current element in array
|
||
index_count, // Number of values in index array
|
||
count; // Number of objects in current pairing
|
||
pdfio_array_t *w_array; // W array
|
||
size_t w[3]; // Size of each cross-reference field
|
||
size_t w_2, // Offset to second field
|
||
w_3; // Offset to third field
|
||
size_t w_total; // Total length
|
||
pdfio_stream_t *st; // Stream
|
||
unsigned char buffer[32]; // Read buffer
|
||
size_t num_sobjs = 0, // Number of object streams
|
||
sobjs[8192]; // Object streams to load
|
||
pdfio_obj_t *current; // Current object
|
||
|
||
if ((number = strtoimax(line, &ptr, 10)) < 1)
|
||
{
|
||
_pdfioFileError(pdf, "Bad xref table header '%s'.", line);
|
||
return (false);
|
||
}
|
||
|
||
if ((generation = (int)strtol(ptr, &ptr, 10)) < 0 || (generation > 65535 && number != 0))
|
||
{
|
||
_pdfioFileError(pdf, "Bad xref table header '%s'.", line);
|
||
return (false);
|
||
}
|
||
|
||
while (isspace(*ptr & 255))
|
||
ptr ++;
|
||
|
||
if (strncmp(ptr, "obj", 3))
|
||
{
|
||
_pdfioFileError(pdf, "Bad xref table header '%s'.", line);
|
||
return (false);
|
||
}
|
||
|
||
if (_pdfioFileSeek(pdf, line_offset + ptr + 3 - line, SEEK_SET) < 0)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to seek to xref object %lu %u.", (unsigned long)number, (unsigned)generation);
|
||
return (false);
|
||
}
|
||
|
||
PDFIO_DEBUG("load_xref: Loading object %lu %u.\n", (unsigned long)number, (unsigned)generation);
|
||
|
||
if ((obj = add_obj(pdf, (size_t)number, (unsigned short)generation, xref_offset)) == NULL)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to allocate memory for object.");
|
||
return (false);
|
||
}
|
||
|
||
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
|
||
|
||
if (!_pdfioValueRead(pdf, obj, &tb, &trailer, 0))
|
||
{
|
||
_pdfioFileError(pdf, "Unable to read cross-reference stream dictionary.");
|
||
return (false);
|
||
}
|
||
else if (trailer.type != PDFIO_VALTYPE_DICT)
|
||
{
|
||
_pdfioFileError(pdf, "Cross-reference stream does not have a dictionary.");
|
||
return (false);
|
||
}
|
||
|
||
obj->value = trailer;
|
||
|
||
if (!_pdfioTokenGet(&tb, line, sizeof(line)) || strcmp(line, "stream"))
|
||
{
|
||
_pdfioFileError(pdf, "Unable to get stream after xref dictionary.");
|
||
return (false);
|
||
}
|
||
|
||
PDFIO_DEBUG("load_xref: tb.bufptr=%p, tb.bufend=%p, tb.bufptr[0]=0x%02x, tb.bufptr[0]=0x%02x\n", tb.bufptr, tb.bufend, tb.bufptr[0], tb.bufptr[1]);
|
||
if (tb.bufptr && tb.bufptr < tb.bufend && (tb.bufptr[0] == 0x0d || tb.bufptr[0] == 0x0a))
|
||
tb.bufptr ++; // Skip trailing CR or LF after token
|
||
|
||
_pdfioTokenFlush(&tb);
|
||
|
||
obj->stream_offset = _pdfioFileTell(pdf);
|
||
|
||
if ((index_array = pdfioDictGetArray(trailer.value.dict, "Index")) != NULL)
|
||
index_count = index_array->num_values;
|
||
else
|
||
index_count = 1;
|
||
|
||
if ((w_array = pdfioDictGetArray(trailer.value.dict, "W")) == NULL)
|
||
{
|
||
_pdfioFileError(pdf, "Cross-reference stream does not have required W key.");
|
||
return (false);
|
||
}
|
||
|
||
w[0] = (size_t)pdfioArrayGetNumber(w_array, 0);
|
||
w[1] = (size_t)pdfioArrayGetNumber(w_array, 1);
|
||
w[2] = (size_t)pdfioArrayGetNumber(w_array, 2);
|
||
w_total = w[0] + w[1] + w[2];
|
||
w_2 = w[0];
|
||
w_3 = w[0] + w[1];
|
||
|
||
if (w[1] == 0 || w[2] > 4 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer))
|
||
{
|
||
_pdfioFileError(pdf, "Cross-reference stream has invalid W key [%u %u %u].", (unsigned)w[0], (unsigned)w[1], (unsigned)w[2]);
|
||
return (false);
|
||
}
|
||
|
||
if ((st = pdfioObjOpenStream(obj, true)) == NULL)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to open cross-reference stream.");
|
||
return (false);
|
||
}
|
||
|
||
for (index_n = 0; index_n < index_count; index_n += 2)
|
||
{
|
||
if (index_count == 1)
|
||
{
|
||
number = 0;
|
||
count = 999999999;
|
||
}
|
||
else
|
||
{
|
||
number = (intmax_t)pdfioArrayGetNumber(index_array, index_n);
|
||
count = (size_t)pdfioArrayGetNumber(index_array, index_n + 1);
|
||
}
|
||
|
||
while (count > 0 && pdfioStreamRead(st, buffer, w_total) > 0)
|
||
{
|
||
count --;
|
||
|
||
PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4]);
|
||
|
||
// Check whether this is an object definition...
|
||
if (w[0] > 0)
|
||
{
|
||
if (buffer[0] == 0)
|
||
{
|
||
// Ignore free objects...
|
||
number ++;
|
||
continue;
|
||
}
|
||
}
|
||
|
||
// Offset
|
||
for (i = 1, offset = buffer[w_2]; i < w[1]; i ++)
|
||
offset = (offset << 8) | buffer[w_2 + i];
|
||
|
||
// Generation number
|
||
switch (w[2])
|
||
{
|
||
default :
|
||
generation = 0;
|
||
break;
|
||
case 1 :
|
||
generation = buffer[w_3];
|
||
break;
|
||
case 2 :
|
||
generation = (buffer[w_3] << 8) | buffer[w_3 + 1];
|
||
break;
|
||
case 3 :
|
||
// Issue #46: Stupid Microsoft PDF generator using 3 bytes to
|
||
// encode 16-bit generation numbers == 0 (probably a lazy coder
|
||
// stuffing things into an array of 64-bit unsigned integers)
|
||
generation = (buffer[w_3] << 16) | (buffer[w_3 + 1] << 8) | buffer[w_3 + 2];
|
||
if (generation > 65535)
|
||
generation = 65535;
|
||
break;
|
||
case 4 : // Even stupider :)
|
||
generation = (buffer[w_3] << 24) | (buffer[w_3 + 1] << 16) | (buffer[w_3 + 2] << 8) | buffer[w_3 + 3];
|
||
if (generation > 65535)
|
||
generation = 65535;
|
||
break;
|
||
}
|
||
|
||
// Create a placeholder for the object in memory...
|
||
if ((current = pdfioFileFindObj(pdf, (size_t)number)) != NULL)
|
||
{
|
||
PDFIO_DEBUG("load_xref: existing object, prev offset=%u\n", (unsigned)current->offset);
|
||
|
||
if (w[0] == 0 || buffer[0] == 1)
|
||
{
|
||
// Location of object...
|
||
current->offset = offset;
|
||
}
|
||
else if (number != offset)
|
||
{
|
||
// Object is part of a stream, offset is the object number...
|
||
current->offset = 0;
|
||
}
|
||
|
||
PDFIO_DEBUG("load_xref: new offset=%u\n", (unsigned)current->offset);
|
||
}
|
||
|
||
if (w[0] > 0 && buffer[0] == 2)
|
||
{
|
||
// Object streams need to be loaded into memory, so add them
|
||
// to the list of objects to load later as needed...
|
||
for (i = 0; i < num_sobjs; i ++)
|
||
{
|
||
if (sobjs[i] == (size_t)offset)
|
||
break;
|
||
}
|
||
|
||
if (i >= num_sobjs)
|
||
{
|
||
if (num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0])))
|
||
{
|
||
sobjs[num_sobjs ++] = (size_t)offset;
|
||
}
|
||
else
|
||
{
|
||
_pdfioFileError(pdf, "Too many object streams.");
|
||
return (false);
|
||
}
|
||
}
|
||
}
|
||
else if (!current)
|
||
{
|
||
// Add this object...
|
||
if (!add_obj(pdf, (size_t)number, (unsigned short)generation, offset))
|
||
return (false);
|
||
}
|
||
|
||
number ++;
|
||
}
|
||
}
|
||
|
||
pdfioStreamClose(st);
|
||
|
||
if (!pdf->trailer_dict)
|
||
{
|
||
// Save the trailer dictionary and grab the root (catalog) and info
|
||
// objects...
|
||
pdf->trailer_dict = trailer.value.dict;
|
||
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
|
||
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
|
||
|
||
// If the trailer contains an Encrypt key, try unlocking the file...
|
||
if (pdf->encrypt_obj && !_pdfioCryptoUnlock(pdf, password_cb, password_data))
|
||
return (false);
|
||
}
|
||
|
||
// Load any object streams that are left...
|
||
PDFIO_DEBUG("load_xref: %lu compressed object streams to load.\n", (unsigned long)num_sobjs);
|
||
|
||
for (i = 0; i < num_sobjs; i ++)
|
||
{
|
||
if ((obj = pdfioFileFindObj(pdf, sobjs[i])) != NULL)
|
||
{
|
||
PDFIO_DEBUG("load_xref: Loading compressed object stream %lu (pdf=%p, obj->pdf=%p).\n", (unsigned long)sobjs[i], pdf, obj->pdf);
|
||
|
||
if (!load_obj_stream(obj))
|
||
return (false);
|
||
}
|
||
else
|
||
{
|
||
_pdfioFileError(pdf, "Unable to find compressed object stream %lu.", (unsigned long)sobjs[i]);
|
||
return (false);
|
||
}
|
||
}
|
||
}
|
||
else if (!strncmp(line, "xref", 4) && (!line[4] || isspace(line[4] & 255)))
|
||
{
|
||
// Read the xref tables
|
||
off_t trailer_offset = _pdfioFileTell(pdf);
|
||
// Offset of current line
|
||
|
||
PDFIO_DEBUG("load_xref: Reading xref table starting at offset %lu\n", (unsigned long)trailer_offset);
|
||
while (_pdfioFileGets(pdf, line, sizeof(line)))
|
||
{
|
||
PDFIO_DEBUG("load_xref: '%s' at offset %lu\n", line, (unsigned long)trailer_offset);
|
||
|
||
if (!strncmp(line, "trailer", 7) && (!line[7] || isspace(line[7] & 255) || line[7] == '<'))
|
||
{
|
||
if (line[7])
|
||
{
|
||
// Probably the start of the trailer dictionary, rewind the file so
|
||
// we can read it...
|
||
_pdfioFileSeek(pdf, trailer_offset + 7, SEEK_SET);
|
||
}
|
||
|
||
break;
|
||
}
|
||
else
|
||
{
|
||
trailer_offset = _pdfioFileTell(pdf);
|
||
|
||
if (!line[0])
|
||
continue;
|
||
}
|
||
|
||
if (sscanf(line, "%jd%jd", &number, &num_objects) != 2)
|
||
{
|
||
_pdfioFileError(pdf, "Malformed xref table section '%s'.", line);
|
||
return (false);
|
||
}
|
||
|
||
// Read this group of objects...
|
||
for (; num_objects > 0; num_objects --, number ++)
|
||
{
|
||
// Read a line from the file and validate it...
|
||
if (_pdfioFileRead(pdf, line, 20) != 20)
|
||
return (false);
|
||
|
||
line[20] = '\0';
|
||
|
||
if (strcmp(line + 18, "\r\n") && strcmp(line + 18, " \n") && strcmp(line + 18, " \r"))
|
||
{
|
||
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line);
|
||
return (false);
|
||
}
|
||
line[18] = '\0';
|
||
|
||
// Parse the line
|
||
if ((offset = strtoimax(line, &ptr, 10)) < 0)
|
||
{
|
||
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line);
|
||
return (false);
|
||
}
|
||
|
||
if ((generation = (int)strtol(ptr, &ptr, 10)) < 0 || (generation > 65535 && offset != 0))
|
||
{
|
||
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line);
|
||
return (false);
|
||
}
|
||
|
||
if (*ptr != ' ')
|
||
{
|
||
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line);
|
||
return (false);
|
||
}
|
||
|
||
ptr ++;
|
||
if (*ptr != 'f' && *ptr != 'n')
|
||
{
|
||
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line);
|
||
return (false);
|
||
}
|
||
|
||
if (*ptr == 'f')
|
||
continue; // Don't care about free objects...
|
||
|
||
// Create a placeholder for the object in memory...
|
||
if (pdfioFileFindObj(pdf, (size_t)number))
|
||
continue; // Don't replace newer object...
|
||
|
||
if (!add_obj(pdf, (size_t)number, (unsigned short)generation, offset))
|
||
return (false);
|
||
}
|
||
|
||
trailer_offset = _pdfioFileTell(pdf);
|
||
}
|
||
|
||
if (strncmp(line, "trailer", 7))
|
||
{
|
||
_pdfioFileError(pdf, "Missing trailer.");
|
||
return (false);
|
||
}
|
||
|
||
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
|
||
|
||
if (!_pdfioValueRead(pdf, NULL, &tb, &trailer, 0))
|
||
{
|
||
_pdfioFileError(pdf, "Unable to read trailer dictionary.");
|
||
return (false);
|
||
}
|
||
else if (trailer.type != PDFIO_VALTYPE_DICT)
|
||
{
|
||
_pdfioFileError(pdf, "Trailer is not a dictionary.");
|
||
return (false);
|
||
}
|
||
|
||
PDFIO_DEBUG("load_xref: Got trailer dict.\n");
|
||
|
||
_pdfioTokenFlush(&tb);
|
||
|
||
if (!pdf->trailer_dict)
|
||
{
|
||
// Save the trailer dictionary and grab the root (catalog) and info
|
||
// objects...
|
||
pdf->trailer_dict = trailer.value.dict;
|
||
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
|
||
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
|
||
|
||
// If the trailer contains an Encrypt key, try unlocking the file...
|
||
if (pdf->encrypt_obj && !_pdfioCryptoUnlock(pdf, password_cb, password_data))
|
||
return (false);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
_pdfioFileError(pdf, "Bad xref table header '%s'.", line);
|
||
return (false);
|
||
}
|
||
|
||
PDFIO_DEBUG("load_xref: Contents of trailer dictionary:\n");
|
||
PDFIO_DEBUG("load_xref: ");
|
||
PDFIO_DEBUG_VALUE(&trailer);
|
||
PDFIO_DEBUG("\n");
|
||
|
||
off_t new_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev");
|
||
|
||
if (new_offset <= 0)
|
||
{
|
||
done = true;
|
||
}
|
||
else if (new_offset == xref_offset)
|
||
{
|
||
_pdfioFileError(pdf, "Recursive xref table.");
|
||
return (false);
|
||
}
|
||
|
||
xref_offset = new_offset;
|
||
}
|
||
|
||
// Once we have all of the xref tables loaded, get the important objects and
|
||
// build the pages array...
|
||
pdf->info_obj = pdfioDictGetObj(pdf->trailer_dict, "Info");
|
||
|
||
if ((pdf->root_obj = pdfioDictGetObj(pdf->trailer_dict, "Root")) == NULL)
|
||
{
|
||
_pdfioFileError(pdf, "Missing Root object.");
|
||
return (false);
|
||
}
|
||
|
||
PDFIO_DEBUG("load_xref: Root=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number);
|
||
|
||
return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages"), 0));
|
||
}
|
||
|
||
|
||
//
|
||
// 'write_pages()' - Write the PDF pages objects.
|
||
//
|
||
|
||
static bool // O - `true` on success, `false` on failure
|
||
write_pages(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
pdfio_array_t *kids; // Pages array
|
||
size_t i; // Looping var
|
||
|
||
|
||
// Build the "Kids" array pointing to each page...
|
||
if ((kids = pdfioArrayCreate(pdf)) == NULL)
|
||
return (false);
|
||
|
||
for (i = 0; i < pdf->num_pages; i ++)
|
||
pdfioArrayAppendObj(kids, pdf->pages[i]);
|
||
|
||
pdfioDictSetNumber(pdf->pages_obj->value.value.dict, "Count", pdf->num_pages);
|
||
pdfioDictSetArray(pdf->pages_obj->value.value.dict, "Kids", kids);
|
||
|
||
// Write the Pages object...
|
||
return (pdfioObjClose(pdf->pages_obj));
|
||
}
|
||
|
||
|
||
//
|
||
// 'write_trailer()' - Write the PDF catalog object, xref table, and trailer.
|
||
//
|
||
|
||
static bool // O - `true` on success, `false` on failure
|
||
write_trailer(pdfio_file_t *pdf) // I - PDF file
|
||
{
|
||
bool ret = true; // Return value
|
||
off_t xref_offset; // Offset to xref table
|
||
size_t i; // Looping var
|
||
|
||
|
||
// Write the xref table...
|
||
// TODO: Look at adding support for xref streams...
|
||
xref_offset = _pdfioFileTell(pdf);
|
||
|
||
if (!_pdfioFilePrintf(pdf, "xref\n0 %lu \n0000000000 65535 f \n", (unsigned long)pdf->num_objs + 1))
|
||
{
|
||
_pdfioFileError(pdf, "Unable to write cross-reference table.");
|
||
ret = false;
|
||
goto done;
|
||
}
|
||
|
||
for (i = 0; i < pdf->num_objs; i ++)
|
||
{
|
||
pdfio_obj_t *obj = pdf->objs[i]; // Current object
|
||
|
||
if (!_pdfioFilePrintf(pdf, "%010lu %05u n \n", (unsigned long)obj->offset, obj->generation))
|
||
{
|
||
_pdfioFileError(pdf, "Unable to write cross-reference table.");
|
||
ret = false;
|
||
goto done;
|
||
}
|
||
}
|
||
|
||
// Write the trailer...
|
||
if (!_pdfioFilePuts(pdf, "trailer\n"))
|
||
{
|
||
_pdfioFileError(pdf, "Unable to write trailer.");
|
||
ret = false;
|
||
goto done;
|
||
}
|
||
|
||
if ((pdf->trailer_dict = pdfioDictCreate(pdf)) == NULL)
|
||
{
|
||
_pdfioFileError(pdf, "Unable to create trailer.");
|
||
ret = false;
|
||
goto done;
|
||
}
|
||
|
||
if (pdf->encrypt_obj)
|
||
pdfioDictSetObj(pdf->trailer_dict, "Encrypt", pdf->encrypt_obj);
|
||
if (pdf->id_array)
|
||
pdfioDictSetArray(pdf->trailer_dict, "ID", pdf->id_array);
|
||
pdfioDictSetObj(pdf->trailer_dict, "Info", pdf->info_obj);
|
||
pdfioDictSetObj(pdf->trailer_dict, "Root", pdf->root_obj);
|
||
pdfioDictSetNumber(pdf->trailer_dict, "Size", pdf->num_objs + 1);
|
||
|
||
if (!_pdfioDictWrite(pdf->trailer_dict, NULL, NULL))
|
||
{
|
||
_pdfioFileError(pdf, "Unable to write trailer.");
|
||
ret = false;
|
||
goto done;
|
||
}
|
||
|
||
if (!_pdfioFilePrintf(pdf, "\nstartxref\n%lu\n%%EOF\n", (unsigned long)xref_offset))
|
||
{
|
||
_pdfioFileError(pdf, "Unable to write xref offset.");
|
||
ret = false;
|
||
}
|
||
|
||
done:
|
||
|
||
return (ret);
|
||
}
|