pdfio/pdfio-object.c

601 lines
14 KiB
C
Raw Normal View History

2021-04-10 14:00:52 +02:00
//
// PDF object functions for PDFio.
2021-04-10 14:00:52 +02:00
//
// Copyright © 2021-2024 by Michael R Sweet.
2021-04-10 14:00:52 +02:00
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
//
#include "pdfio-private.h"
2021-04-17 02:41:46 +02:00
2021-05-16 17:39:05 +02:00
//
// Local functions...
//
static bool write_obj_header(pdfio_obj_t *obj);
2021-04-17 03:09:43 +02:00
//
2021-04-26 16:42:01 +02:00
// 'pdfioObjClose()' - Close an object, writing any data as needed to the PDF
// file.
2021-04-17 03:09:43 +02:00
//
2021-04-26 16:42:01 +02:00
bool // O - `true` on success, `false` on failure
pdfioObjClose(pdfio_obj_t *obj) // I - Object
2021-04-17 03:09:43 +02:00
{
2021-05-16 17:39:05 +02:00
// Range check input
if (!obj)
return (false);
// Clear the current object pointer...
obj->pdf->current_obj = NULL;
2021-05-16 17:39:05 +02:00
if (obj->pdf->mode != _PDFIO_MODE_WRITE)
{
// Nothing to do when reading
return (true);
}
2021-04-28 03:22:34 +02:00
2021-05-16 17:39:05 +02:00
// Write what remains for the object...
if (!obj->offset)
{
// Write the object value
if (!write_obj_header(obj))
return (false);
// Write the "endobj" line...
return (_pdfioFilePuts(obj->pdf, "endobj\n"));
}
2021-05-16 17:39:05 +02:00
else if (obj->stream)
{
// Close the stream...
return (pdfioStreamClose(obj->stream));
}
2021-05-16 17:39:05 +02:00
else
{
// Already closed
return (true);
}
2021-04-17 03:09:43 +02:00
}
//
// 'pdfioObjCopy()' - Copy an object to another PDF file.
//
pdfio_obj_t * // O - New object or `NULL` on error
pdfioObjCopy(pdfio_file_t *pdf, // I - PDF file
pdfio_obj_t *srcobj) // I - Object to copy
{
pdfio_obj_t *dstobj; // Destination object
pdfio_stream_t *srcst, // Source stream
*dstst; // Destination stream
char buffer[32768]; // Copy buffer
ssize_t bytes; // Bytes read
PDFIO_DEBUG("pdfioObjCopy(pdf=%p, srcobj=%p(%p))\n", pdf, srcobj, srcobj ? srcobj->pdf : NULL);
// Range check input
if (!pdf || !srcobj)
return (NULL);
// Load the object value if needed...
if (srcobj->value.type == PDFIO_VALTYPE_NONE)
_pdfioObjLoad(srcobj);
// Create the new object...
if ((dstobj = _pdfioFileCreateObj(pdf, srcobj->pdf, NULL)) == NULL)
return (NULL);
// Add new object to the cache of copied objects...
if (!_pdfioFileAddMappedObj(pdf, dstobj, srcobj))
return (NULL);
// Copy the object's value...
if (!_pdfioValueCopy(pdf, &dstobj->value, srcobj->pdf, &srcobj->value))
return (NULL);
if (dstobj->value.type == PDFIO_VALTYPE_DICT)
pdfioDictClear(dstobj->value.value.dict, "Length");
if (srcobj->stream_offset)
{
// Copy stream data...
if ((srcst = pdfioObjOpenStream(srcobj, false)) == NULL)
{
pdfioObjClose(dstobj);
return (NULL);
}
if ((dstst = pdfioObjCreateStream(dstobj, PDFIO_FILTER_NONE)) == NULL)
{
pdfioStreamClose(srcst);
pdfioObjClose(dstobj);
return (NULL);
}
while ((bytes = pdfioStreamRead(srcst, buffer, sizeof(buffer))) > 0)
{
if (!pdfioStreamWrite(dstst, buffer, (size_t)bytes))
{
bytes = -1;
break;
}
}
pdfioStreamClose(srcst);
pdfioStreamClose(dstst);
if (bytes < 0)
return (NULL);
}
else
pdfioObjClose(dstobj);
return (dstobj);
}
2021-04-17 03:09:43 +02:00
//
2021-04-26 16:42:01 +02:00
// 'pdfioObjCreateStream()' - Create an object (data) stream for writing.
2021-04-17 03:09:43 +02:00
//
2021-04-26 16:42:01 +02:00
pdfio_stream_t * // O - Stream or `NULL` on error
pdfioObjCreateStream(
pdfio_obj_t *obj, // I - Object
pdfio_filter_t filter) // I - Type of compression to apply
2021-04-17 03:09:43 +02:00
{
pdfio_obj_t *length_obj = NULL; // Length object, if any
2021-05-16 17:39:05 +02:00
// Range check input
if (!obj || obj->pdf->mode != _PDFIO_MODE_WRITE || obj->value.type != PDFIO_VALTYPE_DICT)
return (NULL);
2021-04-28 03:22:34 +02:00
2021-05-16 17:39:05 +02:00
if (obj->offset)
{
_pdfioFileError(obj->pdf, "Object has already been written.");
return (NULL);
}
if (filter != PDFIO_FILTER_NONE && filter != PDFIO_FILTER_FLATE)
{
_pdfioFileError(obj->pdf, "Unsupported filter value for PDFioObjCreateStream.");
2021-05-16 17:39:05 +02:00
return (NULL);
}
if (obj->pdf->current_obj)
{
_pdfioFileError(obj->pdf, "Another object (%u) is already open.", (unsigned)obj->pdf->current_obj->number);
return (NULL);
}
2021-05-16 17:39:05 +02:00
// Write the header...
if (!_pdfioDictGetValue(obj->value.value.dict, "Length"))
{
if (obj->pdf->output_cb)
{
// Streaming via an output callback, so add a placeholder length object
_pdfio_value_t length_value; // Length value
length_value.type = PDFIO_VALTYPE_NUMBER;
length_value.value.number = 0.0f;
length_obj = _pdfioFileCreateObj(obj->pdf, obj->pdf, &length_value);
pdfioDictSetObj(obj->value.value.dict, "Length", length_obj);
}
else
{
// Need a Length key for the stream, add a placeholder that we can fill in
// later...
pdfioDictSetNumber(obj->value.value.dict, "Length", 0.0);
}
2021-05-16 17:39:05 +02:00
}
if (!write_obj_header(obj))
return (NULL);
if (!_pdfioFilePuts(obj->pdf, "stream\n"))
return (NULL);
obj->stream_offset = _pdfioFileTell(obj->pdf);
obj->pdf->current_obj = obj;
2021-05-16 17:39:05 +02:00
// Return the new stream...
return (_pdfioStreamCreate(obj, length_obj, filter));
2021-04-17 03:09:43 +02:00
}
//
2021-04-26 16:42:01 +02:00
// '_pdfioObjDelete()' - Free memory used by an object.
2021-04-17 03:09:43 +02:00
//
2021-04-26 16:42:01 +02:00
void
2021-04-28 03:22:34 +02:00
_pdfioObjDelete(pdfio_obj_t *obj) // I - Object
2021-04-17 03:09:43 +02:00
{
2021-04-26 16:42:01 +02:00
if (obj)
{
2021-04-26 16:42:01 +02:00
pdfioStreamClose(obj->stream);
if (obj->datafree)
(obj->datafree)(obj->data);
}
2021-04-26 16:42:01 +02:00
free(obj);
2021-04-17 03:09:43 +02:00
}
2021-05-11 03:01:02 +02:00
//
// 'pdfioObjGetArray()' - Get the array associated with an object.
//
pdfio_array_t * // O - Array or `NULL` on error
pdfioObjGetArray(pdfio_obj_t *obj) // I - Object
{
if (!obj)
return (NULL);
if (obj->value.type == PDFIO_VALTYPE_NONE)
_pdfioObjLoad(obj);
if (obj->value.type == PDFIO_VALTYPE_ARRAY)
return (obj->value.value.array);
else
return (NULL);
}
2021-04-17 03:09:43 +02:00
//
2021-04-26 16:42:01 +02:00
// 'pdfioObjGetDict()' - Get the dictionary associated with an object.
2021-04-17 03:09:43 +02:00
//
2021-04-26 16:42:01 +02:00
pdfio_dict_t * // O - Dictionary or `NULL` on error
pdfioObjGetDict(pdfio_obj_t *obj) // I - Object
2021-04-17 03:09:43 +02:00
{
if (!obj)
return (NULL);
2021-04-26 16:42:01 +02:00
if (obj->value.type == PDFIO_VALTYPE_NONE)
_pdfioObjLoad(obj);
if (obj->value.type == PDFIO_VALTYPE_DICT)
return (obj->value.value.dict);
else
return (NULL);
2021-04-17 03:09:43 +02:00
}
//
// '_pdfioObjGetExtension()' - Get the extension pointer for an object.
//
void * // O - Extension data
_pdfioObjGetExtension(pdfio_obj_t *obj) // I - Object
{
return (obj->data);
}
2021-04-17 03:09:43 +02:00
//
2021-04-28 03:22:34 +02:00
// 'pdfioObjGetGeneration()' - Get the object's generation number.
2021-04-17 03:09:43 +02:00
//
unsigned short // O - Generation number (0 to 65535)
2021-04-28 03:22:34 +02:00
pdfioObjGetGeneration(pdfio_obj_t *obj) // I - Object
2021-04-17 03:09:43 +02:00
{
2021-04-28 03:22:34 +02:00
return (obj ? obj->generation : 0);
2021-04-17 03:09:43 +02:00
}
2021-05-09 01:16:27 +02:00
//
// 'pdfioObjGetLength()' - Get the length of the object's (data) stream.
//
size_t // O - Length in bytes or `0` for none
pdfioObjGetLength(pdfio_obj_t *obj) // I - Object
{
size_t length; // Length of stream
pdfio_obj_t *lenobj; // Length object
// Range check input...
if (!obj || !obj->stream_offset || obj->value.type != PDFIO_VALTYPE_DICT)
return (0);
// Try getting the length, directly or indirectly
if ((length = (size_t)pdfioDictGetNumber(obj->value.value.dict, "Length")) > 0)
{
PDFIO_DEBUG("pdfioObjGetLength(obj=%p) returning %lu.\n", obj, (unsigned long)length);
2021-05-09 01:16:27 +02:00
return (length);
}
2021-05-09 01:16:27 +02:00
if ((lenobj = pdfioDictGetObj(obj->value.value.dict, "Length")) == NULL)
2021-05-09 01:16:27 +02:00
{
_pdfioFileError(obj->pdf, "Unable to get length of stream.");
return (0);
}
if (lenobj->value.type == PDFIO_VALTYPE_NONE)
_pdfioObjLoad(lenobj);
if (lenobj->value.type != PDFIO_VALTYPE_NUMBER || lenobj->value.value.number <= 0.0)
2021-05-09 01:16:27 +02:00
{
_pdfioFileError(obj->pdf, "Unable to get length of stream.");
return (0);
}
PDFIO_DEBUG("pdfioObjGetLength(obj=%p) returning %lu.\n", obj, (unsigned long)lenobj->value.value.number);
2021-05-09 01:16:27 +02:00
return ((size_t)lenobj->value.value.number);
}
2024-09-27 19:38:15 +02:00
//
// 'pdfioObjGetName()' - Get the name value associated with an object.
//
const char * // O - Dictionary or `NULL` on error
pdfioObjGetName(pdfio_obj_t *obj) // I - Object
{
if (!obj)
return (NULL);
if (obj->value.type == PDFIO_VALTYPE_NONE)
_pdfioObjLoad(obj);
if (obj->value.type == PDFIO_VALTYPE_NAME)
return (obj->value.value.name);
else
return (NULL);
}
2021-04-17 03:09:43 +02:00
//
2021-04-28 03:22:34 +02:00
// 'pdfioObjGetNumber()' - Get the object's number.
2021-04-17 03:09:43 +02:00
//
2021-04-28 03:22:34 +02:00
size_t // O - Object number (1 to 9999999999)
pdfioObjGetNumber(pdfio_obj_t *obj) // I - Object
2021-04-17 03:09:43 +02:00
{
2021-04-28 03:22:34 +02:00
return (obj ? obj->number : 0);
2021-04-17 03:09:43 +02:00
}
2021-06-02 21:46:39 +02:00
//
// 'pdfioObjGetSubtype()' - Get an object's subtype.
//
// This function returns an object's PDF subtype name, if any. Common subtype
// names include:
//
// - "CIDFontType0": A CID Type0 font
// - "CIDFontType2": A CID TrueType font
// - "Image": An image or image mask
// - "Form": A fillable form
// - "OpenType": An OpenType font
// - "Type0": A composite font
// - "Type1": A PostScript Type1 font
// - "Type3": A PDF Type3 font
// - "TrueType": A TrueType font
//
2021-06-02 21:46:39 +02:00
const char * // O - Object subtype name or `NULL` for none
2021-06-02 21:46:39 +02:00
pdfioObjGetSubtype(pdfio_obj_t *obj) // I - Object
{
pdfio_dict_t *dict; // Object dictionary
if ((dict = pdfioObjGetDict(obj)) == NULL)
return (NULL);
else
return (pdfioDictGetName(dict, "Subtype"));
}
2021-04-17 03:09:43 +02:00
//
2021-04-28 03:22:34 +02:00
// 'pdfioObjGetType()' - Get an object's type.
2021-04-17 03:09:43 +02:00
//
// This function returns an object's PDF type name, if any. Common type names
// include:
//
// - "CMap": A character map for composite fonts
// - "Font": An embedded font (@link pdfioObjGetSubtype@ will tell you the
// font format)
// - "FontDescriptor": A font descriptor
// - "Page": A (visible) page
// - "Pages": A page tree node
// - "Template": An invisible template page
// - "XObject": An image, image mask, or form (@link pdfioObjGetSubtype@ will
// tell you which)
//
2021-04-17 03:09:43 +02:00
const char * // O - Object type name or `NULL` for none
2021-04-28 03:22:34 +02:00
pdfioObjGetType(pdfio_obj_t *obj) // I - Object
2021-04-17 03:09:43 +02:00
{
pdfio_dict_t *dict; // Object dictionary
2021-04-17 03:09:43 +02:00
if ((dict = pdfioObjGetDict(obj)) == NULL)
return (NULL);
else
return (pdfioDictGetName(dict, "Type"));
2021-04-17 03:09:43 +02:00
}
2021-05-04 23:04:09 +02:00
//
// '_pdfioObjLoad()' - Load an object dictionary/value.
2021-05-04 23:04:09 +02:00
//
bool // O - `true` on success, `false` otherwise
_pdfioObjLoad(pdfio_obj_t *obj) // I - Object
{
char line[64], // Line from file
2021-05-04 23:04:09 +02:00
*ptr; // Pointer into line
ssize_t bytes; // Bytes read
_pdfio_token_t tb; // Token buffer/stack
2021-05-04 23:04:09 +02:00
PDFIO_DEBUG("_pdfioObjLoad(obj=%p(%lu)), offset=%lu\n", obj, (unsigned long)obj->number, (unsigned long)obj->offset);
2021-05-04 23:04:09 +02:00
// Seek to the start of the object and read its header...
if (_pdfioFileSeek(obj->pdf, obj->offset, SEEK_SET) != obj->offset)
{
_pdfioFileError(obj->pdf, "Unable to seek to object %lu.", (unsigned long)obj->number);
return (false);
}
if ((bytes = _pdfioFilePeek(obj->pdf, line, sizeof(line) - 1)) < 0)
2021-05-04 23:04:09 +02:00
{
_pdfioFileError(obj->pdf, "Unable to read header for object %lu.", (unsigned long)obj->number);
return (false);
}
line[bytes] = '\0';
PDFIO_DEBUG("_pdfioObjLoad: Header is '%s'.\n", line);
2021-05-04 23:04:09 +02:00
if (strtoimax(line, &ptr, 10) != (intmax_t)obj->number)
{
_pdfioFileError(obj->pdf, "Bad header for object %lu.", (unsigned long)obj->number);
return (false);
}
if (strtol(ptr, &ptr, 10) != (long)obj->generation)
{
_pdfioFileError(obj->pdf, "Bad header for object %lu.", (unsigned long)obj->number);
return (false);
}
while (isspace(*ptr & 255))
ptr ++;
if (strncmp(ptr, "obj", 3) || (ptr[3] && ptr[3] != '<' && ptr[3] != '[' && !isspace(ptr[3] & 255)))
2021-05-04 23:04:09 +02:00
{
_pdfioFileError(obj->pdf, "Bad header for object %lu.", (unsigned long)obj->number);
return (false);
}
ptr += 3;
while (*ptr && isspace(*ptr & 255))
ptr ++;
_pdfioFileConsume(obj->pdf, (size_t)(ptr - line));
2021-05-04 23:04:09 +02:00
// Then grab the object value...
_pdfioTokenInit(&tb, obj->pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, obj->pdf);
if (!_pdfioValueRead(obj->pdf, obj, &tb, &obj->value, 0))
2021-05-04 23:04:09 +02:00
{
_pdfioFileError(obj->pdf, "Unable to read value for object %lu.", (unsigned long)obj->number);
return (false);
}
// Now see if there is an associated stream...
if (!_pdfioTokenGet(&tb, line, sizeof(line)))
2021-05-04 23:04:09 +02:00
{
_pdfioFileError(obj->pdf, "Early end-of-file for object %lu.", (unsigned long)obj->number);
return (false);
}
PDFIO_DEBUG("_pdfioObjLoad: tb.bufptr=%p, tb.bufend=%p, tb.bufptr[0]=0x%02x, tb.bufptr[1]=0x%02x\n", tb.bufptr, tb.bufend, tb.bufptr[0], tb.bufptr[1]);
_pdfioTokenFlush(&tb);
2021-05-04 23:04:09 +02:00
if (!strcmp(line, "stream"))
{
// Yes, this is an embedded stream so save its location...
2021-05-04 23:04:09 +02:00
obj->stream_offset = _pdfioFileTell(obj->pdf);
PDFIO_DEBUG("_pdfioObjLoad: stream_offset=%lu.\n", (unsigned long)obj->stream_offset);
2021-05-04 23:04:09 +02:00
}
// Decrypt as needed...
if (obj->pdf->encryption)
{
2023-12-14 23:05:10 +01:00
PDFIO_DEBUG("_pdfioObjLoad: Decrypting value...\n");
if (!_pdfioValueDecrypt(obj->pdf, obj, &obj->value, 0))
2023-12-14 23:05:10 +01:00
{
PDFIO_DEBUG("_pdfioObjLoad: Failed to decrypt.\n");
return (false);
2023-12-14 23:05:10 +01:00
}
}
PDFIO_DEBUG("_pdfioObjLoad: ");
PDFIO_DEBUG_VALUE(&obj->value);
PDFIO_DEBUG("\n");
2021-05-04 23:04:09 +02:00
return (true);
}
//
// 'pdfioObjOpenStream()' - Open an object's (data) stream for reading.
//
pdfio_stream_t * // O - Stream or `NULL` on error
2021-05-06 22:07:58 +02:00
pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
bool decode) // I - Decode/decompress data?
{
// Range check input...
if (!obj)
return (NULL);
if (obj->pdf->current_obj)
{
_pdfioFileError(obj->pdf, "Another object (%u) is already open.", (unsigned)obj->pdf->current_obj->number);
return (NULL);
}
// Make sure we've loaded the object dictionary...
if (!obj->value.type)
{
if (!_pdfioObjLoad(obj))
return (NULL);
}
// No stream if there is no dict or offset to a stream...
if (obj->value.type != PDFIO_VALTYPE_DICT || !obj->stream_offset)
return (NULL);
// Open the stream...
obj->pdf->current_obj = obj;
return (_pdfioStreamOpen(obj, decode));
}
2021-05-16 17:39:05 +02:00
//
// '_pdfioObjSetExtension()' - Set extension data for an object.
//
void
_pdfioObjSetExtension(
pdfio_obj_t *obj, // I - Object
void *data, // I - Data
_pdfio_extfree_t datafree) // I - Free function
{
obj->data = data;
obj->datafree = datafree;
}
2021-05-16 17:39:05 +02:00
//
// 'write_obj_header()' - Write the object header...
//
static bool // O - `true` on success, `false` on failure
write_obj_header(pdfio_obj_t *obj) // I - Object
{
obj->offset = _pdfioFileTell(obj->pdf);
if (!_pdfioFilePrintf(obj->pdf, "%lu %u obj\n", (unsigned long)obj->number, obj->generation))
return (false);
if (!_pdfioValueWrite(obj->pdf, obj, &obj->value, &obj->length_offset))
2021-05-16 17:39:05 +02:00
return (false);
return (_pdfioFilePuts(obj->pdf, "\n"));
}