mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2024-12-27 05:48:20 +01:00
74dfefdcc1
- Explain pdfioObjGetSubtype and pdfioObjGetType values - Provide example code and documentation for accessing common page object values
581 lines
14 KiB
C
581 lines
14 KiB
C
//
|
||
// PDF object functions for PDFio.
|
||
//
|
||
// Copyright © 2021-2024 by Michael R Sweet.
|
||
//
|
||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||
// information.
|
||
//
|
||
|
||
#include "pdfio-private.h"
|
||
|
||
|
||
//
|
||
// Local functions...
|
||
//
|
||
|
||
static bool write_obj_header(pdfio_obj_t *obj);
|
||
|
||
|
||
//
|
||
// 'pdfioObjClose()' - Close an object, writing any data as needed to the PDF
|
||
// file.
|
||
//
|
||
|
||
bool // O - `true` on success, `false` on failure
|
||
pdfioObjClose(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
// Range check input
|
||
if (!obj)
|
||
return (false);
|
||
|
||
// Clear the current object pointer...
|
||
obj->pdf->current_obj = NULL;
|
||
|
||
if (obj->pdf->mode != _PDFIO_MODE_WRITE)
|
||
{
|
||
// Nothing to do when reading
|
||
return (true);
|
||
}
|
||
|
||
// Write what remains for the object...
|
||
if (!obj->offset)
|
||
{
|
||
// Write the object value
|
||
if (!write_obj_header(obj))
|
||
return (false);
|
||
|
||
// Write the "endobj" line...
|
||
return (_pdfioFilePuts(obj->pdf, "endobj\n"));
|
||
}
|
||
else if (obj->stream)
|
||
{
|
||
// Close the stream...
|
||
return (pdfioStreamClose(obj->stream));
|
||
}
|
||
else
|
||
{
|
||
// Already closed
|
||
return (true);
|
||
}
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioObjCopy()' - Copy an object to another PDF file.
|
||
//
|
||
|
||
pdfio_obj_t * // O - New object or `NULL` on error
|
||
pdfioObjCopy(pdfio_file_t *pdf, // I - PDF file
|
||
pdfio_obj_t *srcobj) // I - Object to copy
|
||
{
|
||
pdfio_obj_t *dstobj; // Destination object
|
||
pdfio_stream_t *srcst, // Source stream
|
||
*dstst; // Destination stream
|
||
char buffer[32768]; // Copy buffer
|
||
ssize_t bytes; // Bytes read
|
||
|
||
|
||
PDFIO_DEBUG("pdfioObjCopy(pdf=%p, srcobj=%p(%p))\n", pdf, srcobj, srcobj ? srcobj->pdf : NULL);
|
||
|
||
// Range check input
|
||
if (!pdf || !srcobj)
|
||
return (NULL);
|
||
|
||
// Load the object value if needed...
|
||
if (srcobj->value.type == PDFIO_VALTYPE_NONE)
|
||
_pdfioObjLoad(srcobj);
|
||
|
||
// Create the new object...
|
||
if ((dstobj = _pdfioFileCreateObj(pdf, srcobj->pdf, NULL)) == NULL)
|
||
return (NULL);
|
||
|
||
// Add new object to the cache of copied objects...
|
||
if (!_pdfioFileAddMappedObj(pdf, dstobj, srcobj))
|
||
return (NULL);
|
||
|
||
// Copy the object's value...
|
||
if (!_pdfioValueCopy(pdf, &dstobj->value, srcobj->pdf, &srcobj->value))
|
||
return (NULL);
|
||
|
||
if (dstobj->value.type == PDFIO_VALTYPE_DICT)
|
||
_pdfioDictClear(dstobj->value.value.dict, "Length");
|
||
|
||
if (srcobj->stream_offset)
|
||
{
|
||
// Copy stream data...
|
||
if ((srcst = pdfioObjOpenStream(srcobj, false)) == NULL)
|
||
{
|
||
pdfioObjClose(dstobj);
|
||
return (NULL);
|
||
}
|
||
|
||
if ((dstst = pdfioObjCreateStream(dstobj, PDFIO_FILTER_NONE)) == NULL)
|
||
{
|
||
pdfioStreamClose(srcst);
|
||
pdfioObjClose(dstobj);
|
||
return (NULL);
|
||
}
|
||
|
||
while ((bytes = pdfioStreamRead(srcst, buffer, sizeof(buffer))) > 0)
|
||
{
|
||
if (!pdfioStreamWrite(dstst, buffer, (size_t)bytes))
|
||
{
|
||
bytes = -1;
|
||
break;
|
||
}
|
||
}
|
||
|
||
pdfioStreamClose(srcst);
|
||
pdfioStreamClose(dstst);
|
||
|
||
if (bytes < 0)
|
||
return (NULL);
|
||
}
|
||
else
|
||
pdfioObjClose(dstobj);
|
||
|
||
return (dstobj);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioObjCreateStream()' - Create an object (data) stream for writing.
|
||
//
|
||
|
||
pdfio_stream_t * // O - Stream or `NULL` on error
|
||
pdfioObjCreateStream(
|
||
pdfio_obj_t *obj, // I - Object
|
||
pdfio_filter_t filter) // I - Type of compression to apply
|
||
{
|
||
pdfio_obj_t *length_obj = NULL; // Length object, if any
|
||
|
||
|
||
// Range check input
|
||
if (!obj || obj->pdf->mode != _PDFIO_MODE_WRITE || obj->value.type != PDFIO_VALTYPE_DICT)
|
||
return (NULL);
|
||
|
||
if (obj->offset)
|
||
{
|
||
_pdfioFileError(obj->pdf, "Object has already been written.");
|
||
return (NULL);
|
||
}
|
||
|
||
if (filter != PDFIO_FILTER_NONE && filter != PDFIO_FILTER_FLATE)
|
||
{
|
||
_pdfioFileError(obj->pdf, "Unsupported filter value for PDFioObjCreateStream.");
|
||
return (NULL);
|
||
}
|
||
|
||
if (obj->pdf->current_obj)
|
||
{
|
||
_pdfioFileError(obj->pdf, "Another object (%u) is already open.", (unsigned)obj->pdf->current_obj->number);
|
||
return (NULL);
|
||
}
|
||
|
||
// Write the header...
|
||
if (!_pdfioDictGetValue(obj->value.value.dict, "Length"))
|
||
{
|
||
if (obj->pdf->output_cb)
|
||
{
|
||
// Streaming via an output callback, so add a placeholder length object
|
||
_pdfio_value_t length_value; // Length value
|
||
|
||
length_value.type = PDFIO_VALTYPE_NUMBER;
|
||
length_value.value.number = 0.0f;
|
||
|
||
length_obj = _pdfioFileCreateObj(obj->pdf, obj->pdf, &length_value);
|
||
pdfioDictSetObj(obj->value.value.dict, "Length", length_obj);
|
||
}
|
||
else
|
||
{
|
||
// Need a Length key for the stream, add a placeholder that we can fill in
|
||
// later...
|
||
pdfioDictSetNumber(obj->value.value.dict, "Length", 0.0);
|
||
}
|
||
}
|
||
|
||
if (!write_obj_header(obj))
|
||
return (NULL);
|
||
|
||
if (!_pdfioFilePuts(obj->pdf, "stream\n"))
|
||
return (NULL);
|
||
|
||
obj->stream_offset = _pdfioFileTell(obj->pdf);
|
||
obj->pdf->current_obj = obj;
|
||
|
||
// Return the new stream...
|
||
return (_pdfioStreamCreate(obj, length_obj, filter));
|
||
}
|
||
|
||
|
||
//
|
||
// '_pdfioObjDelete()' - Free memory used by an object.
|
||
//
|
||
|
||
void
|
||
_pdfioObjDelete(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
if (obj)
|
||
{
|
||
pdfioStreamClose(obj->stream);
|
||
|
||
if (obj->datafree)
|
||
(obj->datafree)(obj->data);
|
||
}
|
||
|
||
free(obj);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioObjGetArray()' - Get the array associated with an object.
|
||
//
|
||
|
||
pdfio_array_t * // O - Array or `NULL` on error
|
||
pdfioObjGetArray(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
if (!obj)
|
||
return (NULL);
|
||
|
||
if (obj->value.type == PDFIO_VALTYPE_NONE)
|
||
_pdfioObjLoad(obj);
|
||
|
||
if (obj->value.type == PDFIO_VALTYPE_ARRAY)
|
||
return (obj->value.value.array);
|
||
else
|
||
return (NULL);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioObjGetDict()' - Get the dictionary associated with an object.
|
||
//
|
||
|
||
pdfio_dict_t * // O - Dictionary or `NULL` on error
|
||
pdfioObjGetDict(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
if (!obj)
|
||
return (NULL);
|
||
|
||
if (obj->value.type == PDFIO_VALTYPE_NONE)
|
||
_pdfioObjLoad(obj);
|
||
|
||
if (obj->value.type == PDFIO_VALTYPE_DICT)
|
||
return (obj->value.value.dict);
|
||
else
|
||
return (NULL);
|
||
}
|
||
|
||
|
||
//
|
||
// '_pdfioObjGetExtension()' - Get the extension pointer for an object.
|
||
//
|
||
|
||
void * // O - Extension data
|
||
_pdfioObjGetExtension(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
return (obj->data);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioObjGetGeneration()' - Get the object's generation number.
|
||
//
|
||
|
||
unsigned short // O - Generation number (0 to 65535)
|
||
pdfioObjGetGeneration(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
return (obj ? obj->generation : 0);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioObjGetLength()' - Get the length of the object's (data) stream.
|
||
//
|
||
|
||
size_t // O - Length in bytes or `0` for none
|
||
pdfioObjGetLength(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
size_t length; // Length of stream
|
||
pdfio_obj_t *lenobj; // Length object
|
||
|
||
|
||
// Range check input...
|
||
if (!obj || !obj->stream_offset || obj->value.type != PDFIO_VALTYPE_DICT)
|
||
return (0);
|
||
|
||
// Try getting the length, directly or indirectly
|
||
if ((length = (size_t)pdfioDictGetNumber(obj->value.value.dict, "Length")) > 0)
|
||
{
|
||
PDFIO_DEBUG("pdfioObjGetLength(obj=%p) returning %lu.\n", obj, (unsigned long)length);
|
||
return (length);
|
||
}
|
||
|
||
if ((lenobj = pdfioDictGetObj(obj->value.value.dict, "Length")) == NULL)
|
||
{
|
||
_pdfioFileError(obj->pdf, "Unable to get length of stream.");
|
||
return (0);
|
||
}
|
||
|
||
if (lenobj->value.type == PDFIO_VALTYPE_NONE)
|
||
_pdfioObjLoad(lenobj);
|
||
|
||
if (lenobj->value.type != PDFIO_VALTYPE_NUMBER || lenobj->value.value.number <= 0.0)
|
||
{
|
||
_pdfioFileError(obj->pdf, "Unable to get length of stream.");
|
||
return (0);
|
||
}
|
||
|
||
PDFIO_DEBUG("pdfioObjGetLength(obj=%p) returning %lu.\n", obj, (unsigned long)lenobj->value.value.number);
|
||
|
||
return ((size_t)lenobj->value.value.number);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioObjGetNumber()' - Get the object's number.
|
||
//
|
||
|
||
size_t // O - Object number (1 to 9999999999)
|
||
pdfioObjGetNumber(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
return (obj ? obj->number : 0);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioObjGetSubtype()' - Get an object's subtype.
|
||
//
|
||
// This function returns an object's PDF subtype name, if any. Common subtype
|
||
// names include:
|
||
//
|
||
// - "CIDFontType0": A CID Type0 font
|
||
// - "CIDFontType2": A CID TrueType font
|
||
// - "Image": An image or image mask
|
||
// - "Form": A fillable form
|
||
// - "OpenType": An OpenType font
|
||
// - "Type0": A composite font
|
||
// - "Type1": A PostScript Type1 font
|
||
// - "Type3": A PDF Type3 font
|
||
// - "TrueType": A TrueType font
|
||
//
|
||
|
||
const char * // O - Object subtype name or `NULL` for none
|
||
pdfioObjGetSubtype(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
pdfio_dict_t *dict; // Object dictionary
|
||
|
||
|
||
if ((dict = pdfioObjGetDict(obj)) == NULL)
|
||
return (NULL);
|
||
else
|
||
return (pdfioDictGetName(dict, "Subtype"));
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioObjGetType()' - Get an object's type.
|
||
//
|
||
// This function returns an object's PDF type name, if any. Common type names
|
||
// include:
|
||
//
|
||
// - "CMap": A character map for composite fonts
|
||
// - "Font": An embedded font (@link pdfioObjGetSubtype@ will tell you the
|
||
// font format)
|
||
// - "FontDescriptor": A font descriptor
|
||
// - "Page": A (visible) page
|
||
// - "Pages": A page tree node
|
||
// - "Template": An invisible template page
|
||
// - "XObject": An image, image mask, or form (@link pdfioObjGetSubtype@ will
|
||
// tell you which)
|
||
//
|
||
|
||
const char * // O - Object type name or `NULL` for none
|
||
pdfioObjGetType(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
pdfio_dict_t *dict; // Object dictionary
|
||
|
||
|
||
if ((dict = pdfioObjGetDict(obj)) == NULL)
|
||
return (NULL);
|
||
else
|
||
return (pdfioDictGetName(dict, "Type"));
|
||
}
|
||
|
||
|
||
//
|
||
// '_pdfioObjLoad()' - Load an object dictionary/value.
|
||
//
|
||
|
||
bool // O - `true` on success, `false` otherwise
|
||
_pdfioObjLoad(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
char line[64], // Line from file
|
||
*ptr; // Pointer into line
|
||
ssize_t bytes; // Bytes read
|
||
_pdfio_token_t tb; // Token buffer/stack
|
||
|
||
|
||
PDFIO_DEBUG("_pdfioObjLoad(obj=%p(%lu)), offset=%lu\n", obj, (unsigned long)obj->number, (unsigned long)obj->offset);
|
||
|
||
// Seek to the start of the object and read its header...
|
||
if (_pdfioFileSeek(obj->pdf, obj->offset, SEEK_SET) != obj->offset)
|
||
{
|
||
_pdfioFileError(obj->pdf, "Unable to seek to object %lu.", (unsigned long)obj->number);
|
||
return (false);
|
||
}
|
||
|
||
if ((bytes = _pdfioFilePeek(obj->pdf, line, sizeof(line) - 1)) < 0)
|
||
{
|
||
_pdfioFileError(obj->pdf, "Unable to read header for object %lu.", (unsigned long)obj->number);
|
||
return (false);
|
||
}
|
||
|
||
line[bytes] = '\0';
|
||
|
||
PDFIO_DEBUG("_pdfioObjLoad: Header is '%s'.\n", line);
|
||
|
||
if (strtoimax(line, &ptr, 10) != (intmax_t)obj->number)
|
||
{
|
||
_pdfioFileError(obj->pdf, "Bad header for object %lu.", (unsigned long)obj->number);
|
||
return (false);
|
||
}
|
||
|
||
if (strtol(ptr, &ptr, 10) != (long)obj->generation)
|
||
{
|
||
_pdfioFileError(obj->pdf, "Bad header for object %lu.", (unsigned long)obj->number);
|
||
return (false);
|
||
}
|
||
|
||
while (isspace(*ptr & 255))
|
||
ptr ++;
|
||
|
||
if (strncmp(ptr, "obj", 3) || (ptr[3] && ptr[3] != '<' && ptr[3] != '[' && !isspace(ptr[3] & 255)))
|
||
{
|
||
_pdfioFileError(obj->pdf, "Bad header for object %lu.", (unsigned long)obj->number);
|
||
return (false);
|
||
}
|
||
|
||
ptr += 3;
|
||
while (*ptr && isspace(*ptr & 255))
|
||
ptr ++;
|
||
|
||
_pdfioFileConsume(obj->pdf, (size_t)(ptr - line));
|
||
|
||
// Then grab the object value...
|
||
_pdfioTokenInit(&tb, obj->pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, obj->pdf);
|
||
|
||
if (!_pdfioValueRead(obj->pdf, obj, &tb, &obj->value, 0))
|
||
{
|
||
_pdfioFileError(obj->pdf, "Unable to read value for object %lu.", (unsigned long)obj->number);
|
||
return (false);
|
||
}
|
||
|
||
// Now see if there is an associated stream...
|
||
if (!_pdfioTokenGet(&tb, line, sizeof(line)))
|
||
{
|
||
_pdfioFileError(obj->pdf, "Early end-of-file for object %lu.", (unsigned long)obj->number);
|
||
return (false);
|
||
}
|
||
|
||
PDFIO_DEBUG("_pdfioObjLoad: tb.bufptr=%p, tb.bufend=%p, tb.bufptr[0]=0x%02x, tb.bufptr[1]=0x%02x\n", tb.bufptr, tb.bufend, tb.bufptr[0], tb.bufptr[1]);
|
||
|
||
_pdfioTokenFlush(&tb);
|
||
|
||
if (!strcmp(line, "stream"))
|
||
{
|
||
// Yes, this is an embedded stream so save its location...
|
||
obj->stream_offset = _pdfioFileTell(obj->pdf);
|
||
PDFIO_DEBUG("_pdfioObjLoad: stream_offset=%lu.\n", (unsigned long)obj->stream_offset);
|
||
}
|
||
|
||
// Decrypt as needed...
|
||
if (obj->pdf->encryption)
|
||
{
|
||
PDFIO_DEBUG("_pdfioObjLoad: Decrypting value...\n");
|
||
|
||
if (!_pdfioValueDecrypt(obj->pdf, obj, &obj->value, 0))
|
||
{
|
||
PDFIO_DEBUG("_pdfioObjLoad: Failed to decrypt.\n");
|
||
return (false);
|
||
}
|
||
}
|
||
|
||
PDFIO_DEBUG("_pdfioObjLoad: ");
|
||
PDFIO_DEBUG_VALUE(&obj->value);
|
||
PDFIO_DEBUG("\n");
|
||
|
||
return (true);
|
||
}
|
||
|
||
|
||
//
|
||
// 'pdfioObjOpenStream()' - Open an object's (data) stream for reading.
|
||
//
|
||
|
||
pdfio_stream_t * // O - Stream or `NULL` on error
|
||
pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
|
||
bool decode) // I - Decode/decompress data?
|
||
{
|
||
// Range check input...
|
||
if (!obj)
|
||
return (NULL);
|
||
|
||
if (obj->pdf->current_obj)
|
||
{
|
||
_pdfioFileError(obj->pdf, "Another object (%u) is already open.", (unsigned)obj->pdf->current_obj->number);
|
||
return (NULL);
|
||
}
|
||
|
||
// Make sure we've loaded the object dictionary...
|
||
if (!obj->value.type)
|
||
{
|
||
if (!_pdfioObjLoad(obj))
|
||
return (NULL);
|
||
}
|
||
|
||
// No stream if there is no dict or offset to a stream...
|
||
if (obj->value.type != PDFIO_VALTYPE_DICT || !obj->stream_offset)
|
||
return (NULL);
|
||
|
||
// Open the stream...
|
||
obj->pdf->current_obj = obj;
|
||
|
||
return (_pdfioStreamOpen(obj, decode));
|
||
}
|
||
|
||
|
||
//
|
||
// '_pdfioObjSetExtension()' - Set extension data for an object.
|
||
//
|
||
|
||
void
|
||
_pdfioObjSetExtension(
|
||
pdfio_obj_t *obj, // I - Object
|
||
void *data, // I - Data
|
||
_pdfio_extfree_t datafree) // I - Free function
|
||
{
|
||
obj->data = data;
|
||
obj->datafree = datafree;
|
||
}
|
||
|
||
|
||
//
|
||
// 'write_obj_header()' - Write the object header...
|
||
//
|
||
|
||
static bool // O - `true` on success, `false` on failure
|
||
write_obj_header(pdfio_obj_t *obj) // I - Object
|
||
{
|
||
obj->offset = _pdfioFileTell(obj->pdf);
|
||
|
||
if (!_pdfioFilePrintf(obj->pdf, "%lu %u obj\n", (unsigned long)obj->number, obj->generation))
|
||
return (false);
|
||
|
||
if (!_pdfioValueWrite(obj->pdf, obj, &obj->value, &obj->length_offset))
|
||
return (false);
|
||
|
||
return (_pdfioFilePuts(obj->pdf, "\n"));
|
||
}
|