mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2026-04-09 13:32:31 +02:00
Add support for LZWDecode filter, needs more testing (Issue #11)
This commit is contained in:
@@ -8,6 +8,7 @@ v1.7.0 - YYYY-MM-DD
|
||||
- Now use TTF 1.1 or later for font support.
|
||||
- Added support for basic compound stream filters for ASCII85Decode support
|
||||
(Issue #11)
|
||||
- Added support for LZWDecode filters (Issue #11)
|
||||
- Fixed a buffer overflow in the (still not enabled) AES-256 code.
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# Makefile for PDFio.
|
||||
#
|
||||
# Copyright © 2021-2025 by Michael R Sweet.
|
||||
# Copyright © 2021-2026 by Michael R Sweet.
|
||||
#
|
||||
# Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
# information.
|
||||
@@ -91,6 +91,7 @@ PUBOBJS = \
|
||||
pdfio-crypto.o \
|
||||
pdfio-dict.o \
|
||||
pdfio-file.o \
|
||||
pdfio-lzw.o \
|
||||
pdfio-md5.o \
|
||||
pdfio-object.o \
|
||||
pdfio-page.o \
|
||||
|
||||
309
pdfio-lzw.c
Normal file
309
pdfio-lzw.c
Normal file
@@ -0,0 +1,309 @@
|
||||
//
|
||||
// LZW decoding functions for PDFio.
|
||||
//
|
||||
// This code is used to support (legacy) PDF object streams using the LZWDecode
|
||||
// filter as well as when embedding (legacy) GIF images. None of this is public
|
||||
// API and we only support reading (decoding) since FlateDecode is superior in
|
||||
// every way.
|
||||
//
|
||||
// Copyright © 2026 by Michael R Sweet.
|
||||
//
|
||||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
// information.
|
||||
//
|
||||
|
||||
#include "pdfio-private.h"
|
||||
|
||||
|
||||
//
|
||||
// Local functions...
|
||||
//
|
||||
|
||||
static void lzw_clear(_pdfio_lzw_t *lzw);
|
||||
static int lzw_get_code(_pdfio_lzw_t *lzw);
|
||||
|
||||
|
||||
//
|
||||
// '_pdfioLZWCreate()' - Create a LZW decompressor.
|
||||
//
|
||||
|
||||
_pdfio_lzw_t * // O - LZW state
|
||||
_pdfioLZWCreate(int code_size) // I - Data code size in bits (typically 8 for PDF, 2-8 for GIF)
|
||||
{
|
||||
_pdfio_lzw_t *lzw; // LZW state
|
||||
|
||||
|
||||
if ((lzw = (_pdfio_lzw_t *)calloc(1, sizeof(_pdfio_lzw_t))) != NULL)
|
||||
{
|
||||
lzw->def_code_size = code_size + 1;
|
||||
lzw->clear_code = (short)(1 << code_size);
|
||||
lzw->eod_code = lzw->clear_code + 1;
|
||||
|
||||
lzw_clear(lzw);
|
||||
}
|
||||
|
||||
return (lzw);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// '_pdfioLZWDelete()' - Delete a LZW decompressor.
|
||||
//
|
||||
|
||||
void
|
||||
_pdfioLZWDelete(_pdfio_lzw_t *lzw) // I - LZW state
|
||||
{
|
||||
free(lzw);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// '_pdfioLZWInflate()' - Decompress pending input data.
|
||||
//
|
||||
|
||||
bool // O - `true` on success, `false` on error
|
||||
_pdfioLZWInflate(_pdfio_lzw_t *lzw) // I - LZW state
|
||||
{
|
||||
int cur_code, // Current code
|
||||
in_code; // Input code
|
||||
|
||||
|
||||
// Stop if we already saw the "end of data" code...
|
||||
if (lzw->saw_eod)
|
||||
{
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: EOD, returning false.\n");
|
||||
lzw->error = "End of data.";
|
||||
return (false);
|
||||
}
|
||||
|
||||
// Copy pending compressed data to the output buffer...
|
||||
while (lzw->stptr > lzw->stack && lzw->avail_out > 0)
|
||||
{
|
||||
*(lzw->next_out++) = *(--lzw->stptr);
|
||||
lzw->avail_out --;
|
||||
}
|
||||
|
||||
// Loop as long as we have room in the output buffer and data in the input
|
||||
// buffer...
|
||||
while (lzw->avail_out > 0)
|
||||
{
|
||||
if ((in_code = lzw_get_code(lzw)) < 0)
|
||||
{
|
||||
// Out of data, stop now...
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Out of data.\n");
|
||||
break;
|
||||
}
|
||||
else if (in_code == lzw->clear_code)
|
||||
{
|
||||
// Clear the compression tables and reset...
|
||||
lzw_clear(lzw);
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Clear.\n");
|
||||
continue;
|
||||
}
|
||||
else if (in_code == lzw->eod_code)
|
||||
{
|
||||
// End of data...
|
||||
lzw->saw_eod = true;
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: EOD.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
// If we get this far we have something to write to the output buffer and/or
|
||||
// stack...
|
||||
if (lzw->first_code == 0xffff)
|
||||
{
|
||||
// First code...
|
||||
lzw->first_code = lzw->old_code = in_code;
|
||||
*(lzw->next_out++) = in_code;
|
||||
lzw->avail_out --;
|
||||
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: first_code=%d.\n", in_code);
|
||||
continue;
|
||||
}
|
||||
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: in_code=%d.\n", in_code);
|
||||
|
||||
cur_code = in_code;
|
||||
|
||||
if (cur_code >= lzw->next_code)
|
||||
{
|
||||
*(lzw->stptr++) = lzw->first_code;
|
||||
cur_code = lzw->old_code;
|
||||
}
|
||||
|
||||
while (cur_code >= lzw->clear_code)
|
||||
{
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: cur_code=%d\n", cur_code);
|
||||
|
||||
// Protect against overflow/loops...
|
||||
if (lzw->stptr >= (lzw->stack + sizeof(lzw->stack) / sizeof(lzw->stack[0])))
|
||||
{
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Stack overflow, returning false.\n");
|
||||
lzw->error = "Output overflow.";
|
||||
return (false);
|
||||
}
|
||||
|
||||
// Add this character to the output stack and move to the next character
|
||||
// in the sequence...
|
||||
*(lzw->stptr++) = lzw->table[cur_code].suffix;
|
||||
|
||||
if (cur_code == lzw->table[cur_code].prefix_code)
|
||||
{
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Table loop on code %d, returning false.\n", cur_code);
|
||||
lzw->error = "Table loop detected.";
|
||||
return (false);
|
||||
}
|
||||
|
||||
cur_code = lzw->table[cur_code].prefix_code;
|
||||
}
|
||||
|
||||
if (lzw->stptr >= (lzw->stack + sizeof(lzw->stack) / sizeof(lzw->stack[0])))
|
||||
{
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Stack overflow, returning false.\n");
|
||||
lzw->error = "Output overflow.";
|
||||
return (false);
|
||||
}
|
||||
|
||||
*(lzw->stptr++) = lzw->first_code = lzw->table[cur_code].suffix;
|
||||
|
||||
if ((cur_code = lzw->next_code) < 4096)
|
||||
{
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Adding code %d (%d,%d)\n", cur_code, lzw->old_code, lzw->first_code);
|
||||
|
||||
lzw->table[cur_code].prefix_code = lzw->old_code;
|
||||
lzw->table[cur_code].suffix = lzw->first_code;
|
||||
lzw->next_code ++;
|
||||
|
||||
if (lzw->next_code >= lzw->next_size_code && lzw->next_size_code < 4096)
|
||||
{
|
||||
lzw->next_size_code *= 2;
|
||||
lzw->cur_code_size ++;
|
||||
}
|
||||
}
|
||||
|
||||
lzw->old_code = (uint16_t)in_code;
|
||||
|
||||
while (lzw->stptr > lzw->stack && lzw->avail_out > 0)
|
||||
{
|
||||
*(lzw->next_out++) = *(--lzw->stptr);
|
||||
lzw->avail_out --;
|
||||
}
|
||||
}
|
||||
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Returning true, avail_in=%u, avail_out=%u.\n", (unsigned)lzw->avail_in, (unsigned)lzw->avail_out);
|
||||
|
||||
return (true);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// 'lzw_clear()' - Clear the compression table.
|
||||
//
|
||||
|
||||
static void
|
||||
lzw_clear(_pdfio_lzw_t *lzw) // I - LZW state
|
||||
{
|
||||
uint16_t i; // Looping var
|
||||
|
||||
|
||||
lzw->cur_code_size = lzw->def_code_size;
|
||||
lzw->next_code = lzw->clear_code + 2;
|
||||
lzw->next_size_code = 2 * lzw->clear_code;
|
||||
lzw->first_code = 0xffff;
|
||||
lzw->old_code = 0xffff;
|
||||
|
||||
memset(lzw->table, 0, sizeof(lzw->table));
|
||||
|
||||
for (i = 0; i < lzw->clear_code; i ++)
|
||||
lzw->table[i].suffix = i;
|
||||
|
||||
lzw->stptr = lzw->stack;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// 'lzw_get_code()' - Get a code from the input buffer.
|
||||
//
|
||||
|
||||
static int // O - Code or -1 if there is not enough data available
|
||||
lzw_get_code(_pdfio_lzw_t *lzw) // I - LZW state
|
||||
{
|
||||
uint16_t code, // Code
|
||||
in_bit; // Bit offset in buffer
|
||||
uint8_t bits, // Bits in current byte
|
||||
boff, // Bit offset in current byte
|
||||
byte, // Current byte
|
||||
remaining; // Remaining bits for code
|
||||
static uint8_t mask[8] = // Value mask
|
||||
{
|
||||
0xff, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f
|
||||
};
|
||||
|
||||
|
||||
// Fill input bytes as needed...
|
||||
if ((lzw->in_bit + lzw->cur_code_size) > lzw->in_bits)
|
||||
{
|
||||
uint16_t in_used = lzw->in_bits / 8,
|
||||
// Number of input bytes
|
||||
in_offset = lzw->in_bit / 8,
|
||||
// Offset to current input
|
||||
in_add; // Number of bytes to "read"
|
||||
|
||||
|
||||
if (lzw->avail_in == 0)
|
||||
{
|
||||
// No more data
|
||||
PDFIO_DEBUG("lzw_get_code: No data, returning -1.\n");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (in_offset > 0)
|
||||
{
|
||||
// Make room in the input buffer
|
||||
memmove(lzw->in_bytes, lzw->in_bytes + in_offset, in_used - in_offset);
|
||||
in_used -= in_offset;
|
||||
lzw->in_bit &= 7;
|
||||
}
|
||||
|
||||
if ((in_add = sizeof(lzw->in_bytes) - in_used) > lzw->avail_in)
|
||||
in_add = lzw->avail_in;
|
||||
|
||||
memcpy(lzw->in_bytes + in_used, lzw->next_in, in_add);
|
||||
lzw->next_in += in_add;
|
||||
lzw->avail_in -= in_add;
|
||||
lzw->in_bits = 8 * (in_used + in_add);
|
||||
|
||||
if ((lzw->in_bit + lzw->cur_code_size) > lzw->in_bits)
|
||||
{
|
||||
// Not enough data
|
||||
PDFIO_DEBUG("lzw_get_code: Not enough data, returning -1.\n");
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
PDFIO_DEBUG("lzw_get_code: in_bit=%u, in_bits=%u, in_bytes=<...%02X%02X...>, cur_code_size=%u\n", lzw->in_bit, lzw->in_bits, lzw->in_bytes[lzw->in_bit / 8], lzw->in_bytes[lzw->in_bit / 8 + 1], lzw->cur_code_size);
|
||||
|
||||
// Now extract the code from the buffer...
|
||||
for (code = 0, in_bit = lzw->in_bit, remaining = lzw->cur_code_size; remaining > 0; in_bit += bits, remaining -= bits)
|
||||
{
|
||||
// See how many bits we can extract from the current byte...
|
||||
boff = (in_bit & 7);
|
||||
byte = lzw->in_bytes[in_bit / 8];
|
||||
bits = 8 - boff;
|
||||
if (bits > remaining)
|
||||
bits = remaining;
|
||||
|
||||
// Get those bits
|
||||
if (bits == 8) // Full byte from buffer
|
||||
code = (code << 8) | byte;
|
||||
else // Partial byte from buffer
|
||||
code = (code << bits) | ((byte >> (8 - bits - boff)) & mask[bits]);
|
||||
}
|
||||
|
||||
// Save the updated position in the input buffer and return the code...
|
||||
lzw->in_bit = in_bit;
|
||||
|
||||
PDFIO_DEBUG("lzw_get_code: Returning %u.\n", code);
|
||||
|
||||
return ((int)code);
|
||||
}
|
||||
@@ -211,6 +211,36 @@ typedef union _pdfio_crypto_ctx_u // Cryptographic contexts
|
||||
} _pdfio_crypto_ctx_t;
|
||||
typedef size_t (*_pdfio_crypto_cb_t)(_pdfio_crypto_ctx_t *ctx, uint8_t *outbuffer, const uint8_t *inbuffer, size_t len);
|
||||
|
||||
typedef struct _pdfio_lzws_s // LZW string table
|
||||
{
|
||||
uint16_t prefix_code, // Prefix code
|
||||
suffix; // Suffix (character)
|
||||
} _pdfio_lzws_t;
|
||||
|
||||
typedef struct _pdfio_lzw_s // LZW state
|
||||
{
|
||||
uint8_t *next_in; // Next input byte
|
||||
size_t avail_in; // Available input bytes
|
||||
uint8_t in_bytes[256]; // Current input bytes
|
||||
uint16_t in_bit, // Current input bit
|
||||
in_bits; // Total input bits
|
||||
uint8_t *next_out; // Next output byte
|
||||
size_t avail_out; // Available output bytes
|
||||
uint8_t cur_code_size, // Current code size
|
||||
def_code_size; // Initial/default code size
|
||||
uint16_t clear_code, // Clear code
|
||||
eod_code, // End code
|
||||
next_code, // Next code to be used
|
||||
next_size_code, // Code where we need to increase the code size
|
||||
first_code, // First code in sequence
|
||||
old_code, // Previous code in sequence
|
||||
stack[8192], // Output stack
|
||||
*stptr; // Current stack pointer
|
||||
_pdfio_lzws_t table[4096]; // String table
|
||||
bool saw_eod; // Saw end-of-data code?
|
||||
const char *error; // Error, if any
|
||||
} _pdfio_lzw_t;
|
||||
|
||||
struct _pdfio_array_s
|
||||
{
|
||||
pdfio_file_t *pdf; // PDF file
|
||||
@@ -349,11 +379,12 @@ struct _pdfio_stream_s // Stream
|
||||
*a85decptr, // Pointer into decoded characters
|
||||
*a85decend; // Last decoded character
|
||||
z_stream flate; // Flate filter state
|
||||
_pdfio_lzw_t *lzw; // LZW filter state
|
||||
_pdfio_predictor_t predictor; // Predictor function, if any
|
||||
size_t pbpixel, // Size of a pixel in bytes
|
||||
pbsize, // Predictor buffer size, if any
|
||||
cbsize; // Compressed data buffer size
|
||||
unsigned char *cbuffer, // Compressed data buffer
|
||||
uint8_t *cbuffer, // Compressed data buffer
|
||||
*prbuffer, // Raw buffer (previous line), as needed
|
||||
*psbuffer; // PNG filter buffer, as needed
|
||||
_pdfio_crypto_cb_t crypto_cb; // Encryption/descryption callback, if any
|
||||
@@ -420,6 +451,10 @@ extern off_t _pdfioFileSeek(pdfio_file_t *pdf, off_t offset, int whence) _PDFIO
|
||||
extern off_t _pdfioFileTell(pdfio_file_t *pdf) _PDFIO_INTERNAL;
|
||||
extern bool _pdfioFileWrite(pdfio_file_t *pdf, const void *buffer, size_t bytes) _PDFIO_INTERNAL;
|
||||
|
||||
extern _pdfio_lzw_t *_pdfioLZWCreate(int def_code_size) _PDFIO_INTERNAL;
|
||||
extern void _pdfioLZWDelete(_pdfio_lzw_t *lzw) _PDFIO_INTERNAL;
|
||||
extern bool _pdfioLZWInflate(_pdfio_lzw_t *lzw) _PDFIO_INTERNAL;
|
||||
|
||||
extern void _pdfioObjDelete(pdfio_obj_t *obj) _PDFIO_INTERNAL;
|
||||
extern void *_pdfioObjGetExtension(pdfio_obj_t *obj) _PDFIO_INTERNAL;
|
||||
extern bool _pdfioObjLoad(pdfio_obj_t *obj) _PDFIO_INTERNAL;
|
||||
|
||||
293
pdfio-stream.c
293
pdfio-stream.c
@@ -15,6 +15,7 @@
|
||||
//
|
||||
|
||||
static ssize_t stream_get_bytes(pdfio_stream_t *st, void *buffer, size_t bytes);
|
||||
static ssize_t stream_inflate(pdfio_stream_t *st, uint8_t *buffer, size_t bytes, bool exactly);
|
||||
static unsigned char stream_paeth(unsigned char a, unsigned char b, unsigned char c);
|
||||
static ssize_t stream_read(pdfio_stream_t *st, char *buffer, size_t bytes);
|
||||
static bool stream_write(pdfio_stream_t *st, const void *buffer, size_t bytes);
|
||||
@@ -40,6 +41,8 @@ pdfioStreamClose(pdfio_stream_t *st) // I - Stream
|
||||
{
|
||||
if (st->filter == PDFIO_FILTER_FLATE)
|
||||
inflateEnd(&(st->flate));
|
||||
else if (st->filter == PDFIO_FILTER_LZW)
|
||||
_pdfioLZWDelete(st->lzw);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -523,9 +526,9 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
|
||||
// No filter, read as-is...
|
||||
st->filter = PDFIO_FILTER_NONE;
|
||||
}
|
||||
else if (!strcmp(filter, "FlateDecode"))
|
||||
else if (!strcmp(filter, "FlateDecode") || !strcmp(filter, "LZWDecode"))
|
||||
{
|
||||
// Flate compression
|
||||
// Flate or LZW compression
|
||||
pdfio_dict_t *params = pdfioDictGetDict(dict, "DecodeParms");
|
||||
// Decoding parameters
|
||||
int bpc = (int)pdfioDictGetNumber(params, "BitsPerComponent");
|
||||
@@ -536,12 +539,11 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
|
||||
// Number of columns
|
||||
int predictor = (int)pdfioDictGetNumber(params, "Predictor");
|
||||
// Predictory value, if any
|
||||
int status; // ZLIB status
|
||||
ssize_t rbytes; // Bytes read
|
||||
|
||||
PDFIO_DEBUG("_pdfioStreamOpen: FlateDecode - BitsPerComponent=%d, Colors=%d, Columns=%d, Predictor=%d\n", bpc, colors, columns, predictor);
|
||||
PDFIO_DEBUG("_pdfioStreamOpen: %s - BitsPerComponent=%d, Colors=%d, Columns=%d, Predictor=%d\n", filter, bpc, colors, columns, predictor);
|
||||
|
||||
st->filter = PDFIO_FILTER_FLATE;
|
||||
st->filter = !strcmp(filter, "FlateDecode") ? PDFIO_FILTER_FLATE : PDFIO_FILTER_LZW;
|
||||
|
||||
if (bpc == 0)
|
||||
{
|
||||
@@ -613,40 +615,41 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
|
||||
}
|
||||
|
||||
PDFIO_DEBUG("_pdfioStreamOpen: pos=%ld\n", (long)_pdfioFileTell(st->pdf));
|
||||
if (st->cbsize > st->remaining)
|
||||
rbytes = _pdfioFileRead(st->pdf, st->cbuffer, st->remaining);
|
||||
else
|
||||
rbytes = _pdfioFileRead(st->pdf, st->cbuffer, st->cbsize);
|
||||
|
||||
if (rbytes <= 0)
|
||||
if ((rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize)) <= 0)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to read bytes for stream.");
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (st->crypto_cb)
|
||||
rbytes = (ssize_t)(st->crypto_cb)(&st->crypto_ctx, st->cbuffer, st->cbuffer, (size_t)rbytes);
|
||||
|
||||
st->flate.next_in = (Bytef *)st->cbuffer;
|
||||
st->flate.avail_in = (uInt)rbytes;
|
||||
|
||||
PDFIO_DEBUG("_pdfioStreamOpen: avail_in=%u, cbuffer=<%02X%02X%02X%02X%02X%02X%02X%02X...>\n", st->flate.avail_in, st->cbuffer[0], st->cbuffer[1], st->cbuffer[2], st->cbuffer[3], st->cbuffer[4], st->cbuffer[5], st->cbuffer[6], st->cbuffer[7]);
|
||||
|
||||
if ((status = inflateInit(&(st->flate))) != Z_OK)
|
||||
if (st->filter == PDFIO_FILTER_FLATE)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to start Flate filter: %s", zstrerror(status));
|
||||
goto error;
|
||||
}
|
||||
// Flate decompression...
|
||||
int status; // ZLIB status
|
||||
|
||||
st->remaining -= st->flate.avail_in;
|
||||
st->flate.next_in = (Bytef *)st->cbuffer;
|
||||
st->flate.avail_in = (uInt)rbytes;
|
||||
|
||||
PDFIO_DEBUG("_pdfioStreamOpen: avail_in=%u, cbuffer=<%02X%02X%02X%02X%02X%02X%02X%02X...>\n", st->flate.avail_in, st->cbuffer[0], st->cbuffer[1], st->cbuffer[2], st->cbuffer[3], st->cbuffer[4], st->cbuffer[5], st->cbuffer[6], st->cbuffer[7]);
|
||||
|
||||
if ((status = inflateInit(&(st->flate))) != Z_OK)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to start Flate filter: %s", zstrerror(status));
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// LZW decompression...
|
||||
if ((st->lzw = _pdfioLZWCreate(/*code_size*/8)) == NULL)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to initialize LZW filter: %s", strerror(errno));
|
||||
goto error;
|
||||
}
|
||||
|
||||
st->lzw->next_in = st->cbuffer;
|
||||
st->lzw->avail_in = (size_t)rbytes;
|
||||
}
|
||||
}
|
||||
#if 0 // TODO: Implement LZWDecode filter
|
||||
else if (!strcmp(filter, "LZWDecode"))
|
||||
{
|
||||
// LZW compression
|
||||
st->filter = PDFIO_FILTER_LZW;
|
||||
}
|
||||
#endif // 0
|
||||
else
|
||||
{
|
||||
// Something else we don't support
|
||||
@@ -1143,28 +1146,44 @@ stream_get_bytes(
|
||||
a85val = a85val * 85 + a85ch - '!';
|
||||
count ++;
|
||||
}
|
||||
else if (a85ch == 'z' && count == 0)
|
||||
{
|
||||
// 'z' == 0's
|
||||
a85val = 0;
|
||||
count = 5;
|
||||
|
||||
a85bufptr++;
|
||||
}
|
||||
else if (a85ch == '~')
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (!isspace(a85ch & 255))
|
||||
{
|
||||
// Invalid ASCII85Decode character...
|
||||
_pdfioFileError(st->pdf, "Invalid ASCII85Decode character in stream.");
|
||||
_pdfioFileError(st->pdf, "Invalid ASCII85Decode character '%c' in stream.", a85ch);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
st->a85bufptr = a85bufptr;
|
||||
|
||||
if (*a85bufptr == '~')
|
||||
break;
|
||||
|
||||
if (count < 2)
|
||||
{
|
||||
// Need at least 2 characters to decode a single byte...
|
||||
_pdfioFileError(st->pdf, "Invalid ASCII85Decode character in stream.");
|
||||
_pdfioFileError(st->pdf, "Invalid ASCII85Decode sequence in stream.");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
st->a85bufptr = a85bufptr;
|
||||
declen = count - 1;
|
||||
declen = count - 1;
|
||||
|
||||
// Add zero rounds to properly align the decoded value...
|
||||
// Add rounds to properly align the decoded value...
|
||||
while (count < 5)
|
||||
{
|
||||
a85val *= 85;
|
||||
a85val = a85val * 85 + 84;
|
||||
count ++;
|
||||
}
|
||||
|
||||
@@ -1178,6 +1197,8 @@ stream_get_bytes(
|
||||
st->a85decend = st->a85decode + declen;
|
||||
}
|
||||
|
||||
PDFIO_DEBUG("stream_get_bytes: Returning %ld ASCII85 bytes for stream.\n", (long)rbytes);
|
||||
|
||||
return (rbytes);
|
||||
}
|
||||
else
|
||||
@@ -1196,11 +1217,100 @@ stream_get_bytes(
|
||||
(st->crypto_cb)(&st->crypto_ctx, (uint8_t *)buffer, (uint8_t *)buffer, (size_t)rbytes);
|
||||
}
|
||||
|
||||
PDFIO_DEBUG("stream_get_bytes: Returning %ld raw bytes for stream.\n", (long)rbytes);
|
||||
|
||||
return (rbytes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// 'stream_inflate()' - Decompress bytes from a stream (Flate or LZW) into the specified buffer.
|
||||
//
|
||||
|
||||
static ssize_t
|
||||
stream_inflate(pdfio_stream_t *st, // I - Stream
|
||||
uint8_t *buffer, // I - Output buffer
|
||||
size_t bytes, // I - Number of bytes
|
||||
bool exactly) // I - Require exactly the number of bytes
|
||||
{
|
||||
ssize_t rbytes; // Bytes read
|
||||
|
||||
|
||||
// Setup decompression to the output buffer...
|
||||
if (st->filter == PDFIO_FILTER_FLATE)
|
||||
{
|
||||
st->flate.next_out = (Bytef *)buffer;
|
||||
st->flate.avail_out = (uInt)bytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
st->lzw->next_out = buffer;
|
||||
st->lzw->avail_out = bytes;
|
||||
}
|
||||
|
||||
// Loop to get the bytes...
|
||||
do
|
||||
{
|
||||
if (st->filter == PDFIO_FILTER_FLATE)
|
||||
{
|
||||
// Flate decompress
|
||||
int status; // Status of decompression
|
||||
|
||||
PDFIO_DEBUG("stream_inflate: avail_in=%u, avail_out=%u\n", st->flate.avail_in, st->flate.avail_out);
|
||||
|
||||
if (st->flate.avail_in == 0)
|
||||
{
|
||||
// Read more from the file...
|
||||
if ((rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize)) <= 0)
|
||||
return (-1); // End of file...
|
||||
|
||||
st->flate.next_in = (Bytef *)st->cbuffer;
|
||||
st->flate.avail_in = (uInt)rbytes;
|
||||
}
|
||||
|
||||
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
|
||||
{
|
||||
PDFIO_DEBUG("stream_inflate: inflate() returned %d\n", status);
|
||||
_pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
bytes = (size_t)st->flate.avail_out;
|
||||
}
|
||||
else
|
||||
{
|
||||
// LZW decompress
|
||||
if (st->lzw->avail_in == 0)
|
||||
{
|
||||
// Read more from the file...
|
||||
if ((rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize)) <= 0)
|
||||
return (-1); // End of file...
|
||||
|
||||
st->lzw->next_in = st->cbuffer;
|
||||
st->lzw->avail_in = (size_t)rbytes;
|
||||
}
|
||||
|
||||
if (!_pdfioLZWInflate(st->lzw) && !st->lzw->saw_eod)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, st->lzw->error);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
bytes = st->lzw->avail_out;
|
||||
}
|
||||
}
|
||||
while (bytes > 0 && exactly);
|
||||
|
||||
if (exactly && bytes > 0)
|
||||
return (-1);
|
||||
else if (st->filter == PDFIO_FILTER_FLATE)
|
||||
return (st->flate.next_out - (Bytef *)buffer);
|
||||
else
|
||||
return (st->lzw->next_out - (uint8_t *)buffer);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// 'stream_paeth()' - PaethPredictor function for PNG decompression filter.
|
||||
//
|
||||
@@ -1228,47 +1338,20 @@ stream_read(pdfio_stream_t *st, // I - Stream
|
||||
char *buffer, // I - Buffer
|
||||
size_t bytes) // I - Number of bytes to read
|
||||
{
|
||||
uInt avail_in, avail_out; // Previous flate values
|
||||
|
||||
|
||||
if (st->filter == PDFIO_FILTER_NONE)
|
||||
{
|
||||
// No filtering...
|
||||
return (stream_get_bytes(st, buffer, bytes));
|
||||
}
|
||||
else if (st->filter == PDFIO_FILTER_FLATE)
|
||||
else if (st->filter == PDFIO_FILTER_FLATE || st->filter == PDFIO_FILTER_LZW)
|
||||
{
|
||||
// Deflate compression...
|
||||
int status; // Status of decompression
|
||||
|
||||
// Flate or LZW compression...
|
||||
if (st->predictor == _PDFIO_PREDICTOR_NONE)
|
||||
{
|
||||
// Decompress into the buffer...
|
||||
PDFIO_DEBUG("stream_read: No predictor.\n");
|
||||
|
||||
if (st->flate.avail_in == 0)
|
||||
{
|
||||
// Read more from the file...
|
||||
ssize_t rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize);
|
||||
// Bytes read
|
||||
|
||||
if (rbytes <= 0)
|
||||
return (-1); // End of file...
|
||||
|
||||
st->flate.next_in = (Bytef *)st->cbuffer;
|
||||
st->flate.avail_in = (uInt)rbytes;
|
||||
}
|
||||
|
||||
st->flate.next_out = (Bytef *)buffer;
|
||||
st->flate.avail_out = (uInt)bytes;
|
||||
|
||||
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
return (st->flate.next_out - (Bytef *)buffer);
|
||||
return (stream_inflate(st, (uint8_t *)buffer, bytes, /*exactly*/false));
|
||||
}
|
||||
else if (st->predictor == _PDFIO_PREDICTOR_TIFF2)
|
||||
{
|
||||
@@ -1276,9 +1359,9 @@ stream_read(pdfio_stream_t *st, // I - Stream
|
||||
// Size of pixel in bytes
|
||||
remaining = st->pbsize;
|
||||
// Remaining bytes
|
||||
unsigned char *bufptr = (unsigned char *)buffer,
|
||||
uint8_t *bufptr = (uint8_t *)buffer,
|
||||
// Pointer into buffer
|
||||
*bufsecond = (unsigned char *)buffer + pbpixel,
|
||||
*bufsecond = (uint8_t *)buffer + pbpixel,
|
||||
// Pointer to second pixel in buffer
|
||||
*sptr = st->psbuffer;
|
||||
// Current (raw) line
|
||||
@@ -1291,37 +1374,7 @@ stream_read(pdfio_stream_t *st, // I - Stream
|
||||
return (-1);
|
||||
}
|
||||
|
||||
st->flate.next_out = (Bytef *)sptr;
|
||||
st->flate.avail_out = (uInt)st->pbsize;
|
||||
|
||||
while (st->flate.avail_out > 0)
|
||||
{
|
||||
if (st->flate.avail_in == 0)
|
||||
{
|
||||
// Read more from the file...
|
||||
ssize_t rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize);
|
||||
// Bytes read
|
||||
|
||||
if (rbytes <= 0)
|
||||
return (-1); // End of file...
|
||||
|
||||
st->flate.next_in = (Bytef *)st->cbuffer;
|
||||
st->flate.avail_in = (uInt)rbytes;
|
||||
}
|
||||
|
||||
avail_in = st->flate.avail_in;
|
||||
avail_out = st->flate.avail_out;
|
||||
|
||||
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
|
||||
return (-1);
|
||||
}
|
||||
else if (status == Z_STREAM_END || (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out))
|
||||
break;
|
||||
}
|
||||
|
||||
if (st->flate.avail_out > 0)
|
||||
if (stream_inflate(st, sptr, st->pbsize, /*exactly*/true) < 0)
|
||||
return (-1); // Early end of stream
|
||||
|
||||
for (; bufptr < bufsecond; remaining --, sptr ++)
|
||||
@@ -1338,9 +1391,9 @@ stream_read(pdfio_stream_t *st, // I - Stream
|
||||
// Size of pixel in bytes
|
||||
remaining = st->pbsize - 1;
|
||||
// Remaining bytes
|
||||
unsigned char *bufptr = (unsigned char *)buffer,
|
||||
uint8_t *bufptr = (uint8_t *)buffer,
|
||||
// Pointer into buffer
|
||||
*bufsecond = (unsigned char *)buffer + pbpixel,
|
||||
*bufsecond = (uint8_t *)buffer + pbpixel,
|
||||
// Pointer to second pixel in buffer
|
||||
*sptr = st->psbuffer + 1,
|
||||
// Current (raw) line
|
||||
@@ -1355,40 +1408,10 @@ stream_read(pdfio_stream_t *st, // I - Stream
|
||||
return (-1);
|
||||
}
|
||||
|
||||
st->flate.next_out = (Bytef *)sptr - 1;
|
||||
st->flate.avail_out = (uInt)st->pbsize;
|
||||
|
||||
while (st->flate.avail_out > 0)
|
||||
{
|
||||
if (st->flate.avail_in == 0)
|
||||
{
|
||||
// Read more from the file...
|
||||
ssize_t rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize);
|
||||
// Bytes read
|
||||
|
||||
if (rbytes <= 0)
|
||||
return (-1); // End of file...
|
||||
|
||||
st->flate.next_in = (Bytef *)st->cbuffer;
|
||||
st->flate.avail_in = (uInt)rbytes;
|
||||
}
|
||||
|
||||
avail_in = st->flate.avail_in;
|
||||
avail_out = st->flate.avail_out;
|
||||
|
||||
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
|
||||
return (-1);
|
||||
}
|
||||
else if (status == Z_STREAM_END || (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out))
|
||||
break;
|
||||
}
|
||||
|
||||
if (st->flate.avail_out > 0)
|
||||
if (stream_inflate(st, sptr - 1, st->pbsize, /*exactly*/true) < 0)
|
||||
{
|
||||
// Early end of stream
|
||||
PDFIO_DEBUG("stream_read: Early EOF (remaining=%u, avail_in=%d, avail_out=%d, data_type=%d, next_in=<%02X%02X%02X%02X...>).\n", (unsigned)st->remaining, st->flate.avail_in, st->flate.avail_out, st->flate.data_type, st->flate.next_in[0], st->flate.next_in[1], st->flate.next_in[2], st->flate.next_in[3]);
|
||||
PDFIO_DEBUG("stream_read: Early EOF (remaining=%u).\n", (unsigned)st->remaining);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
@@ -1491,8 +1514,6 @@ stream_write(pdfio_stream_t *st, // I - Stream
|
||||
outbytes = cbytes;
|
||||
}
|
||||
|
||||
// fprintf(stderr, "stream_write: bytes=%u, outbytes=%u\n", (unsigned)bytes, (unsigned)outbytes);
|
||||
|
||||
if (!_pdfioFileWrite(st->pdf, st->cbuffer, outbytes))
|
||||
return (false);
|
||||
|
||||
|
||||
6
pdfio.h
6
pdfio.h
@@ -1,7 +1,7 @@
|
||||
//
|
||||
// Public header file for PDFio.
|
||||
//
|
||||
// Copyright © 2021-2025 by Michael R Sweet.
|
||||
// Copyright © 2021-2026 by Michael R Sweet.
|
||||
//
|
||||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
// information.
|
||||
@@ -72,11 +72,11 @@ typedef enum pdfio_filter_e // Compression/decompression filters for streams
|
||||
PDFIO_FILTER_NONE, // No filter
|
||||
PDFIO_FILTER_ASCIIHEX, // ASCIIHexDecode filter (reading only)
|
||||
PDFIO_FILTER_ASCII85, // ASCII85Decode filter (reading only)
|
||||
PDFIO_FILTER_CCITTFAX, // CCITTFaxDecode filter
|
||||
PDFIO_FILTER_CCITTFAX, // CCITTFaxDecode filter (reading only)
|
||||
PDFIO_FILTER_CRYPT, // Encryption filter
|
||||
PDFIO_FILTER_DCT, // DCTDecode (JPEG) filter
|
||||
PDFIO_FILTER_FLATE, // FlateDecode filter
|
||||
PDFIO_FILTER_JBIG2, // JBIG2Decode filter
|
||||
PDFIO_FILTER_JBIG2, // JBIG2Decode filter (reading only)
|
||||
PDFIO_FILTER_JPX, // JPXDecode filter (reading only)
|
||||
PDFIO_FILTER_LZW, // LZWDecode filter (reading only)
|
||||
PDFIO_FILTER_RUNLENGTH, // RunLengthDecode filter (reading only)
|
||||
|
||||
@@ -160,6 +160,7 @@
|
||||
<ClCompile Include="pdfio-crypto.c" />
|
||||
<ClCompile Include="pdfio-dict.c" />
|
||||
<ClCompile Include="pdfio-file.c" />
|
||||
<ClCompile Include="pdfio-lzwdecode.c" />
|
||||
<ClCompile Include="pdfio-md5.c" />
|
||||
<ClCompile Include="pdfio-object.c" />
|
||||
<ClCompile Include="pdfio-page.c" />
|
||||
|
||||
10
test.h
10
test.h
@@ -96,6 +96,16 @@ static int test_progress; // Current progress
|
||||
static char test_title[1024] = ""; // Current test title
|
||||
|
||||
|
||||
// Add printf syntax checking on supported compilers...
|
||||
#if defined(__has_extension) || defined(__GNUC__)
|
||||
# define TEST_FORMAT(a,b) __attribute__ ((__format__(__printf__,a,b)))
|
||||
static inline void testBegin(const char *title, ...) TEST_FORMAT(1,2);
|
||||
static inline void testEndMessage(bool pass, const char *message, ...) TEST_FORMAT(2,3);
|
||||
static inline void testError(const char *error, ...) TEST_FORMAT(1,2);
|
||||
static inline void testMessage(const char *error, ...) TEST_FORMAT(1,2);
|
||||
#endif // __has_extension || __GNUC__
|
||||
|
||||
|
||||
// Start a test
|
||||
static inline void
|
||||
testBegin(const char *title, ...) // I - printf-style title string
|
||||
|
||||
101
testpdfio.c
101
testpdfio.c
@@ -32,6 +32,7 @@
|
||||
//
|
||||
|
||||
static int do_crypto_tests(void);
|
||||
static int do_lzw_tests(void);
|
||||
static int do_pdfa_tests(void);
|
||||
static int do_test_file(const char *filename, const char *outfile, int objnum, const char *password, bool verbose);
|
||||
static int do_unit_tests(void);
|
||||
@@ -382,6 +383,102 @@ do_crypto_tests(void)
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// 'do_lzw_tests()' - Test the various LZW functions in PDFio.
|
||||
//
|
||||
|
||||
static int // O - Exit status
|
||||
do_lzw_tests(void)
|
||||
{
|
||||
int status = 0; // Exit status
|
||||
_pdfio_lzw_t *lzw; // LZW state
|
||||
uint8_t buffer[8192]; // Output buffer
|
||||
size_t bytes; // Output bytes
|
||||
static uint8_t iso32000_in[] = // ISO-32000-2 test case input
|
||||
{
|
||||
0x80, 0x0B, 0x60, 0x50, 0x22, 0x0C, 0x0C, 0x85, 0x01
|
||||
};
|
||||
static uint8_t iso32000_out[] = // ISO-32000-2 test case output
|
||||
{
|
||||
45, 45, 45, 45, 45, 65, 45, 45, 45, 66
|
||||
};
|
||||
|
||||
|
||||
testBegin("_pdfioLZWCreate(8)");
|
||||
testEnd((lzw = _pdfioLZWCreate(/*code_size*/8)) != NULL);
|
||||
if (!lzw)
|
||||
return (1);
|
||||
|
||||
testBegin("_pdfioLZWInflate(ISO 32000-2 test case)");
|
||||
|
||||
lzw->avail_in = sizeof(iso32000_in);
|
||||
lzw->next_in = iso32000_in;
|
||||
|
||||
lzw->avail_out = sizeof(buffer);
|
||||
lzw->next_out = buffer;
|
||||
|
||||
if (!_pdfioLZWInflate(lzw))
|
||||
{
|
||||
testEndMessage(false, "returned false");
|
||||
status = 1;
|
||||
}
|
||||
else if ((bytes = sizeof(buffer) - lzw->avail_out) != sizeof(iso32000_out))
|
||||
{
|
||||
testEndMessage(false, "got %u bytes, expected %u bytes", (unsigned)bytes, (unsigned)sizeof(iso32000_out));
|
||||
status = 1;
|
||||
}
|
||||
else if (memcmp(buffer, iso32000_out, bytes))
|
||||
{
|
||||
size_t i; // Looping var
|
||||
|
||||
testEndMessage(false, "got incorrect output");
|
||||
|
||||
testMessage(" EXPECTED %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", iso32000_out[0], iso32000_out[1], iso32000_out[2], iso32000_out[3], iso32000_out[4], iso32000_out[5], iso32000_out[6], iso32000_out[7], iso32000_out[8], iso32000_out[9]);
|
||||
|
||||
for (i = 0; i < bytes; i += 8)
|
||||
{
|
||||
switch (bytes - i)
|
||||
{
|
||||
case 1 :
|
||||
testMessage(" %s %02X", i == 0 ? "GOT" : " ", buffer[i + 0]);
|
||||
break;
|
||||
case 2 :
|
||||
testMessage(" %s %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1]);
|
||||
break;
|
||||
case 3 :
|
||||
testMessage(" %s %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2]);
|
||||
break;
|
||||
case 4 :
|
||||
testMessage(" %s %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3]);
|
||||
break;
|
||||
case 5 :
|
||||
testMessage(" %s %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4]);
|
||||
break;
|
||||
case 6 :
|
||||
testMessage(" %s %02X %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4], buffer[i + 5]);
|
||||
break;
|
||||
case 7 :
|
||||
testMessage(" %s %02X %02X %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4], buffer[i + 5], buffer[i + 6]);
|
||||
break;
|
||||
default :
|
||||
testMessage(" %s %02X %02X %02X %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4], buffer[i + 5], buffer[i + 6], buffer[i + 7]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
status = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
testEnd(true);
|
||||
}
|
||||
|
||||
_pdfioLZWDelete(lzw);
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// 'do_pdfa_tests()' - Run PDF/A generation and compliance tests.
|
||||
//
|
||||
@@ -1180,6 +1277,10 @@ do_unit_tests(void)
|
||||
if (do_crypto_tests())
|
||||
return (1);
|
||||
|
||||
// Do LZW tests...
|
||||
if (do_lzw_tests())
|
||||
return (1);
|
||||
|
||||
// Create a new PDF file...
|
||||
testBegin("pdfioFileCreate(\"testpdfio-out.pdf\", ...)");
|
||||
if ((outpdf = pdfioFileCreate("testpdfio-out.pdf", /*version*/"1.7", /*media_box*/NULL, /*crop_box*/NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL)
|
||||
|
||||
Reference in New Issue
Block a user