Add support for LZWDecode filter, needs more testing (Issue #11)

This commit is contained in:
Michael R Sweet
2026-01-16 09:53:51 -05:00
parent bdcd963352
commit 09520d250f
9 changed files with 620 additions and 141 deletions

View File

@@ -8,6 +8,7 @@ v1.7.0 - YYYY-MM-DD
- Now use TTF 1.1 or later for font support.
- Added support for basic compound stream filters for ASCII85Decode support
(Issue #11)
- Added support for LZWDecode filters (Issue #11)
- Fixed a buffer overflow in the (still not enabled) AES-256 code.

View File

@@ -1,7 +1,7 @@
#
# Makefile for PDFio.
#
# Copyright © 2021-2025 by Michael R Sweet.
# Copyright © 2021-2026 by Michael R Sweet.
#
# Licensed under Apache License v2.0. See the file "LICENSE" for more
# information.
@@ -91,6 +91,7 @@ PUBOBJS = \
pdfio-crypto.o \
pdfio-dict.o \
pdfio-file.o \
pdfio-lzw.o \
pdfio-md5.o \
pdfio-object.o \
pdfio-page.o \

309
pdfio-lzw.c Normal file
View File

@@ -0,0 +1,309 @@
//
// LZW decoding functions for PDFio.
//
// This code is used to support (legacy) PDF object streams using the LZWDecode
// filter as well as when embedding (legacy) GIF images. None of this is public
// API and we only support reading (decoding) since FlateDecode is superior in
// every way.
//
// Copyright © 2026 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
//
#include "pdfio-private.h"
//
// Local functions...
//
static void lzw_clear(_pdfio_lzw_t *lzw);
static int lzw_get_code(_pdfio_lzw_t *lzw);
//
// '_pdfioLZWCreate()' - Create a LZW decompressor.
//
_pdfio_lzw_t * // O - LZW state
_pdfioLZWCreate(int code_size) // I - Data code size in bits (typically 8 for PDF, 2-8 for GIF)
{
_pdfio_lzw_t *lzw; // LZW state
if ((lzw = (_pdfio_lzw_t *)calloc(1, sizeof(_pdfio_lzw_t))) != NULL)
{
lzw->def_code_size = code_size + 1;
lzw->clear_code = (short)(1 << code_size);
lzw->eod_code = lzw->clear_code + 1;
lzw_clear(lzw);
}
return (lzw);
}
//
// '_pdfioLZWDelete()' - Delete a LZW decompressor.
//
void
_pdfioLZWDelete(_pdfio_lzw_t *lzw) // I - LZW state
{
free(lzw);
}
//
// '_pdfioLZWInflate()' - Decompress pending input data.
//
bool // O - `true` on success, `false` on error
_pdfioLZWInflate(_pdfio_lzw_t *lzw) // I - LZW state
{
int cur_code, // Current code
in_code; // Input code
// Stop if we already saw the "end of data" code...
if (lzw->saw_eod)
{
PDFIO_DEBUG("_pdfioLZWInflate: EOD, returning false.\n");
lzw->error = "End of data.";
return (false);
}
// Copy pending compressed data to the output buffer...
while (lzw->stptr > lzw->stack && lzw->avail_out > 0)
{
*(lzw->next_out++) = *(--lzw->stptr);
lzw->avail_out --;
}
// Loop as long as we have room in the output buffer and data in the input
// buffer...
while (lzw->avail_out > 0)
{
if ((in_code = lzw_get_code(lzw)) < 0)
{
// Out of data, stop now...
PDFIO_DEBUG("_pdfioLZWInflate: Out of data.\n");
break;
}
else if (in_code == lzw->clear_code)
{
// Clear the compression tables and reset...
lzw_clear(lzw);
PDFIO_DEBUG("_pdfioLZWInflate: Clear.\n");
continue;
}
else if (in_code == lzw->eod_code)
{
// End of data...
lzw->saw_eod = true;
PDFIO_DEBUG("_pdfioLZWInflate: EOD.\n");
break;
}
// If we get this far we have something to write to the output buffer and/or
// stack...
if (lzw->first_code == 0xffff)
{
// First code...
lzw->first_code = lzw->old_code = in_code;
*(lzw->next_out++) = in_code;
lzw->avail_out --;
PDFIO_DEBUG("_pdfioLZWInflate: first_code=%d.\n", in_code);
continue;
}
PDFIO_DEBUG("_pdfioLZWInflate: in_code=%d.\n", in_code);
cur_code = in_code;
if (cur_code >= lzw->next_code)
{
*(lzw->stptr++) = lzw->first_code;
cur_code = lzw->old_code;
}
while (cur_code >= lzw->clear_code)
{
PDFIO_DEBUG("_pdfioLZWInflate: cur_code=%d\n", cur_code);
// Protect against overflow/loops...
if (lzw->stptr >= (lzw->stack + sizeof(lzw->stack) / sizeof(lzw->stack[0])))
{
PDFIO_DEBUG("_pdfioLZWInflate: Stack overflow, returning false.\n");
lzw->error = "Output overflow.";
return (false);
}
// Add this character to the output stack and move to the next character
// in the sequence...
*(lzw->stptr++) = lzw->table[cur_code].suffix;
if (cur_code == lzw->table[cur_code].prefix_code)
{
PDFIO_DEBUG("_pdfioLZWInflate: Table loop on code %d, returning false.\n", cur_code);
lzw->error = "Table loop detected.";
return (false);
}
cur_code = lzw->table[cur_code].prefix_code;
}
if (lzw->stptr >= (lzw->stack + sizeof(lzw->stack) / sizeof(lzw->stack[0])))
{
PDFIO_DEBUG("_pdfioLZWInflate: Stack overflow, returning false.\n");
lzw->error = "Output overflow.";
return (false);
}
*(lzw->stptr++) = lzw->first_code = lzw->table[cur_code].suffix;
if ((cur_code = lzw->next_code) < 4096)
{
PDFIO_DEBUG("_pdfioLZWInflate: Adding code %d (%d,%d)\n", cur_code, lzw->old_code, lzw->first_code);
lzw->table[cur_code].prefix_code = lzw->old_code;
lzw->table[cur_code].suffix = lzw->first_code;
lzw->next_code ++;
if (lzw->next_code >= lzw->next_size_code && lzw->next_size_code < 4096)
{
lzw->next_size_code *= 2;
lzw->cur_code_size ++;
}
}
lzw->old_code = (uint16_t)in_code;
while (lzw->stptr > lzw->stack && lzw->avail_out > 0)
{
*(lzw->next_out++) = *(--lzw->stptr);
lzw->avail_out --;
}
}
PDFIO_DEBUG("_pdfioLZWInflate: Returning true, avail_in=%u, avail_out=%u.\n", (unsigned)lzw->avail_in, (unsigned)lzw->avail_out);
return (true);
}
//
// 'lzw_clear()' - Clear the compression table.
//
static void
lzw_clear(_pdfio_lzw_t *lzw) // I - LZW state
{
uint16_t i; // Looping var
lzw->cur_code_size = lzw->def_code_size;
lzw->next_code = lzw->clear_code + 2;
lzw->next_size_code = 2 * lzw->clear_code;
lzw->first_code = 0xffff;
lzw->old_code = 0xffff;
memset(lzw->table, 0, sizeof(lzw->table));
for (i = 0; i < lzw->clear_code; i ++)
lzw->table[i].suffix = i;
lzw->stptr = lzw->stack;
}
//
// 'lzw_get_code()' - Get a code from the input buffer.
//
static int // O - Code or -1 if there is not enough data available
lzw_get_code(_pdfio_lzw_t *lzw) // I - LZW state
{
uint16_t code, // Code
in_bit; // Bit offset in buffer
uint8_t bits, // Bits in current byte
boff, // Bit offset in current byte
byte, // Current byte
remaining; // Remaining bits for code
static uint8_t mask[8] = // Value mask
{
0xff, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f
};
// Fill input bytes as needed...
if ((lzw->in_bit + lzw->cur_code_size) > lzw->in_bits)
{
uint16_t in_used = lzw->in_bits / 8,
// Number of input bytes
in_offset = lzw->in_bit / 8,
// Offset to current input
in_add; // Number of bytes to "read"
if (lzw->avail_in == 0)
{
// No more data
PDFIO_DEBUG("lzw_get_code: No data, returning -1.\n");
return (-1);
}
if (in_offset > 0)
{
// Make room in the input buffer
memmove(lzw->in_bytes, lzw->in_bytes + in_offset, in_used - in_offset);
in_used -= in_offset;
lzw->in_bit &= 7;
}
if ((in_add = sizeof(lzw->in_bytes) - in_used) > lzw->avail_in)
in_add = lzw->avail_in;
memcpy(lzw->in_bytes + in_used, lzw->next_in, in_add);
lzw->next_in += in_add;
lzw->avail_in -= in_add;
lzw->in_bits = 8 * (in_used + in_add);
if ((lzw->in_bit + lzw->cur_code_size) > lzw->in_bits)
{
// Not enough data
PDFIO_DEBUG("lzw_get_code: Not enough data, returning -1.\n");
return (-1);
}
}
PDFIO_DEBUG("lzw_get_code: in_bit=%u, in_bits=%u, in_bytes=<...%02X%02X...>, cur_code_size=%u\n", lzw->in_bit, lzw->in_bits, lzw->in_bytes[lzw->in_bit / 8], lzw->in_bytes[lzw->in_bit / 8 + 1], lzw->cur_code_size);
// Now extract the code from the buffer...
for (code = 0, in_bit = lzw->in_bit, remaining = lzw->cur_code_size; remaining > 0; in_bit += bits, remaining -= bits)
{
// See how many bits we can extract from the current byte...
boff = (in_bit & 7);
byte = lzw->in_bytes[in_bit / 8];
bits = 8 - boff;
if (bits > remaining)
bits = remaining;
// Get those bits
if (bits == 8) // Full byte from buffer
code = (code << 8) | byte;
else // Partial byte from buffer
code = (code << bits) | ((byte >> (8 - bits - boff)) & mask[bits]);
}
// Save the updated position in the input buffer and return the code...
lzw->in_bit = in_bit;
PDFIO_DEBUG("lzw_get_code: Returning %u.\n", code);
return ((int)code);
}

View File

@@ -211,6 +211,36 @@ typedef union _pdfio_crypto_ctx_u // Cryptographic contexts
} _pdfio_crypto_ctx_t;
typedef size_t (*_pdfio_crypto_cb_t)(_pdfio_crypto_ctx_t *ctx, uint8_t *outbuffer, const uint8_t *inbuffer, size_t len);
typedef struct _pdfio_lzws_s // LZW string table
{
uint16_t prefix_code, // Prefix code
suffix; // Suffix (character)
} _pdfio_lzws_t;
typedef struct _pdfio_lzw_s // LZW state
{
uint8_t *next_in; // Next input byte
size_t avail_in; // Available input bytes
uint8_t in_bytes[256]; // Current input bytes
uint16_t in_bit, // Current input bit
in_bits; // Total input bits
uint8_t *next_out; // Next output byte
size_t avail_out; // Available output bytes
uint8_t cur_code_size, // Current code size
def_code_size; // Initial/default code size
uint16_t clear_code, // Clear code
eod_code, // End code
next_code, // Next code to be used
next_size_code, // Code where we need to increase the code size
first_code, // First code in sequence
old_code, // Previous code in sequence
stack[8192], // Output stack
*stptr; // Current stack pointer
_pdfio_lzws_t table[4096]; // String table
bool saw_eod; // Saw end-of-data code?
const char *error; // Error, if any
} _pdfio_lzw_t;
struct _pdfio_array_s
{
pdfio_file_t *pdf; // PDF file
@@ -349,11 +379,12 @@ struct _pdfio_stream_s // Stream
*a85decptr, // Pointer into decoded characters
*a85decend; // Last decoded character
z_stream flate; // Flate filter state
_pdfio_lzw_t *lzw; // LZW filter state
_pdfio_predictor_t predictor; // Predictor function, if any
size_t pbpixel, // Size of a pixel in bytes
pbsize, // Predictor buffer size, if any
cbsize; // Compressed data buffer size
unsigned char *cbuffer, // Compressed data buffer
uint8_t *cbuffer, // Compressed data buffer
*prbuffer, // Raw buffer (previous line), as needed
*psbuffer; // PNG filter buffer, as needed
_pdfio_crypto_cb_t crypto_cb; // Encryption/descryption callback, if any
@@ -420,6 +451,10 @@ extern off_t _pdfioFileSeek(pdfio_file_t *pdf, off_t offset, int whence) _PDFIO
extern off_t _pdfioFileTell(pdfio_file_t *pdf) _PDFIO_INTERNAL;
extern bool _pdfioFileWrite(pdfio_file_t *pdf, const void *buffer, size_t bytes) _PDFIO_INTERNAL;
extern _pdfio_lzw_t *_pdfioLZWCreate(int def_code_size) _PDFIO_INTERNAL;
extern void _pdfioLZWDelete(_pdfio_lzw_t *lzw) _PDFIO_INTERNAL;
extern bool _pdfioLZWInflate(_pdfio_lzw_t *lzw) _PDFIO_INTERNAL;
extern void _pdfioObjDelete(pdfio_obj_t *obj) _PDFIO_INTERNAL;
extern void *_pdfioObjGetExtension(pdfio_obj_t *obj) _PDFIO_INTERNAL;
extern bool _pdfioObjLoad(pdfio_obj_t *obj) _PDFIO_INTERNAL;

View File

@@ -15,6 +15,7 @@
//
static ssize_t stream_get_bytes(pdfio_stream_t *st, void *buffer, size_t bytes);
static ssize_t stream_inflate(pdfio_stream_t *st, uint8_t *buffer, size_t bytes, bool exactly);
static unsigned char stream_paeth(unsigned char a, unsigned char b, unsigned char c);
static ssize_t stream_read(pdfio_stream_t *st, char *buffer, size_t bytes);
static bool stream_write(pdfio_stream_t *st, const void *buffer, size_t bytes);
@@ -40,6 +41,8 @@ pdfioStreamClose(pdfio_stream_t *st) // I - Stream
{
if (st->filter == PDFIO_FILTER_FLATE)
inflateEnd(&(st->flate));
else if (st->filter == PDFIO_FILTER_LZW)
_pdfioLZWDelete(st->lzw);
}
else
{
@@ -523,9 +526,9 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
// No filter, read as-is...
st->filter = PDFIO_FILTER_NONE;
}
else if (!strcmp(filter, "FlateDecode"))
else if (!strcmp(filter, "FlateDecode") || !strcmp(filter, "LZWDecode"))
{
// Flate compression
// Flate or LZW compression
pdfio_dict_t *params = pdfioDictGetDict(dict, "DecodeParms");
// Decoding parameters
int bpc = (int)pdfioDictGetNumber(params, "BitsPerComponent");
@@ -536,12 +539,11 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
// Number of columns
int predictor = (int)pdfioDictGetNumber(params, "Predictor");
// Predictory value, if any
int status; // ZLIB status
ssize_t rbytes; // Bytes read
PDFIO_DEBUG("_pdfioStreamOpen: FlateDecode - BitsPerComponent=%d, Colors=%d, Columns=%d, Predictor=%d\n", bpc, colors, columns, predictor);
PDFIO_DEBUG("_pdfioStreamOpen: %s - BitsPerComponent=%d, Colors=%d, Columns=%d, Predictor=%d\n", filter, bpc, colors, columns, predictor);
st->filter = PDFIO_FILTER_FLATE;
st->filter = !strcmp(filter, "FlateDecode") ? PDFIO_FILTER_FLATE : PDFIO_FILTER_LZW;
if (bpc == 0)
{
@@ -613,40 +615,41 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
}
PDFIO_DEBUG("_pdfioStreamOpen: pos=%ld\n", (long)_pdfioFileTell(st->pdf));
if (st->cbsize > st->remaining)
rbytes = _pdfioFileRead(st->pdf, st->cbuffer, st->remaining);
else
rbytes = _pdfioFileRead(st->pdf, st->cbuffer, st->cbsize);
if (rbytes <= 0)
if ((rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize)) <= 0)
{
_pdfioFileError(st->pdf, "Unable to read bytes for stream.");
goto error;
}
if (st->crypto_cb)
rbytes = (ssize_t)(st->crypto_cb)(&st->crypto_ctx, st->cbuffer, st->cbuffer, (size_t)rbytes);
st->flate.next_in = (Bytef *)st->cbuffer;
st->flate.avail_in = (uInt)rbytes;
PDFIO_DEBUG("_pdfioStreamOpen: avail_in=%u, cbuffer=<%02X%02X%02X%02X%02X%02X%02X%02X...>\n", st->flate.avail_in, st->cbuffer[0], st->cbuffer[1], st->cbuffer[2], st->cbuffer[3], st->cbuffer[4], st->cbuffer[5], st->cbuffer[6], st->cbuffer[7]);
if ((status = inflateInit(&(st->flate))) != Z_OK)
if (st->filter == PDFIO_FILTER_FLATE)
{
_pdfioFileError(st->pdf, "Unable to start Flate filter: %s", zstrerror(status));
goto error;
}
// Flate decompression...
int status; // ZLIB status
st->remaining -= st->flate.avail_in;
st->flate.next_in = (Bytef *)st->cbuffer;
st->flate.avail_in = (uInt)rbytes;
PDFIO_DEBUG("_pdfioStreamOpen: avail_in=%u, cbuffer=<%02X%02X%02X%02X%02X%02X%02X%02X...>\n", st->flate.avail_in, st->cbuffer[0], st->cbuffer[1], st->cbuffer[2], st->cbuffer[3], st->cbuffer[4], st->cbuffer[5], st->cbuffer[6], st->cbuffer[7]);
if ((status = inflateInit(&(st->flate))) != Z_OK)
{
_pdfioFileError(st->pdf, "Unable to start Flate filter: %s", zstrerror(status));
goto error;
}
}
else
{
// LZW decompression...
if ((st->lzw = _pdfioLZWCreate(/*code_size*/8)) == NULL)
{
_pdfioFileError(st->pdf, "Unable to initialize LZW filter: %s", strerror(errno));
goto error;
}
st->lzw->next_in = st->cbuffer;
st->lzw->avail_in = (size_t)rbytes;
}
}
#if 0 // TODO: Implement LZWDecode filter
else if (!strcmp(filter, "LZWDecode"))
{
// LZW compression
st->filter = PDFIO_FILTER_LZW;
}
#endif // 0
else
{
// Something else we don't support
@@ -1143,28 +1146,44 @@ stream_get_bytes(
a85val = a85val * 85 + a85ch - '!';
count ++;
}
else if (a85ch == 'z' && count == 0)
{
// 'z' == 0's
a85val = 0;
count = 5;
a85bufptr++;
}
else if (a85ch == '~')
{
break;
}
else if (!isspace(a85ch & 255))
{
// Invalid ASCII85Decode character...
_pdfioFileError(st->pdf, "Invalid ASCII85Decode character in stream.");
_pdfioFileError(st->pdf, "Invalid ASCII85Decode character '%c' in stream.", a85ch);
return (-1);
}
}
st->a85bufptr = a85bufptr;
if (*a85bufptr == '~')
break;
if (count < 2)
{
// Need at least 2 characters to decode a single byte...
_pdfioFileError(st->pdf, "Invalid ASCII85Decode character in stream.");
_pdfioFileError(st->pdf, "Invalid ASCII85Decode sequence in stream.");
return (-1);
}
st->a85bufptr = a85bufptr;
declen = count - 1;
declen = count - 1;
// Add zero rounds to properly align the decoded value...
// Add rounds to properly align the decoded value...
while (count < 5)
{
a85val *= 85;
a85val = a85val * 85 + 84;
count ++;
}
@@ -1178,6 +1197,8 @@ stream_get_bytes(
st->a85decend = st->a85decode + declen;
}
PDFIO_DEBUG("stream_get_bytes: Returning %ld ASCII85 bytes for stream.\n", (long)rbytes);
return (rbytes);
}
else
@@ -1196,11 +1217,100 @@ stream_get_bytes(
(st->crypto_cb)(&st->crypto_ctx, (uint8_t *)buffer, (uint8_t *)buffer, (size_t)rbytes);
}
PDFIO_DEBUG("stream_get_bytes: Returning %ld raw bytes for stream.\n", (long)rbytes);
return (rbytes);
}
}
//
// 'stream_inflate()' - Decompress bytes from a stream (Flate or LZW) into the specified buffer.
//
static ssize_t
stream_inflate(pdfio_stream_t *st, // I - Stream
uint8_t *buffer, // I - Output buffer
size_t bytes, // I - Number of bytes
bool exactly) // I - Require exactly the number of bytes
{
ssize_t rbytes; // Bytes read
// Setup decompression to the output buffer...
if (st->filter == PDFIO_FILTER_FLATE)
{
st->flate.next_out = (Bytef *)buffer;
st->flate.avail_out = (uInt)bytes;
}
else
{
st->lzw->next_out = buffer;
st->lzw->avail_out = bytes;
}
// Loop to get the bytes...
do
{
if (st->filter == PDFIO_FILTER_FLATE)
{
// Flate decompress
int status; // Status of decompression
PDFIO_DEBUG("stream_inflate: avail_in=%u, avail_out=%u\n", st->flate.avail_in, st->flate.avail_out);
if (st->flate.avail_in == 0)
{
// Read more from the file...
if ((rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize)) <= 0)
return (-1); // End of file...
st->flate.next_in = (Bytef *)st->cbuffer;
st->flate.avail_in = (uInt)rbytes;
}
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
PDFIO_DEBUG("stream_inflate: inflate() returned %d\n", status);
_pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
return (-1);
}
bytes = (size_t)st->flate.avail_out;
}
else
{
// LZW decompress
if (st->lzw->avail_in == 0)
{
// Read more from the file...
if ((rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize)) <= 0)
return (-1); // End of file...
st->lzw->next_in = st->cbuffer;
st->lzw->avail_in = (size_t)rbytes;
}
if (!_pdfioLZWInflate(st->lzw) && !st->lzw->saw_eod)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, st->lzw->error);
return (-1);
}
bytes = st->lzw->avail_out;
}
}
while (bytes > 0 && exactly);
if (exactly && bytes > 0)
return (-1);
else if (st->filter == PDFIO_FILTER_FLATE)
return (st->flate.next_out - (Bytef *)buffer);
else
return (st->lzw->next_out - (uint8_t *)buffer);
}
//
// 'stream_paeth()' - PaethPredictor function for PNG decompression filter.
//
@@ -1228,47 +1338,20 @@ stream_read(pdfio_stream_t *st, // I - Stream
char *buffer, // I - Buffer
size_t bytes) // I - Number of bytes to read
{
uInt avail_in, avail_out; // Previous flate values
if (st->filter == PDFIO_FILTER_NONE)
{
// No filtering...
return (stream_get_bytes(st, buffer, bytes));
}
else if (st->filter == PDFIO_FILTER_FLATE)
else if (st->filter == PDFIO_FILTER_FLATE || st->filter == PDFIO_FILTER_LZW)
{
// Deflate compression...
int status; // Status of decompression
// Flate or LZW compression...
if (st->predictor == _PDFIO_PREDICTOR_NONE)
{
// Decompress into the buffer...
PDFIO_DEBUG("stream_read: No predictor.\n");
if (st->flate.avail_in == 0)
{
// Read more from the file...
ssize_t rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize);
// Bytes read
if (rbytes <= 0)
return (-1); // End of file...
st->flate.next_in = (Bytef *)st->cbuffer;
st->flate.avail_in = (uInt)rbytes;
}
st->flate.next_out = (Bytef *)buffer;
st->flate.avail_out = (uInt)bytes;
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
return (-1);
}
return (st->flate.next_out - (Bytef *)buffer);
return (stream_inflate(st, (uint8_t *)buffer, bytes, /*exactly*/false));
}
else if (st->predictor == _PDFIO_PREDICTOR_TIFF2)
{
@@ -1276,9 +1359,9 @@ stream_read(pdfio_stream_t *st, // I - Stream
// Size of pixel in bytes
remaining = st->pbsize;
// Remaining bytes
unsigned char *bufptr = (unsigned char *)buffer,
uint8_t *bufptr = (uint8_t *)buffer,
// Pointer into buffer
*bufsecond = (unsigned char *)buffer + pbpixel,
*bufsecond = (uint8_t *)buffer + pbpixel,
// Pointer to second pixel in buffer
*sptr = st->psbuffer;
// Current (raw) line
@@ -1291,37 +1374,7 @@ stream_read(pdfio_stream_t *st, // I - Stream
return (-1);
}
st->flate.next_out = (Bytef *)sptr;
st->flate.avail_out = (uInt)st->pbsize;
while (st->flate.avail_out > 0)
{
if (st->flate.avail_in == 0)
{
// Read more from the file...
ssize_t rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize);
// Bytes read
if (rbytes <= 0)
return (-1); // End of file...
st->flate.next_in = (Bytef *)st->cbuffer;
st->flate.avail_in = (uInt)rbytes;
}
avail_in = st->flate.avail_in;
avail_out = st->flate.avail_out;
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
return (-1);
}
else if (status == Z_STREAM_END || (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out))
break;
}
if (st->flate.avail_out > 0)
if (stream_inflate(st, sptr, st->pbsize, /*exactly*/true) < 0)
return (-1); // Early end of stream
for (; bufptr < bufsecond; remaining --, sptr ++)
@@ -1338,9 +1391,9 @@ stream_read(pdfio_stream_t *st, // I - Stream
// Size of pixel in bytes
remaining = st->pbsize - 1;
// Remaining bytes
unsigned char *bufptr = (unsigned char *)buffer,
uint8_t *bufptr = (uint8_t *)buffer,
// Pointer into buffer
*bufsecond = (unsigned char *)buffer + pbpixel,
*bufsecond = (uint8_t *)buffer + pbpixel,
// Pointer to second pixel in buffer
*sptr = st->psbuffer + 1,
// Current (raw) line
@@ -1355,40 +1408,10 @@ stream_read(pdfio_stream_t *st, // I - Stream
return (-1);
}
st->flate.next_out = (Bytef *)sptr - 1;
st->flate.avail_out = (uInt)st->pbsize;
while (st->flate.avail_out > 0)
{
if (st->flate.avail_in == 0)
{
// Read more from the file...
ssize_t rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize);
// Bytes read
if (rbytes <= 0)
return (-1); // End of file...
st->flate.next_in = (Bytef *)st->cbuffer;
st->flate.avail_in = (uInt)rbytes;
}
avail_in = st->flate.avail_in;
avail_out = st->flate.avail_out;
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
return (-1);
}
else if (status == Z_STREAM_END || (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out))
break;
}
if (st->flate.avail_out > 0)
if (stream_inflate(st, sptr - 1, st->pbsize, /*exactly*/true) < 0)
{
// Early end of stream
PDFIO_DEBUG("stream_read: Early EOF (remaining=%u, avail_in=%d, avail_out=%d, data_type=%d, next_in=<%02X%02X%02X%02X...>).\n", (unsigned)st->remaining, st->flate.avail_in, st->flate.avail_out, st->flate.data_type, st->flate.next_in[0], st->flate.next_in[1], st->flate.next_in[2], st->flate.next_in[3]);
PDFIO_DEBUG("stream_read: Early EOF (remaining=%u).\n", (unsigned)st->remaining);
return (-1);
}
@@ -1491,8 +1514,6 @@ stream_write(pdfio_stream_t *st, // I - Stream
outbytes = cbytes;
}
// fprintf(stderr, "stream_write: bytes=%u, outbytes=%u\n", (unsigned)bytes, (unsigned)outbytes);
if (!_pdfioFileWrite(st->pdf, st->cbuffer, outbytes))
return (false);

View File

@@ -1,7 +1,7 @@
//
// Public header file for PDFio.
//
// Copyright © 2021-2025 by Michael R Sweet.
// Copyright © 2021-2026 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@@ -72,11 +72,11 @@ typedef enum pdfio_filter_e // Compression/decompression filters for streams
PDFIO_FILTER_NONE, // No filter
PDFIO_FILTER_ASCIIHEX, // ASCIIHexDecode filter (reading only)
PDFIO_FILTER_ASCII85, // ASCII85Decode filter (reading only)
PDFIO_FILTER_CCITTFAX, // CCITTFaxDecode filter
PDFIO_FILTER_CCITTFAX, // CCITTFaxDecode filter (reading only)
PDFIO_FILTER_CRYPT, // Encryption filter
PDFIO_FILTER_DCT, // DCTDecode (JPEG) filter
PDFIO_FILTER_FLATE, // FlateDecode filter
PDFIO_FILTER_JBIG2, // JBIG2Decode filter
PDFIO_FILTER_JBIG2, // JBIG2Decode filter (reading only)
PDFIO_FILTER_JPX, // JPXDecode filter (reading only)
PDFIO_FILTER_LZW, // LZWDecode filter (reading only)
PDFIO_FILTER_RUNLENGTH, // RunLengthDecode filter (reading only)

View File

@@ -160,6 +160,7 @@
<ClCompile Include="pdfio-crypto.c" />
<ClCompile Include="pdfio-dict.c" />
<ClCompile Include="pdfio-file.c" />
<ClCompile Include="pdfio-lzwdecode.c" />
<ClCompile Include="pdfio-md5.c" />
<ClCompile Include="pdfio-object.c" />
<ClCompile Include="pdfio-page.c" />

10
test.h
View File

@@ -96,6 +96,16 @@ static int test_progress; // Current progress
static char test_title[1024] = ""; // Current test title
// Add printf syntax checking on supported compilers...
#if defined(__has_extension) || defined(__GNUC__)
# define TEST_FORMAT(a,b) __attribute__ ((__format__(__printf__,a,b)))
static inline void testBegin(const char *title, ...) TEST_FORMAT(1,2);
static inline void testEndMessage(bool pass, const char *message, ...) TEST_FORMAT(2,3);
static inline void testError(const char *error, ...) TEST_FORMAT(1,2);
static inline void testMessage(const char *error, ...) TEST_FORMAT(1,2);
#endif // __has_extension || __GNUC__
// Start a test
static inline void
testBegin(const char *title, ...) // I - printf-style title string

View File

@@ -32,6 +32,7 @@
//
static int do_crypto_tests(void);
static int do_lzw_tests(void);
static int do_pdfa_tests(void);
static int do_test_file(const char *filename, const char *outfile, int objnum, const char *password, bool verbose);
static int do_unit_tests(void);
@@ -382,6 +383,102 @@ do_crypto_tests(void)
}
//
// 'do_lzw_tests()' - Test the various LZW functions in PDFio.
//
static int // O - Exit status
do_lzw_tests(void)
{
int status = 0; // Exit status
_pdfio_lzw_t *lzw; // LZW state
uint8_t buffer[8192]; // Output buffer
size_t bytes; // Output bytes
static uint8_t iso32000_in[] = // ISO-32000-2 test case input
{
0x80, 0x0B, 0x60, 0x50, 0x22, 0x0C, 0x0C, 0x85, 0x01
};
static uint8_t iso32000_out[] = // ISO-32000-2 test case output
{
45, 45, 45, 45, 45, 65, 45, 45, 45, 66
};
testBegin("_pdfioLZWCreate(8)");
testEnd((lzw = _pdfioLZWCreate(/*code_size*/8)) != NULL);
if (!lzw)
return (1);
testBegin("_pdfioLZWInflate(ISO 32000-2 test case)");
lzw->avail_in = sizeof(iso32000_in);
lzw->next_in = iso32000_in;
lzw->avail_out = sizeof(buffer);
lzw->next_out = buffer;
if (!_pdfioLZWInflate(lzw))
{
testEndMessage(false, "returned false");
status = 1;
}
else if ((bytes = sizeof(buffer) - lzw->avail_out) != sizeof(iso32000_out))
{
testEndMessage(false, "got %u bytes, expected %u bytes", (unsigned)bytes, (unsigned)sizeof(iso32000_out));
status = 1;
}
else if (memcmp(buffer, iso32000_out, bytes))
{
size_t i; // Looping var
testEndMessage(false, "got incorrect output");
testMessage(" EXPECTED %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", iso32000_out[0], iso32000_out[1], iso32000_out[2], iso32000_out[3], iso32000_out[4], iso32000_out[5], iso32000_out[6], iso32000_out[7], iso32000_out[8], iso32000_out[9]);
for (i = 0; i < bytes; i += 8)
{
switch (bytes - i)
{
case 1 :
testMessage(" %s %02X", i == 0 ? "GOT" : " ", buffer[i + 0]);
break;
case 2 :
testMessage(" %s %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1]);
break;
case 3 :
testMessage(" %s %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2]);
break;
case 4 :
testMessage(" %s %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3]);
break;
case 5 :
testMessage(" %s %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4]);
break;
case 6 :
testMessage(" %s %02X %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4], buffer[i + 5]);
break;
case 7 :
testMessage(" %s %02X %02X %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4], buffer[i + 5], buffer[i + 6]);
break;
default :
testMessage(" %s %02X %02X %02X %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4], buffer[i + 5], buffer[i + 6], buffer[i + 7]);
break;
}
}
status = 1;
}
else
{
testEnd(true);
}
_pdfioLZWDelete(lzw);
return (status);
}
//
// 'do_pdfa_tests()' - Run PDF/A generation and compliance tests.
//
@@ -1180,6 +1277,10 @@ do_unit_tests(void)
if (do_crypto_tests())
return (1);
// Do LZW tests...
if (do_lzw_tests())
return (1);
// Create a new PDF file...
testBegin("pdfioFileCreate(\"testpdfio-out.pdf\", ...)");
if ((outpdf = pdfioFileCreate("testpdfio-out.pdf", /*version*/"1.7", /*media_box*/NULL, /*crop_box*/NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL)