diff --git a/CHANGES.md b/CHANGES.md
index 4dbc0c5..ce2b071 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -8,6 +8,7 @@ v1.7.0 - YYYY-MM-DD
- Now use TTF 1.1 or later for font support.
- Added support for basic compound stream filters for ASCII85Decode support
(Issue #11)
+- Added support for LZWDecode filters (Issue #11)
- Fixed a buffer overflow in the (still not enabled) AES-256 code.
diff --git a/Makefile.in b/Makefile.in
index 867e6ca..5bc1842 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,7 +1,7 @@
#
# Makefile for PDFio.
#
-# Copyright © 2021-2025 by Michael R Sweet.
+# Copyright © 2021-2026 by Michael R Sweet.
#
# Licensed under Apache License v2.0. See the file "LICENSE" for more
# information.
@@ -91,6 +91,7 @@ PUBOBJS = \
pdfio-crypto.o \
pdfio-dict.o \
pdfio-file.o \
+ pdfio-lzw.o \
pdfio-md5.o \
pdfio-object.o \
pdfio-page.o \
diff --git a/pdfio-lzw.c b/pdfio-lzw.c
new file mode 100644
index 0000000..50243ec
--- /dev/null
+++ b/pdfio-lzw.c
@@ -0,0 +1,309 @@
+//
+// LZW decoding functions for PDFio.
+//
+// This code is used to support (legacy) PDF object streams using the LZWDecode
+// filter as well as when embedding (legacy) GIF images. None of this is public
+// API and we only support reading (decoding) since FlateDecode is superior in
+// every way.
+//
+// Copyright © 2026 by Michael R Sweet.
+//
+// Licensed under Apache License v2.0. See the file "LICENSE" for more
+// information.
+//
+
+#include "pdfio-private.h"
+
+
+//
+// Local functions...
+//
+
+static void lzw_clear(_pdfio_lzw_t *lzw);
+static int lzw_get_code(_pdfio_lzw_t *lzw);
+
+
+//
+// '_pdfioLZWCreate()' - Create a LZW decompressor.
+//
+
+_pdfio_lzw_t * // O - LZW state
+_pdfioLZWCreate(int code_size) // I - Data code size in bits (typically 8 for PDF, 2-8 for GIF)
+{
+ _pdfio_lzw_t *lzw; // LZW state
+
+
+ if ((lzw = (_pdfio_lzw_t *)calloc(1, sizeof(_pdfio_lzw_t))) != NULL)
+ {
+ lzw->def_code_size = code_size + 1;
+ lzw->clear_code = (short)(1 << code_size);
+ lzw->eod_code = lzw->clear_code + 1;
+
+ lzw_clear(lzw);
+ }
+
+ return (lzw);
+}
+
+
+//
+// '_pdfioLZWDelete()' - Delete a LZW decompressor.
+//
+
+void
+_pdfioLZWDelete(_pdfio_lzw_t *lzw) // I - LZW state
+{
+ free(lzw);
+}
+
+
+//
+// '_pdfioLZWInflate()' - Decompress pending input data.
+//
+
+bool // O - `true` on success, `false` on error
+_pdfioLZWInflate(_pdfio_lzw_t *lzw) // I - LZW state
+{
+ int cur_code, // Current code
+ in_code; // Input code
+
+
+ // Stop if we already saw the "end of data" code...
+ if (lzw->saw_eod)
+ {
+ PDFIO_DEBUG("_pdfioLZWInflate: EOD, returning false.\n");
+ lzw->error = "End of data.";
+ return (false);
+ }
+
+ // Copy pending compressed data to the output buffer...
+ while (lzw->stptr > lzw->stack && lzw->avail_out > 0)
+ {
+ *(lzw->next_out++) = *(--lzw->stptr);
+ lzw->avail_out --;
+ }
+
+ // Loop as long as we have room in the output buffer and data in the input
+ // buffer...
+ while (lzw->avail_out > 0)
+ {
+ if ((in_code = lzw_get_code(lzw)) < 0)
+ {
+ // Out of data, stop now...
+ PDFIO_DEBUG("_pdfioLZWInflate: Out of data.\n");
+ break;
+ }
+ else if (in_code == lzw->clear_code)
+ {
+ // Clear the compression tables and reset...
+ lzw_clear(lzw);
+ PDFIO_DEBUG("_pdfioLZWInflate: Clear.\n");
+ continue;
+ }
+ else if (in_code == lzw->eod_code)
+ {
+ // End of data...
+ lzw->saw_eod = true;
+ PDFIO_DEBUG("_pdfioLZWInflate: EOD.\n");
+ break;
+ }
+
+ // If we get this far we have something to write to the output buffer and/or
+ // stack...
+ if (lzw->first_code == 0xffff)
+ {
+ // First code...
+ lzw->first_code = lzw->old_code = in_code;
+ *(lzw->next_out++) = in_code;
+ lzw->avail_out --;
+
+ PDFIO_DEBUG("_pdfioLZWInflate: first_code=%d.\n", in_code);
+ continue;
+ }
+
+ PDFIO_DEBUG("_pdfioLZWInflate: in_code=%d.\n", in_code);
+
+ cur_code = in_code;
+
+ if (cur_code >= lzw->next_code)
+ {
+ *(lzw->stptr++) = lzw->first_code;
+ cur_code = lzw->old_code;
+ }
+
+ while (cur_code >= lzw->clear_code)
+ {
+ PDFIO_DEBUG("_pdfioLZWInflate: cur_code=%d\n", cur_code);
+
+ // Protect against overflow/loops...
+ if (lzw->stptr >= (lzw->stack + sizeof(lzw->stack) / sizeof(lzw->stack[0])))
+ {
+ PDFIO_DEBUG("_pdfioLZWInflate: Stack overflow, returning false.\n");
+ lzw->error = "Output overflow.";
+ return (false);
+ }
+
+ // Add this character to the output stack and move to the next character
+ // in the sequence...
+ *(lzw->stptr++) = lzw->table[cur_code].suffix;
+
+ if (cur_code == lzw->table[cur_code].prefix_code)
+ {
+ PDFIO_DEBUG("_pdfioLZWInflate: Table loop on code %d, returning false.\n", cur_code);
+ lzw->error = "Table loop detected.";
+ return (false);
+ }
+
+ cur_code = lzw->table[cur_code].prefix_code;
+ }
+
+ if (lzw->stptr >= (lzw->stack + sizeof(lzw->stack) / sizeof(lzw->stack[0])))
+ {
+ PDFIO_DEBUG("_pdfioLZWInflate: Stack overflow, returning false.\n");
+ lzw->error = "Output overflow.";
+ return (false);
+ }
+
+ *(lzw->stptr++) = lzw->first_code = lzw->table[cur_code].suffix;
+
+ if ((cur_code = lzw->next_code) < 4096)
+ {
+ PDFIO_DEBUG("_pdfioLZWInflate: Adding code %d (%d,%d)\n", cur_code, lzw->old_code, lzw->first_code);
+
+ lzw->table[cur_code].prefix_code = lzw->old_code;
+ lzw->table[cur_code].suffix = lzw->first_code;
+ lzw->next_code ++;
+
+ if (lzw->next_code >= lzw->next_size_code && lzw->next_size_code < 4096)
+ {
+ lzw->next_size_code *= 2;
+ lzw->cur_code_size ++;
+ }
+ }
+
+ lzw->old_code = (uint16_t)in_code;
+
+ while (lzw->stptr > lzw->stack && lzw->avail_out > 0)
+ {
+ *(lzw->next_out++) = *(--lzw->stptr);
+ lzw->avail_out --;
+ }
+ }
+
+ PDFIO_DEBUG("_pdfioLZWInflate: Returning true, avail_in=%u, avail_out=%u.\n", (unsigned)lzw->avail_in, (unsigned)lzw->avail_out);
+
+ return (true);
+}
+
+
+//
+// 'lzw_clear()' - Clear the compression table.
+//
+
+static void
+lzw_clear(_pdfio_lzw_t *lzw) // I - LZW state
+{
+ uint16_t i; // Looping var
+
+
+ lzw->cur_code_size = lzw->def_code_size;
+ lzw->next_code = lzw->clear_code + 2;
+ lzw->next_size_code = 2 * lzw->clear_code;
+ lzw->first_code = 0xffff;
+ lzw->old_code = 0xffff;
+
+ memset(lzw->table, 0, sizeof(lzw->table));
+
+ for (i = 0; i < lzw->clear_code; i ++)
+ lzw->table[i].suffix = i;
+
+ lzw->stptr = lzw->stack;
+}
+
+
+//
+// 'lzw_get_code()' - Get a code from the input buffer.
+//
+
+static int // O - Code or -1 if there is not enough data available
+lzw_get_code(_pdfio_lzw_t *lzw) // I - LZW state
+{
+ uint16_t code, // Code
+ in_bit; // Bit offset in buffer
+ uint8_t bits, // Bits in current byte
+ boff, // Bit offset in current byte
+ byte, // Current byte
+ remaining; // Remaining bits for code
+ static uint8_t mask[8] = // Value mask
+ {
+ 0xff, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f
+ };
+
+
+ // Fill input bytes as needed...
+ if ((lzw->in_bit + lzw->cur_code_size) > lzw->in_bits)
+ {
+ uint16_t in_used = lzw->in_bits / 8,
+ // Number of input bytes
+ in_offset = lzw->in_bit / 8,
+ // Offset to current input
+ in_add; // Number of bytes to "read"
+
+
+ if (lzw->avail_in == 0)
+ {
+ // No more data
+ PDFIO_DEBUG("lzw_get_code: No data, returning -1.\n");
+ return (-1);
+ }
+
+ if (in_offset > 0)
+ {
+ // Make room in the input buffer
+ memmove(lzw->in_bytes, lzw->in_bytes + in_offset, in_used - in_offset);
+ in_used -= in_offset;
+ lzw->in_bit &= 7;
+ }
+
+ if ((in_add = sizeof(lzw->in_bytes) - in_used) > lzw->avail_in)
+ in_add = lzw->avail_in;
+
+ memcpy(lzw->in_bytes + in_used, lzw->next_in, in_add);
+ lzw->next_in += in_add;
+ lzw->avail_in -= in_add;
+ lzw->in_bits = 8 * (in_used + in_add);
+
+ if ((lzw->in_bit + lzw->cur_code_size) > lzw->in_bits)
+ {
+ // Not enough data
+ PDFIO_DEBUG("lzw_get_code: Not enough data, returning -1.\n");
+ return (-1);
+ }
+ }
+
+ PDFIO_DEBUG("lzw_get_code: in_bit=%u, in_bits=%u, in_bytes=<...%02X%02X...>, cur_code_size=%u\n", lzw->in_bit, lzw->in_bits, lzw->in_bytes[lzw->in_bit / 8], lzw->in_bytes[lzw->in_bit / 8 + 1], lzw->cur_code_size);
+
+ // Now extract the code from the buffer...
+ for (code = 0, in_bit = lzw->in_bit, remaining = lzw->cur_code_size; remaining > 0; in_bit += bits, remaining -= bits)
+ {
+ // See how many bits we can extract from the current byte...
+ boff = (in_bit & 7);
+ byte = lzw->in_bytes[in_bit / 8];
+ bits = 8 - boff;
+ if (bits > remaining)
+ bits = remaining;
+
+ // Get those bits
+ if (bits == 8) // Full byte from buffer
+ code = (code << 8) | byte;
+ else // Partial byte from buffer
+ code = (code << bits) | ((byte >> (8 - bits - boff)) & mask[bits]);
+ }
+
+ // Save the updated position in the input buffer and return the code...
+ lzw->in_bit = in_bit;
+
+ PDFIO_DEBUG("lzw_get_code: Returning %u.\n", code);
+
+ return ((int)code);
+}
diff --git a/pdfio-private.h b/pdfio-private.h
index 4549b63..3a2a618 100644
--- a/pdfio-private.h
+++ b/pdfio-private.h
@@ -211,6 +211,36 @@ typedef union _pdfio_crypto_ctx_u // Cryptographic contexts
} _pdfio_crypto_ctx_t;
typedef size_t (*_pdfio_crypto_cb_t)(_pdfio_crypto_ctx_t *ctx, uint8_t *outbuffer, const uint8_t *inbuffer, size_t len);
+typedef struct _pdfio_lzws_s // LZW string table
+{
+ uint16_t prefix_code, // Prefix code
+ suffix; // Suffix (character)
+} _pdfio_lzws_t;
+
+typedef struct _pdfio_lzw_s // LZW state
+{
+ uint8_t *next_in; // Next input byte
+ size_t avail_in; // Available input bytes
+ uint8_t in_bytes[256]; // Current input bytes
+ uint16_t in_bit, // Current input bit
+ in_bits; // Total input bits
+ uint8_t *next_out; // Next output byte
+ size_t avail_out; // Available output bytes
+ uint8_t cur_code_size, // Current code size
+ def_code_size; // Initial/default code size
+ uint16_t clear_code, // Clear code
+ eod_code, // End code
+ next_code, // Next code to be used
+ next_size_code, // Code where we need to increase the code size
+ first_code, // First code in sequence
+ old_code, // Previous code in sequence
+ stack[8192], // Output stack
+ *stptr; // Current stack pointer
+ _pdfio_lzws_t table[4096]; // String table
+ bool saw_eod; // Saw end-of-data code?
+ const char *error; // Error, if any
+} _pdfio_lzw_t;
+
struct _pdfio_array_s
{
pdfio_file_t *pdf; // PDF file
@@ -349,11 +379,12 @@ struct _pdfio_stream_s // Stream
*a85decptr, // Pointer into decoded characters
*a85decend; // Last decoded character
z_stream flate; // Flate filter state
+ _pdfio_lzw_t *lzw; // LZW filter state
_pdfio_predictor_t predictor; // Predictor function, if any
size_t pbpixel, // Size of a pixel in bytes
pbsize, // Predictor buffer size, if any
cbsize; // Compressed data buffer size
- unsigned char *cbuffer, // Compressed data buffer
+ uint8_t *cbuffer, // Compressed data buffer
*prbuffer, // Raw buffer (previous line), as needed
*psbuffer; // PNG filter buffer, as needed
_pdfio_crypto_cb_t crypto_cb; // Encryption/descryption callback, if any
@@ -420,6 +451,10 @@ extern off_t _pdfioFileSeek(pdfio_file_t *pdf, off_t offset, int whence) _PDFIO
extern off_t _pdfioFileTell(pdfio_file_t *pdf) _PDFIO_INTERNAL;
extern bool _pdfioFileWrite(pdfio_file_t *pdf, const void *buffer, size_t bytes) _PDFIO_INTERNAL;
+extern _pdfio_lzw_t *_pdfioLZWCreate(int def_code_size) _PDFIO_INTERNAL;
+extern void _pdfioLZWDelete(_pdfio_lzw_t *lzw) _PDFIO_INTERNAL;
+extern bool _pdfioLZWInflate(_pdfio_lzw_t *lzw) _PDFIO_INTERNAL;
+
extern void _pdfioObjDelete(pdfio_obj_t *obj) _PDFIO_INTERNAL;
extern void *_pdfioObjGetExtension(pdfio_obj_t *obj) _PDFIO_INTERNAL;
extern bool _pdfioObjLoad(pdfio_obj_t *obj) _PDFIO_INTERNAL;
diff --git a/pdfio-stream.c b/pdfio-stream.c
index 8c397e3..3dfeb58 100644
--- a/pdfio-stream.c
+++ b/pdfio-stream.c
@@ -15,6 +15,7 @@
//
static ssize_t stream_get_bytes(pdfio_stream_t *st, void *buffer, size_t bytes);
+static ssize_t stream_inflate(pdfio_stream_t *st, uint8_t *buffer, size_t bytes, bool exactly);
static unsigned char stream_paeth(unsigned char a, unsigned char b, unsigned char c);
static ssize_t stream_read(pdfio_stream_t *st, char *buffer, size_t bytes);
static bool stream_write(pdfio_stream_t *st, const void *buffer, size_t bytes);
@@ -40,6 +41,8 @@ pdfioStreamClose(pdfio_stream_t *st) // I - Stream
{
if (st->filter == PDFIO_FILTER_FLATE)
inflateEnd(&(st->flate));
+ else if (st->filter == PDFIO_FILTER_LZW)
+ _pdfioLZWDelete(st->lzw);
}
else
{
@@ -523,9 +526,9 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
// No filter, read as-is...
st->filter = PDFIO_FILTER_NONE;
}
- else if (!strcmp(filter, "FlateDecode"))
+ else if (!strcmp(filter, "FlateDecode") || !strcmp(filter, "LZWDecode"))
{
- // Flate compression
+ // Flate or LZW compression
pdfio_dict_t *params = pdfioDictGetDict(dict, "DecodeParms");
// Decoding parameters
int bpc = (int)pdfioDictGetNumber(params, "BitsPerComponent");
@@ -536,12 +539,11 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
// Number of columns
int predictor = (int)pdfioDictGetNumber(params, "Predictor");
// Predictory value, if any
- int status; // ZLIB status
ssize_t rbytes; // Bytes read
- PDFIO_DEBUG("_pdfioStreamOpen: FlateDecode - BitsPerComponent=%d, Colors=%d, Columns=%d, Predictor=%d\n", bpc, colors, columns, predictor);
+ PDFIO_DEBUG("_pdfioStreamOpen: %s - BitsPerComponent=%d, Colors=%d, Columns=%d, Predictor=%d\n", filter, bpc, colors, columns, predictor);
- st->filter = PDFIO_FILTER_FLATE;
+ st->filter = !strcmp(filter, "FlateDecode") ? PDFIO_FILTER_FLATE : PDFIO_FILTER_LZW;
if (bpc == 0)
{
@@ -613,40 +615,41 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
}
PDFIO_DEBUG("_pdfioStreamOpen: pos=%ld\n", (long)_pdfioFileTell(st->pdf));
- if (st->cbsize > st->remaining)
- rbytes = _pdfioFileRead(st->pdf, st->cbuffer, st->remaining);
- else
- rbytes = _pdfioFileRead(st->pdf, st->cbuffer, st->cbsize);
-
- if (rbytes <= 0)
+ if ((rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize)) <= 0)
{
_pdfioFileError(st->pdf, "Unable to read bytes for stream.");
goto error;
}
- if (st->crypto_cb)
- rbytes = (ssize_t)(st->crypto_cb)(&st->crypto_ctx, st->cbuffer, st->cbuffer, (size_t)rbytes);
-
- st->flate.next_in = (Bytef *)st->cbuffer;
- st->flate.avail_in = (uInt)rbytes;
-
- PDFIO_DEBUG("_pdfioStreamOpen: avail_in=%u, cbuffer=<%02X%02X%02X%02X%02X%02X%02X%02X...>\n", st->flate.avail_in, st->cbuffer[0], st->cbuffer[1], st->cbuffer[2], st->cbuffer[3], st->cbuffer[4], st->cbuffer[5], st->cbuffer[6], st->cbuffer[7]);
-
- if ((status = inflateInit(&(st->flate))) != Z_OK)
+ if (st->filter == PDFIO_FILTER_FLATE)
{
- _pdfioFileError(st->pdf, "Unable to start Flate filter: %s", zstrerror(status));
- goto error;
- }
+ // Flate decompression...
+ int status; // ZLIB status
- st->remaining -= st->flate.avail_in;
+ st->flate.next_in = (Bytef *)st->cbuffer;
+ st->flate.avail_in = (uInt)rbytes;
+
+ PDFIO_DEBUG("_pdfioStreamOpen: avail_in=%u, cbuffer=<%02X%02X%02X%02X%02X%02X%02X%02X...>\n", st->flate.avail_in, st->cbuffer[0], st->cbuffer[1], st->cbuffer[2], st->cbuffer[3], st->cbuffer[4], st->cbuffer[5], st->cbuffer[6], st->cbuffer[7]);
+
+ if ((status = inflateInit(&(st->flate))) != Z_OK)
+ {
+ _pdfioFileError(st->pdf, "Unable to start Flate filter: %s", zstrerror(status));
+ goto error;
+ }
+ }
+ else
+ {
+ // LZW decompression...
+ if ((st->lzw = _pdfioLZWCreate(/*code_size*/8)) == NULL)
+ {
+ _pdfioFileError(st->pdf, "Unable to initialize LZW filter: %s", strerror(errno));
+ goto error;
+ }
+
+ st->lzw->next_in = st->cbuffer;
+ st->lzw->avail_in = (size_t)rbytes;
+ }
}
-#if 0 // TODO: Implement LZWDecode filter
- else if (!strcmp(filter, "LZWDecode"))
- {
- // LZW compression
- st->filter = PDFIO_FILTER_LZW;
- }
-#endif // 0
else
{
// Something else we don't support
@@ -1143,28 +1146,44 @@ stream_get_bytes(
a85val = a85val * 85 + a85ch - '!';
count ++;
}
+ else if (a85ch == 'z' && count == 0)
+ {
+ // 'z' == 0's
+ a85val = 0;
+ count = 5;
+
+ a85bufptr++;
+ }
+ else if (a85ch == '~')
+ {
+ break;
+ }
else if (!isspace(a85ch & 255))
{
// Invalid ASCII85Decode character...
- _pdfioFileError(st->pdf, "Invalid ASCII85Decode character in stream.");
+ _pdfioFileError(st->pdf, "Invalid ASCII85Decode character '%c' in stream.", a85ch);
return (-1);
}
}
+ st->a85bufptr = a85bufptr;
+
+ if (*a85bufptr == '~')
+ break;
+
if (count < 2)
{
// Need at least 2 characters to decode a single byte...
- _pdfioFileError(st->pdf, "Invalid ASCII85Decode character in stream.");
+ _pdfioFileError(st->pdf, "Invalid ASCII85Decode sequence in stream.");
return (-1);
}
- st->a85bufptr = a85bufptr;
- declen = count - 1;
+ declen = count - 1;
- // Add zero rounds to properly align the decoded value...
+ // Add rounds to properly align the decoded value...
while (count < 5)
{
- a85val *= 85;
+ a85val = a85val * 85 + 84;
count ++;
}
@@ -1178,6 +1197,8 @@ stream_get_bytes(
st->a85decend = st->a85decode + declen;
}
+ PDFIO_DEBUG("stream_get_bytes: Returning %ld ASCII85 bytes for stream.\n", (long)rbytes);
+
return (rbytes);
}
else
@@ -1196,11 +1217,100 @@ stream_get_bytes(
(st->crypto_cb)(&st->crypto_ctx, (uint8_t *)buffer, (uint8_t *)buffer, (size_t)rbytes);
}
+ PDFIO_DEBUG("stream_get_bytes: Returning %ld raw bytes for stream.\n", (long)rbytes);
+
return (rbytes);
}
}
+//
+// 'stream_inflate()' - Decompress bytes from a stream (Flate or LZW) into the specified buffer.
+//
+
+static ssize_t
+stream_inflate(pdfio_stream_t *st, // I - Stream
+ uint8_t *buffer, // I - Output buffer
+ size_t bytes, // I - Number of bytes
+ bool exactly) // I - Require exactly the number of bytes
+{
+ ssize_t rbytes; // Bytes read
+
+
+ // Setup decompression to the output buffer...
+ if (st->filter == PDFIO_FILTER_FLATE)
+ {
+ st->flate.next_out = (Bytef *)buffer;
+ st->flate.avail_out = (uInt)bytes;
+ }
+ else
+ {
+ st->lzw->next_out = buffer;
+ st->lzw->avail_out = bytes;
+ }
+
+ // Loop to get the bytes...
+ do
+ {
+ if (st->filter == PDFIO_FILTER_FLATE)
+ {
+ // Flate decompress
+ int status; // Status of decompression
+
+ PDFIO_DEBUG("stream_inflate: avail_in=%u, avail_out=%u\n", st->flate.avail_in, st->flate.avail_out);
+
+ if (st->flate.avail_in == 0)
+ {
+ // Read more from the file...
+ if ((rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize)) <= 0)
+ return (-1); // End of file...
+
+ st->flate.next_in = (Bytef *)st->cbuffer;
+ st->flate.avail_in = (uInt)rbytes;
+ }
+
+ if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
+ {
+ PDFIO_DEBUG("stream_inflate: inflate() returned %d\n", status);
+ _pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
+ return (-1);
+ }
+
+ bytes = (size_t)st->flate.avail_out;
+ }
+ else
+ {
+ // LZW decompress
+ if (st->lzw->avail_in == 0)
+ {
+ // Read more from the file...
+ if ((rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize)) <= 0)
+ return (-1); // End of file...
+
+ st->lzw->next_in = st->cbuffer;
+ st->lzw->avail_in = (size_t)rbytes;
+ }
+
+ if (!_pdfioLZWInflate(st->lzw) && !st->lzw->saw_eod)
+ {
+ _pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, st->lzw->error);
+ return (-1);
+ }
+
+ bytes = st->lzw->avail_out;
+ }
+ }
+ while (bytes > 0 && exactly);
+
+ if (exactly && bytes > 0)
+ return (-1);
+ else if (st->filter == PDFIO_FILTER_FLATE)
+ return (st->flate.next_out - (Bytef *)buffer);
+ else
+ return (st->lzw->next_out - (uint8_t *)buffer);
+}
+
+
//
// 'stream_paeth()' - PaethPredictor function for PNG decompression filter.
//
@@ -1228,47 +1338,20 @@ stream_read(pdfio_stream_t *st, // I - Stream
char *buffer, // I - Buffer
size_t bytes) // I - Number of bytes to read
{
- uInt avail_in, avail_out; // Previous flate values
-
-
if (st->filter == PDFIO_FILTER_NONE)
{
// No filtering...
return (stream_get_bytes(st, buffer, bytes));
}
- else if (st->filter == PDFIO_FILTER_FLATE)
+ else if (st->filter == PDFIO_FILTER_FLATE || st->filter == PDFIO_FILTER_LZW)
{
- // Deflate compression...
- int status; // Status of decompression
-
+ // Flate or LZW compression...
if (st->predictor == _PDFIO_PREDICTOR_NONE)
{
// Decompress into the buffer...
PDFIO_DEBUG("stream_read: No predictor.\n");
- if (st->flate.avail_in == 0)
- {
- // Read more from the file...
- ssize_t rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize);
- // Bytes read
-
- if (rbytes <= 0)
- return (-1); // End of file...
-
- st->flate.next_in = (Bytef *)st->cbuffer;
- st->flate.avail_in = (uInt)rbytes;
- }
-
- st->flate.next_out = (Bytef *)buffer;
- st->flate.avail_out = (uInt)bytes;
-
- if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
- {
- _pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
- return (-1);
- }
-
- return (st->flate.next_out - (Bytef *)buffer);
+ return (stream_inflate(st, (uint8_t *)buffer, bytes, /*exactly*/false));
}
else if (st->predictor == _PDFIO_PREDICTOR_TIFF2)
{
@@ -1276,9 +1359,9 @@ stream_read(pdfio_stream_t *st, // I - Stream
// Size of pixel in bytes
remaining = st->pbsize;
// Remaining bytes
- unsigned char *bufptr = (unsigned char *)buffer,
+ uint8_t *bufptr = (uint8_t *)buffer,
// Pointer into buffer
- *bufsecond = (unsigned char *)buffer + pbpixel,
+ *bufsecond = (uint8_t *)buffer + pbpixel,
// Pointer to second pixel in buffer
*sptr = st->psbuffer;
// Current (raw) line
@@ -1291,37 +1374,7 @@ stream_read(pdfio_stream_t *st, // I - Stream
return (-1);
}
- st->flate.next_out = (Bytef *)sptr;
- st->flate.avail_out = (uInt)st->pbsize;
-
- while (st->flate.avail_out > 0)
- {
- if (st->flate.avail_in == 0)
- {
- // Read more from the file...
- ssize_t rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize);
- // Bytes read
-
- if (rbytes <= 0)
- return (-1); // End of file...
-
- st->flate.next_in = (Bytef *)st->cbuffer;
- st->flate.avail_in = (uInt)rbytes;
- }
-
- avail_in = st->flate.avail_in;
- avail_out = st->flate.avail_out;
-
- if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
- {
- _pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
- return (-1);
- }
- else if (status == Z_STREAM_END || (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out))
- break;
- }
-
- if (st->flate.avail_out > 0)
+ if (stream_inflate(st, sptr, st->pbsize, /*exactly*/true) < 0)
return (-1); // Early end of stream
for (; bufptr < bufsecond; remaining --, sptr ++)
@@ -1338,9 +1391,9 @@ stream_read(pdfio_stream_t *st, // I - Stream
// Size of pixel in bytes
remaining = st->pbsize - 1;
// Remaining bytes
- unsigned char *bufptr = (unsigned char *)buffer,
+ uint8_t *bufptr = (uint8_t *)buffer,
// Pointer into buffer
- *bufsecond = (unsigned char *)buffer + pbpixel,
+ *bufsecond = (uint8_t *)buffer + pbpixel,
// Pointer to second pixel in buffer
*sptr = st->psbuffer + 1,
// Current (raw) line
@@ -1355,40 +1408,10 @@ stream_read(pdfio_stream_t *st, // I - Stream
return (-1);
}
- st->flate.next_out = (Bytef *)sptr - 1;
- st->flate.avail_out = (uInt)st->pbsize;
-
- while (st->flate.avail_out > 0)
- {
- if (st->flate.avail_in == 0)
- {
- // Read more from the file...
- ssize_t rbytes = stream_get_bytes(st, st->cbuffer, st->cbsize);
- // Bytes read
-
- if (rbytes <= 0)
- return (-1); // End of file...
-
- st->flate.next_in = (Bytef *)st->cbuffer;
- st->flate.avail_in = (uInt)rbytes;
- }
-
- avail_in = st->flate.avail_in;
- avail_out = st->flate.avail_out;
-
- if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
- {
- _pdfioFileError(st->pdf, "Unable to decompress stream data for object %ld: %s", (long)st->obj->number, zstrerror(status));
- return (-1);
- }
- else if (status == Z_STREAM_END || (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out))
- break;
- }
-
- if (st->flate.avail_out > 0)
+ if (stream_inflate(st, sptr - 1, st->pbsize, /*exactly*/true) < 0)
{
// Early end of stream
- PDFIO_DEBUG("stream_read: Early EOF (remaining=%u, avail_in=%d, avail_out=%d, data_type=%d, next_in=<%02X%02X%02X%02X...>).\n", (unsigned)st->remaining, st->flate.avail_in, st->flate.avail_out, st->flate.data_type, st->flate.next_in[0], st->flate.next_in[1], st->flate.next_in[2], st->flate.next_in[3]);
+ PDFIO_DEBUG("stream_read: Early EOF (remaining=%u).\n", (unsigned)st->remaining);
return (-1);
}
@@ -1491,8 +1514,6 @@ stream_write(pdfio_stream_t *st, // I - Stream
outbytes = cbytes;
}
-// fprintf(stderr, "stream_write: bytes=%u, outbytes=%u\n", (unsigned)bytes, (unsigned)outbytes);
-
if (!_pdfioFileWrite(st->pdf, st->cbuffer, outbytes))
return (false);
diff --git a/pdfio.h b/pdfio.h
index f7895f5..41255a0 100644
--- a/pdfio.h
+++ b/pdfio.h
@@ -1,7 +1,7 @@
//
// Public header file for PDFio.
//
-// Copyright © 2021-2025 by Michael R Sweet.
+// Copyright © 2021-2026 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@@ -72,11 +72,11 @@ typedef enum pdfio_filter_e // Compression/decompression filters for streams
PDFIO_FILTER_NONE, // No filter
PDFIO_FILTER_ASCIIHEX, // ASCIIHexDecode filter (reading only)
PDFIO_FILTER_ASCII85, // ASCII85Decode filter (reading only)
- PDFIO_FILTER_CCITTFAX, // CCITTFaxDecode filter
+ PDFIO_FILTER_CCITTFAX, // CCITTFaxDecode filter (reading only)
PDFIO_FILTER_CRYPT, // Encryption filter
PDFIO_FILTER_DCT, // DCTDecode (JPEG) filter
PDFIO_FILTER_FLATE, // FlateDecode filter
- PDFIO_FILTER_JBIG2, // JBIG2Decode filter
+ PDFIO_FILTER_JBIG2, // JBIG2Decode filter (reading only)
PDFIO_FILTER_JPX, // JPXDecode filter (reading only)
PDFIO_FILTER_LZW, // LZWDecode filter (reading only)
PDFIO_FILTER_RUNLENGTH, // RunLengthDecode filter (reading only)
diff --git a/pdfio.vcxproj b/pdfio.vcxproj
index 2943025..f6de726 100644
--- a/pdfio.vcxproj
+++ b/pdfio.vcxproj
@@ -160,6 +160,7 @@
+
diff --git a/test.h b/test.h
index 454a76c..86f8e87 100644
--- a/test.h
+++ b/test.h
@@ -96,6 +96,16 @@ static int test_progress; // Current progress
static char test_title[1024] = ""; // Current test title
+// Add printf syntax checking on supported compilers...
+#if defined(__has_extension) || defined(__GNUC__)
+# define TEST_FORMAT(a,b) __attribute__ ((__format__(__printf__,a,b)))
+static inline void testBegin(const char *title, ...) TEST_FORMAT(1,2);
+static inline void testEndMessage(bool pass, const char *message, ...) TEST_FORMAT(2,3);
+static inline void testError(const char *error, ...) TEST_FORMAT(1,2);
+static inline void testMessage(const char *error, ...) TEST_FORMAT(1,2);
+#endif // __has_extension || __GNUC__
+
+
// Start a test
static inline void
testBegin(const char *title, ...) // I - printf-style title string
diff --git a/testpdfio.c b/testpdfio.c
index cca0501..0c95fae 100644
--- a/testpdfio.c
+++ b/testpdfio.c
@@ -32,6 +32,7 @@
//
static int do_crypto_tests(void);
+static int do_lzw_tests(void);
static int do_pdfa_tests(void);
static int do_test_file(const char *filename, const char *outfile, int objnum, const char *password, bool verbose);
static int do_unit_tests(void);
@@ -382,6 +383,102 @@ do_crypto_tests(void)
}
+//
+// 'do_lzw_tests()' - Test the various LZW functions in PDFio.
+//
+
+static int // O - Exit status
+do_lzw_tests(void)
+{
+ int status = 0; // Exit status
+ _pdfio_lzw_t *lzw; // LZW state
+ uint8_t buffer[8192]; // Output buffer
+ size_t bytes; // Output bytes
+ static uint8_t iso32000_in[] = // ISO-32000-2 test case input
+ {
+ 0x80, 0x0B, 0x60, 0x50, 0x22, 0x0C, 0x0C, 0x85, 0x01
+ };
+ static uint8_t iso32000_out[] = // ISO-32000-2 test case output
+ {
+ 45, 45, 45, 45, 45, 65, 45, 45, 45, 66
+ };
+
+
+ testBegin("_pdfioLZWCreate(8)");
+ testEnd((lzw = _pdfioLZWCreate(/*code_size*/8)) != NULL);
+ if (!lzw)
+ return (1);
+
+ testBegin("_pdfioLZWInflate(ISO 32000-2 test case)");
+
+ lzw->avail_in = sizeof(iso32000_in);
+ lzw->next_in = iso32000_in;
+
+ lzw->avail_out = sizeof(buffer);
+ lzw->next_out = buffer;
+
+ if (!_pdfioLZWInflate(lzw))
+ {
+ testEndMessage(false, "returned false");
+ status = 1;
+ }
+ else if ((bytes = sizeof(buffer) - lzw->avail_out) != sizeof(iso32000_out))
+ {
+ testEndMessage(false, "got %u bytes, expected %u bytes", (unsigned)bytes, (unsigned)sizeof(iso32000_out));
+ status = 1;
+ }
+ else if (memcmp(buffer, iso32000_out, bytes))
+ {
+ size_t i; // Looping var
+
+ testEndMessage(false, "got incorrect output");
+
+ testMessage(" EXPECTED %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", iso32000_out[0], iso32000_out[1], iso32000_out[2], iso32000_out[3], iso32000_out[4], iso32000_out[5], iso32000_out[6], iso32000_out[7], iso32000_out[8], iso32000_out[9]);
+
+ for (i = 0; i < bytes; i += 8)
+ {
+ switch (bytes - i)
+ {
+ case 1 :
+ testMessage(" %s %02X", i == 0 ? "GOT" : " ", buffer[i + 0]);
+ break;
+ case 2 :
+ testMessage(" %s %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1]);
+ break;
+ case 3 :
+ testMessage(" %s %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2]);
+ break;
+ case 4 :
+ testMessage(" %s %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3]);
+ break;
+ case 5 :
+ testMessage(" %s %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4]);
+ break;
+ case 6 :
+ testMessage(" %s %02X %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4], buffer[i + 5]);
+ break;
+ case 7 :
+ testMessage(" %s %02X %02X %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4], buffer[i + 5], buffer[i + 6]);
+ break;
+ default :
+ testMessage(" %s %02X %02X %02X %02X %02X %02X %02X %02X", i == 0 ? "GOT" : " ", buffer[i + 0], buffer[i + 1], buffer[i + 2], buffer[i + 3], buffer[i + 4], buffer[i + 5], buffer[i + 6], buffer[i + 7]);
+ break;
+ }
+ }
+
+ status = 1;
+ }
+ else
+ {
+ testEnd(true);
+ }
+
+ _pdfioLZWDelete(lzw);
+
+ return (status);
+}
+
+
//
// 'do_pdfa_tests()' - Run PDF/A generation and compliance tests.
//
@@ -1180,6 +1277,10 @@ do_unit_tests(void)
if (do_crypto_tests())
return (1);
+ // Do LZW tests...
+ if (do_lzw_tests())
+ return (1);
+
// Create a new PDF file...
testBegin("pdfioFileCreate(\"testpdfio-out.pdf\", ...)");
if ((outpdf = pdfioFileCreate("testpdfio-out.pdf", /*version*/"1.7", /*media_box*/NULL, /*crop_box*/NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL)