mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2026-04-09 13:32:31 +02:00
Fix implementation of LZWDecode filter to account for the EarlyChange parameter
(somewhat buried, very frustrating...) Add some debugging and update the test suite to find page metadata in any parent page object.
This commit is contained in:
27
pdfio-lzw.c
27
pdfio-lzw.c
@@ -28,7 +28,8 @@ static int lzw_get_code(_pdfio_lzw_t *lzw);
|
||||
//
|
||||
|
||||
_pdfio_lzw_t * // O - LZW state
|
||||
_pdfioLZWCreate(int code_size) // I - Data code size in bits (typically 8 for PDF, 2-8 for GIF)
|
||||
_pdfioLZWCreate(int code_size, // I - Data code size in bits (typically 8 for PDF, 2-8 for GIF)
|
||||
int early) // I - Number of early codes
|
||||
{
|
||||
_pdfio_lzw_t *lzw; // LZW state
|
||||
|
||||
@@ -38,6 +39,7 @@ _pdfioLZWCreate(int code_size) // I - Data code size in bits (typically 8 for P
|
||||
lzw->def_code_size = code_size + 1;
|
||||
lzw->clear_code = (short)(1 << code_size);
|
||||
lzw->eod_code = lzw->clear_code + 1;
|
||||
lzw->early = early;
|
||||
|
||||
lzw_clear(lzw);
|
||||
}
|
||||
@@ -81,6 +83,7 @@ _pdfioLZWInflate(_pdfio_lzw_t *lzw) // I - LZW state
|
||||
{
|
||||
*(lzw->next_out++) = *(--lzw->stptr);
|
||||
lzw->avail_out --;
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Unrolled value %d, stptr=%p(%ld), avail_out=%u\n", *(lzw->stptr), (void *)lzw->stptr, lzw->stptr - lzw->stack, (unsigned)lzw->avail_out);
|
||||
}
|
||||
|
||||
// Loop as long as we have room in the output buffer and data in the input
|
||||
@@ -121,19 +124,20 @@ _pdfioLZWInflate(_pdfio_lzw_t *lzw) // I - LZW state
|
||||
continue;
|
||||
}
|
||||
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: in_code=%d.\n", in_code);
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: in_code=%d, old_code=%d.\n", in_code, lzw->old_code);
|
||||
|
||||
cur_code = in_code;
|
||||
|
||||
if (cur_code >= lzw->next_code)
|
||||
{
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: New cur_code=%d, next_code=%d\n", cur_code, lzw->next_code);
|
||||
*(lzw->stptr++) = lzw->first_code;
|
||||
cur_code = lzw->old_code;
|
||||
}
|
||||
|
||||
while (cur_code >= lzw->clear_code)
|
||||
{
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: cur_code=%d\n", cur_code);
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: cur_code=%d (%d,%d)\n", cur_code, lzw->table[cur_code].prefix_code, lzw->table[cur_code].suffix);
|
||||
|
||||
// Protect against overflow/loops...
|
||||
if (lzw->stptr >= (lzw->stack + sizeof(lzw->stack) / sizeof(lzw->stack[0])))
|
||||
@@ -168,16 +172,17 @@ _pdfioLZWInflate(_pdfio_lzw_t *lzw) // I - LZW state
|
||||
|
||||
if ((cur_code = lzw->next_code) < 4096)
|
||||
{
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Adding code %d (%d,%d)\n", cur_code, lzw->old_code, lzw->first_code);
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Adding code %d (%d,%d), next_size_code=%d\n", cur_code, lzw->old_code, lzw->first_code, lzw->next_size_code);
|
||||
|
||||
lzw->table[cur_code].prefix_code = lzw->old_code;
|
||||
lzw->table[cur_code].suffix = lzw->first_code;
|
||||
lzw->next_code ++;
|
||||
|
||||
if (lzw->next_code >= lzw->next_size_code && lzw->next_size_code < 4096)
|
||||
if (lzw->next_code >= lzw->next_size_code && lzw->cur_code_size < 12)
|
||||
{
|
||||
lzw->next_size_code *= 2;
|
||||
lzw->cur_code_size ++;
|
||||
lzw->next_size_code = (1 << lzw->cur_code_size) - lzw->early;
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Increased code size to %u, next_size_code=%u\n", lzw->cur_code_size, lzw->next_size_code);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -187,6 +192,7 @@ _pdfioLZWInflate(_pdfio_lzw_t *lzw) // I - LZW state
|
||||
{
|
||||
*(lzw->next_out++) = *(--lzw->stptr);
|
||||
lzw->avail_out --;
|
||||
PDFIO_DEBUG("_pdfioLZWInflate: Unrolled value %d, stptr=%p(%ld), avail_out=%u\n", *(lzw->stptr), (void *)lzw->stptr, lzw->stptr - lzw->stack, (unsigned)lzw->avail_out);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -208,7 +214,7 @@ lzw_clear(_pdfio_lzw_t *lzw) // I - LZW state
|
||||
|
||||
lzw->cur_code_size = lzw->def_code_size;
|
||||
lzw->next_code = lzw->clear_code + 2;
|
||||
lzw->next_size_code = 2 * lzw->clear_code;
|
||||
lzw->next_size_code = (1 << lzw->cur_code_size) - lzw->early;
|
||||
lzw->first_code = 0xffff;
|
||||
lzw->old_code = 0xffff;
|
||||
|
||||
@@ -281,7 +287,7 @@ lzw_get_code(_pdfio_lzw_t *lzw) // I - LZW state
|
||||
}
|
||||
}
|
||||
|
||||
PDFIO_DEBUG("lzw_get_code: in_bit=%u, in_bits=%u, in_bytes=<...%02X%02X...>, cur_code_size=%u\n", lzw->in_bit, lzw->in_bits, lzw->in_bytes[lzw->in_bit / 8], lzw->in_bytes[lzw->in_bit / 8 + 1], lzw->cur_code_size);
|
||||
PDFIO_DEBUG("lzw_get_code: in_bit=%u, in_bits=%u, in_bytes=<...%02X%02X%02X...>, cur_code_size=%u\n", lzw->in_bit, lzw->in_bits, lzw->in_bytes[lzw->in_bit / 8], lzw->in_bytes[lzw->in_bit / 8 + 1], lzw->in_bytes[lzw->in_bit / 8 + 2], lzw->cur_code_size);
|
||||
|
||||
// Now extract the code from the buffer...
|
||||
for (code = 0, in_bit = lzw->in_bit, remaining = lzw->cur_code_size; remaining > 0; in_bit += bits, remaining -= bits)
|
||||
@@ -303,7 +309,12 @@ lzw_get_code(_pdfio_lzw_t *lzw) // I - LZW state
|
||||
// Save the updated position in the input buffer and return the code...
|
||||
lzw->in_bit = in_bit;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (code >= 0x20 && code < 0x7f)
|
||||
PDFIO_DEBUG("lzw_get_code: Returning %u('%c').\n", code, code);
|
||||
else
|
||||
PDFIO_DEBUG("lzw_get_code: Returning %u.\n", code);
|
||||
#endif // DEBUG
|
||||
|
||||
return ((int)code);
|
||||
}
|
||||
|
||||
16
pdfio-page.c
16
pdfio-page.c
@@ -1,7 +1,7 @@
|
||||
//
|
||||
// PDF page functions for PDFio.
|
||||
//
|
||||
// Copyright © 2021-2022 by Michael R Sweet.
|
||||
// Copyright © 2021-2026 by Michael R Sweet.
|
||||
//
|
||||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
// information.
|
||||
@@ -87,14 +87,28 @@ pdfioPageOpenStream(
|
||||
// Contents value
|
||||
|
||||
|
||||
PDFIO_DEBUG("pdfioPageOpenStream(page=%p(%lu), n=%lu, decode=%s)\n", (void *)page, page ? (unsigned long)page->number : 0, (unsigned long)n, decode ? "true" : "false");
|
||||
|
||||
if (!contents)
|
||||
{
|
||||
PDFIO_DEBUG("pdfioPageOpenStream: No contents.\n");
|
||||
return (NULL);
|
||||
}
|
||||
else if (contents->type == PDFIO_VALTYPE_ARRAY && n < pdfioArrayGetSize(contents->value.array))
|
||||
{
|
||||
PDFIO_DEBUG("pdfioPageOpenStream: Contents is array, opening numbered content stream.\n");
|
||||
return (pdfioObjOpenStream(pdfioArrayGetObj(contents->value.array, n), decode));
|
||||
}
|
||||
else if (n)
|
||||
{
|
||||
PDFIO_DEBUG("pdfioPageOpenStream: Numbered stream does not exist.\n");
|
||||
return (NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
PDFIO_DEBUG("pdfioPageOpenStream: Opening single content stream.\n");
|
||||
return (pdfioObjOpenStream(pdfioFileFindObj(page->pdf, contents->value.indirect.number), decode));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -227,7 +227,8 @@ typedef struct _pdfio_lzw_s // LZW state
|
||||
uint8_t *next_out; // Next output byte
|
||||
size_t avail_out; // Available output bytes
|
||||
uint8_t cur_code_size, // Current code size
|
||||
def_code_size; // Initial/default code size
|
||||
def_code_size, // Initial/default code size
|
||||
early; // Early code change offset
|
||||
uint16_t clear_code, // Clear code
|
||||
eod_code, // End code
|
||||
next_code, // Next code to be used
|
||||
@@ -451,7 +452,7 @@ extern off_t _pdfioFileSeek(pdfio_file_t *pdf, off_t offset, int whence) _PDFIO
|
||||
extern off_t _pdfioFileTell(pdfio_file_t *pdf) _PDFIO_INTERNAL;
|
||||
extern bool _pdfioFileWrite(pdfio_file_t *pdf, const void *buffer, size_t bytes) _PDFIO_INTERNAL;
|
||||
|
||||
extern _pdfio_lzw_t *_pdfioLZWCreate(int def_code_size) _PDFIO_INTERNAL;
|
||||
extern _pdfio_lzw_t *_pdfioLZWCreate(int def_code_size, int early) _PDFIO_INTERNAL;
|
||||
extern void _pdfioLZWDelete(_pdfio_lzw_t *lzw) _PDFIO_INTERNAL;
|
||||
extern bool _pdfioLZWInflate(_pdfio_lzw_t *lzw) _PDFIO_INTERNAL;
|
||||
|
||||
|
||||
@@ -610,7 +610,7 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
|
||||
st->cbsize = 4096;
|
||||
if ((st->cbuffer = malloc(st->cbsize)) == NULL)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to allocate %lu bytes for Flate compression buffer.", (unsigned long)st->cbsize);
|
||||
_pdfioFileError(st->pdf, "Unable to allocate %lu bytes for FlateDecode decompression buffer.", (unsigned long)st->cbsize);
|
||||
goto error;
|
||||
}
|
||||
|
||||
@@ -633,16 +633,28 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
|
||||
|
||||
if ((status = inflateInit(&(st->flate))) != Z_OK)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to start Flate filter: %s", zstrerror(status));
|
||||
_pdfioFileError(st->pdf, "Unable to start FlateDecode filter: %s", zstrerror(status));
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// LZW decompression...
|
||||
if ((st->lzw = _pdfioLZWCreate(/*code_size*/8)) == NULL)
|
||||
int early = 1;
|
||||
|
||||
if (pdfioDictGetType(params, "EarlyChange") == PDFIO_VALTYPE_NUMBER)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to initialize LZW filter: %s", strerror(errno));
|
||||
early = (int)pdfioDictGetNumber(params, "EarlyChange");
|
||||
if (early < 0 || early > 100)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Bad EarlyChange value %d for LZWDecode filter.", early);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if ((st->lzw = _pdfioLZWCreate(/*code_size*/8, early)) == NULL)
|
||||
{
|
||||
_pdfioFileError(st->pdf, "Unable to initialize LZWDecode filter: %s", strerror(errno));
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
||||
30
testpdfio.c
30
testpdfio.c
@@ -12,6 +12,7 @@
|
||||
//
|
||||
// Options:
|
||||
//
|
||||
// --decode Decode object stream
|
||||
// --help Show help
|
||||
// --password PASSWORD Set access password
|
||||
// --verbose Be verbose
|
||||
@@ -34,7 +35,7 @@
|
||||
static int do_crypto_tests(void);
|
||||
static int do_lzw_tests(void);
|
||||
static int do_pdfa_tests(void);
|
||||
static int do_test_file(const char *filename, const char *outfile, int objnum, const char *password, bool verbose);
|
||||
static int do_test_file(const char *filename, const char *outfile, int objnum, const char *password, bool decode, bool verbose);
|
||||
static int do_unit_tests(void);
|
||||
static int draw_image(pdfio_stream_t *st, const char *name, double x, double y, double w, double h, const char *label);
|
||||
static bool error_cb(pdfio_file_t *pdf, const char *message, bool *error);
|
||||
@@ -75,11 +76,16 @@ main(int argc, // I - Number of command-line arguments
|
||||
{
|
||||
int i; // Looping var
|
||||
const char *password = NULL; // Password
|
||||
bool verbose = false; // Be verbose?
|
||||
bool decode = false, // Decode object stream?
|
||||
verbose = false; // Be verbose?
|
||||
|
||||
for (i = 1; i < argc; i ++)
|
||||
{
|
||||
if (!strcmp(argv[i], "--help"))
|
||||
if (!strcmp(argv[i], "--decode"))
|
||||
{
|
||||
decode = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "--help"))
|
||||
{
|
||||
return (usage(stdout));
|
||||
}
|
||||
@@ -108,14 +114,14 @@ main(int argc, // I - Number of command-line arguments
|
||||
else if ((i + 1) < argc && isdigit(argv[i + 1][0] & 255))
|
||||
{
|
||||
// filename.pdf object-number
|
||||
if (do_test_file(argv[i], /*outfile*/NULL, atoi(argv[i + 1]), password, verbose))
|
||||
if (do_test_file(argv[i], /*outfile*/NULL, atoi(argv[i + 1]), password, decode, verbose))
|
||||
ret = 1;
|
||||
|
||||
i ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (do_test_file(argv[i], argv[i + 1], /*objnum*/0, password, verbose))
|
||||
if (do_test_file(argv[i], argv[i + 1], /*objnum*/0, password, decode, verbose))
|
||||
ret = 1;
|
||||
|
||||
if (argv[i + 1])
|
||||
@@ -405,7 +411,7 @@ do_lzw_tests(void)
|
||||
|
||||
|
||||
testBegin("_pdfioLZWCreate(8)");
|
||||
testEnd((lzw = _pdfioLZWCreate(/*code_size*/8)) != NULL);
|
||||
testEnd((lzw = _pdfioLZWCreate(/*code_size*/8, /*early*/1)) != NULL);
|
||||
if (!lzw)
|
||||
return (1);
|
||||
|
||||
@@ -538,6 +544,7 @@ do_test_file(const char *filename, // I - PDF filename
|
||||
const char *outfile, // I - Output filename, if any
|
||||
int objnum, // I - Object number to dump, if any
|
||||
const char *password, // I - Password for file
|
||||
bool decode, // I - Decode object?
|
||||
bool verbose) // I - Be verbose?
|
||||
{
|
||||
int status = 0; // Exit status
|
||||
@@ -586,7 +593,7 @@ do_test_file(const char *filename, // I - PDF filename
|
||||
|
||||
filter = pdfioDictGetName(dict, "Filter");
|
||||
|
||||
if ((st = pdfioObjOpenStream(obj, filter && !strcmp(filter, "FlateDecode"))) == NULL)
|
||||
if ((st = pdfioObjOpenStream(obj, decode || (filter && !strcmp(filter, "FlateDecode")))) == NULL)
|
||||
{
|
||||
_pdfioValueDebug(&obj->value, stdout);
|
||||
putchar('\n');
|
||||
@@ -655,10 +662,13 @@ do_test_file(const char *filename, // I - PDF filename
|
||||
|
||||
if (!pdfioDictGetRect(dict, "MediaBox", &media_box))
|
||||
{
|
||||
if ((obj = pdfioDictGetObj(dict, "Parent")) != NULL)
|
||||
pdfio_obj_t *parent; // Parent object
|
||||
|
||||
while ((parent = pdfioDictGetObj(dict, "Parent")) != NULL)
|
||||
{
|
||||
dict = pdfioObjGetDict(obj);
|
||||
pdfioDictGetRect(dict, "MediaBox", &media_box);
|
||||
dict = pdfioObjGetDict(parent);
|
||||
if (pdfioDictGetRect(dict, "MediaBox", &media_box))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user