Backport reading improvements/bug fixes from master.

This commit is contained in:
Michael R Sweet 2025-04-24 12:19:11 -04:00
parent f9c07a0346
commit 8f706b9fe7
No known key found for this signature in database
GPG Key ID: BE67C75EC81F3244
5 changed files with 175 additions and 100 deletions

View File

@ -134,19 +134,20 @@ _pdfioFileGetChar(pdfio_file_t *pdf) // I - PDF file
bool // O - `true` on success, `false` on error bool // O - `true` on success, `false` on error
_pdfioFileGets(pdfio_file_t *pdf, // I - PDF file _pdfioFileGets(pdfio_file_t *pdf, // I - PDF file
char *buffer, // I - Line buffer char *buffer, // I - Line buffer
size_t bufsize) // I - Size of line buffer size_t bufsize, // I - Size of line buffer
bool discard) // I - OK to discard excess line chars?
{ {
bool eol = false; // End of line? bool eol = false; // End of line?
char *bufptr = buffer, // Pointer into buffer char *bufptr = buffer, // Pointer into buffer
*bufend = buffer + bufsize - 1; // Pointer to end of buffer *bufend = buffer + bufsize - 1; // Pointer to end of buffer
PDFIO_DEBUG("_pdfioFileGets(pdf=%p, buffer=%p, bufsize=%lu) bufpos=%ld, buffer=%p, bufptr=%p, bufend=%p, offset=%lu\n", pdf, buffer, (unsigned long)bufsize, (long)pdf->bufpos, pdf->buffer, pdf->bufptr, pdf->bufend, (unsigned long)(pdf->bufpos + (pdf->bufptr - pdf->buffer))); PDFIO_DEBUG("_pdfioFileGets(pdf=%p, buffer=%p, bufsize=%lu, discard=%s) bufpos=%ld, buffer=%p, bufptr=%p, bufend=%p, offset=%lu\n", pdf, buffer, (unsigned long)bufsize, discard ? "true" : "false", (long)pdf->bufpos, pdf->buffer, pdf->bufptr, pdf->bufend, (unsigned long)(pdf->bufpos + (pdf->bufptr - pdf->buffer)));
while (!eol) while (!eol)
{ {
// If there are characters ready in the buffer, use them... // If there are characters ready in the buffer, use them...
while (!eol && pdf->bufptr < pdf->bufend && bufptr < bufend) while (!eol && pdf->bufptr < pdf->bufend)
{ {
char ch = *(pdf->bufptr++); // Next character in buffer char ch = *(pdf->bufptr++); // Next character in buffer
@ -168,8 +169,10 @@ _pdfioFileGets(pdfio_file_t *pdf, // I - PDF file
pdf->bufptr ++; pdf->bufptr ++;
} }
} }
else else if (bufptr < bufend)
*bufptr++ = ch; *bufptr++ = ch;
else if (!discard)
break;
} }
// Fill the read buffer as needed... // Fill the read buffer as needed...

View File

@ -1049,7 +1049,7 @@ pdfioFileOpen(
} }
// Read the header from the first line... // Read the header from the first line...
if (!_pdfioFileGets(pdf, line, sizeof(line))) if (!_pdfioFileGets(pdf, line, sizeof(line), true))
goto error; goto error;
if ((strncmp(line, "%PDF-1.", 7) && strncmp(line, "%PDF-2.", 7)) || !isdigit(line[7] & 255)) if ((strncmp(line, "%PDF-1.", 7) && strncmp(line, "%PDF-2.", 7)) || !isdigit(line[7] & 255))
@ -1063,7 +1063,7 @@ pdfioFileOpen(
pdf->version = strdup(line + 5); pdf->version = strdup(line + 5);
// Grab the last 1k of the file to find the start of the xref table... // Grab the last 1k of the file to find the start of the xref table...
if (_pdfioFileSeek(pdf, -1024, SEEK_END) < 0) if (_pdfioFileSeek(pdf, 1 - sizeof(line), SEEK_END) < 0)
{ {
_pdfioFileError(pdf, "Unable to read startxref data."); _pdfioFileError(pdf, "Unable to read startxref data.");
goto error; goto error;
@ -1075,28 +1075,36 @@ pdfioFileOpen(
goto error; goto error;
} }
PDFIO_DEBUG("pdfioOpen: Read %d bytes at end of file.\n", (int)bytes);
line[bytes] = '\0'; line[bytes] = '\0';
end = line + bytes - 9; end = line + bytes - 9;
for (ptr = line; ptr < end; ptr ++) for (ptr = line; ptr < end; ptr ++)
{ {
if (!memcmp(ptr, "startxref", 9)) if (!strncmp(ptr, "startxref", 9) && !strstr(ptr + 9, "startxref") && strtol(ptr + 9, NULL, 10) > 0)
break; break;
} }
if (ptr >= end) if (ptr >= end)
{ {
_pdfioFileError(pdf, "Unable to find start of xref table."); if (!_pdfioFileError(pdf, "WARNING: Unable to find start of cross-reference table, will attempt to rebuild."))
goto error; goto error;
}
xref_offset = (off_t)strtol(ptr + 9, NULL, 10);
if (!load_xref(pdf, xref_offset, password_cb, password_cbdata))
{
if (!repair_xref(pdf, password_cb, password_cbdata)) if (!repair_xref(pdf, password_cb, password_cbdata))
goto error; goto error;
} }
else
{
PDFIO_DEBUG("pdfioFileOpen: line=%p,ptr=%p(\"%s\")\n", line, ptr, ptr);
xref_offset = (off_t)strtol(ptr + 9, NULL, 10);
PDFIO_DEBUG("pdfioFileOpen: xref_offset=%lu\n", (unsigned long)xref_offset);
if (!load_xref(pdf, xref_offset, password_cb, password_cbdata))
goto error;
}
return (pdf); return (pdf);
@ -1744,31 +1752,32 @@ load_xref(
int generation; // Generation number int generation; // Generation number
_pdfio_token_t tb; // Token buffer/stack _pdfio_token_t tb; // Token buffer/stack
off_t line_offset; // Offset to start of line off_t line_offset; // Offset to start of line
pdfio_obj_t *pages_obj; // Pages object
while (!done) while (!done)
{ {
if (_pdfioFileSeek(pdf, xref_offset, SEEK_SET) != xref_offset) if (_pdfioFileSeek(pdf, xref_offset, SEEK_SET) != xref_offset)
{ {
_pdfioFileError(pdf, "Unable to seek to start of xref table."); PDFIO_DEBUG("load_xref: Unable to seek to %lu.\n", (unsigned long)xref_offset);
return (false); goto repair;
} }
do do
{ {
line_offset = _pdfioFileTell(pdf); line_offset = _pdfioFileTell(pdf);
if (!_pdfioFileGets(pdf, line, sizeof(line))) if (!_pdfioFileGets(pdf, line, sizeof(line), true))
{ {
_pdfioFileError(pdf, "Unable to read start of xref table."); PDFIO_DEBUG("load_xref: Unable to read line at offset %lu.\n", (unsigned long)line_offset);
return (false); goto repair;
} }
} }
while (!line[0]); while (!line[0]);
PDFIO_DEBUG("load_xref: line_offset=%lu, line='%s'\n", (unsigned long)line_offset, line); PDFIO_DEBUG("load_xref: line_offset=%lu, line='%s'\n", (unsigned long)line_offset, line);
if (isdigit(line[0] & 255) && strlen(line) > 4 && (!strcmp(line + strlen(line) - 4, " obj") || ((ptr = strstr(line, " obj")) != NULL && ptr[4] == '<'))) if (isdigit(line[0] & 255) && strlen(line) > 4 && (!strcmp(line + strlen(line) - 4, " obj") || ((ptr = strstr(line, " obj")) != NULL && (ptr[4] == '<' || isspace(ptr[4])))))
{ {
// Cross-reference stream // Cross-reference stream
pdfio_obj_t *obj; // Object pdfio_obj_t *obj; // Object
@ -1790,14 +1799,14 @@ load_xref(
if ((number = strtoimax(line, &ptr, 10)) < 1) if ((number = strtoimax(line, &ptr, 10)) < 1)
{ {
_pdfioFileError(pdf, "Bad xref table header '%s'.", line); PDFIO_DEBUG("load_xref: Unable to scan object number.\n");
return (false); goto repair;
} }
if ((generation = (int)strtol(ptr, &ptr, 10)) < 0 || (generation > 65535 && number != 0)) if ((generation = (int)strtol(ptr, &ptr, 10)) < 0 || (generation > 65535 && number != 0))
{ {
_pdfioFileError(pdf, "Bad xref table header '%s'.", line); PDFIO_DEBUG("load_xref: Unable to scan generation number (%u).\n", (unsigned)generation);
return (false); goto repair;
} }
while (isspace(*ptr & 255)) while (isspace(*ptr & 255))
@ -1805,14 +1814,14 @@ load_xref(
if (strncmp(ptr, "obj", 3)) if (strncmp(ptr, "obj", 3))
{ {
_pdfioFileError(pdf, "Bad xref table header '%s'.", line); PDFIO_DEBUG("load_xref: No 'obj' after object number and generation (saw '%s').\n", ptr);
return (false); goto repair;
} }
if (_pdfioFileSeek(pdf, line_offset + (off_t)(ptr + 3 - line), SEEK_SET) < 0) if (_pdfioFileSeek(pdf, line_offset + (off_t)(ptr + 3 - line), SEEK_SET) < 0)
{ {
_pdfioFileError(pdf, "Unable to seek to xref object %lu %u.", (unsigned long)number, (unsigned)generation); PDFIO_DEBUG("load_xref: Unable to seek to start of cross-reference object dictionary.\n");
return (false); goto repair;
} }
PDFIO_DEBUG("load_xref: Loading object %lu %u.\n", (unsigned long)number, (unsigned)generation); PDFIO_DEBUG("load_xref: Loading object %lu %u.\n", (unsigned long)number, (unsigned)generation);
@ -1827,21 +1836,21 @@ load_xref(
if (!_pdfioValueRead(pdf, obj, &tb, &trailer, 0)) if (!_pdfioValueRead(pdf, obj, &tb, &trailer, 0))
{ {
_pdfioFileError(pdf, "Unable to read cross-reference stream dictionary."); PDFIO_DEBUG("load_xref: Unable to read cross-reference object dictionary.\n");
return (false); goto repair;
} }
else if (trailer.type != PDFIO_VALTYPE_DICT) else if (trailer.type != PDFIO_VALTYPE_DICT)
{ {
_pdfioFileError(pdf, "Cross-reference stream does not have a dictionary."); PDFIO_DEBUG("load_xref: Expected dictionary for cross-reference object (type=%d).", trailer.type);
return (false); goto repair;
} }
obj->value = trailer; obj->value = trailer;
if (!_pdfioTokenGet(&tb, line, sizeof(line)) || strcmp(line, "stream")) if (!_pdfioTokenGet(&tb, line, sizeof(line)) || strcmp(line, "stream"))
{ {
_pdfioFileError(pdf, "Unable to get stream after xref dictionary."); PDFIO_DEBUG("load_xref: No stream token after dictionary (got '%s').\n", line);
return (false); goto repair;
} }
PDFIO_DEBUG("load_xref: tb.bufptr=%p, tb.bufend=%p, tb.bufptr[0]=0x%02x, tb.bufptr[0]=0x%02x\n", tb.bufptr, tb.bufend, tb.bufptr[0], tb.bufptr[1]); PDFIO_DEBUG("load_xref: tb.bufptr=%p, tb.bufend=%p, tb.bufptr[0]=0x%02x, tb.bufptr[0]=0x%02x\n", tb.bufptr, tb.bufend, tb.bufptr[0], tb.bufptr[1]);
@ -1859,8 +1868,8 @@ load_xref(
if ((w_array = pdfioDictGetArray(trailer.value.dict, "W")) == NULL) if ((w_array = pdfioDictGetArray(trailer.value.dict, "W")) == NULL)
{ {
_pdfioFileError(pdf, "Cross-reference stream does not have required W key."); PDFIO_DEBUG("load_xref: Missing W array in cross-reference objection dictionary.\n");
return (false); goto repair;
} }
w[0] = (size_t)pdfioArrayGetNumber(w_array, 0); w[0] = (size_t)pdfioArrayGetNumber(w_array, 0);
@ -1870,16 +1879,16 @@ load_xref(
w_2 = w[0]; w_2 = w[0];
w_3 = w[0] + w[1]; w_3 = w[0] + w[1];
if (w[1] == 0 || w[2] > 4 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer)) if (pdfioArrayGetSize(w_array) > 3 || w[1] == 0 || w[2] > 4 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer))
{ {
_pdfioFileError(pdf, "Cross-reference stream has invalid W key [%u %u %u].", (unsigned)w[0], (unsigned)w[1], (unsigned)w[2]); PDFIO_DEBUG("load_xref: Bad W array in cross-reference objection dictionary.\n");
return (false); goto repair;
} }
if ((st = pdfioObjOpenStream(obj, true)) == NULL) if ((st = pdfioObjOpenStream(obj, true)) == NULL)
{ {
_pdfioFileError(pdf, "Unable to open cross-reference stream."); PDFIO_DEBUG("load_xref: Unable to open cross-reference stream.\n");
return (false); goto repair;
} }
for (index_n = 0; index_n < index_count; index_n += 2) for (index_n = 0; index_n < index_count; index_n += 2)
@ -1981,6 +1990,7 @@ load_xref(
else else
{ {
_pdfioFileError(pdf, "Too many object streams."); _pdfioFileError(pdf, "Too many object streams.");
pdfioStreamClose(st);
return (false); return (false);
} }
} }
@ -1989,7 +1999,10 @@ load_xref(
{ {
// Add this object... // Add this object...
if (!add_obj(pdf, (size_t)number, (unsigned short)generation, (off_t)offset)) if (!add_obj(pdf, (size_t)number, (unsigned short)generation, (off_t)offset))
{
pdfioStreamClose(st);
return (false); return (false);
}
} }
number ++; number ++;
@ -2037,7 +2050,7 @@ load_xref(
// Offset of current line // Offset of current line
PDFIO_DEBUG("load_xref: Reading xref table starting at offset %lu\n", (unsigned long)trailer_offset); PDFIO_DEBUG("load_xref: Reading xref table starting at offset %lu\n", (unsigned long)trailer_offset);
while (_pdfioFileGets(pdf, line, sizeof(line))) while (_pdfioFileGets(pdf, line, sizeof(line), false))
{ {
PDFIO_DEBUG("load_xref: '%s' at offset %lu\n", line, (unsigned long)trailer_offset); PDFIO_DEBUG("load_xref: '%s' at offset %lu\n", line, (unsigned long)trailer_offset);
@ -2062,8 +2075,8 @@ load_xref(
if (sscanf(line, "%jd%jd", &number, &num_objects) != 2) if (sscanf(line, "%jd%jd", &number, &num_objects) != 2)
{ {
_pdfioFileError(pdf, "Malformed xref table section '%s'.", line); PDFIO_DEBUG("load_xref: Unable to scan START COUNT from line.\n");
return (false); goto repair;
} }
// Read this group of objects... // Read this group of objects...
@ -2071,41 +2084,45 @@ load_xref(
{ {
// Read a line from the file and validate it... // Read a line from the file and validate it...
if (_pdfioFileRead(pdf, line, 20) != 20) if (_pdfioFileRead(pdf, line, 20) != 20)
return (false); {
PDFIO_DEBUG("load_xref: Unable to read 20 byte xref record.\n");
goto repair;
}
line[20] = '\0'; line[20] = '\0';
if (strcmp(line + 18, "\r\n") && strcmp(line + 18, " \n") && strcmp(line + 18, " \r")) if (strcmp(line + 18, "\r\n") && strcmp(line + 18, "\r\r") && strcmp(line + 18, " \n") && strcmp(line + 18, " \r"))
{ {
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); PDFIO_DEBUG("load_xref: Bad end-of-line <%02X%02X>\n", line[18], line[19]);
return (false); goto repair;
} }
line[18] = '\0'; line[18] = '\0';
// Parse the line // Parse the line
if ((offset = strtoimax(line, &ptr, 10)) < 0) if ((offset = strtoimax(line, &ptr, 10)) < 0)
{ {
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); PDFIO_DEBUG("load_xref: Unable to scan offset.\n");
return (false); goto repair;
} }
if ((generation = (int)strtol(ptr, &ptr, 10)) < 0 || (generation > 65535 && offset != 0)) if ((generation = (int)strtol(ptr, &ptr, 10)) < 0 || (generation > 65535 && offset != 0))
{ {
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); PDFIO_DEBUG("load_xref: Unable to scan generation (%u).\n", (unsigned)generation);
return (false); goto repair;
} }
if (*ptr != ' ') if (*ptr != ' ')
{ {
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); PDFIO_DEBUG("load_xref: Missing space before type.\n");
return (false); goto repair;
} }
ptr ++; ptr ++;
if (*ptr != 'f' && *ptr != 'n') if (*ptr != 'f' && *ptr != 'n')
{ {
_pdfioFileError(pdf, "Malformed xref table entry '%s'.", line); PDFIO_DEBUG("load_xref: Bad type '%c'.\n", *ptr);
return (false); goto repair;
} }
if (*ptr == 'f') if (*ptr == 'f')
@ -2124,21 +2141,21 @@ load_xref(
if (strncmp(line, "trailer", 7)) if (strncmp(line, "trailer", 7))
{ {
_pdfioFileError(pdf, "Missing trailer."); PDFIO_DEBUG("load_xref: No trailer after xref table.\n");
return (false); goto repair;
} }
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf); _pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, NULL, &tb, &trailer, 0)) if (!_pdfioValueRead(pdf, NULL, &tb, &trailer, 0))
{ {
_pdfioFileError(pdf, "Unable to read trailer dictionary."); PDFIO_DEBUG("load_xref: Unable to read trailer dictionary.\n");
return (false); goto repair;
} }
else if (trailer.type != PDFIO_VALTYPE_DICT) else if (trailer.type != PDFIO_VALTYPE_DICT)
{ {
_pdfioFileError(pdf, "Trailer is not a dictionary."); PDFIO_DEBUG("load_xref: Trailer not a dictionary (type=%d).\n", trailer.type);
return (false); goto repair;
} }
PDFIO_DEBUG("load_xref: Got trailer dict.\n"); PDFIO_DEBUG("load_xref: Got trailer dict.\n");
@ -2160,8 +2177,7 @@ load_xref(
} }
else else
{ {
_pdfioFileError(pdf, "Bad xref table header '%s'.", line); goto repair;
return (false);
} }
PDFIO_DEBUG("load_xref: Contents of trailer dictionary:\n"); PDFIO_DEBUG("load_xref: Contents of trailer dictionary:\n");
@ -2190,13 +2206,31 @@ load_xref(
if ((pdf->root_obj = pdfioDictGetObj(pdf->trailer_dict, "Root")) == NULL) if ((pdf->root_obj = pdfioDictGetObj(pdf->trailer_dict, "Root")) == NULL)
{ {
_pdfioFileError(pdf, "Missing Root object."); PDFIO_DEBUG("load_xref: Missing Root object.\n");
return (false); goto repair;
} }
PDFIO_DEBUG("load_xref: Root=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number); PDFIO_DEBUG("load_xref: Root=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number);
return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages"), 0)); if ((pages_obj = pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages")) == NULL)
{
PDFIO_DEBUG("load_xref: Missing Pages object.\n");
goto repair;
}
PDFIO_DEBUG("load_xref: Pages=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number);
return (load_pages(pdf, pages_obj, 0));
// If we get here the cross-reference table is busted - try repairing if the
// error callback says to proceed...
repair:
if (_pdfioFileError(pdf, "WARNING: Cross-reference is damaged, will attempt to rebuild."))
return (repair_xref(pdf, password_cb, password_data));
else
return (false);
} }
@ -2210,7 +2244,7 @@ repair_xref(
pdfio_password_cb_t password_cb, // I - Password callback or `NULL` for none pdfio_password_cb_t password_cb, // I - Password callback or `NULL` for none
void *password_data) // I - Password callback data, if any void *password_data) // I - Password callback data, if any
{ {
char line[16384], // Line from file char line[1024], // Line from file
*ptr; // Pointer into line *ptr; // Pointer into line
off_t line_offset; // Offset in file off_t line_offset; // Offset in file
intmax_t number; // Object number intmax_t number; // Object number
@ -2218,16 +2252,21 @@ repair_xref(
size_t i; // Looping var size_t i; // Looping var
size_t num_sobjs = 0; // Number of object streams size_t num_sobjs = 0; // Number of object streams
pdfio_obj_t *sobjs[16384]; // Object streams to load pdfio_obj_t *sobjs[16384]; // Object streams to load
pdfio_obj_t *pages_obj; // Pages object
// Let caller know something is wrong... // Clear trailer data...
_pdfioFileError(pdf, "WARNING: Cross-reference table is damaged, attempting to rebuild."); pdf->trailer_dict = NULL;
pdf->root_obj = NULL;
pdf->info_obj = NULL;
pdf->pages_obj = NULL;
pdf->encrypt_obj = NULL;
// Read from the beginning of the file, looking for // Read from the beginning of the file, looking for objects...
if ((line_offset = _pdfioFileSeek(pdf, 0, SEEK_SET)) < 0) if ((line_offset = _pdfioFileSeek(pdf, 0, SEEK_SET)) < 0)
return (false); return (false);
while (_pdfioFileGets(pdf, line, sizeof(line))) while (_pdfioFileGets(pdf, line, sizeof(line), true))
{ {
// See if this is the start of an object... // See if this is the start of an object...
if (line[0] >= '1' && line[0] <= '9') if (line[0] >= '1' && line[0] <= '9')
@ -2244,43 +2283,61 @@ repair_xref(
pdfio_obj_t *obj; // Object pdfio_obj_t *obj; // Object
_pdfio_token_t tb; // Token buffer/stack _pdfio_token_t tb; // Token buffer/stack
PDFIO_DEBUG("OBJECT %ld %d at offset %ld\n", (long)number, generation, (long)line_offset); PDFIO_DEBUG("repair_xref: OBJECT %ld %d at offset %ld\n", (long)number, generation, (long)line_offset);
if ((obj = add_obj(pdf, (size_t)number, (unsigned short)generation, line_offset)) == NULL) if ((obj = pdfioFileFindObj(pdf, (size_t)number)) != NULL)
{
obj->offset = line_offset;
}
else if ((obj = add_obj(pdf, (size_t)number, (unsigned short)generation, line_offset)) == NULL)
{ {
_pdfioFileError(pdf, "Unable to allocate memory for object."); _pdfioFileError(pdf, "Unable to allocate memory for object.");
return (false); return (false);
} }
if (ptr[3])
{
// Probably the start of the object dictionary, rewind the file so
// we can read it...
_pdfioFileSeek(pdf, line_offset + (ptr - line + 3), SEEK_SET);
}
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf); _pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, obj, &tb, &obj->value, 0)) if (!_pdfioValueRead(pdf, obj, &tb, &obj->value, 0))
{ {
_pdfioFileError(pdf, "Unable to read cross-reference stream dictionary."); if (!_pdfioFileError(pdf, "WARNING: Unable to read object dictionary/value."))
return (false); return (false);
else
continue;
} }
if (_pdfioTokenGet(&tb, line, sizeof(line)) && strcmp(line, "stream")) if (_pdfioTokenGet(&tb, line, sizeof(line)))
{ {
const char *type = pdfioObjGetType(obj); const char *type = pdfioObjGetType(obj);
// Object type // Object type
_pdfioTokenFlush(&tb); _pdfioTokenFlush(&tb);
obj->stream_offset = _pdfioFileTell(pdf);
if (type && !strcmp(type, "ObjStm") && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0]))) if (type && !strcmp(line, "stream"))
{ {
sobjs[num_sobjs] = obj; // Possible object or XRef stream...
num_sobjs ++; obj->stream_offset = _pdfioFileTell(pdf);
}
if (type && !strcmp(type, "XRef") && !pdf->trailer_dict) if (!strcmp(type, "ObjStm") && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0])))
{ {
// Save the trailer dictionary... sobjs[num_sobjs] = obj;
pdf->trailer_dict = pdfioObjGetDict(obj); num_sobjs ++;
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt"); }
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
} if (!strcmp(type, "XRef") && !pdf->trailer_dict)
{
// Save the trailer dictionary...
pdf->trailer_dict = pdfioObjGetDict(obj);
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
}
}
} }
} }
} }
@ -2298,7 +2355,7 @@ repair_xref(
_pdfioFileSeek(pdf, line_offset + 7, SEEK_SET); _pdfioFileSeek(pdf, line_offset + 7, SEEK_SET);
} }
PDFIO_DEBUG("TRAILER at offset %ld\n", (long)line_offset); PDFIO_DEBUG("repair_xref: TRAILER at offset %ld\n", (long)line_offset);
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf); _pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, NULL, &tb, &trailer, 0)) if (!_pdfioValueRead(pdf, NULL, &tb, &trailer, 0))
@ -2314,7 +2371,7 @@ repair_xref(
_pdfioTokenFlush(&tb); _pdfioTokenFlush(&tb);
if (!pdf->trailer_dict) if (_pdfioDictGetValue(trailer.value.dict, "Root"))
{ {
// Save the trailer dictionary and grab the root (catalog) and info // Save the trailer dictionary and grab the root (catalog) and info
// objects... // objects...
@ -2351,8 +2408,16 @@ repair_xref(
PDFIO_DEBUG("repair_xref: Root=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number); PDFIO_DEBUG("repair_xref: Root=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number);
if ((pages_obj = pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages")) == NULL)
{
_pdfioFileError(pdf, "Missing Pages object.");
return (false);
}
PDFIO_DEBUG("repair_xref: Pages=%p(%lu)\n", pages_obj, (unsigned long)pages_obj->number);
// Load pages... // Load pages...
return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages"), 0)); return (load_pages(pdf, pages_obj, 0));
} }

View File

@ -141,6 +141,7 @@ pdfioObjCreateStream(
pdfio_obj_t *obj, // I - Object pdfio_obj_t *obj, // I - Object
pdfio_filter_t filter) // I - Type of compression to apply pdfio_filter_t filter) // I - Type of compression to apply
{ {
pdfio_stream_t *st; // Stream
pdfio_obj_t *length_obj = NULL; // Length object, if any pdfio_obj_t *length_obj = NULL; // Length object, if any
@ -194,11 +195,13 @@ pdfioObjCreateStream(
if (!_pdfioFilePuts(obj->pdf, "stream\n")) if (!_pdfioFilePuts(obj->pdf, "stream\n"))
return (NULL); return (NULL);
obj->stream_offset = _pdfioFileTell(obj->pdf); obj->stream_offset = _pdfioFileTell(obj->pdf);
obj->pdf->current_obj = obj;
// Return the new stream... // Return the new stream...
return (_pdfioStreamCreate(obj, length_obj, 0, filter)); if ((st = _pdfioStreamCreate(obj, length_obj, 0, filter)) != NULL)
obj->pdf->current_obj = obj;
return (st);
} }
@ -532,6 +535,9 @@ pdfio_stream_t * // O - Stream or `NULL` on error
pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
bool decode) // I - Decode/decompress data? bool decode) // I - Decode/decompress data?
{ {
pdfio_stream_t *st; // Stream
// Range check input... // Range check input...
if (!obj) if (!obj)
return (NULL); return (NULL);
@ -554,9 +560,10 @@ pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
return (NULL); return (NULL);
// Open the stream... // Open the stream...
obj->pdf->current_obj = obj; if ((st = _pdfioStreamOpen(obj, decode)) != NULL)
obj->pdf->current_obj = obj;
return (_pdfioStreamOpen(obj, decode)); return (st);
} }

View File

@ -384,7 +384,7 @@ extern bool _pdfioFileError(pdfio_file_t *pdf, const char *format, ...) _PDFIO_
extern pdfio_obj_t *_pdfioFileFindMappedObj(pdfio_file_t *pdf, pdfio_file_t *src_pdf, size_t src_number) _PDFIO_INTERNAL; extern pdfio_obj_t *_pdfioFileFindMappedObj(pdfio_file_t *pdf, pdfio_file_t *src_pdf, size_t src_number) _PDFIO_INTERNAL;
extern bool _pdfioFileFlush(pdfio_file_t *pdf) _PDFIO_INTERNAL; extern bool _pdfioFileFlush(pdfio_file_t *pdf) _PDFIO_INTERNAL;
extern int _pdfioFileGetChar(pdfio_file_t *pdf) _PDFIO_INTERNAL; extern int _pdfioFileGetChar(pdfio_file_t *pdf) _PDFIO_INTERNAL;
extern bool _pdfioFileGets(pdfio_file_t *pdf, char *buffer, size_t bufsize) _PDFIO_INTERNAL; extern bool _pdfioFileGets(pdfio_file_t *pdf, char *buffer, size_t bufsize, bool discard) _PDFIO_INTERNAL;
extern ssize_t _pdfioFilePeek(pdfio_file_t *pdf, void *buffer, size_t bytes) _PDFIO_INTERNAL; extern ssize_t _pdfioFilePeek(pdfio_file_t *pdf, void *buffer, size_t bytes) _PDFIO_INTERNAL;
extern bool _pdfioFilePrintf(pdfio_file_t *pdf, const char *format, ...) _PDFIO_INTERNAL; extern bool _pdfioFilePrintf(pdfio_file_t *pdf, const char *format, ...) _PDFIO_INTERNAL;
extern bool _pdfioFilePuts(pdfio_file_t *pdf, const char *s) _PDFIO_INTERNAL; extern bool _pdfioFilePuts(pdfio_file_t *pdf, const char *s) _PDFIO_INTERNAL;

View File

@ -259,7 +259,7 @@ _pdfioStreamCreate(
{ {
colors = 1; colors = 1;
} }
else if (colors < 0 || colors > 4) else if (colors < 0 || colors > 32)
{ {
_pdfioFileError(st->pdf, "Unsupported Colors value %d.", colors); _pdfioFileError(st->pdf, "Unsupported Colors value %d.", colors);
free(st); free(st);
@ -532,7 +532,7 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
{ {
colors = 1; colors = 1;
} }
else if (colors < 0 || colors > 4) else if (colors < 0 || colors > 32)
{ {
_pdfioFileError(st->pdf, "Unsupported Colors value %d.", colors); _pdfioFileError(st->pdf, "Unsupported Colors value %d.", colors);
goto error; goto error;