From f4aa951165e521321b67fdb020d7bcb7591f193f Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Tue, 12 Dec 2023 12:24:49 -0500 Subject: [PATCH] Fix _pdfioFileSeek with whence==SEEK_CUR Fix seek offset after trailer. Look at the last 1k of the file to find the startxref marker. --- pdfio-common.c | 8 ++++---- pdfio-file.c | 43 ++++++++++++++++++++++++++++++++----------- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/pdfio-common.c b/pdfio-common.c index 0ee3b53..733aadc 100644 --- a/pdfio-common.c +++ b/pdfio-common.c @@ -141,7 +141,7 @@ _pdfioFileGets(pdfio_file_t *pdf, // I - PDF file *bufend = buffer + bufsize - 1; // Pointer to end of buffer - PDFIO_DEBUG("_pdfioFileGets(pdf=%p, buffer=%p, bufsize=%lu) bufpos=%ld, buffer=%p, bufptr=%p, bufend=%p\n", pdf, buffer, (unsigned long)bufsize, (long)pdf->bufpos, pdf->buffer, pdf->bufptr, pdf->bufend); + PDFIO_DEBUG("_pdfioFileGets(pdf=%p, buffer=%p, bufsize=%lu) bufpos=%ld, buffer=%p, bufptr=%p, bufend=%p, offset=%lu\n", pdf, buffer, (unsigned long)bufsize, (long)pdf->bufpos, pdf->buffer, pdf->bufptr, pdf->bufend, (unsigned long)(pdf->bufpos + (pdf->bufptr - pdf->buffer))); while (!eol) { @@ -356,12 +356,12 @@ _pdfioFileSeek(pdfio_file_t *pdf, // I - PDF file off_t offset, // I - Offset int whence) // I - Offset base { - PDFIO_DEBUG("_pdfioFileSeek(pdf=%p, offset=%ld, whence=%d)\n", pdf, (long)offset, whence); + PDFIO_DEBUG("_pdfioFileSeek(pdf=%p, offset=%ld, whence=%d) pdf->bufpos=%lu\n", pdf, (long)offset, whence, (unsigned long)(pdf ? pdf->bufpos : 0)); // Adjust offset for relative seeks... if (whence == SEEK_CUR) { - offset += pdf->bufpos; + offset += pdf->bufpos + (pdf->bufptr - pdf->buffer); whence = SEEK_SET; } @@ -404,7 +404,7 @@ _pdfioFileSeek(pdfio_file_t *pdf, // I - PDF file return (-1); } - PDFIO_DEBUG("_pdfioFileSeek: Reset bufpos=%ld.\n", (long)pdf->bufpos); + PDFIO_DEBUG("_pdfioFileSeek: Reset bufpos=%ld, offset=%lu.\n", (long)pdf->bufpos, (unsigned long)offset); PDFIO_DEBUG("_pdfioFileSeek: buffer=%p, bufptr=%p, bufend=%p\n", pdf->buffer, pdf->bufptr, pdf->bufend); pdf->bufpos = offset; diff --git a/pdfio-file.c b/pdfio-file.c index 53eee57..486b1ba 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -1154,8 +1154,10 @@ pdfioFileOpen( void *error_data) // I - Error callback data, if any { pdfio_file_t *pdf; // PDF file - char line[1024], // Line from file - *ptr; // Pointer into line + char line[1025], // Line from file + *ptr, // Pointer into line + *end; // End of line + ssize_t bytes; // Bytes read off_t xref_offset; // Offset to xref table @@ -1210,21 +1212,29 @@ pdfioFileOpen( // Copy the version number... pdf->version = strdup(line + 5); - // Grab the last 32 characters of the file to find the start of the xref table... - if (_pdfioFileSeek(pdf, -32, SEEK_END) < 0) + // Grab the last 1k of the file to find the start of the xref table... + if (_pdfioFileSeek(pdf, -1024, SEEK_END) < 0) { _pdfioFileError(pdf, "Unable to read startxref data."); goto error; } - if (_pdfioFileRead(pdf, line, 32) < 32) + if ((bytes = _pdfioFileRead(pdf, line, sizeof(line) - 1)) < 1) { _pdfioFileError(pdf, "Unable to read startxref data."); goto error; } - line[32] = '\0'; - if ((ptr = strstr(line, "startxref")) == NULL) + line[bytes] = '\0'; + end = line + bytes - 9; + + for (ptr = line; ptr < end; ptr ++) + { + if (!memcmp(ptr, "startxref", 9)) + break; + } + + if (ptr >= end) { _pdfioFileError(pdf, "Unable to find start of xref table."); goto error; @@ -1407,7 +1417,7 @@ add_obj(pdfio_file_t *pdf, // I - PDF file obj->generation = generation; obj->offset = offset; - PDFIO_DEBUG("add_obj: obj=%p, ->pdf=%p, ->number=%lu\n", obj, pdf, (unsigned long)obj->number); + PDFIO_DEBUG("add_obj: obj=%p, ->pdf=%p, ->number=%lu, ->offset=%lu\n", obj, pdf, (unsigned long)obj->number, (unsigned long)offset); // Re-sort object array as needed... if (pdf->num_objs > 1 && pdf->objs[pdf->num_objs - 2]->number > number) @@ -1976,22 +1986,31 @@ load_xref( else if (!strncmp(line, "xref", 4) && (!line[4] || isspace(line[4] & 255))) { // Read the xref tables + off_t trailer_offset = _pdfioFileTell(pdf); + // Offset of current line + + PDFIO_DEBUG("load_xref: Reading xref table starting at offset %lu\n", (unsigned long)trailer_offset); while (_pdfioFileGets(pdf, line, sizeof(line))) { + PDFIO_DEBUG("load_xref: '%s' at offset %lu\n", line, (unsigned long)trailer_offset); + if (!strncmp(line, "trailer", 7) && (!line[7] || isspace(line[7] & 255))) { if (line[7]) { // Probably the start of the trailer dictionary, rewind the file so // we can read it... - _pdfioFileSeek(pdf, 7 - strlen(line), SEEK_CUR); + _pdfioFileSeek(pdf, trailer_offset + 7, SEEK_SET); } break; } - else if (!line[0]) + else { - continue; + trailer_offset = _pdfioFileTell(pdf); + + if (!line[0]) + continue; } if (sscanf(line, "%jd%jd", &number, &num_objects) != 2) @@ -2052,6 +2071,8 @@ load_xref( if (!add_obj(pdf, (size_t)number, (unsigned short)generation, offset)) return (false); } + + trailer_offset = _pdfioFileTell(pdf); } if (strncmp(line, "trailer", 7))