Backport more changes from master.

This commit is contained in:
Michael R Sweet 2025-04-24 12:29:53 -04:00
parent 8f706b9fe7
commit e6588d3960
No known key found for this signature in database
GPG Key ID: BE67C75EC81F3244
11 changed files with 175 additions and 30 deletions

View File

@ -343,8 +343,8 @@ password_cb(void *data, const char *filename)
```
The error callback is called for both errors and warnings and accepts the
`pdfio_file_t` pointer, a message string, and the callback pointer value, for
example:
`pdfio_file_t` pointer, a message string, and the callback pointer value. It
returns `true` to continue processing the file or `false` to stop, for example:
```c
bool
@ -354,13 +354,17 @@ error_cb(pdfio_file_t *pdf, const char *message, void *data)
fprintf(stderr, "%s: %s\n", pdfioFileGetName(pdf), message);
// Return false to treat warnings as errors
return (false);
// Return true for warning messages (continue) and false for errors (stop)
return (!strncmp(message, "WARNING:", 8));
}
```
The default error callback (`NULL`) does the equivalent of the above.
> Note: Many errors are unrecoverable, so PDFio ignores the return value from
> the error callback and always stops processing the PDF file. Warning messages
> start with the prefix "WARNING:" while errors have no prefix.
Each PDF file contains one or more pages. The [`pdfioFileGetNumPages`](@@)
function returns the number of pages in the file while the
[`pdfioFileGetPage`](@@) function gets the specified page in the PDF file:

View File

@ -640,6 +640,8 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file
//
// 'pdfioArrayRemove()' - Remove an array entry.
//
// @since PDFio v1.4@
//
bool // O - `true` on success, `false` otherwise
pdfioArrayRemove(pdfio_array_t *a, // I - Array

View File

@ -47,7 +47,7 @@ _pdfioFileConsume(pdfio_file_t *pdf, // I - PDF file
// `false` to halt.
//
bool // O - `false` to stop
bool // O - `false` to stop, `true` to continue
_pdfioFileDefaultError(
pdfio_file_t *pdf, // I - PDF file
const char *message, // I - Error message
@ -57,7 +57,7 @@ _pdfioFileDefaultError(
fprintf(stderr, "%s: %s\n", pdf->filename, message);
return (false);
return (!strncmp(message, "WARNING:", 8));
}
@ -427,7 +427,7 @@ off_t // O - Offset from beginning of file
_pdfioFileTell(pdfio_file_t *pdf) // I - PDF file
{
if (pdf->bufptr)
return (pdf->bufpos + (pdf->bufptr - pdf->buffer));
return (pdf->bufpos + (off_t)(pdf->bufptr - pdf->buffer));
else
return (pdf->bufpos);
}
@ -455,7 +455,7 @@ _pdfioFileWrite(pdfio_file_t *pdf, // I - PDF file
if (!write_buffer(pdf, buffer, bytes))
return (false);
pdf->bufpos += bytes;
pdf->bufpos += (off_t)bytes;
return (true);
}
@ -481,7 +481,7 @@ fill_buffer(pdfio_file_t *pdf) // I - PDF file
// Advance current position in file as needed...
if (pdf->bufend)
pdf->bufpos += pdf->bufend - pdf->buffer;
pdf->bufpos += (off_t)(pdf->bufend - pdf->buffer);
// Try reading from the file...
if ((bytes = read_buffer(pdf, pdf->buffer, sizeof(pdf->buffer))) <= 0)

View File

@ -632,6 +632,8 @@ pdfioContentPathCurve23(
//
// 'pdfioContentPathEnd()' - Clear the current path.
//
// @since PDFio v1.1@
//
bool // O - `true` on success, `false` on failure
pdfioContentPathEnd(pdfio_stream_t *st) // I - Stream
@ -1111,6 +1113,8 @@ pdfioContentTextEnd(pdfio_stream_t *st) // I - Stream
// on "size". The text string must always use the UTF-8 (Unicode) encoding but
// any control characters (such as newlines) are ignored.
//
// @since PDFio v1.2@
//
double // O - Width
pdfioContentTextMeasure(
@ -1267,6 +1271,8 @@ pdfioContentTextMoveTo(
//
// 'pdfioContentTextNewLine()' - Move to the next line.
//
// @since PDFio v1.2@
//
bool // O - `true` on success, `false` on failure
pdfioContentTextNewLine(
@ -1298,6 +1304,8 @@ pdfioContentTextNextLine(
// specifies that the current font maps to full Unicode. The "s" argument
// specifies a UTF-8 encoded string.
//
// @since PDFio v1.2@
//
bool // O - `true` on success, `false` on failure
pdfioContentTextNewLineShow(
@ -1346,6 +1354,8 @@ pdfioContentTextNewLineShow(
// argument specifies that the current font maps to full Unicode. The "format"
// argument specifies a UTF-8 encoded `printf`-style format string.
//
// @since PDFio v1.2@
//
bool // O - `true` on success, `false` on failure
pdfioContentTextNewLineShowf(

View File

@ -20,6 +20,8 @@ static int compare_pairs(_pdfio_pair_t *a, _pdfio_pair_t *b);
//
// 'pdfioDictClear()' - Remove a key/value pair from a dictionary.
//
// @since PDFio v1.4@
//
bool // O - `true` if cleared, `false` otherwise
pdfioDictClear(pdfio_dict_t *dict, // I - Dictionary
@ -345,6 +347,8 @@ pdfioDictGetDict(pdfio_dict_t *dict, // I - Dictionary
//
// 'pdfioDictGetKey()' - Get the key for the specified pair.
//
// @since PDFio v1.4@
//
const char * // O - Key for specified pair
pdfioDictGetKey(pdfio_dict_t *dict, // I - Dictionary
@ -375,6 +379,8 @@ pdfioDictGetName(pdfio_dict_t *dict, // I - Dictionary
//
// 'pdfioDictGetNumPairs()' - Get the number of key/value pairs in a dictionary.
//
// @since PDFio v1.4@
//
size_t // O - Number of pairs
pdfioDictGetNumPairs(pdfio_dict_t *dict)// I - Dictionary
@ -562,6 +568,8 @@ _pdfioDictGetValue(pdfio_dict_t *dict, // I - Dictionary
// The iteration continues as long as the callback returns `true` or all keys
// have been iterated.
//
// @since PDFio v1.1@
//
void
pdfioDictIterateKeys(
@ -635,9 +643,11 @@ _pdfioDictRead(pdfio_file_t *pdf, // I - PDF file
{
// Issue 118: Discard duplicate key/value pairs, in the future this will
// be a warning message...
_pdfioFileError(pdf, "WARNING: Discarding value for duplicate dictionary key '%s'.", key + 1);
_pdfioValueDelete(&value);
continue;
if (_pdfioFileError(pdf, "WARNING: Discarding value for duplicate dictionary key '%s'.", key + 1))
continue;
else
break;
}
else if (!_pdfioDictSetValue(dict, pdfioStringCreate(pdf, key + 1), &value))
break;

View File

@ -181,8 +181,9 @@ pdfioFileClose(pdfio_file_t *pdf) // I - PDF file
// of 8.27x11in (the intersection of US Letter and ISO A4) is used.
//
// The "error_cb" and "error_cbdata" arguments specify an error handler callback
// and its data pointer - if `NULL` the default error handler is used that
// writes error messages to `stderr`.
// and its data pointer - if `NULL` then the default error handler is used that
// writes error messages to `stderr`. The error handler callback should return
// `true` to continue writing the PDF file or `false` to stop.
//
pdfio_file_t * // O - PDF file or `NULL` on error
@ -260,6 +261,8 @@ pdfioFileCreateArrayObj(
// This function creates a new object with a name value in a PDF file.
// You must call @link pdfioObjClose@ to write the object to the file.
//
// @since PDFio v1.4@
//
pdfio_obj_t * // O - New object
pdfioFileCreateNameObj(
@ -289,6 +292,8 @@ pdfioFileCreateNameObj(
// This function creates a new object with a number value in a PDF file.
// You must call @link pdfioObjClose@ to write the object to the file.
//
// @since PDFio v1.2@
//
pdfio_obj_t * // O - New object
pdfioFileCreateNumberObj(
@ -415,8 +420,9 @@ _pdfioFileCreateObj(
// of 8.27x11in (the intersection of US Letter and ISO A4) is used.
//
// The "error_cb" and "error_cbdata" arguments specify an error handler callback
// and its data pointer - if `NULL` the default error handler is used that
// writes error messages to `stderr`.
// and its data pointer - if `NULL` then the default error handler is used that
// writes error messages to `stderr`. The error handler callback should return
// `true` to continue writing the PDF file or `false` to stop.
//
// > *Note*: Files created using this API are slightly larger than those
// > created using the @link pdfioFileCreate@ function since stream lengths are
@ -518,6 +524,8 @@ pdfioFileCreatePage(pdfio_file_t *pdf, // I - PDF file
// This function creates a new object with a string value in a PDF file.
// You must call @link pdfioObjClose@ to write the object to the file.
//
// @since PDFio v1.2@
//
pdfio_obj_t * // O - New object
pdfioFileCreateStringObj(
@ -987,8 +995,14 @@ pdfioFileGetVersion(
// PDF file requires a password, the open will always fail.
//
// The "error_cb" and "error_cbdata" arguments specify an error handler callback
// and its data pointer - if `NULL` the default error handler is used that
// writes error messages to `stderr`.
// and its data pointer - if `NULL` then the default error handler is used that
// writes error messages to `stderr`. The error handler callback should return
// `true` to continue reading the PDF file or `false` to stop.
//
// > Note: Error messages starting with "WARNING:" are actually warning
// > messages - the callback should normally return `true` to allow PDFio to
// > try to resolve the issue. In addition, some errors are unrecoverable and
// > ignore the return value of the error callback.
//
pdfio_file_t * // O - PDF file
@ -1685,7 +1699,10 @@ load_pages(pdfio_file_t *pdf, // I - PDF file
}
if ((type = pdfioDictGetName(dict, "Type")) == NULL || (strcmp(type, "Pages") && strcmp(type, "Page")))
return (false);
{
if (!_pdfioFileError(pdf, "WARNING: No Type value for pages object."))
return (false);
}
// If there is a Kids array, then this is a parent node and we have to look
// at the child objects...
@ -1879,6 +1896,8 @@ load_xref(
w_2 = w[0];
w_3 = w[0] + w[1];
PDFIO_DEBUG("W=[%u %u %u], w_total=%u\n", (unsigned)w[0], (unsigned)w[1], (unsigned)w[2], (unsigned)w_total);
if (pdfioArrayGetSize(w_array) > 3 || w[1] == 0 || w[2] > 4 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer))
{
PDFIO_DEBUG("load_xref: Bad W array in cross-reference objection dictionary.\n");
@ -1908,7 +1927,20 @@ load_xref(
{
count --;
PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4]);
#ifdef DEBUG
if (w_total > 5)
PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X%02X...\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4]);
else if (w_total == 5)
PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4]);
else if (w_total == 4)
PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3]);
else if (w_total == 3)
PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2]);
else if (w_total == 2)
PDFIO_DEBUG("load_xref: number=%u %02X%02X\n", (unsigned)number, buffer[0], buffer[1]);
else
PDFIO_DEBUG("load_xref: number=%u %02X\n", (unsigned)number, buffer[0]);
#endif // DEBUG
// Check whether this is an object definition...
if (w[0] > 0)
@ -2252,6 +2284,7 @@ repair_xref(
size_t i; // Looping var
size_t num_sobjs = 0; // Number of object streams
pdfio_obj_t *sobjs[16384]; // Object streams to load
pdfio_dict_t *backup_trailer = NULL; // Backup trailer dictionary
pdfio_obj_t *pages_obj; // Pages object
@ -2319,13 +2352,14 @@ repair_xref(
_pdfioTokenFlush(&tb);
if (type && !strcmp(line, "stream"))
{
// Possible object or XRef stream...
if (type && !strcmp(line, "stream"))
{
// Possible object or XRef stream...
obj->stream_offset = _pdfioFileTell(pdf);
if (!strcmp(type, "ObjStm") && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0])))
{
PDFIO_DEBUG("repair_xref: Object stream...\n");
sobjs[num_sobjs] = obj;
num_sobjs ++;
}
@ -2333,11 +2367,24 @@ repair_xref(
if (!strcmp(type, "XRef") && !pdf->trailer_dict)
{
// Save the trailer dictionary...
PDFIO_DEBUG("repair_xref: XRef stream...\n");
pdf->trailer_dict = pdfioObjGetDict(obj);
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
}
}
else if (type && !strcmp(line, "endobj"))
{
// Possible catalog or pages object...
if (!strcmp(type, "Catalog"))
{
PDFIO_DEBUG("repair_xref: Catalog (root) object...\n");
if (!backup_trailer)
backup_trailer = pdfioDictCreate(pdf);
pdfioDictSetObj(backup_trailer, "Root", obj);
}
}
}
}
}
@ -2348,6 +2395,8 @@ repair_xref(
_pdfio_token_t tb; // Token buffer/stack
_pdfio_value_t trailer; // Trailer
PDFIO_DEBUG("repair_xref: line=\"%s\"\n", line);
if (line[7])
{
// Probably the start of the trailer dictionary, rewind the file so
@ -2375,6 +2424,8 @@ repair_xref(
{
// Save the trailer dictionary and grab the root (catalog) and info
// objects...
PDFIO_DEBUG("repair_xref: Using this trailer dictionary.\n");
pdf->trailer_dict = trailer.value.dict;
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
@ -2385,11 +2436,18 @@ repair_xref(
line_offset = _pdfioFileTell(pdf);
}
PDFIO_DEBUG("repair_xref: Stopped at line_offset=%lu\n", (unsigned long)line_offset);
if (!pdf->trailer_dict && backup_trailer)
pdf->trailer_dict = backup_trailer;
// If the trailer contains an Encrypt key, try unlocking the file...
if (pdf->encrypt_obj && !_pdfioCryptoUnlock(pdf, password_cb, password_data))
return (false);
// Load any stream objects...
PDFIO_DEBUG("repair_xref: Found %lu stream objects.\n", (unsigned long)num_sobjs);
for (i = 0; i < num_sobjs; i ++)
{
if (!load_obj_stream(sobjs[i]))
@ -2567,12 +2625,16 @@ write_trailer(pdfio_file_t *pdf) // I - PDF file
buffer[2] = (obj->offset >> 8) & 255;
buffer[3] = obj->offset & 255;
break;
#ifdef _WIN32
default :
#endif // _WIN32
case 4 :
buffer[1] = (obj->offset >> 24) & 255;
buffer[2] = (obj->offset >> 16) & 255;
buffer[3] = (obj->offset >> 8) & 255;
buffer[4] = obj->offset & 255;
break;
#ifndef _WIN32 // Windows off_t is 32-bits?!?
case 5 :
buffer[1] = (obj->offset >> 32) & 255;
buffer[2] = (obj->offset >> 24) & 255;
@ -2607,6 +2669,7 @@ write_trailer(pdfio_file_t *pdf) // I - PDF file
buffer[7] = (obj->offset >> 8) & 255;
buffer[8] = obj->offset & 255;
break;
#endif // !_WIN32
}
if (!pdfioStreamWrite(xref_st, buffer, offsize + 2))

View File

@ -333,6 +333,8 @@ pdfioObjGetLength(pdfio_obj_t *obj) // I - Object
//
// 'pdfioObjGetName()' - Get the name value associated with an object.
//
// @since PDFio v1.4@
//
const char * // O - Dictionary or `NULL` on error
pdfioObjGetName(pdfio_obj_t *obj) // I - Object

View File

@ -10,7 +10,7 @@
#ifndef PDFIO_PRIVATE_H
# define PDFIO_PRIVATE_H
# ifdef _WIN32
# define _CRT_SECURE_NO_WARNINGS // Disable bogus VS warnings/errors...
# define _CRT_SECURE_NO_WARNINGS 1 // Disable bogus VS warnings/errors...
# endif // _WIN32
# include "pdfio.h"
# include <stdarg.h>
@ -28,16 +28,16 @@
# define access _access // Map standard POSIX/C99 names
# define close _close
# define fileno _fileno
# define lseek _lseek
# define lseek(f,o,w) (off_t)_lseek((f),(long)(o),(w))
# define mkdir(d,p) _mkdir(d)
# define open _open
# define read _read
# define read(f,b,s) _read((f),(b),(unsigned)(s))
# define rmdir _rmdir
# define snprintf _snprintf
# define strdup _strdup
# define unlink _unlink
# define vsnprintf _vsnprintf
# define write _write
# define write(f,b,s) _write((f),(b),(unsigned)(s))
# ifndef F_OK
# define F_OK 00 // POSIX parameters/flags
# define W_OK 02

View File

@ -697,8 +697,9 @@ pdfioStreamPeek(pdfio_stream_t *st, // I - Stream
// 'pdfioStreamPrintf()' - Write a formatted string to a stream.
//
// This function writes a formatted string to a stream. In addition to the
// standard `printf` format characters, you can use "%N" to format a PDF name
// value ("/Name") and "%S" to format a PDF string ("(String)") value.
// standard `printf` format characters, you can use "%H" to format a HTML/XML
// string value, "%N" to format a PDF name value ("/Name"), and "%S" to format
// a PDF string ("(String)") value.
//
bool // O - `true` on success, `false` on failure

View File

@ -486,6 +486,59 @@ _pdfio_vsnprintf(pdfio_file_t *pdf, // I - PDF file
}
break;
case 'H' : // XML/HTML string
if ((s = va_arg(ap, char *)) == NULL)
s = "(null)";
// Loop through the literal string...
while (*s)
{
// Escape special characters
if (*s == '&')
{
// &amp;
if (bufptr < bufend)
{
_pdfio_strlcpy(bufptr, "&amp;", (size_t)(bufend - bufptr + 1));
bufptr += strlen(bufptr);
}
bytes += 5;
}
else if (*s == '<')
{
// &lt;
if (bufptr < bufend)
{
_pdfio_strlcpy(bufptr, "&lt;", (size_t)(bufend - bufptr + 1));
bufptr += strlen(bufptr);
}
bytes += 4;
}
else if (*s == '>')
{
// &gt;
if (bufptr < bufend)
{
_pdfio_strlcpy(bufptr, "&gt;", (size_t)(bufend - bufptr + 1));
bufptr += strlen(bufptr);
}
bytes += 4;
}
else
{
// Literal character...
if (bufptr < bufend)
*bufptr++ = *s;
bytes ++;
}
s ++;
}
break;
case 'S' : // PDF string
if ((s = va_arg(ap, char *)) == NULL)
s = "(null)";

View File

@ -46,7 +46,7 @@ extern "C" {
//
# if _WIN32
typedef __int64 ssize_t; // POSIX type not present on Windows... @private@
typedef __int64 ssize_t; // POSIX type not present on Windows @private@
# endif // _WIN32
typedef struct _pdfio_array_s pdfio_array_t;
@ -62,7 +62,7 @@ typedef bool (*pdfio_error_cb_t)(pdfio_file_t *pdf, const char *message, void *d
typedef enum pdfio_encryption_e // PDF encryption modes
{
PDFIO_ENCRYPTION_NONE = 0, // No encryption
PDFIO_ENCRYPTION_RC4_40, // 40-bit RC4 encryption (PDF 1.3)
PDFIO_ENCRYPTION_RC4_40, // 40-bit RC4 encryption (PDF 1.3, reading only)
PDFIO_ENCRYPTION_RC4_128, // 128-bit RC4 encryption (PDF 1.4)
PDFIO_ENCRYPTION_AES_128, // 128-bit AES encryption (PDF 1.6)
PDFIO_ENCRYPTION_AES_256 // 256-bit AES encryption (PDF 2.0) @exclude all@