diff --git a/doc/pdfio.md b/doc/pdfio.md index 711c16d..8709d33 100644 --- a/doc/pdfio.md +++ b/doc/pdfio.md @@ -343,8 +343,8 @@ password_cb(void *data, const char *filename) ``` The error callback is called for both errors and warnings and accepts the -`pdfio_file_t` pointer, a message string, and the callback pointer value, for -example: +`pdfio_file_t` pointer, a message string, and the callback pointer value. It +returns `true` to continue processing the file or `false` to stop, for example: ```c bool @@ -354,13 +354,17 @@ error_cb(pdfio_file_t *pdf, const char *message, void *data) fprintf(stderr, "%s: %s\n", pdfioFileGetName(pdf), message); - // Return false to treat warnings as errors - return (false); + // Return true for warning messages (continue) and false for errors (stop) + return (!strncmp(message, "WARNING:", 8)); } ``` The default error callback (`NULL`) does the equivalent of the above. +> Note: Many errors are unrecoverable, so PDFio ignores the return value from +> the error callback and always stops processing the PDF file. Warning messages +> start with the prefix "WARNING:" while errors have no prefix. + Each PDF file contains one or more pages. The [`pdfioFileGetNumPages`](@@) function returns the number of pages in the file while the [`pdfioFileGetPage`](@@) function gets the specified page in the PDF file: diff --git a/pdfio-array.c b/pdfio-array.c index 67b89d7..de2dd30 100644 --- a/pdfio-array.c +++ b/pdfio-array.c @@ -640,6 +640,8 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file // // 'pdfioArrayRemove()' - Remove an array entry. // +// @since PDFio v1.4@ +// bool // O - `true` on success, `false` otherwise pdfioArrayRemove(pdfio_array_t *a, // I - Array diff --git a/pdfio-common.c b/pdfio-common.c index d8b159e..e88b081 100644 --- a/pdfio-common.c +++ b/pdfio-common.c @@ -47,7 +47,7 @@ _pdfioFileConsume(pdfio_file_t *pdf, // I - PDF file // `false` to halt. // -bool // O - `false` to stop +bool // O - `false` to stop, `true` to continue _pdfioFileDefaultError( pdfio_file_t *pdf, // I - PDF file const char *message, // I - Error message @@ -57,7 +57,7 @@ _pdfioFileDefaultError( fprintf(stderr, "%s: %s\n", pdf->filename, message); - return (false); + return (!strncmp(message, "WARNING:", 8)); } @@ -427,7 +427,7 @@ off_t // O - Offset from beginning of file _pdfioFileTell(pdfio_file_t *pdf) // I - PDF file { if (pdf->bufptr) - return (pdf->bufpos + (pdf->bufptr - pdf->buffer)); + return (pdf->bufpos + (off_t)(pdf->bufptr - pdf->buffer)); else return (pdf->bufpos); } @@ -455,7 +455,7 @@ _pdfioFileWrite(pdfio_file_t *pdf, // I - PDF file if (!write_buffer(pdf, buffer, bytes)) return (false); - pdf->bufpos += bytes; + pdf->bufpos += (off_t)bytes; return (true); } @@ -481,7 +481,7 @@ fill_buffer(pdfio_file_t *pdf) // I - PDF file // Advance current position in file as needed... if (pdf->bufend) - pdf->bufpos += pdf->bufend - pdf->buffer; + pdf->bufpos += (off_t)(pdf->bufend - pdf->buffer); // Try reading from the file... if ((bytes = read_buffer(pdf, pdf->buffer, sizeof(pdf->buffer))) <= 0) diff --git a/pdfio-content.c b/pdfio-content.c index 29566f9..46a8dcf 100644 --- a/pdfio-content.c +++ b/pdfio-content.c @@ -632,6 +632,8 @@ pdfioContentPathCurve23( // // 'pdfioContentPathEnd()' - Clear the current path. // +// @since PDFio v1.1@ +// bool // O - `true` on success, `false` on failure pdfioContentPathEnd(pdfio_stream_t *st) // I - Stream @@ -1111,6 +1113,8 @@ pdfioContentTextEnd(pdfio_stream_t *st) // I - Stream // on "size". The text string must always use the UTF-8 (Unicode) encoding but // any control characters (such as newlines) are ignored. // +// @since PDFio v1.2@ +// double // O - Width pdfioContentTextMeasure( @@ -1267,6 +1271,8 @@ pdfioContentTextMoveTo( // // 'pdfioContentTextNewLine()' - Move to the next line. // +// @since PDFio v1.2@ +// bool // O - `true` on success, `false` on failure pdfioContentTextNewLine( @@ -1298,6 +1304,8 @@ pdfioContentTextNextLine( // specifies that the current font maps to full Unicode. The "s" argument // specifies a UTF-8 encoded string. // +// @since PDFio v1.2@ +// bool // O - `true` on success, `false` on failure pdfioContentTextNewLineShow( @@ -1346,6 +1354,8 @@ pdfioContentTextNewLineShow( // argument specifies that the current font maps to full Unicode. The "format" // argument specifies a UTF-8 encoded `printf`-style format string. // +// @since PDFio v1.2@ +// bool // O - `true` on success, `false` on failure pdfioContentTextNewLineShowf( diff --git a/pdfio-dict.c b/pdfio-dict.c index 8702846..642871c 100644 --- a/pdfio-dict.c +++ b/pdfio-dict.c @@ -20,6 +20,8 @@ static int compare_pairs(_pdfio_pair_t *a, _pdfio_pair_t *b); // // 'pdfioDictClear()' - Remove a key/value pair from a dictionary. // +// @since PDFio v1.4@ +// bool // O - `true` if cleared, `false` otherwise pdfioDictClear(pdfio_dict_t *dict, // I - Dictionary @@ -345,6 +347,8 @@ pdfioDictGetDict(pdfio_dict_t *dict, // I - Dictionary // // 'pdfioDictGetKey()' - Get the key for the specified pair. // +// @since PDFio v1.4@ +// const char * // O - Key for specified pair pdfioDictGetKey(pdfio_dict_t *dict, // I - Dictionary @@ -375,6 +379,8 @@ pdfioDictGetName(pdfio_dict_t *dict, // I - Dictionary // // 'pdfioDictGetNumPairs()' - Get the number of key/value pairs in a dictionary. // +// @since PDFio v1.4@ +// size_t // O - Number of pairs pdfioDictGetNumPairs(pdfio_dict_t *dict)// I - Dictionary @@ -562,6 +568,8 @@ _pdfioDictGetValue(pdfio_dict_t *dict, // I - Dictionary // The iteration continues as long as the callback returns `true` or all keys // have been iterated. // +// @since PDFio v1.1@ +// void pdfioDictIterateKeys( @@ -635,9 +643,11 @@ _pdfioDictRead(pdfio_file_t *pdf, // I - PDF file { // Issue 118: Discard duplicate key/value pairs, in the future this will // be a warning message... - _pdfioFileError(pdf, "WARNING: Discarding value for duplicate dictionary key '%s'.", key + 1); _pdfioValueDelete(&value); - continue; + if (_pdfioFileError(pdf, "WARNING: Discarding value for duplicate dictionary key '%s'.", key + 1)) + continue; + else + break; } else if (!_pdfioDictSetValue(dict, pdfioStringCreate(pdf, key + 1), &value)) break; diff --git a/pdfio-file.c b/pdfio-file.c index 0cbbd28..eebc578 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -181,8 +181,9 @@ pdfioFileClose(pdfio_file_t *pdf) // I - PDF file // of 8.27x11in (the intersection of US Letter and ISO A4) is used. // // The "error_cb" and "error_cbdata" arguments specify an error handler callback -// and its data pointer - if `NULL` the default error handler is used that -// writes error messages to `stderr`. +// and its data pointer - if `NULL` then the default error handler is used that +// writes error messages to `stderr`. The error handler callback should return +// `true` to continue writing the PDF file or `false` to stop. // pdfio_file_t * // O - PDF file or `NULL` on error @@ -260,6 +261,8 @@ pdfioFileCreateArrayObj( // This function creates a new object with a name value in a PDF file. // You must call @link pdfioObjClose@ to write the object to the file. // +// @since PDFio v1.4@ +// pdfio_obj_t * // O - New object pdfioFileCreateNameObj( @@ -289,6 +292,8 @@ pdfioFileCreateNameObj( // This function creates a new object with a number value in a PDF file. // You must call @link pdfioObjClose@ to write the object to the file. // +// @since PDFio v1.2@ +// pdfio_obj_t * // O - New object pdfioFileCreateNumberObj( @@ -415,8 +420,9 @@ _pdfioFileCreateObj( // of 8.27x11in (the intersection of US Letter and ISO A4) is used. // // The "error_cb" and "error_cbdata" arguments specify an error handler callback -// and its data pointer - if `NULL` the default error handler is used that -// writes error messages to `stderr`. +// and its data pointer - if `NULL` then the default error handler is used that +// writes error messages to `stderr`. The error handler callback should return +// `true` to continue writing the PDF file or `false` to stop. // // > *Note*: Files created using this API are slightly larger than those // > created using the @link pdfioFileCreate@ function since stream lengths are @@ -518,6 +524,8 @@ pdfioFileCreatePage(pdfio_file_t *pdf, // I - PDF file // This function creates a new object with a string value in a PDF file. // You must call @link pdfioObjClose@ to write the object to the file. // +// @since PDFio v1.2@ +// pdfio_obj_t * // O - New object pdfioFileCreateStringObj( @@ -987,8 +995,14 @@ pdfioFileGetVersion( // PDF file requires a password, the open will always fail. // // The "error_cb" and "error_cbdata" arguments specify an error handler callback -// and its data pointer - if `NULL` the default error handler is used that -// writes error messages to `stderr`. +// and its data pointer - if `NULL` then the default error handler is used that +// writes error messages to `stderr`. The error handler callback should return +// `true` to continue reading the PDF file or `false` to stop. +// +// > Note: Error messages starting with "WARNING:" are actually warning +// > messages - the callback should normally return `true` to allow PDFio to +// > try to resolve the issue. In addition, some errors are unrecoverable and +// > ignore the return value of the error callback. // pdfio_file_t * // O - PDF file @@ -1685,7 +1699,10 @@ load_pages(pdfio_file_t *pdf, // I - PDF file } if ((type = pdfioDictGetName(dict, "Type")) == NULL || (strcmp(type, "Pages") && strcmp(type, "Page"))) - return (false); + { + if (!_pdfioFileError(pdf, "WARNING: No Type value for pages object.")) + return (false); + } // If there is a Kids array, then this is a parent node and we have to look // at the child objects... @@ -1879,6 +1896,8 @@ load_xref( w_2 = w[0]; w_3 = w[0] + w[1]; + PDFIO_DEBUG("W=[%u %u %u], w_total=%u\n", (unsigned)w[0], (unsigned)w[1], (unsigned)w[2], (unsigned)w_total); + if (pdfioArrayGetSize(w_array) > 3 || w[1] == 0 || w[2] > 4 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer)) { PDFIO_DEBUG("load_xref: Bad W array in cross-reference objection dictionary.\n"); @@ -1908,7 +1927,20 @@ load_xref( { count --; - PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4]); +#ifdef DEBUG + if (w_total > 5) + PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X%02X...\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4]); + else if (w_total == 5) + PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4]); + else if (w_total == 4) + PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2], buffer[3]); + else if (w_total == 3) + PDFIO_DEBUG("load_xref: number=%u %02X%02X%02X\n", (unsigned)number, buffer[0], buffer[1], buffer[2]); + else if (w_total == 2) + PDFIO_DEBUG("load_xref: number=%u %02X%02X\n", (unsigned)number, buffer[0], buffer[1]); + else + PDFIO_DEBUG("load_xref: number=%u %02X\n", (unsigned)number, buffer[0]); +#endif // DEBUG // Check whether this is an object definition... if (w[0] > 0) @@ -2252,6 +2284,7 @@ repair_xref( size_t i; // Looping var size_t num_sobjs = 0; // Number of object streams pdfio_obj_t *sobjs[16384]; // Object streams to load + pdfio_dict_t *backup_trailer = NULL; // Backup trailer dictionary pdfio_obj_t *pages_obj; // Pages object @@ -2319,13 +2352,14 @@ repair_xref( _pdfioTokenFlush(&tb); - if (type && !strcmp(line, "stream")) - { - // Possible object or XRef stream... + if (type && !strcmp(line, "stream")) + { + // Possible object or XRef stream... obj->stream_offset = _pdfioFileTell(pdf); if (!strcmp(type, "ObjStm") && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0]))) { + PDFIO_DEBUG("repair_xref: Object stream...\n"); sobjs[num_sobjs] = obj; num_sobjs ++; } @@ -2333,11 +2367,24 @@ repair_xref( if (!strcmp(type, "XRef") && !pdf->trailer_dict) { // Save the trailer dictionary... + PDFIO_DEBUG("repair_xref: XRef stream...\n"); pdf->trailer_dict = pdfioObjGetDict(obj); pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt"); pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID"); } } + else if (type && !strcmp(line, "endobj")) + { + // Possible catalog or pages object... + if (!strcmp(type, "Catalog")) + { + PDFIO_DEBUG("repair_xref: Catalog (root) object...\n"); + if (!backup_trailer) + backup_trailer = pdfioDictCreate(pdf); + + pdfioDictSetObj(backup_trailer, "Root", obj); + } + } } } } @@ -2348,6 +2395,8 @@ repair_xref( _pdfio_token_t tb; // Token buffer/stack _pdfio_value_t trailer; // Trailer + PDFIO_DEBUG("repair_xref: line=\"%s\"\n", line); + if (line[7]) { // Probably the start of the trailer dictionary, rewind the file so @@ -2375,6 +2424,8 @@ repair_xref( { // Save the trailer dictionary and grab the root (catalog) and info // objects... + PDFIO_DEBUG("repair_xref: Using this trailer dictionary.\n"); + pdf->trailer_dict = trailer.value.dict; pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt"); pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID"); @@ -2385,11 +2436,18 @@ repair_xref( line_offset = _pdfioFileTell(pdf); } + PDFIO_DEBUG("repair_xref: Stopped at line_offset=%lu\n", (unsigned long)line_offset); + + if (!pdf->trailer_dict && backup_trailer) + pdf->trailer_dict = backup_trailer; + // If the trailer contains an Encrypt key, try unlocking the file... if (pdf->encrypt_obj && !_pdfioCryptoUnlock(pdf, password_cb, password_data)) return (false); // Load any stream objects... + PDFIO_DEBUG("repair_xref: Found %lu stream objects.\n", (unsigned long)num_sobjs); + for (i = 0; i < num_sobjs; i ++) { if (!load_obj_stream(sobjs[i])) @@ -2567,12 +2625,16 @@ write_trailer(pdfio_file_t *pdf) // I - PDF file buffer[2] = (obj->offset >> 8) & 255; buffer[3] = obj->offset & 255; break; +#ifdef _WIN32 + default : +#endif // _WIN32 case 4 : buffer[1] = (obj->offset >> 24) & 255; buffer[2] = (obj->offset >> 16) & 255; buffer[3] = (obj->offset >> 8) & 255; buffer[4] = obj->offset & 255; break; +#ifndef _WIN32 // Windows off_t is 32-bits?!? case 5 : buffer[1] = (obj->offset >> 32) & 255; buffer[2] = (obj->offset >> 24) & 255; @@ -2607,6 +2669,7 @@ write_trailer(pdfio_file_t *pdf) // I - PDF file buffer[7] = (obj->offset >> 8) & 255; buffer[8] = obj->offset & 255; break; +#endif // !_WIN32 } if (!pdfioStreamWrite(xref_st, buffer, offsize + 2)) diff --git a/pdfio-object.c b/pdfio-object.c index 45ea152..58a977e 100644 --- a/pdfio-object.c +++ b/pdfio-object.c @@ -333,6 +333,8 @@ pdfioObjGetLength(pdfio_obj_t *obj) // I - Object // // 'pdfioObjGetName()' - Get the name value associated with an object. // +// @since PDFio v1.4@ +// const char * // O - Dictionary or `NULL` on error pdfioObjGetName(pdfio_obj_t *obj) // I - Object diff --git a/pdfio-private.h b/pdfio-private.h index d6c7320..0823f7a 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -10,7 +10,7 @@ #ifndef PDFIO_PRIVATE_H # define PDFIO_PRIVATE_H # ifdef _WIN32 -# define _CRT_SECURE_NO_WARNINGS // Disable bogus VS warnings/errors... +# define _CRT_SECURE_NO_WARNINGS 1 // Disable bogus VS warnings/errors... # endif // _WIN32 # include "pdfio.h" # include @@ -28,16 +28,16 @@ # define access _access // Map standard POSIX/C99 names # define close _close # define fileno _fileno -# define lseek _lseek +# define lseek(f,o,w) (off_t)_lseek((f),(long)(o),(w)) # define mkdir(d,p) _mkdir(d) # define open _open -# define read _read +# define read(f,b,s) _read((f),(b),(unsigned)(s)) # define rmdir _rmdir # define snprintf _snprintf # define strdup _strdup # define unlink _unlink # define vsnprintf _vsnprintf -# define write _write +# define write(f,b,s) _write((f),(b),(unsigned)(s)) # ifndef F_OK # define F_OK 00 // POSIX parameters/flags # define W_OK 02 diff --git a/pdfio-stream.c b/pdfio-stream.c index 9fa76b0..e4e6703 100644 --- a/pdfio-stream.c +++ b/pdfio-stream.c @@ -697,8 +697,9 @@ pdfioStreamPeek(pdfio_stream_t *st, // I - Stream // 'pdfioStreamPrintf()' - Write a formatted string to a stream. // // This function writes a formatted string to a stream. In addition to the -// standard `printf` format characters, you can use "%N" to format a PDF name -// value ("/Name") and "%S" to format a PDF string ("(String)") value. +// standard `printf` format characters, you can use "%H" to format a HTML/XML +// string value, "%N" to format a PDF name value ("/Name"), and "%S" to format +// a PDF string ("(String)") value. // bool // O - `true` on success, `false` on failure diff --git a/pdfio-string.c b/pdfio-string.c index d26f156..d8033de 100644 --- a/pdfio-string.c +++ b/pdfio-string.c @@ -486,6 +486,59 @@ _pdfio_vsnprintf(pdfio_file_t *pdf, // I - PDF file } break; + case 'H' : // XML/HTML string + if ((s = va_arg(ap, char *)) == NULL) + s = "(null)"; + + // Loop through the literal string... + while (*s) + { + // Escape special characters + if (*s == '&') + { + // & + if (bufptr < bufend) + { + _pdfio_strlcpy(bufptr, "&", (size_t)(bufend - bufptr + 1)); + bufptr += strlen(bufptr); + } + + bytes += 5; + } + else if (*s == '<') + { + // < + if (bufptr < bufend) + { + _pdfio_strlcpy(bufptr, "<", (size_t)(bufend - bufptr + 1)); + bufptr += strlen(bufptr); + } + + bytes += 4; + } + else if (*s == '>') + { + // > + if (bufptr < bufend) + { + _pdfio_strlcpy(bufptr, ">", (size_t)(bufend - bufptr + 1)); + bufptr += strlen(bufptr); + } + + bytes += 4; + } + else + { + // Literal character... + if (bufptr < bufend) + *bufptr++ = *s; + bytes ++; + } + + s ++; + } + break; + case 'S' : // PDF string if ((s = va_arg(ap, char *)) == NULL) s = "(null)"; diff --git a/pdfio.h b/pdfio.h index c2fbaac..d54eb72 100644 --- a/pdfio.h +++ b/pdfio.h @@ -46,7 +46,7 @@ extern "C" { // # if _WIN32 -typedef __int64 ssize_t; // POSIX type not present on Windows... @private@ +typedef __int64 ssize_t; // POSIX type not present on Windows @private@ # endif // _WIN32 typedef struct _pdfio_array_s pdfio_array_t; @@ -62,7 +62,7 @@ typedef bool (*pdfio_error_cb_t)(pdfio_file_t *pdf, const char *message, void *d typedef enum pdfio_encryption_e // PDF encryption modes { PDFIO_ENCRYPTION_NONE = 0, // No encryption - PDFIO_ENCRYPTION_RC4_40, // 40-bit RC4 encryption (PDF 1.3) + PDFIO_ENCRYPTION_RC4_40, // 40-bit RC4 encryption (PDF 1.3, reading only) PDFIO_ENCRYPTION_RC4_128, // 128-bit RC4 encryption (PDF 1.4) PDFIO_ENCRYPTION_AES_128, // 128-bit AES encryption (PDF 1.6) PDFIO_ENCRYPTION_AES_256 // 256-bit AES encryption (PDF 2.0) @exclude all@