From 81aeef46d257a752545bdf8fcea06244eb16244f Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Sun, 13 Apr 2025 16:56:30 -0400 Subject: [PATCH] Add XMP metadata to output (Issue #103) --- CHANGES.md | 1 + pdfio-file.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++- pdfio-stream.c | 5 ++- pdfio-string.c | 53 +++++++++++++++++++++++ 4 files changed, 169 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 7ae6863..6d42de7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,6 +5,7 @@ Changes in PDFio v1.6.0 - YYYY-MM-DD ------------------- +- Added generation of XMP metadata when writing PDFs (Issue #103) - Added "standard" `PDFIO_CS_CGATS001` color space for non-device CMYK (Issue #104) - Added CMYK JPEG support with embedded ICC profiles or using the CGATS001 diff --git a/pdfio-file.c b/pdfio-file.c index bcb8a3a..6a3d4a7 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -22,11 +22,13 @@ static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short gen static int compare_objmaps(_pdfio_objmap_t *a, _pdfio_objmap_t *b); static pdfio_file_t *create_common(const char *filename, int fd, pdfio_output_cb_t output_cb, void *output_cbdata, const char *version, pdfio_rect_t *media_box, pdfio_rect_t *crop_box, pdfio_error_cb_t error_cb, void *error_cbdata); static const char *get_info_string(pdfio_file_t *pdf, const char *key); +static char *get_iso_date(time_t t, char *buffer, size_t bufsize); static struct lconv *get_lconv(void); static bool load_obj_stream(pdfio_obj_t *obj); static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth); static bool load_xref(pdfio_file_t *pdf, off_t xref_offset, pdfio_password_cb_t password_cb, void *password_data); static bool repair_xref(pdfio_file_t *pdf, pdfio_password_cb_t password_cb, void *password_data); +static bool write_metadata(pdfio_file_t *pdf); static bool write_pages(pdfio_file_t *pdf); static bool write_trailer(pdfio_file_t *pdf); @@ -128,7 +130,7 @@ pdfioFileClose(pdfio_file_t *pdf) // I - PDF file pdfioFileAddOutputIntent(pdf, /*subtype*/"GTS_PDFA1", /*condition*/"CMYK", /*cond_id*/"CGATS001", /*reg_name*/NULL, /*info*/"CMYK Printing", /*profile*/NULL); // Close and write out the last bits... - if (pdfioObjClose(pdf->info_obj) && write_pages(pdf) && pdfioObjClose(pdf->root_obj) && write_trailer(pdf)) + if (write_metadata(pdf) && pdfioObjClose(pdf->info_obj) && write_pages(pdf) && pdfioObjClose(pdf->root_obj) && write_trailer(pdf)) ret = _pdfioFileFlush(pdf); } @@ -1536,6 +1538,32 @@ get_info_string(pdfio_file_t *pdf, // I - PDF file } +// +// 'get_iso_date()' - Convert a time_t value to an ISO 8601 date/time value. +// + +static char * // O - Date string +get_iso_date(time_t t, // I - Time value in seconds + char *buffer, // I - Date buffer + size_t bufsize) // I - Size of date buffer +{ + struct tm d; // Date values + + + // Convert time to UTC date +#if _WIN32 + gmtime_s(&d, &t); +#else + gmtime_r(&t, &d); +#endif // _WIN32 + + // Format the string and return... + snprintf(buffer, bufsize, "%04d-%02d-%02dT%02d:%02d:%02dZ", d.tm_year + 1900, d.tm_mon + 1, d.tm_mday, d.tm_hour, d.tm_min, d.tm_sec); + + return (buffer); +} + + // // 'get_lconv()' - Get any locale-specific numeric information. // @@ -2360,6 +2388,89 @@ repair_xref( } +// +// 'write_metadata()' - Write an XMP metadata stream. +// + +static bool // O - `true` on success, `false` on failure +write_metadata(pdfio_file_t *pdf) // I - PDF file +{ + pdfio_dict_t *dict; // XMP object dictionary + pdfio_obj_t *obj; // XMP object + pdfio_stream_t *st; // XMP stream + bool status = true; // Write status + const char *value; // Value from info dictionary + time_t t; // Date/time value in seconds + char d[64]; // Date/time string (ISO 8601) + + + // Create the Metadata object... + if ((dict = pdfioDictCreate(pdf)) == NULL) + return (false); + + pdfioDictSetName(dict, "Type", "Metadata"); + pdfioDictSetName(dict, "Subtype", "XML"); + + if ((obj = pdfioFileCreateObj(pdf, dict)) == NULL) + return (false); + + // Write the XMP stream... + if ((st = pdfioObjCreateStream(obj, PDFIO_FILTER_NONE)) == NULL) + return (false); + + status &= pdfioStreamPuts(st, "\n"); + status &= pdfioStreamPuts(st, "\n"); + status &= pdfioStreamPuts(st, " \n"); + + status &= pdfioStreamPuts(st, " \n"); + t = pdfioFileGetCreationDate(pdf); + status &= pdfioStreamPrintf(st, " %H\n", get_iso_date(t, d, sizeof(d))); + if ((value = pdfioFileGetCreator(pdf)) != NULL) + status &= pdfioStreamPrintf(st, " %H\n", value); + status &= pdfioStreamPrintf(st, " %H\n", d); + if ((t = pdfioFileGetModificationDate(pdf)) > 0) + status &= pdfioStreamPrintf(st, " %H\n", get_iso_date(t, d, sizeof(d))); + status &= pdfioStreamPuts(st, " \n"); + + status &= pdfioStreamPuts(st, " \n"); + status &= pdfioStreamPrintf(st, " %H\n", pdfioFileGetProducer(pdf)); + if ((value = pdfioFileGetKeywords(pdf)) != NULL) + status &= pdfioStreamPrintf(st, " %H\n", value); + status &= pdfioStreamPuts(st, " \n"); + + status &= pdfioStreamPuts(st, " \n"); + status &= pdfioStreamPrintf(st, " application/pdf\n"); + if ((value = pdfioFileGetTitle(pdf)) != NULL) + status &= pdfioStreamPrintf(st, " %H\n", value); + if ((value = pdfioFileGetAuthor(pdf)) != NULL) + status &= pdfioStreamPrintf(st, " %H\n", value); + if ((value = pdfioFileGetSubject(pdf)) != NULL) + status &= pdfioStreamPrintf(st, " %H\n", value); + status &= pdfioStreamPuts(st, " \n"); + + // TODO: Need a better way to choose the output profile - something that lets + // us choose the base PDF version and PDF/A, PDF/E, PDF/X, etc. +#if 0 + status &= pdfioStreamPuts(st, " \n"); + status &= pdfioStreamPuts(st, " A\n"); + status &= pdfioStreamPuts(st, " 1\n"); + status &= pdfioStreamPuts(st, " \n"); +#endif // 0 + + status &= pdfioStreamPuts(st, " \n"); + status &= pdfioStreamPuts(st, "\n"); + status &= pdfioStreamPuts(st, "\n"); + + status &= pdfioStreamClose(st); + + if (!status) + return (false); + + // If we get this far, add the Metadata key/value to the catalog/root object. + return (pdfioDictSetObj(pdfioFileGetCatalog(pdf), "Metadata", obj)); +} + + // // 'write_pages()' - Write the PDF pages objects. // diff --git a/pdfio-stream.c b/pdfio-stream.c index bc3658b..3a13d43 100644 --- a/pdfio-stream.c +++ b/pdfio-stream.c @@ -690,8 +690,9 @@ pdfioStreamPeek(pdfio_stream_t *st, // I - Stream // 'pdfioStreamPrintf()' - Write a formatted string to a stream. // // This function writes a formatted string to a stream. In addition to the -// standard `printf` format characters, you can use "%N" to format a PDF name -// value ("/Name") and "%S" to format a PDF string ("(String)") value. +// standard `printf` format characters, you can use "%H" to format a HTML/XML +// string value, "%N" to format a PDF name value ("/Name"), and "%S" to format +// a PDF string ("(String)") value. // bool // O - `true` on success, `false` on failure diff --git a/pdfio-string.c b/pdfio-string.c index 1295169..9cc5883 100644 --- a/pdfio-string.c +++ b/pdfio-string.c @@ -486,6 +486,59 @@ _pdfio_vsnprintf(pdfio_file_t *pdf, // I - PDF file } break; + case 'H' : // XML/HTML string + if ((s = va_arg(ap, char *)) == NULL) + s = "(null)"; + + // Loop through the literal string... + while (*s) + { + // Escape special characters + if (*s == '&') + { + // & + if (bufptr < bufend) + { + _pdfio_strlcpy(bufptr, "&", (size_t)(bufend - bufptr + 1)); + bufptr += strlen(bufptr); + } + + bytes += 5; + } + else if (*s == '<') + { + // < + if (bufptr < bufend) + { + _pdfio_strlcpy(bufptr, "<", (size_t)(bufend - bufptr + 1)); + bufptr += strlen(bufptr); + } + + bytes += 4; + } + else if (*s == '>') + { + // > + if (bufptr < bufend) + { + _pdfio_strlcpy(bufptr, ">", (size_t)(bufend - bufptr + 1)); + bufptr += strlen(bufptr); + } + + bytes += 4; + } + else + { + // Literal character... + if (bufptr < bufend) + *bufptr++ = *s; + bytes ++; + } + + s ++; + } + break; + case 'S' : // PDF string if ((s = va_arg(ap, char *)) == NULL) s = "(null)";