diff --git a/examples/pdf2text.c b/examples/pdf2text.c index 151ca83..fe8be68 100644 --- a/examples/pdf2text.c +++ b/examples/pdf2text.c @@ -1417,4 +1417,4 @@ puts_utf16(const char *s) // I - Hex string put_utf8(ch); } -} \ No newline at end of file +} diff --git a/examples/tagged_document.pdf b/examples/tagged_document.pdf new file mode 100644 index 0000000..ca13b9f Binary files /dev/null and b/examples/tagged_document.pdf differ diff --git a/pdfio-content.c b/pdfio-content.c index 9ee1f2f..bf4df9e 100644 --- a/pdfio-content.c +++ b/pdfio-content.c @@ -1709,6 +1709,11 @@ pdfioFileCreateFontObjFromBase( pdfio_dict_t *dict; // Font dictionary pdfio_obj_t *obj; // Font object + if (pdf && pdf->pdfa != _PDFIO_PDFA_NONE) + { + _pdfioFileError(pdf, "Base fonts are not allowed in PDF/A files; use pdfioFileCreateFontObjFromFile to embed a font."); + return (NULL); + } if ((dict = pdfioDictCreate(pdf)) == NULL) return (NULL); @@ -2073,6 +2078,12 @@ pdfioFileCreateImageObjFromData( }; + if (pdf && (pdf->pdfa == _PDFIO_PDFA_1A || pdf->pdfa == _PDFIO_PDFA_1B) && alpha) + { + _pdfioFileError(pdf, "Images with transparency (alpha channels) are not allowed in PDF/A-1 files."); + return (NULL); + } + // Range check input... if (!pdf || !data || !width || !height || num_colors < 1 || num_colors == 2 || num_colors > 4) return (NULL); @@ -2739,6 +2750,12 @@ copy_png(pdfio_dict_t *dict, // I - Dictionary depth = png_get_bit_depth(pp, info); color_type = png_get_color_type(pp, info); + if ((dict->pdf->pdfa == _PDFIO_PDFA_1A || dict->pdf->pdfa == _PDFIO_PDFA_1B) && (color_type & PNG_COLOR_MASK_ALPHA)) + { + _pdfioFileError(dict->pdf, "PNG images with transparency (alpha channels) are not allowed in PDF/A-1 files."); + goto finish_png; + } + if (color_type & PNG_COLOR_MASK_PALETTE) num_colors = 1; else if (color_type & PNG_COLOR_MASK_COLOR) diff --git a/pdfio-file.c b/pdfio-file.c index 5d46724..75532e7 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -1114,8 +1114,9 @@ pdfioFileOpen( char message[8192]; // Message string temp.filename = (char *)filename; - snprintf(message, sizeof(message), "Unable to allocate memory for PDF file - %s", strerror(errno)); + snprintf(message, sizeof(message), "Unable to allocate memory for PDF file: %s", strerror(errno)); (error_cb)(&temp, message, error_cbdata); + return (NULL); } @@ -1328,21 +1329,26 @@ pdfioFileSetPermissions( if (!pdf) return (false); - if (pdf->num_objs > 3) // First three objects are pages, info, and root + if (pdf->pdfa != _PDFIO_PDFA_NONE && encryption != PDFIO_ENCRYPTION_NONE) { - _pdfioFileError(pdf, "You must call pdfioFileSetPermissions before adding any objects."); + _pdfioFileError(pdf, "Encryption is not allowed for PDF/A files."); return (false); } if (encryption == PDFIO_ENCRYPTION_NONE) return (true); + if (pdf->num_objs > 3) // First three objects are pages, info, and root + { + _pdfioFileError(pdf, "You must call pdfioFileSetPermissions before adding any objects."); + return (false); + } + pdf->encrypt_metadata = true; return (_pdfioCryptoLock(pdf, permissions, encryption, owner_password, user_password)); } - // // 'pdfioFileSetSubject()' - Set the subject for a PDF file. // @@ -1514,7 +1520,7 @@ create_common( unsigned char id_value[16]; // File ID value time_t curtime; // Creation date/time _pdfio_sha256_t ctx; // Hashing context - + const char *file_version; // Actual PDF version string PDFIO_DEBUG("create_common(filename=\"%s\", fd=%d, output_cb=%p, output_cbdata=%p, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", filename, fd, (void *)output_cb, (void *)output_cbdata, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata); @@ -1522,13 +1528,12 @@ create_common( if (!filename || (fd < 0 && !output_cb)) return (NULL); - if (!version) - version = "2.0"; if (!error_cb) { error_cb = _pdfioFileDefaultError; error_cbdata = NULL; + } // Allocate a PDF file structure... @@ -1550,7 +1555,52 @@ create_common( pdf->output_cb = output_cb; pdf->output_ctx = output_cbdata; pdf->filename = strdup(filename); - pdf->version = strdup(!strncmp(version, "PCLm-", 5) ? "1.4" : version); + + if (!version) + { + version = "2.0"; + } + + if (!strncmp(version, "PDF/A-1", 7)) + { + file_version = "1.4"; + if (version[7] == 'a') + pdf->pdfa = _PDFIO_PDFA_1A; + else + pdf->pdfa = _PDFIO_PDFA_1B; // Default to 'b' + } + else if (!strncmp(version, "PDF/A-2", 7)) + { + file_version = "1.7"; + if (version[7] == 'a') + pdf->pdfa = _PDFIO_PDFA_2A; + else if (version[7] == 'u') + pdf->pdfa = _PDFIO_PDFA_2U; + else + pdf->pdfa = _PDFIO_PDFA_2B; // Default to 'b' + } + else if (!strncmp(version, "PDF/A-3", 7)) + { + file_version = "1.7"; + if (version[7] == 'a') + pdf->pdfa = _PDFIO_PDFA_3A; + else if (version[7] == 'u') + pdf->pdfa = _PDFIO_PDFA_3U; + else + pdf->pdfa = _PDFIO_PDFA_3B; // Default to 'b' + } + else if (!strncmp(version, "PDF/A-4", 7)) + { + file_version = "2.0"; + pdf->pdfa = _PDFIO_PDFA_4; + } + else + { + file_version = version; + pdf->pdfa = _PDFIO_PDFA_NONE; + } + + pdf->version = strdup(file_version); pdf->mode = _PDFIO_MODE_WRITE; pdf->error_cb = error_cb; pdf->error_data = error_cbdata; @@ -1580,17 +1630,19 @@ create_common( pdf->crop_box.y2 = 11.0f * 72.0f; } - // Write a standard PDF header... + // Write the PDF header (special case for PCLm, otherwise standard/PDF-A header) if (!strncmp(version, "PCLm-", 5)) { if (!_pdfioFilePrintf(pdf, "%%PDF-1.4\n%%%s\n", version)) goto error; } - else if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", version)) + else { - goto error; + if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", pdf->version)) + goto error; } + // Create the pages object... if ((dict = pdfioDictCreate(pdf)) == NULL) goto error; @@ -2692,6 +2744,34 @@ write_metadata(pdfio_file_t *pdf) // I - PDF file status &= pdfioStreamPuts(st, " \n"); #endif // 0 + if (pdf->pdfa != _PDFIO_PDFA_NONE) + { + static const char * const pdfa_versions[] = + { + "1A", // _PDFIO_PDFA_1A + "1B", // _PDFIO_PDFA_1B + "2A", // _PDFIO_PDFA_2A + "2B", // _PDFIO_PDFA_2B + "2U", // _PDFIO_PDFA_2U + "3A", // _PDFIO_PDFA_3A + "3B", // _PDFIO_PDFA_3B + "3U", // _PDFIO_PDFA_3U + "4", // _PDFIO_PDFA_4 + }; + const char *version_info = pdfa_versions[pdf->pdfa - _PDFIO_PDFA_1A]; + const char *conformance; + conformance = version_info + 1; + + status &= pdfioStreamPuts(st, " \n"); + status &= pdfioStreamPrintf(st, " %c\n",version_info[0]); + if (*conformance) + status &= pdfioStreamPrintf(st, " %s\n", conformance); + status &= pdfioStreamPuts(st, " \n"); + } + + + + status &= pdfioStreamPuts(st, " \n"); status &= pdfioStreamPuts(st, "\n"); status &= pdfioStreamPuts(st, "\n"); diff --git a/pdfio-private.h b/pdfio-private.h index 2531a1c..a82e46f 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -105,6 +105,20 @@ typedef enum _pdfio_mode_e // Read/write mode _PDFIO_MODE_WRITE // Write a PDF file } _pdfio_mode_t; +typedef enum _pdfio_pdfa_e // PDF/A version constants +{ + _PDFIO_PDFA_NONE = 0, // Not a PDF/A file + _PDFIO_PDFA_1A, // PDF/A-1a:2005 + _PDFIO_PDFA_1B, // PDF/A-1b:2005 + _PDFIO_PDFA_2A, // PDF/A-2a:2011 + _PDFIO_PDFA_2B, // PDF/A-2b:20011 + _PDFIO_PDFA_2U, // PDF/A-2u:2011 + _PDFIO_PDFA_3A, // PDF/A-3a:2012 + _PDFIO_PDFA_3B, // PDF/A-3b:2012 + _PDFIO_PDFA_3U, // PDF/A-3u:2012 + _PDFIO_PDFA_4, // PDF/A-4:2020 +} _pdfio_pdfa_t; + typedef enum _pdfio_predictor_e // PNG predictor constants { _PDFIO_PREDICTOR_NONE = 1, // No predictor (default) @@ -239,6 +253,7 @@ struct _pdfio_file_s // PDF file structure unsigned char file_id[32]; // File identifier bytes struct lconv *loc; // Locale data char *version; // Version number + _pdfio_pdfa_t pdfa; // PDF/A conformance pdfio_rect_t media_box, // Default MediaBox value crop_box; // Default CropBox value _pdfio_mode_t mode; // Read/write mode diff --git a/testpdfio.c b/testpdfio.c index 230ab0a..60035d8 100644 --- a/testpdfio.c +++ b/testpdfio.c @@ -51,7 +51,8 @@ static int write_jpeg_test(pdfio_file_t *pdf, const char *title, int number, pdf static int write_png_tests(pdfio_file_t *pdf, int number, pdfio_obj_t *font); static int write_text_test(pdfio_file_t *pdf, int first_page, pdfio_obj_t *font, const char *filename); static int write_unit_file(pdfio_file_t *inpdf, const char *outname, pdfio_file_t *outpdf, size_t *num_pages, size_t *first_image); - +static int do_pdfa_tests(void); +static int create_pdfa_test_file(const char *filename, const char *pdfa_version); // // 'main()' - Main entry for test program. @@ -128,6 +129,107 @@ main(int argc, // I - Number of command-line arguments return (ret); } +// +// 'create_pdfa_test_file()' - A helper function to generate a simple PDF/A file. +// +static int // O - 0 on success, 1 on error +create_pdfa_test_file( + const char *filename, // I - Name of the PDF file to create + const char *pdfa_version) // I - PDF/A version string (e.g., "PDF/A-1b") +{ + pdfio_file_t *pdf; // Output PDF file + pdfio_rect_t media_box = { 0.0, 0.0, 612.0, 792.0 }; + // Media box for US Letter + pdfio_obj_t *font; // Font object + pdfio_dict_t *page_dict; // Page dictionary + pdfio_stream_t *st; // Page content stream + bool error = false; // Error flag + + + testBegin("pdfioFileCreate(%s)", pdfa_version); + + if ((pdf = pdfioFileCreate(filename, pdfa_version, &media_box, NULL, (pdfio_error_cb_t)error_cb, &error)) == NULL) + { + testEnd(false); + return (1); + } + + // Embed a font, which is required for PDF/A + if ((font = pdfioFileCreateFontObjFromFile(pdf, "testfiles/OpenSans-Regular.ttf", false)) == NULL) + { + pdfioFileClose(pdf); + testEnd(false); + return (1); + } + + page_dict = pdfioDictCreate(pdf); + pdfioPageDictAddFont(page_dict, "F1", font); + st = pdfioFileCreatePage(pdf, page_dict); + + pdfioContentSetTextFont(st, "F1", 12.0); + pdfioContentTextBegin(st); + pdfioContentTextMoveTo(st, 72.0, 720.0); + pdfioContentTextShowf(st, false, "This is a compliance test for %s.", pdfa_version); + pdfioContentTextEnd(st); + + pdfioStreamClose(st); + + if (pdfioFileClose(pdf)) + { + testEnd(true); + return (0); + } + else + { + testEnd(false); + return (1); + } +} + + +// +// 'do_pdfa_tests()' - Run PDF/A generation and compliance tests. +// +static int // O - 0 on success, 1 on error +do_pdfa_tests(void) +{ + int status = 0; // Overall status + pdfio_file_t *fail_pdf; // PDF file for failure test + pdfio_rect_t media_box = { 0.0, 0.0, 612.0, 792.0 }; + // US Letter media box + bool error = false; // Error flag + + // Test creation of various PDF/A standards + status |= create_pdfa_test_file("testpdfio-pdfa-1b.pdf", "PDF/A-1b"); + status |= create_pdfa_test_file("testpdfio-pdfa-2b.pdf", "PDF/A-2b"); + status |= create_pdfa_test_file("testpdfio-pdfa-2u.pdf", "PDF/A-2u"); + status |= create_pdfa_test_file("testpdfio-pdfa-3b.pdf", "PDF/A-3b"); + status |= create_pdfa_test_file("testpdfio-pdfa-3u.pdf", "PDF/A-3u"); + status |= create_pdfa_test_file("testpdfio-pdfa-4.pdf", "PDF/A-4"); + + // Test that encryption is not allowed for PDF/A files + testBegin("pdfioFileCreate(testpdfio-pdfa-rc4.pdf)"); + if ((fail_pdf = pdfioFileCreate("testpdfio-pdfa-rc4.pdf", "PDF/A-1b", &media_box, NULL, (pdfio_error_cb_t)error_cb, &error)) == NULL) + { + testEndMessage(false, "pdfioFileCreate failed for encryption test."); + return (1); + } + + if (pdfioFileSetPermissions(fail_pdf, PDFIO_PERMISSION_ALL, PDFIO_ENCRYPTION_RC4_128, "owner", "user")) + { + testEndMessage(false, "encryption allowed on PDF/A file"); + status = 1; + } + else + { + // This is the expected outcome + testEnd(true); + } + pdfioFileClose(fail_pdf); + + return (status); +} + // // 'do_crypto_tests()' - Test the various cryptographic functions in PDFio. @@ -1057,6 +1159,8 @@ do_unit_tests(void) if (do_crypto_tests()) return (1); + + // Create a new PDF file... testBegin("pdfioFileCreate(\"testpdfio-out.pdf\", ...)"); if ((outpdf = pdfioFileCreate("testpdfio-out.pdf", NULL, NULL, NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL) @@ -1222,7 +1326,12 @@ do_unit_tests(void) if (read_unit_file(temppdf, num_pages, first_image, false)) return (1); + pdfioFileClose(inpdf); + + // Do PDF/A tests... + if (do_pdfa_tests()) + return (1); return (0);