Merge pull request #137 from vididvidid/feature/pdfa-subset-support

Feature: Add method for creating PDF/A compliant files
This commit is contained in:
Michael R Sweet
2025-10-05 13:43:10 -04:00
committed by GitHub
6 changed files with 234 additions and 13 deletions

View File

@@ -1417,4 +1417,4 @@ puts_utf16(const char *s) // I - Hex string
put_utf8(ch);
}
}
}

Binary file not shown.

View File

@@ -1709,6 +1709,11 @@ pdfioFileCreateFontObjFromBase(
pdfio_dict_t *dict; // Font dictionary
pdfio_obj_t *obj; // Font object
if (pdf && pdf->pdfa != _PDFIO_PDFA_NONE)
{
_pdfioFileError(pdf, "Base fonts are not allowed in PDF/A files; use pdfioFileCreateFontObjFromFile to embed a font.");
return (NULL);
}
if ((dict = pdfioDictCreate(pdf)) == NULL)
return (NULL);
@@ -2073,6 +2078,12 @@ pdfioFileCreateImageObjFromData(
};
if (pdf && (pdf->pdfa == _PDFIO_PDFA_1A || pdf->pdfa == _PDFIO_PDFA_1B) && alpha)
{
_pdfioFileError(pdf, "Images with transparency (alpha channels) are not allowed in PDF/A-1 files.");
return (NULL);
}
// Range check input...
if (!pdf || !data || !width || !height || num_colors < 1 || num_colors == 2 || num_colors > 4)
return (NULL);
@@ -2739,6 +2750,12 @@ copy_png(pdfio_dict_t *dict, // I - Dictionary
depth = png_get_bit_depth(pp, info);
color_type = png_get_color_type(pp, info);
if ((dict->pdf->pdfa == _PDFIO_PDFA_1A || dict->pdf->pdfa == _PDFIO_PDFA_1B) && (color_type & PNG_COLOR_MASK_ALPHA))
{
_pdfioFileError(dict->pdf, "PNG images with transparency (alpha channels) are not allowed in PDF/A-1 files.");
goto finish_png;
}
if (color_type & PNG_COLOR_MASK_PALETTE)
num_colors = 1;
else if (color_type & PNG_COLOR_MASK_COLOR)

View File

@@ -1114,8 +1114,9 @@ pdfioFileOpen(
char message[8192]; // Message string
temp.filename = (char *)filename;
snprintf(message, sizeof(message), "Unable to allocate memory for PDF file - %s", strerror(errno));
snprintf(message, sizeof(message), "Unable to allocate memory for PDF file: %s", strerror(errno));
(error_cb)(&temp, message, error_cbdata);
return (NULL);
}
@@ -1328,21 +1329,26 @@ pdfioFileSetPermissions(
if (!pdf)
return (false);
if (pdf->num_objs > 3) // First three objects are pages, info, and root
if (pdf->pdfa != _PDFIO_PDFA_NONE && encryption != PDFIO_ENCRYPTION_NONE)
{
_pdfioFileError(pdf, "You must call pdfioFileSetPermissions before adding any objects.");
_pdfioFileError(pdf, "Encryption is not allowed for PDF/A files.");
return (false);
}
if (encryption == PDFIO_ENCRYPTION_NONE)
return (true);
if (pdf->num_objs > 3) // First three objects are pages, info, and root
{
_pdfioFileError(pdf, "You must call pdfioFileSetPermissions before adding any objects.");
return (false);
}
pdf->encrypt_metadata = true;
return (_pdfioCryptoLock(pdf, permissions, encryption, owner_password, user_password));
}
//
// 'pdfioFileSetSubject()' - Set the subject for a PDF file.
//
@@ -1514,7 +1520,7 @@ create_common(
unsigned char id_value[16]; // File ID value
time_t curtime; // Creation date/time
_pdfio_sha256_t ctx; // Hashing context
const char *file_version; // Actual PDF version string
PDFIO_DEBUG("create_common(filename=\"%s\", fd=%d, output_cb=%p, output_cbdata=%p, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", filename, fd, (void *)output_cb, (void *)output_cbdata, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata);
@@ -1522,13 +1528,12 @@ create_common(
if (!filename || (fd < 0 && !output_cb))
return (NULL);
if (!version)
version = "2.0";
if (!error_cb)
{
error_cb = _pdfioFileDefaultError;
error_cbdata = NULL;
}
// Allocate a PDF file structure...
@@ -1550,7 +1555,52 @@ create_common(
pdf->output_cb = output_cb;
pdf->output_ctx = output_cbdata;
pdf->filename = strdup(filename);
pdf->version = strdup(!strncmp(version, "PCLm-", 5) ? "1.4" : version);
if (!version)
{
version = "2.0";
}
if (!strncmp(version, "PDF/A-1", 7))
{
file_version = "1.4";
if (version[7] == 'a')
pdf->pdfa = _PDFIO_PDFA_1A;
else
pdf->pdfa = _PDFIO_PDFA_1B; // Default to 'b'
}
else if (!strncmp(version, "PDF/A-2", 7))
{
file_version = "1.7";
if (version[7] == 'a')
pdf->pdfa = _PDFIO_PDFA_2A;
else if (version[7] == 'u')
pdf->pdfa = _PDFIO_PDFA_2U;
else
pdf->pdfa = _PDFIO_PDFA_2B; // Default to 'b'
}
else if (!strncmp(version, "PDF/A-3", 7))
{
file_version = "1.7";
if (version[7] == 'a')
pdf->pdfa = _PDFIO_PDFA_3A;
else if (version[7] == 'u')
pdf->pdfa = _PDFIO_PDFA_3U;
else
pdf->pdfa = _PDFIO_PDFA_3B; // Default to 'b'
}
else if (!strncmp(version, "PDF/A-4", 7))
{
file_version = "2.0";
pdf->pdfa = _PDFIO_PDFA_4;
}
else
{
file_version = version;
pdf->pdfa = _PDFIO_PDFA_NONE;
}
pdf->version = strdup(file_version);
pdf->mode = _PDFIO_MODE_WRITE;
pdf->error_cb = error_cb;
pdf->error_data = error_cbdata;
@@ -1580,17 +1630,19 @@ create_common(
pdf->crop_box.y2 = 11.0f * 72.0f;
}
// Write a standard PDF header...
// Write the PDF header (special case for PCLm, otherwise standard/PDF-A header)
if (!strncmp(version, "PCLm-", 5))
{
if (!_pdfioFilePrintf(pdf, "%%PDF-1.4\n%%%s\n", version))
goto error;
}
else if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", version))
else
{
goto error;
if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", pdf->version))
goto error;
}
// Create the pages object...
if ((dict = pdfioDictCreate(pdf)) == NULL)
goto error;
@@ -2692,6 +2744,34 @@ write_metadata(pdfio_file_t *pdf) // I - PDF file
status &= pdfioStreamPuts(st, " </rdf:Description>\n");
#endif // 0
if (pdf->pdfa != _PDFIO_PDFA_NONE)
{
static const char * const pdfa_versions[] =
{
"1A", // _PDFIO_PDFA_1A
"1B", // _PDFIO_PDFA_1B
"2A", // _PDFIO_PDFA_2A
"2B", // _PDFIO_PDFA_2B
"2U", // _PDFIO_PDFA_2U
"3A", // _PDFIO_PDFA_3A
"3B", // _PDFIO_PDFA_3B
"3U", // _PDFIO_PDFA_3U
"4", // _PDFIO_PDFA_4
};
const char *version_info = pdfa_versions[pdf->pdfa - _PDFIO_PDFA_1A];
const char *conformance;
conformance = version_info + 1;
status &= pdfioStreamPuts(st, " <rdf:Description rdf:about=\"\" xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n");
status &= pdfioStreamPrintf(st, " <pdfaid:part>%c</pdfaid:part>\n",version_info[0]);
if (*conformance)
status &= pdfioStreamPrintf(st, " <pdfaid:conformance>%s</pdfaid:conformance>\n", conformance);
status &= pdfioStreamPuts(st, " </rdf:Description>\n");
}
status &= pdfioStreamPuts(st, " </rdf:RDF>\n");
status &= pdfioStreamPuts(st, "</x:xmpmeta>\n");
status &= pdfioStreamPuts(st, "<?xpacket end=\"r\"?>\n");

View File

@@ -105,6 +105,20 @@ typedef enum _pdfio_mode_e // Read/write mode
_PDFIO_MODE_WRITE // Write a PDF file
} _pdfio_mode_t;
typedef enum _pdfio_pdfa_e // PDF/A version constants
{
_PDFIO_PDFA_NONE = 0, // Not a PDF/A file
_PDFIO_PDFA_1A, // PDF/A-1a:2005
_PDFIO_PDFA_1B, // PDF/A-1b:2005
_PDFIO_PDFA_2A, // PDF/A-2a:2011
_PDFIO_PDFA_2B, // PDF/A-2b:20011
_PDFIO_PDFA_2U, // PDF/A-2u:2011
_PDFIO_PDFA_3A, // PDF/A-3a:2012
_PDFIO_PDFA_3B, // PDF/A-3b:2012
_PDFIO_PDFA_3U, // PDF/A-3u:2012
_PDFIO_PDFA_4, // PDF/A-4:2020
} _pdfio_pdfa_t;
typedef enum _pdfio_predictor_e // PNG predictor constants
{
_PDFIO_PREDICTOR_NONE = 1, // No predictor (default)
@@ -239,6 +253,7 @@ struct _pdfio_file_s // PDF file structure
unsigned char file_id[32]; // File identifier bytes
struct lconv *loc; // Locale data
char *version; // Version number
_pdfio_pdfa_t pdfa; // PDF/A conformance
pdfio_rect_t media_box, // Default MediaBox value
crop_box; // Default CropBox value
_pdfio_mode_t mode; // Read/write mode

View File

@@ -51,7 +51,8 @@ static int write_jpeg_test(pdfio_file_t *pdf, const char *title, int number, pdf
static int write_png_tests(pdfio_file_t *pdf, int number, pdfio_obj_t *font);
static int write_text_test(pdfio_file_t *pdf, int first_page, pdfio_obj_t *font, const char *filename);
static int write_unit_file(pdfio_file_t *inpdf, const char *outname, pdfio_file_t *outpdf, size_t *num_pages, size_t *first_image);
static int do_pdfa_tests(void);
static int create_pdfa_test_file(const char *filename, const char *pdfa_version);
//
// 'main()' - Main entry for test program.
@@ -128,6 +129,107 @@ main(int argc, // I - Number of command-line arguments
return (ret);
}
//
// 'create_pdfa_test_file()' - A helper function to generate a simple PDF/A file.
//
static int // O - 0 on success, 1 on error
create_pdfa_test_file(
const char *filename, // I - Name of the PDF file to create
const char *pdfa_version) // I - PDF/A version string (e.g., "PDF/A-1b")
{
pdfio_file_t *pdf; // Output PDF file
pdfio_rect_t media_box = { 0.0, 0.0, 612.0, 792.0 };
// Media box for US Letter
pdfio_obj_t *font; // Font object
pdfio_dict_t *page_dict; // Page dictionary
pdfio_stream_t *st; // Page content stream
bool error = false; // Error flag
testBegin("pdfioFileCreate(%s)", pdfa_version);
if ((pdf = pdfioFileCreate(filename, pdfa_version, &media_box, NULL, (pdfio_error_cb_t)error_cb, &error)) == NULL)
{
testEnd(false);
return (1);
}
// Embed a font, which is required for PDF/A
if ((font = pdfioFileCreateFontObjFromFile(pdf, "testfiles/OpenSans-Regular.ttf", false)) == NULL)
{
pdfioFileClose(pdf);
testEnd(false);
return (1);
}
page_dict = pdfioDictCreate(pdf);
pdfioPageDictAddFont(page_dict, "F1", font);
st = pdfioFileCreatePage(pdf, page_dict);
pdfioContentSetTextFont(st, "F1", 12.0);
pdfioContentTextBegin(st);
pdfioContentTextMoveTo(st, 72.0, 720.0);
pdfioContentTextShowf(st, false, "This is a compliance test for %s.", pdfa_version);
pdfioContentTextEnd(st);
pdfioStreamClose(st);
if (pdfioFileClose(pdf))
{
testEnd(true);
return (0);
}
else
{
testEnd(false);
return (1);
}
}
//
// 'do_pdfa_tests()' - Run PDF/A generation and compliance tests.
//
static int // O - 0 on success, 1 on error
do_pdfa_tests(void)
{
int status = 0; // Overall status
pdfio_file_t *fail_pdf; // PDF file for failure test
pdfio_rect_t media_box = { 0.0, 0.0, 612.0, 792.0 };
// US Letter media box
bool error = false; // Error flag
// Test creation of various PDF/A standards
status |= create_pdfa_test_file("testpdfio-pdfa-1b.pdf", "PDF/A-1b");
status |= create_pdfa_test_file("testpdfio-pdfa-2b.pdf", "PDF/A-2b");
status |= create_pdfa_test_file("testpdfio-pdfa-2u.pdf", "PDF/A-2u");
status |= create_pdfa_test_file("testpdfio-pdfa-3b.pdf", "PDF/A-3b");
status |= create_pdfa_test_file("testpdfio-pdfa-3u.pdf", "PDF/A-3u");
status |= create_pdfa_test_file("testpdfio-pdfa-4.pdf", "PDF/A-4");
// Test that encryption is not allowed for PDF/A files
testBegin("pdfioFileCreate(testpdfio-pdfa-rc4.pdf)");
if ((fail_pdf = pdfioFileCreate("testpdfio-pdfa-rc4.pdf", "PDF/A-1b", &media_box, NULL, (pdfio_error_cb_t)error_cb, &error)) == NULL)
{
testEndMessage(false, "pdfioFileCreate failed for encryption test.");
return (1);
}
if (pdfioFileSetPermissions(fail_pdf, PDFIO_PERMISSION_ALL, PDFIO_ENCRYPTION_RC4_128, "owner", "user"))
{
testEndMessage(false, "encryption allowed on PDF/A file");
status = 1;
}
else
{
// This is the expected outcome
testEnd(true);
}
pdfioFileClose(fail_pdf);
return (status);
}
//
// 'do_crypto_tests()' - Test the various cryptographic functions in PDFio.
@@ -1057,6 +1159,8 @@ do_unit_tests(void)
if (do_crypto_tests())
return (1);
// Create a new PDF file...
testBegin("pdfioFileCreate(\"testpdfio-out.pdf\", ...)");
if ((outpdf = pdfioFileCreate("testpdfio-out.pdf", NULL, NULL, NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL)
@@ -1222,7 +1326,12 @@ do_unit_tests(void)
if (read_unit_file(temppdf, num_pages, first_image, false))
return (1);
pdfioFileClose(inpdf);
// Do PDF/A tests...
if (do_pdfa_tests())
return (1);
return (0);