diff --git a/examples/create_tagged_pdf b/examples/create_tagged_pdf new file mode 100755 index 0000000..9614c99 Binary files /dev/null and b/examples/create_tagged_pdf differ diff --git a/examples/create_tagged_pdf.c b/examples/create_tagged_pdf.c new file mode 100644 index 0000000..96654db --- /dev/null +++ b/examples/create_tagged_pdf.c @@ -0,0 +1,77 @@ +#include +#include +#include + +int main(void) +{ + // 1. Basic PDF setup + pdfio_file_t *pdf; + pdfio_rect_t media_box = {0.0, 0.0, 612.0, 792.0}; // US Letter size + + // Create the PDF file + if ((pdf = pdfioFileCreate("tagged_document.pdf", "2.0", &media_box, &media_box, NULL, NULL)) == NULL) + { + puts("Error: Could not create PDF file."); + return (1); + } + + // 2. Build the Structure Tree Root (StructTreeRoot) + // This is the master "table of contents" for all tags. + pdfio_dict_t *struct_tree_root_dict = pdfioDictCreate(pdf); + pdfioDictSetName(struct_tree_root_dict, "Type", "StructTreeRoot"); + + // Create the top-level document element tag: /Document + pdfio_dict_t *doc_elem_dict = pdfioDictCreate(pdf); + pdfioDictSetName(doc_elem_dict, "Type", "StructElem"); + pdfioDictSetName(doc_elem_dict, "S", "Document"); // 'S' is the structure type + + // Create the paragraph element tag: /P + pdfio_dict_t *p_elem_dict = pdfioDictCreate(pdf); + pdfioDictSetName(p_elem_dict, "Type", "StructElem"); + pdfioDictSetName(p_elem_dict, "S", "P"); // 'S' is the structure type (Paragraph) + pdfioDictSetNumber(p_elem_dict, "K", 0); // 'K' is the content, pointing to MCID 0 on the page + + // Link the paragraph as a child of the document element + pdfio_array_t *doc_kids = pdfioArrayCreate(pdf); + pdfioArrayAppendDict(doc_kids, p_elem_dict); + pdfioDictSetArray(doc_elem_dict, "K", doc_kids); + + // Link the document element as a child of the StructTreeRoot + pdfio_array_t *root_kids = pdfioArrayCreate(pdf); + pdfioArrayAppendDict(root_kids, doc_elem_dict); + pdfioDictSetArray(struct_tree_root_dict, "K", root_kids); + + // Create a PDF object for the StructTreeRoot and link it to the main catalog + pdfio_obj_t *struct_tree_root_obj = pdfioFileCreateObj(pdf, struct_tree_root_dict); + pdfioDictSetObj(pdfioFileGetCatalog(pdf), "StructTreeRoot", struct_tree_root_obj); + + // 3. Create a page and its content + pdfio_dict_t *page_dict = pdfioDictCreate(pdf); + pdfio_obj_t *helvetica = pdfioFileCreateFontObjFromBase(pdf, "Helvetica"); + pdfioPageDictAddFont(page_dict, "F1", helvetica); + + pdfio_stream_t *st = pdfioFileCreatePage(pdf, page_dict); + + // 4. Write the tagged content to the page stream + pdfioContentTextBegin(st); + pdfioContentSetTextFont(st, "F1", 24.0); + pdfioContentTextMoveTo(st, 72.0, 700.0); + + // Create a dictionary for the marked content, specifying the ID + pdfio_dict_t *p_mcid_dict = pdfioDictCreate(pdf); + pdfioDictSetNumber(p_mcid_dict, "MCID", 0); // This ID must match the 'K' value in the StructElem + + // Use the functions from pdfio-content.c to wrap the text + pdfioContentBeginMarked(st, "P", p_mcid_dict); // Start tag for Paragraph + pdfioContentTextShow(st, false, "This is a tagged paragraph."); + pdfioContentEndMarked(st); // End tag + + pdfioContentTextEnd(st); + + // 5. Finalize and close + pdfioStreamClose(st); + pdfioFileClose(pdf); + + puts("Successfully created tagged_document.pdf"); + return (0); +} diff --git a/examples/pdf2text.c b/examples/pdf2text.c index 151ca83..fe8be68 100644 --- a/examples/pdf2text.c +++ b/examples/pdf2text.c @@ -1417,4 +1417,4 @@ puts_utf16(const char *s) // I - Hex string put_utf8(ch); } -} \ No newline at end of file +} diff --git a/examples/tagged_document.pdf b/examples/tagged_document.pdf new file mode 100644 index 0000000..ca13b9f Binary files /dev/null and b/examples/tagged_document.pdf differ diff --git a/pdfio-file.c b/pdfio-file.c index 5d46724..7a45446 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -1514,7 +1514,7 @@ create_common( unsigned char id_value[16]; // File ID value time_t curtime; // Creation date/time _pdfio_sha256_t ctx; // Hashing context - + const char *actual_version; // Actual PDF version string PDFIO_DEBUG("create_common(filename=\"%s\", fd=%d, output_cb=%p, output_cbdata=%p, version=\"%s\", media_box=%p, crop_box=%p, error_cb=%p, error_cbdata=%p)\n", filename, fd, (void *)output_cb, (void *)output_cbdata, version, (void *)media_box, (void *)crop_box, (void *)error_cb, (void *)error_cbdata); @@ -1522,13 +1522,16 @@ create_common( if (!filename || (fd < 0 && !output_cb)) return (NULL); - if (!version) - version = "2.0"; + //if (!version) + //version = "2.0"; if (!error_cb) { error_cb = _pdfioFileDefaultError; - error_cbdata = NULL; + error_cbdata = NULL;i + (error_cb)(&temp,message,error_cbdata) + + return (NULL) } // Allocate a PDF file structure... @@ -1550,7 +1553,52 @@ create_common( pdf->output_cb = output_cb; pdf->output_ctx = output_cbdata; pdf->filename = strdup(filename); - pdf->version = strdup(!strncmp(version, "PCLm-", 5) ? "1.4" : version); + + if (!version) + { + version = "2.0"; + } + + if (!strncmp(version, "PDF/A-1", 7)) + { + actual_version = "1.4"; + if (version[7] == 'a') + pdf->pdfa = _PDFIO_PDFA_1A; + else + pdf->pdfa = _PDFIO_PDFA_1B; // Default to 'b' + } + else if (!strncmp(version, "PDF/A-2", 7)) + { + actual_version = "1.7"; + if (version[7] == 'a') + pdf->pdfa = _PDFIO_PDFA_2A; + else if (version[7] == 'u') + pdf->pdfa = _PDFIO_PDFA_2U; + else + pdf->pdfa = _PDFIO_PDFA_2B; // Default to 'b' + } + else if (!strncmp(version, "PDF/A-3", 7)) + { + actual_version = "1.7"; + if (version[7] == 'a') + pdf->pdfa = _PDFIO_PDFA_3A; + else if (version[7] == 'u') + pdf->pdfa = _PDFIO_PDFA_3U; + else + pdf->pdfa = _PDFIO_PDFA_3B; // Default to 'b' + } + else if (!strncmp(version, "PDF/A-4", 7)) + { + actual_version = "2.0"; + pdf->pdfa = _PDFIO_PDFA_4; + } + else + { + actual_version = version; + pdf->pdfa = _PDFIO_PDFA_NONe; + } + + pdf->version = strdup(actual_version); pdf->mode = _PDFIO_MODE_WRITE; pdf->error_cb = error_cb; pdf->error_data = error_cbdata; diff --git a/pdfio-private.h b/pdfio-private.h index 2531a1c..d8f1319 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -105,6 +105,20 @@ typedef enum _pdfio_mode_e // Read/write mode _PDFIO_MODE_WRITE // Write a PDF file } _pdfio_mode_t; +typedef enum _pdfio_pdfa_e +{ + _PDFIO_PDFA_NONE = 0, // Not a PDF/A file + _PDFIO_PDFA_1A, // PDF/A-1a:2005 + _PDFIO_PDFA_1B, // PDF/A-1b:2005 + _PDFIO_PDFA_2A, // PDF/A-2a:2011 + _PDFIO_PDFA_2B, // PDF/A-2b:20011 + _PDFIO_PDFA_2U, // PDF/A-2u:2011 + _PDFIO_PDFA_3A, // PDF/A-3a:2012 + _PDFIO_PDFA_3B, // PDF/A-3b:2012 + _PDFIO_PDFA_3U, // PDF/A-3u:2012 + _PDFIO_PDFA_4, // PDF/A-4:2020 +} _pdfio_pdfa_t; + typedef enum _pdfio_predictor_e // PNG predictor constants { _PDFIO_PREDICTOR_NONE = 1, // No predictor (default) @@ -239,6 +253,7 @@ struct _pdfio_file_s // PDF file structure unsigned char file_id[32]; // File identifier bytes struct lconv *loc; // Locale data char *version; // Version number + _pdfio_pdfa_t pdfa; // PDF/A conformance pdfio_rect_t media_box, // Default MediaBox value crop_box; // Default CropBox value _pdfio_mode_t mode; // Read/write mode