From 10c8d222cf719598ed1e9777ac6a7d37bd3ff9a3 Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Wed, 23 Jun 2021 21:26:01 -0400 Subject: [PATCH] Test both CP1252 and Unicode font rendering. Add cached CP1252 and Unicode font encoding objects. Clean up CP1252 ("simple") TrueType font embedding. --- pdfio-content.c | 316 +++++++++++++++++++++++++++++++++++++++++------- pdfio-private.h | 2 + testpdfio.c | 26 ++-- 3 files changed, 283 insertions(+), 61 deletions(-) diff --git a/pdfio-content.c b/pdfio-content.c index 0a0366f..11a53be 100644 --- a/pdfio-content.c +++ b/pdfio-content.c @@ -1225,9 +1225,7 @@ pdfioFileCreateFontObjFromFile( bool unicode) // I - Force Unicode { ttf_t *font; // TrueType font - int ch, // Current character - firstch, // First character - lastch; // Last character + int ch; // Current character ttf_rect_t bounds; // Font bounds pdfio_dict_t *dict, // Font dictionary *desc, // Font descriptor @@ -1241,6 +1239,233 @@ pdfioFileCreateFontObjFromFile( int fd; // File unsigned char buffer[16384]; // Read buffer ssize_t bytes; // Bytes read + static const char * const cp1252[] = // Glyphs for CP1252 encoding + { + "space", + "exclam", + "quotedbl", + "numbersign", + "dollar", + "percent", + "ampersand", + "quotesingle", + "parenleft", + "parenright", + "asterisk", + "plus", + "comma", + "hyphen", + "period", + "slash", + "zero", + "one", + "two", + "three", + "four", + "five", + "six", + "seven", + "eight", + "nine", + "colon", + "semicolon", + "less", + "equal", + "greater", + "question", + "at", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "bracketleft", + "backslash", + "bracketright", + "asciicircum", + "underscore", + "grave", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "braceleft", + "bar", + "braceright", + "asciitilde", + "", + "Euro", + "", + "quotesinglbase", + "florin", + "quotedblbase", + "ellipsis", + "dagger", + "daggerdbl", + "circumflex", + "perthousand", + "Scaron", + "guilsinglleft", + "OE", + "", + "Zcaron", + "", + "", + "quoteleft", + "quoteright", + "quotedblleft", + "quotedblright", + "bullet", + "endash", + "emdash", + "tilde", + "trademark", + "scaron", + "guilsinglright", + "oe", + "", + "zcaron", + "Ydieresis", + "space", + "exclamdown", + "cent", + "sterling", + "currency", + "yen", + "brokenbar", + "section", + "dieresis", + "copyright", + "ordfeminine", + "guillemotleft", + "logicalnot", + "minus", + "registered", + "macron", + "degree", + "plusminus", + "twosuperior", + "threesuperior", + "acute", + "mu", + "paragraph", + "periodcentered", + "cedilla", + "onesuperior", + "ordmasculine", + "guillemotright", + "onequarter", + "onehalf", + "threequarters", + "questiondown", + "Agrave", + "Aacute", + "Acircumflex", + "Atilde", + "Adieresis", + "Aring", + "AE", + "Ccedilla", + "Egrave", + "Eacute", + "Ecircumflex", + "Edieresis", + "Igrave", + "Iacute", + "Icircumflex", + "Idieresis", + "Eth", + "Ntilde", + "Ograve", + "Oacute", + "Ocircumflex", + "Otilde", + "Odieresis", + "multiply", + "Oslash", + "Ugrave", + "Uacute", + "Ucircumflex", + "Udieresis", + "Yacute", + "Thorn", + "germandbls", + "agrave", + "aacute", + "acircumflex", + "atilde", + "adieresis", + "aring", + "ae", + "ccedilla", + "egrave", + "eacute", + "ecircumflex", + "edieresis", + "igrave", + "iacute", + "icircumflex", + "idieresis", + "eth", + "ntilde", + "ograve", + "oacute", + "ocircumflex", + "otilde", + "odieresis", + "divide", + "oslash", + "ugrave", + "uacute", + "ucircumflex", + "udieresis", + "yacute", + "thorn", + "ydieresis" + }; // Range check input... @@ -1491,46 +1716,51 @@ pdfioFileCreateFontObjFromFile( else { // Simple (CP1282 or custom encoding) 8-bit font... - pdfio_array_t *widths; // Font widths array - pdfio_obj_t *widths_obj; // Font widths object - - // Create the widths array and object... - if ((widths = pdfioArrayCreate(pdf)) == NULL) + if (ttfGetMaxChar(font) >= 255 && !pdf->cp1252_obj) { - ttfDelete(font); - return (NULL); + bool chindex; // Need character index? + pdfio_dict_t *cp1252_dict; // Encoding dictionary + pdfio_array_t *cp1252_array; // Differences array + + if ((cp1252_dict = pdfioDictCreate(pdf)) == NULL || (cp1252_array = pdfioArrayCreate(pdf)) == NULL) + { + ttfDelete(font); + return (NULL); + } + + for (ch = 0, chindex = true; ch < (int)(sizeof(cp1252) / sizeof(cp1252[0])); ch ++) + { + if (cp1252[ch][0]) + { + // Add this character... + if (chindex) + { + // Add the initial index... + pdfioArrayAppendNumber(cp1252_array, ch + 32); + chindex = false; + } + + pdfioArrayAppendName(cp1252_array, cp1252[ch]); + } + else + { + // Flag that we need a new index... + chindex = true; + } + } + + pdfioDictSetName(cp1252_dict, "Type", "Encoding"); + pdfioDictSetArray(cp1252_dict, "Differences", cp1252_array); + + if ((pdf->cp1252_obj = pdfioFileCreateObj(pdf, cp1252_dict)) == NULL) + { + ttfDelete(font); + return (NULL); + } + + pdfioObjClose(pdf->cp1252_obj); } - firstch = ttfGetMinChar(font); - lastch = ttfGetMaxChar(font); - - if (lastch < 255) - { - // Provide widths for all characters... - for (ch = firstch; ch <= lastch; ch ++) - pdfioArrayAppendNumber(widths, ttfGetWidth(font, ch)); - } - else - { - // Provide widths only for CP1252 characters... - lastch = 255; - - for (ch = firstch; ch < 128; ch ++) - pdfioArrayAppendNumber(widths, ttfGetWidth(font, ch)); - for (; ch < 160; ch ++) - pdfioArrayAppendNumber(widths, ttfGetWidth(font, _pdfio_cp1252[ch - 128])); - for (; ch <= lastch && ch < 128; ch ++) - pdfioArrayAppendNumber(widths, ttfGetWidth(font, ch)); - } - - if ((widths_obj = pdfioFileCreateArrayObj(pdf, widths)) == NULL) - { - ttfDelete(font); - return (NULL); - } - - pdfioObjClose(widths_obj); - // Create a TrueType font object... if ((dict = pdfioDictCreate(pdf)) == NULL) { @@ -1541,12 +1771,10 @@ pdfioFileCreateFontObjFromFile( pdfioDictSetName(dict, "Type", "Font"); pdfioDictSetName(dict, "Subtype", "TrueType"); pdfioDictSetName(dict, "BaseFont", basefont); - pdfioDictSetName(dict, "Encoding", "WinAnsi"); + if (ttfGetMaxChar(font) >= 255) + pdfioDictSetObj(dict, "Encoding", pdf->cp1252_obj); pdfioDictSetObj(dict, "FontDescriptor", desc_obj); - pdfioDictSetNumber(dict, "FirstChar", firstch); - pdfioDictSetNumber(dict, "LastChar", lastch); - pdfioDictSetObj(dict, "Widths", widths_obj); if ((obj = pdfioFileCreateObj(pdf, dict)) == NULL) { diff --git a/pdfio-private.h b/pdfio-private.h index 929b4c8..e3cc1ba 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -224,6 +224,8 @@ struct _pdfio_file_s // PDF file structure pdfio_obj_t *info; // Information object/dictionary pdfio_obj_t *pages_root; // Root pages object pdfio_obj_t *encrypt; // Encryption object/dictionary + pdfio_obj_t *cp1252_obj, // CP1252 font encoding object + *unicode_obj; // Unicode font encoding object pdfio_array_t *id_array; // ID array // Allocated data elements diff --git a/testpdfio.c b/testpdfio.c index d012a36..fb57157 100644 --- a/testpdfio.c +++ b/testpdfio.c @@ -35,7 +35,7 @@ static ssize_t token_peek_cb(const char **s, char *buffer, size_t bytes); static int verify_image(pdfio_file_t *pdf, size_t number); static int write_color_patch(pdfio_stream_t *st, bool device); static int write_color_test(pdfio_file_t *pdf, int number, pdfio_obj_t *font); -static int write_font_test(pdfio_file_t *pdf, int number, pdfio_obj_t *font); +static int write_font_test(pdfio_file_t *pdf, int number, pdfio_obj_t *font, bool unicode); static int write_header_footer(pdfio_stream_t *st, const char *title, int number); static pdfio_obj_t *write_image_object(pdfio_file_t *pdf, _pdfio_predictor_t predictor); static int write_images_test(pdfio_file_t *pdf, int number, pdfio_obj_t *font); @@ -332,11 +332,14 @@ do_unit_tests(void) return (1); // Test TrueType fonts... - if (write_font_test(outpdf, 8, helvetica)) + if (write_font_test(outpdf, 8, helvetica, false)) + return (1); + + if (write_font_test(outpdf, 9, helvetica, true)) return (1); // Print this text file... - if (write_text_test(outpdf, 9, helvetica, "README.md")) + if (write_text_test(outpdf, 10, helvetica, "README.md")) return (1); // Close the test PDF file... @@ -1090,12 +1093,12 @@ write_color_test(pdfio_file_t *pdf, // I - PDF file static int // O - 1 on failure, 0 on success write_font_test(pdfio_file_t *pdf, // I - PDF file int number, // I - Page number - pdfio_obj_t *font) // I - Page number font + pdfio_obj_t *font, // I - Page number font + bool unicode) // I - Use Unicode font? { pdfio_dict_t *dict; // Page dictionary pdfio_stream_t *st; // Page contents stream pdfio_obj_t *opensans; // OpenSans-Regular font - bool unicode; // Unicode? int i; // Looping var static const char * const welcomes[] =// "Welcome" in many languages { @@ -1250,23 +1253,12 @@ write_font_test(pdfio_file_t *pdf, // I - PDF file }; - unicode = true; - -#if 1 fputs("pdfioFileCreateFontObjFromFile(OpenSans-Regular.ttf): ", stdout); if ((opensans = pdfioFileCreateFontObjFromFile(pdf, "testfiles/OpenSans-Regular.ttf", unicode)) != NULL) puts("PASS"); else return (1); -#else - fputs("pdfioFileCreateFontObjFromFile(NotoSansJP-Regular.otf): ", stdout); - if ((opensans = pdfioFileCreateFontObjFromFile(pdf, "testfiles/NotoSansJP-Regular.otf", unicode)) != NULL) - puts("PASS"); - else - return (1); -#endif // 1 - fputs("pdfioDictCreate: ", stdout); if ((dict = pdfioDictCreate(pdf)) != NULL) puts("PASS"); @@ -1292,7 +1284,7 @@ write_font_test(pdfio_file_t *pdf, // I - PDF file else return (1); - if (write_header_footer(st, "TrueType Font Test", number)) + if (write_header_footer(st, unicode ? "Unicode TrueType Font Test" : "CP1252 TrueType Font Test", number)) goto error; fputs("pdfioContentTextBegin(): ", stdout);