Add ToUnicode map.

This commit is contained in:
Michael R Sweet 2023-11-19 20:49:30 -05:00
parent 088646e1cf
commit 97934ab995
No known key found for this signature in database
GPG Key ID: BE67C75EC81F3244
2 changed files with 105 additions and 105 deletions

View File

@ -8,6 +8,7 @@ v1.1.4 (Month DD, YYYY)
- Fixed detection of encrypted strings that are too short (Issue #52) - Fixed detection of encrypted strings that are too short (Issue #52)
- Fixed a TrueType CMAP decoding bug. - Fixed a TrueType CMAP decoding bug.
- Fixed a text rendering issue for Asian text. - Fixed a text rendering issue for Asian text.
- Added a ToUnicode map for Unicode text to support text copying.
v1.1.3 (November 15, 2023) v1.1.3 (November 15, 2023)

View File

@ -1289,13 +1289,13 @@ pdfioFileCreateFontObjFromFile(
pdfio_dict_t *dict, // Font dictionary pdfio_dict_t *dict, // Font dictionary
*desc, // Font descriptor *desc, // Font descriptor
*file; // Font file dictionary *file; // Font file dictionary
pdfio_obj_t *obj, // Font object pdfio_obj_t *obj = NULL, // Font object
*desc_obj, // Font descriptor object *desc_obj, // Font descriptor object
*file_obj; // Font file object *file_obj; // Font file object
const char *basefont; // Base font name const char *basefont; // Base font name
pdfio_array_t *bbox; // Font bounding box array pdfio_array_t *bbox; // Font bounding box array
pdfio_stream_t *st; // Font stream pdfio_stream_t *st; // Font stream
int fd; // File int fd = -1; // File
unsigned char buffer[16384]; // Read buffer unsigned char buffer[16384]; // Read buffer
ssize_t bytes; // Bytes read ssize_t bytes; // Bytes read
@ -1324,48 +1324,32 @@ pdfioFileCreateFontObjFromFile(
// Create the font file dictionary and object... // Create the font file dictionary and object...
if ((file = pdfioDictCreate(pdf)) == NULL) if ((file = pdfioDictCreate(pdf)) == NULL)
{ goto done;
ttfDelete(font);
close(fd);
return (NULL);
}
pdfioDictSetName(file, "Filter", "FlateDecode"); pdfioDictSetName(file, "Filter", "FlateDecode");
if ((file_obj = pdfioFileCreateObj(pdf, file)) == NULL) if ((file_obj = pdfioFileCreateObj(pdf, file)) == NULL)
{ goto done;
ttfDelete(font);
close(fd);
return (NULL);
}
if ((st = pdfioObjCreateStream(file_obj, PDFIO_FILTER_FLATE)) == NULL) if ((st = pdfioObjCreateStream(file_obj, PDFIO_FILTER_FLATE)) == NULL)
{ goto done;
ttfDelete(font);
close(fd);
return (NULL);
}
while ((bytes = read(fd, buffer, sizeof(buffer))) > 0) while ((bytes = read(fd, buffer, sizeof(buffer))) > 0)
{ {
if (!pdfioStreamWrite(st, buffer, (size_t)bytes)) if (!pdfioStreamWrite(st, buffer, (size_t)bytes))
{ {
ttfDelete(font);
close(fd);
pdfioStreamClose(st); pdfioStreamClose(st);
return (NULL); goto done;
} }
} }
close(fd); close(fd);
fd = -1;
pdfioStreamClose(st); pdfioStreamClose(st);
// Create the font descriptor dictionary and object... // Create the font descriptor dictionary and object...
if ((bbox = pdfioArrayCreate(pdf)) == NULL) if ((bbox = pdfioArrayCreate(pdf)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
ttfGetBounds(font, &bounds); ttfGetBounds(font, &bounds);
@ -1375,10 +1359,7 @@ pdfioFileCreateFontObjFromFile(
pdfioArrayAppendNumber(bbox, bounds.top); pdfioArrayAppendNumber(bbox, bounds.top);
if ((desc = pdfioDictCreate(pdf)) == NULL) if ((desc = pdfioDictCreate(pdf)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
basefont = pdfioStringCreate(pdf, ttfGetPostScriptName(font)); basefont = pdfioStringCreate(pdf, ttfGetPostScriptName(font));
@ -1397,22 +1378,25 @@ pdfioFileCreateFontObjFromFile(
pdfioDictSetNumber(desc, "StemV", ttfGetWeight(font) / 4 + 25); pdfioDictSetNumber(desc, "StemV", ttfGetWeight(font) / 4 + 25);
if ((desc_obj = pdfioFileCreateObj(pdf, desc)) == NULL) if ((desc_obj = pdfioFileCreateObj(pdf, desc)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
pdfioObjClose(desc_obj); pdfioObjClose(desc_obj);
if (unicode) if (unicode)
{ {
// Unicode (CID) font... // Unicode (CID) font...
pdfio_dict_t *cid2gid; // CIDToGIDMap dictionary pdfio_dict_t *cid2gid, // CIDToGIDMap dictionary
pdfio_obj_t *cid2gid_obj; // CIDToGIDMap object *to_unicode; // ToUnicode dictionary
pdfio_obj_t *cid2gid_obj, // CIDToGIDMap object
*to_unicode_obj;// ToUnicode object
size_t i, // Looping var size_t i, // Looping var
start, // Start character start, // Start character
num_cmap; // Number of CMap entries num_cmap; // Number of CMap entries
const int *cmap; // CMap entries const int *cmap; // CMap entries
int glyph, // Current glyph
min_glyph, // First glyph
max_glyph; // Last glyph
unsigned short glyphs[65536]; // Glyph to Unicode mapping
unsigned char *bufptr, // Pointer into buffer unsigned char *bufptr, // Pointer into buffer
*bufend; // End of buffer *bufend; // End of buffer
pdfio_dict_t *type2; // CIDFontType2 font dictionary pdfio_dict_t *type2; // CIDFontType2 font dictionary
@ -1423,34 +1407,36 @@ pdfioFileCreateFontObjFromFile(
*temp_array; // Temporary width sub-array *temp_array; // Temporary width sub-array
int w0, w1; // Widths int w0, w1; // Widths
// Create a CIDSystemInfo mapping to Adobe UCS2 v0 (Unicode)
if ((sidict = pdfioDictCreate(pdf)) == NULL)
goto done;
pdfioDictSetString(sidict, "Registry", "Adobe");
pdfioDictSetString(sidict, "Ordering", "Identity");
pdfioDictSetNumber(sidict, "Supplement", 0);
// Create a CIDToGIDMap object for the Unicode font... // Create a CIDToGIDMap object for the Unicode font...
if ((cid2gid = pdfioDictCreate(pdf)) == NULL) if ((cid2gid = pdfioDictCreate(pdf)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
#ifndef DEBUG #ifndef DEBUG
pdfioDictSetName(cid2gid, "Filter", "FlateDecode"); pdfioDictSetName(cid2gid, "Filter", "FlateDecode");
#endif // !DEBUG #endif // !DEBUG
if ((cid2gid_obj = pdfioFileCreateObj(pdf, cid2gid)) == NULL) if ((cid2gid_obj = pdfioFileCreateObj(pdf, cid2gid)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
#ifdef DEBUG #ifdef DEBUG
if ((st = pdfioObjCreateStream(cid2gid_obj, PDFIO_FILTER_NONE)) == NULL) if ((st = pdfioObjCreateStream(cid2gid_obj, PDFIO_FILTER_NONE)) == NULL)
#else #else
if ((st = pdfioObjCreateStream(cid2gid_obj, PDFIO_FILTER_FLATE)) == NULL) if ((st = pdfioObjCreateStream(cid2gid_obj, PDFIO_FILTER_FLATE)) == NULL)
#endif // DEBUG #endif // DEBUG
{ goto done;
ttfDelete(font);
return (NULL);
}
cmap = ttfGetCMap(font, &num_cmap); cmap = ttfGetCMap(font, &num_cmap);
min_glyph = 65536;
max_glyph = 0;
memset(glyphs, 0, sizeof(glyphs));
PDFIO_DEBUG("pdfioFileCreateFontObjFromFile: num_cmap=%u\n", (unsigned)num_cmap); PDFIO_DEBUG("pdfioFileCreateFontObjFromFile: num_cmap=%u\n", (unsigned)num_cmap);
@ -1468,6 +1454,12 @@ pdfioFileCreateFontObjFromFile(
// Map to specified glyph... // Map to specified glyph...
*bufptr++ = (unsigned char)(cmap[i] >> 8); *bufptr++ = (unsigned char)(cmap[i] >> 8);
*bufptr++ = (unsigned char)(cmap[i] & 255); *bufptr++ = (unsigned char)(cmap[i] & 255);
glyphs[cmap[i]] = i;
if (cmap[i] < min_glyph)
min_glyph = cmap[i];
if (cmap[i] > max_glyph)
max_glyph = cmap[i];
} }
if (bufptr >= bufend) if (bufptr >= bufend)
@ -1476,8 +1468,7 @@ pdfioFileCreateFontObjFromFile(
if (!pdfioStreamWrite(st, buffer, (size_t)(bufptr - buffer))) if (!pdfioStreamWrite(st, buffer, (size_t)(bufptr - buffer)))
{ {
pdfioStreamClose(st); pdfioStreamClose(st);
ttfDelete(font); goto done;
return (NULL);
} }
bufptr = buffer; bufptr = buffer;
@ -1490,32 +1481,64 @@ pdfioFileCreateFontObjFromFile(
if (!pdfioStreamWrite(st, buffer, (size_t)(bufptr - buffer))) if (!pdfioStreamWrite(st, buffer, (size_t)(bufptr - buffer)))
{ {
pdfioStreamClose(st); pdfioStreamClose(st);
ttfDelete(font); goto done;
return (NULL);
} }
} }
pdfioStreamClose(st); pdfioStreamClose(st);
// ToUnicode mapping object
to_unicode = pdfioDictCreate(pdf);
pdfioDictSetName(to_unicode, "Type", "CMap");
pdfioDictSetName(to_unicode, "CMapName", "Adobe-Identity-UCS2");
pdfioDictSetDict(to_unicode, "CIDSystemInfo", sidict);
#ifndef DEBUG
pdfioDictSetName(to_unicode, "Filter", "FlateDecode");
#endif // !DEBUG
if ((to_unicode_obj = pdfioFileCreateObj(pdf, to_unicode)) == NULL)
goto done;
#ifdef DEBUG
if ((st = pdfioObjCreateStream(to_unicode_obj, PDFIO_FILTER_NONE)) == NULL)
#else
if ((st = pdfioObjCreateStream(to_unicode_obj, PDFIO_FILTER_FLATE)) == NULL)
#endif // DEBUG
goto done;
pdfioStreamPuts(st,
"stream\n"
"/CIDInit /ProcSet findresource begin\n"
"12 dict begin\n"
"begincmap\n"
"/CIDSystemInfo<<\n"
"/Registry (Adobe)\n"
"/Ordering (UCS2)\n"
"/Supplement 0\n"
">> def\n"
"/CMapName /Adobe-Identity-UCS2 def\n"
"/CMapType 2 def\n"
"1 begincodespacerange\n"
"<0000> <FFFF>\n"
"endcodespacerange\n"
"1 beginbfrange\n"
"<0000> <FFFF> <0000>\n"
"endbfrange\n"
"endcmap\n"
"CMapName currentdict /CMap defineresource pop\n"
"end\n"
"end\n");
pdfioStreamClose(st);
// Create a CIDFontType2 dictionary for the Unicode font... // Create a CIDFontType2 dictionary for the Unicode font...
if ((type2 = pdfioDictCreate(pdf)) == NULL) if ((type2 = pdfioDictCreate(pdf)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
if ((sidict = pdfioDictCreate(pdf)) == NULL)
{
ttfDelete(font);
return (NULL);
}
// Width array // Width array
if ((w_array = pdfioArrayCreate(pdf)) == NULL) if ((w_array = pdfioArrayCreate(pdf)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
for (start = 0, w0 = ttfGetWidth(font, 0), i = 1; i < 65536; start = i, w0 = w1, i ++) for (start = 0, w0 = ttfGetWidth(font, 0), i = 1; i < 65536; start = i, w0 = w1, i ++)
{ {
@ -1535,10 +1558,7 @@ pdfioFileCreateFontObjFromFile(
pdfioArrayAppendNumber(w_array, start); pdfioArrayAppendNumber(w_array, start);
if ((temp_array = pdfioArrayCreate(pdf)) == NULL) if ((temp_array = pdfioArrayCreate(pdf)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
pdfioArrayAppendNumber(temp_array, w0); pdfioArrayAppendNumber(temp_array, w0);
for (w0 = w1, i ++; i < 65536; w0 = w1, i ++) for (w0 = w1, i ++; i < 65536; w0 = w1, i ++)
@ -1558,11 +1578,6 @@ pdfioFileCreateFontObjFromFile(
} }
} }
// CIDSystemInfo mapping to Adobe UCS2 v0 (Unicode)
pdfioDictSetString(sidict, "Registry", "Adobe");
pdfioDictSetString(sidict, "Ordering", "Identity");
pdfioDictSetNumber(sidict, "Supplement", 0);
// Then the dictionary for the CID base font... // Then the dictionary for the CID base font...
pdfioDictSetName(type2, "Type", "Font"); pdfioDictSetName(type2, "Type", "Font");
pdfioDictSetName(type2, "Subtype", "CIDFontType2"); pdfioDictSetName(type2, "Subtype", "CIDFontType2");
@ -1573,54 +1588,38 @@ pdfioFileCreateFontObjFromFile(
pdfioDictSetArray(type2, "W", w_array); pdfioDictSetArray(type2, "W", w_array);
if ((type2_obj = pdfioFileCreateObj(pdf, type2)) == NULL) if ((type2_obj = pdfioFileCreateObj(pdf, type2)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
pdfioObjClose(type2_obj); pdfioObjClose(type2_obj);
// Create a Type 0 font object... // Create a Type 0 font object...
if ((descendants = pdfioArrayCreate(pdf)) == NULL) if ((descendants = pdfioArrayCreate(pdf)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
pdfioArrayAppendObj(descendants, type2_obj); pdfioArrayAppendObj(descendants, type2_obj);
if ((dict = pdfioDictCreate(pdf)) == NULL) if ((dict = pdfioDictCreate(pdf)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
pdfioDictSetName(dict, "Type", "Font"); pdfioDictSetName(dict, "Type", "Font");
pdfioDictSetName(dict, "Subtype", "Type0"); pdfioDictSetName(dict, "Subtype", "Type0");
pdfioDictSetName(dict, "BaseFont", basefont); pdfioDictSetName(dict, "BaseFont", basefont);
pdfioDictSetArray(dict, "DescendantFonts", descendants); pdfioDictSetArray(dict, "DescendantFonts", descendants);
pdfioDictSetName(dict, "Encoding", "Identity-H"); pdfioDictSetName(dict, "Encoding", "Identity-H");
pdfioDictSetObj(dict, "ToUnicode", to_unicode_obj);
if ((obj = pdfioFileCreateObj(pdf, dict)) == NULL) if ((obj = pdfioFileCreateObj(pdf, dict)) != NULL)
return (NULL); pdfioObjClose(obj);
pdfioObjClose(obj);
} }
else else
{ {
// Simple (CP1282 or custom encoding) 8-bit font... // Simple (CP1282 or custom encoding) 8-bit font...
if (ttfGetMaxChar(font) >= 255 && !pdf->cp1252_obj && !create_cp1252(pdf)) if (ttfGetMaxChar(font) >= 255 && !pdf->cp1252_obj && !create_cp1252(pdf))
{ goto done;
ttfDelete(font);
return (NULL);
}
// Create a TrueType font object... // Create a TrueType font object...
if ((dict = pdfioDictCreate(pdf)) == NULL) if ((dict = pdfioDictCreate(pdf)) == NULL)
{ goto done;
ttfDelete(font);
return (NULL);
}
pdfioDictSetName(dict, "Type", "Font"); pdfioDictSetName(dict, "Type", "Font");
pdfioDictSetName(dict, "Subtype", "TrueType"); pdfioDictSetName(dict, "Subtype", "TrueType");
@ -1630,15 +1629,15 @@ pdfioFileCreateFontObjFromFile(
pdfioDictSetObj(dict, "FontDescriptor", desc_obj); pdfioDictSetObj(dict, "FontDescriptor", desc_obj);
if ((obj = pdfioFileCreateObj(pdf, dict)) == NULL) if ((obj = pdfioFileCreateObj(pdf, dict)) != NULL)
{ pdfioObjClose(obj);
ttfDelete(font);
return (NULL);
}
pdfioObjClose(obj);
} }
done:
if (fd >= 0)
close(fd);
ttfDelete(font); ttfDelete(font);
return (obj); return (obj);