mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2024-12-26 13:28:22 +01:00
Add ToUnicode map.
This commit is contained in:
parent
088646e1cf
commit
97934ab995
@ -8,6 +8,7 @@ v1.1.4 (Month DD, YYYY)
|
|||||||
- Fixed detection of encrypted strings that are too short (Issue #52)
|
- Fixed detection of encrypted strings that are too short (Issue #52)
|
||||||
- Fixed a TrueType CMAP decoding bug.
|
- Fixed a TrueType CMAP decoding bug.
|
||||||
- Fixed a text rendering issue for Asian text.
|
- Fixed a text rendering issue for Asian text.
|
||||||
|
- Added a ToUnicode map for Unicode text to support text copying.
|
||||||
|
|
||||||
|
|
||||||
v1.1.3 (November 15, 2023)
|
v1.1.3 (November 15, 2023)
|
||||||
|
209
pdfio-content.c
209
pdfio-content.c
@ -1289,13 +1289,13 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
pdfio_dict_t *dict, // Font dictionary
|
pdfio_dict_t *dict, // Font dictionary
|
||||||
*desc, // Font descriptor
|
*desc, // Font descriptor
|
||||||
*file; // Font file dictionary
|
*file; // Font file dictionary
|
||||||
pdfio_obj_t *obj, // Font object
|
pdfio_obj_t *obj = NULL, // Font object
|
||||||
*desc_obj, // Font descriptor object
|
*desc_obj, // Font descriptor object
|
||||||
*file_obj; // Font file object
|
*file_obj; // Font file object
|
||||||
const char *basefont; // Base font name
|
const char *basefont; // Base font name
|
||||||
pdfio_array_t *bbox; // Font bounding box array
|
pdfio_array_t *bbox; // Font bounding box array
|
||||||
pdfio_stream_t *st; // Font stream
|
pdfio_stream_t *st; // Font stream
|
||||||
int fd; // File
|
int fd = -1; // File
|
||||||
unsigned char buffer[16384]; // Read buffer
|
unsigned char buffer[16384]; // Read buffer
|
||||||
ssize_t bytes; // Bytes read
|
ssize_t bytes; // Bytes read
|
||||||
|
|
||||||
@ -1324,48 +1324,32 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
|
|
||||||
// Create the font file dictionary and object...
|
// Create the font file dictionary and object...
|
||||||
if ((file = pdfioDictCreate(pdf)) == NULL)
|
if ((file = pdfioDictCreate(pdf)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
close(fd);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
pdfioDictSetName(file, "Filter", "FlateDecode");
|
pdfioDictSetName(file, "Filter", "FlateDecode");
|
||||||
|
|
||||||
if ((file_obj = pdfioFileCreateObj(pdf, file)) == NULL)
|
if ((file_obj = pdfioFileCreateObj(pdf, file)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
close(fd);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((st = pdfioObjCreateStream(file_obj, PDFIO_FILTER_FLATE)) == NULL)
|
if ((st = pdfioObjCreateStream(file_obj, PDFIO_FILTER_FLATE)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
close(fd);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((bytes = read(fd, buffer, sizeof(buffer))) > 0)
|
while ((bytes = read(fd, buffer, sizeof(buffer))) > 0)
|
||||||
{
|
{
|
||||||
if (!pdfioStreamWrite(st, buffer, (size_t)bytes))
|
if (!pdfioStreamWrite(st, buffer, (size_t)bytes))
|
||||||
{
|
{
|
||||||
ttfDelete(font);
|
|
||||||
close(fd);
|
|
||||||
pdfioStreamClose(st);
|
pdfioStreamClose(st);
|
||||||
return (NULL);
|
goto done;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
close(fd);
|
close(fd);
|
||||||
|
fd = -1;
|
||||||
pdfioStreamClose(st);
|
pdfioStreamClose(st);
|
||||||
|
|
||||||
// Create the font descriptor dictionary and object...
|
// Create the font descriptor dictionary and object...
|
||||||
if ((bbox = pdfioArrayCreate(pdf)) == NULL)
|
if ((bbox = pdfioArrayCreate(pdf)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
ttfGetBounds(font, &bounds);
|
ttfGetBounds(font, &bounds);
|
||||||
|
|
||||||
@ -1375,10 +1359,7 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
pdfioArrayAppendNumber(bbox, bounds.top);
|
pdfioArrayAppendNumber(bbox, bounds.top);
|
||||||
|
|
||||||
if ((desc = pdfioDictCreate(pdf)) == NULL)
|
if ((desc = pdfioDictCreate(pdf)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
basefont = pdfioStringCreate(pdf, ttfGetPostScriptName(font));
|
basefont = pdfioStringCreate(pdf, ttfGetPostScriptName(font));
|
||||||
|
|
||||||
@ -1397,22 +1378,25 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
pdfioDictSetNumber(desc, "StemV", ttfGetWeight(font) / 4 + 25);
|
pdfioDictSetNumber(desc, "StemV", ttfGetWeight(font) / 4 + 25);
|
||||||
|
|
||||||
if ((desc_obj = pdfioFileCreateObj(pdf, desc)) == NULL)
|
if ((desc_obj = pdfioFileCreateObj(pdf, desc)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
pdfioObjClose(desc_obj);
|
pdfioObjClose(desc_obj);
|
||||||
|
|
||||||
if (unicode)
|
if (unicode)
|
||||||
{
|
{
|
||||||
// Unicode (CID) font...
|
// Unicode (CID) font...
|
||||||
pdfio_dict_t *cid2gid; // CIDToGIDMap dictionary
|
pdfio_dict_t *cid2gid, // CIDToGIDMap dictionary
|
||||||
pdfio_obj_t *cid2gid_obj; // CIDToGIDMap object
|
*to_unicode; // ToUnicode dictionary
|
||||||
|
pdfio_obj_t *cid2gid_obj, // CIDToGIDMap object
|
||||||
|
*to_unicode_obj;// ToUnicode object
|
||||||
size_t i, // Looping var
|
size_t i, // Looping var
|
||||||
start, // Start character
|
start, // Start character
|
||||||
num_cmap; // Number of CMap entries
|
num_cmap; // Number of CMap entries
|
||||||
const int *cmap; // CMap entries
|
const int *cmap; // CMap entries
|
||||||
|
int glyph, // Current glyph
|
||||||
|
min_glyph, // First glyph
|
||||||
|
max_glyph; // Last glyph
|
||||||
|
unsigned short glyphs[65536]; // Glyph to Unicode mapping
|
||||||
unsigned char *bufptr, // Pointer into buffer
|
unsigned char *bufptr, // Pointer into buffer
|
||||||
*bufend; // End of buffer
|
*bufend; // End of buffer
|
||||||
pdfio_dict_t *type2; // CIDFontType2 font dictionary
|
pdfio_dict_t *type2; // CIDFontType2 font dictionary
|
||||||
@ -1423,34 +1407,36 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
*temp_array; // Temporary width sub-array
|
*temp_array; // Temporary width sub-array
|
||||||
int w0, w1; // Widths
|
int w0, w1; // Widths
|
||||||
|
|
||||||
|
// Create a CIDSystemInfo mapping to Adobe UCS2 v0 (Unicode)
|
||||||
|
if ((sidict = pdfioDictCreate(pdf)) == NULL)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
pdfioDictSetString(sidict, "Registry", "Adobe");
|
||||||
|
pdfioDictSetString(sidict, "Ordering", "Identity");
|
||||||
|
pdfioDictSetNumber(sidict, "Supplement", 0);
|
||||||
|
|
||||||
// Create a CIDToGIDMap object for the Unicode font...
|
// Create a CIDToGIDMap object for the Unicode font...
|
||||||
if ((cid2gid = pdfioDictCreate(pdf)) == NULL)
|
if ((cid2gid = pdfioDictCreate(pdf)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef DEBUG
|
#ifndef DEBUG
|
||||||
pdfioDictSetName(cid2gid, "Filter", "FlateDecode");
|
pdfioDictSetName(cid2gid, "Filter", "FlateDecode");
|
||||||
#endif // !DEBUG
|
#endif // !DEBUG
|
||||||
|
|
||||||
if ((cid2gid_obj = pdfioFileCreateObj(pdf, cid2gid)) == NULL)
|
if ((cid2gid_obj = pdfioFileCreateObj(pdf, cid2gid)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if ((st = pdfioObjCreateStream(cid2gid_obj, PDFIO_FILTER_NONE)) == NULL)
|
if ((st = pdfioObjCreateStream(cid2gid_obj, PDFIO_FILTER_NONE)) == NULL)
|
||||||
#else
|
#else
|
||||||
if ((st = pdfioObjCreateStream(cid2gid_obj, PDFIO_FILTER_FLATE)) == NULL)
|
if ((st = pdfioObjCreateStream(cid2gid_obj, PDFIO_FILTER_FLATE)) == NULL)
|
||||||
#endif // DEBUG
|
#endif // DEBUG
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
cmap = ttfGetCMap(font, &num_cmap);
|
cmap = ttfGetCMap(font, &num_cmap);
|
||||||
|
min_glyph = 65536;
|
||||||
|
max_glyph = 0;
|
||||||
|
memset(glyphs, 0, sizeof(glyphs));
|
||||||
|
|
||||||
PDFIO_DEBUG("pdfioFileCreateFontObjFromFile: num_cmap=%u\n", (unsigned)num_cmap);
|
PDFIO_DEBUG("pdfioFileCreateFontObjFromFile: num_cmap=%u\n", (unsigned)num_cmap);
|
||||||
|
|
||||||
@ -1468,6 +1454,12 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
// Map to specified glyph...
|
// Map to specified glyph...
|
||||||
*bufptr++ = (unsigned char)(cmap[i] >> 8);
|
*bufptr++ = (unsigned char)(cmap[i] >> 8);
|
||||||
*bufptr++ = (unsigned char)(cmap[i] & 255);
|
*bufptr++ = (unsigned char)(cmap[i] & 255);
|
||||||
|
|
||||||
|
glyphs[cmap[i]] = i;
|
||||||
|
if (cmap[i] < min_glyph)
|
||||||
|
min_glyph = cmap[i];
|
||||||
|
if (cmap[i] > max_glyph)
|
||||||
|
max_glyph = cmap[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bufptr >= bufend)
|
if (bufptr >= bufend)
|
||||||
@ -1476,8 +1468,7 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
if (!pdfioStreamWrite(st, buffer, (size_t)(bufptr - buffer)))
|
if (!pdfioStreamWrite(st, buffer, (size_t)(bufptr - buffer)))
|
||||||
{
|
{
|
||||||
pdfioStreamClose(st);
|
pdfioStreamClose(st);
|
||||||
ttfDelete(font);
|
goto done;
|
||||||
return (NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bufptr = buffer;
|
bufptr = buffer;
|
||||||
@ -1490,32 +1481,64 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
if (!pdfioStreamWrite(st, buffer, (size_t)(bufptr - buffer)))
|
if (!pdfioStreamWrite(st, buffer, (size_t)(bufptr - buffer)))
|
||||||
{
|
{
|
||||||
pdfioStreamClose(st);
|
pdfioStreamClose(st);
|
||||||
ttfDelete(font);
|
goto done;
|
||||||
return (NULL);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pdfioStreamClose(st);
|
pdfioStreamClose(st);
|
||||||
|
|
||||||
|
// ToUnicode mapping object
|
||||||
|
to_unicode = pdfioDictCreate(pdf);
|
||||||
|
pdfioDictSetName(to_unicode, "Type", "CMap");
|
||||||
|
pdfioDictSetName(to_unicode, "CMapName", "Adobe-Identity-UCS2");
|
||||||
|
pdfioDictSetDict(to_unicode, "CIDSystemInfo", sidict);
|
||||||
|
|
||||||
|
#ifndef DEBUG
|
||||||
|
pdfioDictSetName(to_unicode, "Filter", "FlateDecode");
|
||||||
|
#endif // !DEBUG
|
||||||
|
|
||||||
|
if ((to_unicode_obj = pdfioFileCreateObj(pdf, to_unicode)) == NULL)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
if ((st = pdfioObjCreateStream(to_unicode_obj, PDFIO_FILTER_NONE)) == NULL)
|
||||||
|
#else
|
||||||
|
if ((st = pdfioObjCreateStream(to_unicode_obj, PDFIO_FILTER_FLATE)) == NULL)
|
||||||
|
#endif // DEBUG
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
pdfioStreamPuts(st,
|
||||||
|
"stream\n"
|
||||||
|
"/CIDInit /ProcSet findresource begin\n"
|
||||||
|
"12 dict begin\n"
|
||||||
|
"begincmap\n"
|
||||||
|
"/CIDSystemInfo<<\n"
|
||||||
|
"/Registry (Adobe)\n"
|
||||||
|
"/Ordering (UCS2)\n"
|
||||||
|
"/Supplement 0\n"
|
||||||
|
">> def\n"
|
||||||
|
"/CMapName /Adobe-Identity-UCS2 def\n"
|
||||||
|
"/CMapType 2 def\n"
|
||||||
|
"1 begincodespacerange\n"
|
||||||
|
"<0000> <FFFF>\n"
|
||||||
|
"endcodespacerange\n"
|
||||||
|
"1 beginbfrange\n"
|
||||||
|
"<0000> <FFFF> <0000>\n"
|
||||||
|
"endbfrange\n"
|
||||||
|
"endcmap\n"
|
||||||
|
"CMapName currentdict /CMap defineresource pop\n"
|
||||||
|
"end\n"
|
||||||
|
"end\n");
|
||||||
|
|
||||||
|
pdfioStreamClose(st);
|
||||||
|
|
||||||
// Create a CIDFontType2 dictionary for the Unicode font...
|
// Create a CIDFontType2 dictionary for the Unicode font...
|
||||||
if ((type2 = pdfioDictCreate(pdf)) == NULL)
|
if ((type2 = pdfioDictCreate(pdf)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((sidict = pdfioDictCreate(pdf)) == NULL)
|
|
||||||
{
|
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Width array
|
// Width array
|
||||||
if ((w_array = pdfioArrayCreate(pdf)) == NULL)
|
if ((w_array = pdfioArrayCreate(pdf)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (start = 0, w0 = ttfGetWidth(font, 0), i = 1; i < 65536; start = i, w0 = w1, i ++)
|
for (start = 0, w0 = ttfGetWidth(font, 0), i = 1; i < 65536; start = i, w0 = w1, i ++)
|
||||||
{
|
{
|
||||||
@ -1535,10 +1558,7 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
pdfioArrayAppendNumber(w_array, start);
|
pdfioArrayAppendNumber(w_array, start);
|
||||||
|
|
||||||
if ((temp_array = pdfioArrayCreate(pdf)) == NULL)
|
if ((temp_array = pdfioArrayCreate(pdf)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
pdfioArrayAppendNumber(temp_array, w0);
|
pdfioArrayAppendNumber(temp_array, w0);
|
||||||
for (w0 = w1, i ++; i < 65536; w0 = w1, i ++)
|
for (w0 = w1, i ++; i < 65536; w0 = w1, i ++)
|
||||||
@ -1558,11 +1578,6 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// CIDSystemInfo mapping to Adobe UCS2 v0 (Unicode)
|
|
||||||
pdfioDictSetString(sidict, "Registry", "Adobe");
|
|
||||||
pdfioDictSetString(sidict, "Ordering", "Identity");
|
|
||||||
pdfioDictSetNumber(sidict, "Supplement", 0);
|
|
||||||
|
|
||||||
// Then the dictionary for the CID base font...
|
// Then the dictionary for the CID base font...
|
||||||
pdfioDictSetName(type2, "Type", "Font");
|
pdfioDictSetName(type2, "Type", "Font");
|
||||||
pdfioDictSetName(type2, "Subtype", "CIDFontType2");
|
pdfioDictSetName(type2, "Subtype", "CIDFontType2");
|
||||||
@ -1573,54 +1588,38 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
pdfioDictSetArray(type2, "W", w_array);
|
pdfioDictSetArray(type2, "W", w_array);
|
||||||
|
|
||||||
if ((type2_obj = pdfioFileCreateObj(pdf, type2)) == NULL)
|
if ((type2_obj = pdfioFileCreateObj(pdf, type2)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
pdfioObjClose(type2_obj);
|
pdfioObjClose(type2_obj);
|
||||||
|
|
||||||
// Create a Type 0 font object...
|
// Create a Type 0 font object...
|
||||||
if ((descendants = pdfioArrayCreate(pdf)) == NULL)
|
if ((descendants = pdfioArrayCreate(pdf)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
pdfioArrayAppendObj(descendants, type2_obj);
|
pdfioArrayAppendObj(descendants, type2_obj);
|
||||||
|
|
||||||
if ((dict = pdfioDictCreate(pdf)) == NULL)
|
if ((dict = pdfioDictCreate(pdf)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
pdfioDictSetName(dict, "Type", "Font");
|
pdfioDictSetName(dict, "Type", "Font");
|
||||||
pdfioDictSetName(dict, "Subtype", "Type0");
|
pdfioDictSetName(dict, "Subtype", "Type0");
|
||||||
pdfioDictSetName(dict, "BaseFont", basefont);
|
pdfioDictSetName(dict, "BaseFont", basefont);
|
||||||
pdfioDictSetArray(dict, "DescendantFonts", descendants);
|
pdfioDictSetArray(dict, "DescendantFonts", descendants);
|
||||||
pdfioDictSetName(dict, "Encoding", "Identity-H");
|
pdfioDictSetName(dict, "Encoding", "Identity-H");
|
||||||
|
pdfioDictSetObj(dict, "ToUnicode", to_unicode_obj);
|
||||||
|
|
||||||
if ((obj = pdfioFileCreateObj(pdf, dict)) == NULL)
|
if ((obj = pdfioFileCreateObj(pdf, dict)) != NULL)
|
||||||
return (NULL);
|
pdfioObjClose(obj);
|
||||||
|
|
||||||
pdfioObjClose(obj);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Simple (CP1282 or custom encoding) 8-bit font...
|
// Simple (CP1282 or custom encoding) 8-bit font...
|
||||||
if (ttfGetMaxChar(font) >= 255 && !pdf->cp1252_obj && !create_cp1252(pdf))
|
if (ttfGetMaxChar(font) >= 255 && !pdf->cp1252_obj && !create_cp1252(pdf))
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a TrueType font object...
|
// Create a TrueType font object...
|
||||||
if ((dict = pdfioDictCreate(pdf)) == NULL)
|
if ((dict = pdfioDictCreate(pdf)) == NULL)
|
||||||
{
|
goto done;
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
pdfioDictSetName(dict, "Type", "Font");
|
pdfioDictSetName(dict, "Type", "Font");
|
||||||
pdfioDictSetName(dict, "Subtype", "TrueType");
|
pdfioDictSetName(dict, "Subtype", "TrueType");
|
||||||
@ -1630,15 +1629,15 @@ pdfioFileCreateFontObjFromFile(
|
|||||||
|
|
||||||
pdfioDictSetObj(dict, "FontDescriptor", desc_obj);
|
pdfioDictSetObj(dict, "FontDescriptor", desc_obj);
|
||||||
|
|
||||||
if ((obj = pdfioFileCreateObj(pdf, dict)) == NULL)
|
if ((obj = pdfioFileCreateObj(pdf, dict)) != NULL)
|
||||||
{
|
pdfioObjClose(obj);
|
||||||
ttfDelete(font);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
pdfioObjClose(obj);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
|
||||||
|
if (fd >= 0)
|
||||||
|
close(fd);
|
||||||
|
|
||||||
ttfDelete(font);
|
ttfDelete(font);
|
||||||
|
|
||||||
return (obj);
|
return (obj);
|
||||||
|
Loading…
Reference in New Issue
Block a user