Update support for UTF-16 string tokens (Issue #141)

This commit is contained in:
Michael R Sweet
2025-12-02 14:37:18 -05:00
parent 789b74af39
commit 597c9f8cda
3 changed files with 21 additions and 2 deletions

View File

@@ -6,6 +6,7 @@ v1.6.1 - YYYY-MM-DD
-------------------
- Added missing input checking to `pdfioFileCreateFontObjFromBase` function.
- Updated support for UTF-16 strings (Issue #141)
- Updated Xcode project to use installed PNG library.
- Fixed some clang warnings.

View File

@@ -464,8 +464,12 @@ pdfioDictGetString(pdfio_dict_t *dict, // I - Dictionary
_pdfio_value_t *value = _pdfioDictGetValue(dict, key);
PDFIO_DEBUG("pdfioDictGetString(dict=%p, key=\"%s\")\n", (void *)dict, key);
PDFIO_DEBUG("pdfioDictGetString: value=%p(type=%d)\n", (void *)value, value ? value->type : 0);
if (value && value->type == PDFIO_VALTYPE_STRING)
{
PDFIO_DEBUG("pdfioDictGetString: Returning \"%s\".\n", value->value.string);
return (value->value.string);
}
else if (value && value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096)
@@ -476,6 +480,8 @@ pdfioDictGetString(pdfio_dict_t *dict, // I - Dictionary
if (!(value->value.binary.datalen & 1) && (!memcmp(value->value.binary.data, "\376\377", 2) || !memcmp(value->value.binary.data, "\377\376", 2)))
{
// Copy UTF-16...
PDFIO_DEBUG("pdfioDictGetString: Converting UTF-16 to UTF-8 string.\n");
_pdfio_utf16cpy(temp, value->value.binary.data, value->value.binary.datalen, sizeof(temp));
}
else
@@ -489,10 +495,13 @@ pdfioDictGetString(pdfio_dict_t *dict, // I - Dictionary
value->type = PDFIO_VALTYPE_STRING;
value->value.string = pdfioStringCreate(dict->pdf, temp);
PDFIO_DEBUG("pdfioDictGetString: Returning \"%s\".\n", value->value.string);
return (value->value.string);
}
else
{
PDFIO_DEBUG("pdfioDictGetString: Returning NULL.\n");
return (NULL);
}
}

View File

@@ -393,9 +393,18 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
return (false);
}
if (saw_nul)
if ((bufptr - buffer) > 3 && ((bufptr - buffer) & 1) != 0 && (!memcmp(buffer, "(\377\376", 3) || !memcmp(buffer, "(\376\377", 3)))
{
// UTF-16 string, convert to UTF-8...
PDFIO_DEBUG("_pdfioTokenRead: Converting string to UTF-8.\n", stderr);
_pdfio_utf16cpy(buffer + 1, (unsigned char *)buffer + 1, bufptr - buffer - 1, bufsize - 1);
PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
return (true);
}
else if (saw_nul)
{
// Convert to a hex (binary) string...
// Contains nul characters, convert to a hex (binary) string...
char *litptr, // Pointer to literal character
*hexptr; // Pointer to hex character
size_t bytes = (size_t)(bufptr - buffer - 1);