diff --git a/CHANGES.md b/CHANGES.md index a3303e9..7cc0bf6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,7 @@ v1.6.1 - YYYY-MM-DD ------------------- - Added missing input checking to `pdfioFileCreateFontObjFromBase` function. +- Updated support for UTF-16 strings (Issue #141) - Updated Xcode project to use installed PNG library. - Fixed some clang warnings. diff --git a/pdfio-dict.c b/pdfio-dict.c index 663423f..660d1f9 100644 --- a/pdfio-dict.c +++ b/pdfio-dict.c @@ -464,8 +464,12 @@ pdfioDictGetString(pdfio_dict_t *dict, // I - Dictionary _pdfio_value_t *value = _pdfioDictGetValue(dict, key); + PDFIO_DEBUG("pdfioDictGetString(dict=%p, key=\"%s\")\n", (void *)dict, key); + PDFIO_DEBUG("pdfioDictGetString: value=%p(type=%d)\n", (void *)value, value ? value->type : 0); + if (value && value->type == PDFIO_VALTYPE_STRING) { + PDFIO_DEBUG("pdfioDictGetString: Returning \"%s\".\n", value->value.string); return (value->value.string); } else if (value && value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096) @@ -476,6 +480,8 @@ pdfioDictGetString(pdfio_dict_t *dict, // I - Dictionary if (!(value->value.binary.datalen & 1) && (!memcmp(value->value.binary.data, "\376\377", 2) || !memcmp(value->value.binary.data, "\377\376", 2))) { // Copy UTF-16... + PDFIO_DEBUG("pdfioDictGetString: Converting UTF-16 to UTF-8 string.\n"); + _pdfio_utf16cpy(temp, value->value.binary.data, value->value.binary.datalen, sizeof(temp)); } else @@ -489,10 +495,13 @@ pdfioDictGetString(pdfio_dict_t *dict, // I - Dictionary value->type = PDFIO_VALTYPE_STRING; value->value.string = pdfioStringCreate(dict->pdf, temp); + PDFIO_DEBUG("pdfioDictGetString: Returning \"%s\".\n", value->value.string); + return (value->value.string); } else { + PDFIO_DEBUG("pdfioDictGetString: Returning NULL.\n"); return (NULL); } } diff --git a/pdfio-token.c b/pdfio-token.c index c13adb8..49ece38 100644 --- a/pdfio-token.c +++ b/pdfio-token.c @@ -393,9 +393,18 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack return (false); } - if (saw_nul) + if ((bufptr - buffer) > 3 && ((bufptr - buffer) & 1) != 0 && (!memcmp(buffer, "(\377\376", 3) || !memcmp(buffer, "(\376\377", 3))) + { + // UTF-16 string, convert to UTF-8... + PDFIO_DEBUG("_pdfioTokenRead: Converting string to UTF-8.\n", stderr); + _pdfio_utf16cpy(buffer + 1, (unsigned char *)buffer + 1, bufptr - buffer - 1, bufsize - 1); + + PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer); + return (true); + } + else if (saw_nul) { - // Convert to a hex (binary) string... + // Contains nul characters, convert to a hex (binary) string... char *litptr, // Pointer to literal character *hexptr; // Pointer to hex character size_t bytes = (size_t)(bufptr - buffer - 1);