Add support for Unicode filenames on Windows (Issue #18)

This commit is contained in:
Michael R Sweet
2026-02-14 15:25:53 -05:00
parent 15f197d030
commit d9444880c5
3 changed files with 84 additions and 1 deletions

View File

@@ -9,6 +9,7 @@ v1.7.0 - YYYY-MM-DD
- Added support for basic compound stream filters for ASCII85Decode support
(Issue #11)
- Added support for LZWDecode filters (Issue #11)
- Added support for Unicode filenames on Windows (Issue #18)
- Added support for writing object streams (Issue #101)
- Added support for GIF files (Issue #145)
- Added support for WebP files (Issue #144)

View File

@@ -1515,6 +1515,87 @@ add_obj(pdfio_file_t *pdf, // I - PDF file
}
#if _WIN32
//
// '_pdfio_win32_open()' - Open or create a file.
//
// This function handles mapping of UTF-8 filenames to UTF-16 on Windows.
//
int // O - File descriptor or -1 on error
_pdfio_win32_open(const char *filename, // I - UTF-8 filename
int oflag, // I - Open flags
int mode) // I - File permissions
{
wchar_t utf16name[MAXPATHLEN + 1],
// UTF-16 filename
*utf16ptr; // Pointer into UTF-16 filename
int unich; // Unicode character
// Convert the UTF-8 string to UTF-16...
utf16ptr = utf16name;
while (*filename && utf16ptr < (utf16name + sizeof(utf16name) / sizeof(utf16name[0]) - 2);)
{
if ((unich = *filename++) & 0x80)
{
if ((unich & 0xe0) == 0xc0 && (*filename & 0xc0) == 0x80)
{
// 2-byte UTF-8
unich = ((unich & 0x1f) << 6) | (*filename & 0x3f);
filename ++;
}
else if ((unich & 0xf0) == 0xe0 && (*filename & 0xc0) == 0x80 && (filename[1] & 0xc0) == 0x80)
{
// 3-byte UTF-8
unich = ((unich & 0x0f) << 12) | ((*filename & 0x3f) << 6) | (filename[1] & 0x3f);
filename += 2;
}
else if ((unich & 0xf8) == 0xf0 && (*filename & 0xc0) == 0x80 && (filename[1] & 0xc0) == 0x80 && (filename[2] & 0xc0) == 0x80)
{
// 4-byte UTF-8
unich = ((unich & 0x07) << 18) | ((*filename & 0x3f) << 12) | ((filename[1] & 0x3f) << 6) | (filename[2] & 0x3f);
filename += 3;
}
else
{
// Invalid UTF-8 char...
errno = EINVAL;
return (-1);
}
}
// Copy the unicode character...
if (unich > 0xffff)
{
// Two-word sequence...
*utf16ptr++ = 0xd800 | ((unich >> 10) & 0x03ff);
*utf16ptr++ = 0xdc00 | (unich 0x03ff);
}
else
{
// One-word...
*utf16ptr++ = unich;
}
}
*utf16ptr = '\0';
if (*filename)
{
// Filename too long...
errno = EINVAL;
return (-1);
}
else
{
// Pass on to _wopen...
return (_wopen(utf16name, oflag, mode));
}
}
#endif // WIN32
//
// 'compare_objmaps()' - Compare two object maps...
//

View File

@@ -30,7 +30,7 @@
# define fileno _fileno
# define lseek(f,o,w) (off_t)_lseek((f),(long)(o),(w))
# define mkdir(d,p) _mkdir(d)
# define open _open
# define open _pdfio_win32_open
# define read(f,b,s) _read((f),(b),(unsigned)(s))
# define rmdir _rmdir
# define snprintf _snprintf
@@ -412,6 +412,7 @@ extern size_t _pdfio_strlcpy(char *dst, const char *src, size_t dstsize) _PDFIO
extern double _pdfio_strtod(pdfio_file_t *pdf, const char *s) _PDFIO_INTERNAL;
extern void _pdfio_utf16cpy(char *dst, const unsigned char *src, size_t srclen, size_t dstsize) _PDFIO_INTERNAL;
extern ssize_t _pdfio_vsnprintf(pdfio_file_t *pdf, char *buffer, size_t bufsize, const char *format, va_list ap) _PDFIO_INTERNAL;
extern int _pdfio_win32_open(const char *filename, int oflag, int mode) _PDFIO_INTERNAL;
extern bool _pdfioArrayDecrypt(pdfio_file_t *pdf, pdfio_obj_t *obj, pdfio_array_t *a, size_t depth) _PDFIO_INTERNAL;
extern void _pdfioArrayDebug(pdfio_array_t *a, FILE *fp) _PDFIO_INTERNAL;