From e88262223342b67c7caf628130a2a63de9ca0012 Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Sat, 27 Jan 2024 18:22:16 -0500 Subject: [PATCH] Fix locale support (Issue #61) --- CHANGES.md | 6 + pdfio-common.c | 4 +- pdfio-file.c | 27 +++- pdfio-private.h | 7 +- pdfio-stream.c | 4 +- pdfio-string.c | 354 +++++++++++++++++++++++++++++++++++++++++++++++- pdfio-value.c | 4 +- 7 files changed, 395 insertions(+), 11 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 7db271b..20ae06c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,12 @@ Changes in PDFio ================ +v1.2.1 (Month DD, YYYY) +----------------------- + +- Updated number support to avoid locale issues (Issue #61) + + v1.2.0 (January 24, 2024) ------------------------- diff --git a/pdfio-common.c b/pdfio-common.c index 733aadc..e795687 100644 --- a/pdfio-common.c +++ b/pdfio-common.c @@ -1,7 +1,7 @@ // // Common support functions for pdfio. // -// Copyright © 2021-2023 by Michael R Sweet. +// Copyright © 2021-2024 by Michael R Sweet. // // Licensed under Apache License v2.0. See the file "LICENSE" for more // information. @@ -261,7 +261,7 @@ _pdfioFilePrintf(pdfio_file_t *pdf, // I - PDF file // Format the string... va_start(ap, format); - vsnprintf(buffer, sizeof(buffer), format, ap); + _pdfio_vsnprintf(pdf, buffer, sizeof(buffer), format, ap); va_end(ap); // Write it... diff --git a/pdfio-file.c b/pdfio-file.c index a3ded25..2c94a02 100644 --- a/pdfio-file.c +++ b/pdfio-file.c @@ -1,7 +1,7 @@ // // PDF file functions for PDFio. // -// Copyright © 2021-2023 by Michael R Sweet. +// Copyright © 2021-2024 by Michael R Sweet. // // Licensed under Apache License v2.0. See the file "LICENSE" for more // information. @@ -20,6 +20,7 @@ static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset); static int compare_objmaps(_pdfio_objmap_t *a, _pdfio_objmap_t *b); static const char *get_info_string(pdfio_file_t *pdf, const char *key); +static struct lconv *get_lconv(void); static bool load_obj_stream(pdfio_obj_t *obj); static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth); static bool load_xref(pdfio_file_t *pdf, off_t xref_offset, pdfio_password_cb_t password_cb, void *password_data); @@ -217,6 +218,7 @@ pdfioFileCreate( return (NULL); } + pdf->loc = get_lconv(); pdf->filename = strdup(filename); pdf->version = strdup(version); pdf->mode = _PDFIO_MODE_WRITE; @@ -259,7 +261,7 @@ pdfioFileCreate( } // Write a standard PDF header... - if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", version)) + if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%PDF\303\254o\n", version)) goto error; // Create the pages object... @@ -1218,6 +1220,7 @@ pdfioFileOpen( return (NULL); } + pdf->loc = get_lconv(); pdf->filename = strdup(filename); pdf->mode = _PDFIO_MODE_READ; pdf->error_cb = error_cb; @@ -1576,6 +1579,26 @@ get_info_string(pdfio_file_t *pdf, // I - PDF file } +// +// 'get_lconv()' - Get any locale-specific numeric information. +// + +static struct lconv * // O - Locale information or `NULL` +get_lconv(void) +{ + struct lconv *loc; // Locale information + + + if ((loc = localeconv()) != NULL) + { + if (!loc->decimal_point || !strcmp(loc->decimal_point, ".")) + loc = NULL; + } + + return (loc); +} + + // // 'load_obj_stream()' - Load an object stream. // diff --git a/pdfio-private.h b/pdfio-private.h index 9a1b95b..1a3efa2 100644 --- a/pdfio-private.h +++ b/pdfio-private.h @@ -1,7 +1,7 @@ // // Private header file for PDFio. // -// Copyright © 2021-2023 by Michael R Sweet. +// Copyright © 2021-2024 by Michael R Sweet. // // Licensed under Apache License v2.0. See the file "LICENSE" for more // information. @@ -20,6 +20,7 @@ # include # include # include +# include # ifdef _WIN32 # include # include @@ -224,6 +225,7 @@ typedef struct _pdfio_objmap_s // PDF object map struct _pdfio_file_s // PDF file structure { char *filename; // Filename + struct lconv *loc; // Locale data char *version; // Version number pdfio_rect_t media_box, // Default MediaBox value crop_box; // Default CropBox value @@ -322,6 +324,9 @@ struct _pdfio_stream_s // Stream // Functions... // +extern double _pdfio_strtod(pdfio_file_t *pdf, const char *s) _PDFIO_INTERNAL; +extern ssize_t _pdfio_vsnprintf(pdfio_file_t *pdf, char *buffer, size_t bufsize, const char *format, va_list ap) _PDFIO_INTERNAL; + extern bool _pdfioArrayDecrypt(pdfio_file_t *pdf, pdfio_obj_t *obj, pdfio_array_t *a, size_t depth) _PDFIO_INTERNAL; extern void _pdfioArrayDebug(pdfio_array_t *a, FILE *fp) _PDFIO_INTERNAL; extern void _pdfioArrayDelete(pdfio_array_t *a) _PDFIO_INTERNAL; diff --git a/pdfio-stream.c b/pdfio-stream.c index 2bfdcae..3a7f419 100644 --- a/pdfio-stream.c +++ b/pdfio-stream.c @@ -1,7 +1,7 @@ // // PDF stream functions for PDFio. // -// Copyright © 2021-2023 by Michael R Sweet. +// Copyright © 2021-2024 by Michael R Sweet. // // Licensed under Apache License v2.0. See the file "LICENSE" for more // information. @@ -687,7 +687,7 @@ pdfioStreamPrintf( // Format the string... va_start(ap, format); - vsnprintf(buffer, sizeof(buffer), format, ap); + _pdfio_vsnprintf(st->pdf, buffer, sizeof(buffer), format, ap); va_end(ap); // Write the string... diff --git a/pdfio-string.c b/pdfio-string.c index 6aad602..ce17fd0 100644 --- a/pdfio-string.c +++ b/pdfio-string.c @@ -1,7 +1,7 @@ // -// PDF dictionary functions for PDFio. +// PDF string functions for PDFio. // -// Copyright © 2021 by Michael R Sweet. +// Copyright © 2021-2024 by Michael R Sweet. // // Licensed under Apache License v2.0. See the file "LICENSE" for more // information. @@ -17,6 +17,356 @@ static int compare_strings(char **a, char **b); +// +// '_pdfio_strtod()' - Convert a string to a double value. +// +// This function wraps strtod() to avoid locale issues. +// + +double // O - Double value +_pdfio_strtod(pdfio_file_t *pdf, // I - PDF file + const char *s) // I - String +{ + char temp[64], // Temporary buffer + *tempptr; // Pointer into temporary buffer + + + // See if the locale has a special decimal point string... + if (!pdf->loc) + return (strtod(s, NULL)); + + // Copy leading sign, numbers, period, and then numbers... + tempptr = temp; + temp[sizeof(temp) - 1] = '\0'; + + while (*s && *s != '.') + { + if (tempptr < (temp + sizeof(temp) - 1)) + *tempptr++ = *s++; + else + return (0.0); + } + + if (*s == '.') + { + // Convert decimal point to locale equivalent... + size_t declen = strlen(pdf->loc->decimal_point); + // Length of decimal point + s ++; + + if (declen <= (sizeof(temp) - (size_t)(tempptr - temp))) + { + memcpy(tempptr, pdf->loc->decimal_point, declen); + tempptr += declen; + } + else + { + return (0.0); + } + } + + // Copy any remaining characters... + while (*s) + { + if (tempptr < (temp + sizeof(temp) - 1)) + *tempptr++ = *s++; + else + return (0.0); + } + + // Nul-terminate the temporary string and convert the string... + *tempptr = '\0'; + + return (strtod(temp, NULL)); +} + + +// +// '_pdfio_vsnprintf()' - Format a string. +// +// This function emulates vsnprintf() to avoid locale issues. +// + +ssize_t // O - Number of bytes +_pdfio_vsnprintf(pdfio_file_t *pdf, // I - PDF file + char *buffer, // I - Output buffer + size_t bufsize, // I - Size of output buffer + const char *format, // I - printf-style format string + va_list ap) // I - Pointer to additional arguments +{ + char *bufptr, // Pointer to position in buffer + *bufend, // Pointer to end of buffer + size, // Size character (h, l, L) + type; // Format type character + int width, // Width of field + prec; // Number of characters of precision + char tformat[100], // Temporary format string for snprintf() + *tptr, // Pointer into temporary format + temp[1024], // Buffer for formatted numbers + *tempptr; // Pointer into buffer + char *s; // Pointer to string + ssize_t bytes; // Total number of bytes needed + const char *dec = pdf->loc ? pdf->loc->decimal_point : "."; + // Decimal point string + char *decptr; // Pointer to decimal point + + + // Loop through the format string, formatting as needed... + bufptr = buffer; + bufend = buffer + bufsize - 1; + *bufend = '\0'; + bytes = 0; + + while (*format) + { + if (*format == '%') + { + // Format character... + tptr = tformat; + *tptr++ = *format++; + + if (*format == '%') + { + if (bufptr < bufend) + *bufptr++ = *format; + bytes ++; + format ++; + continue; + } + else if (strchr(" -+#\'", *format)) + { + *tptr++ = *format++; + } + + if (*format == '*') + { + // Get width from argument... + format ++; + width = va_arg(ap, int); + + snprintf(tptr, sizeof(tformat) - (size_t)(tptr - tformat), "%d", width); + tptr += strlen(tptr); + } + else + { + width = 0; + + while (isdigit(*format & 255)) + { + if (tptr < (tformat + sizeof(tformat) - 1)) + *tptr++ = *format; + + width = width * 10 + *format++ - '0'; + } + } + + if (*format == '.') + { + if (tptr < (tformat + sizeof(tformat) - 1)) + *tptr++ = *format; + + format ++; + + if (*format == '*') + { + // Get precision from argument... + format ++; + prec = va_arg(ap, int); + + snprintf(tptr, sizeof(tformat) - (size_t)(tptr - tformat), "%d", prec); + tptr += strlen(tptr); + } + else + { + prec = 0; + + while (isdigit(*format & 255)) + { + if (tptr < (tformat + sizeof(tformat) - 1)) + *tptr++ = *format; + + prec = prec * 10 + *format++ - '0'; + } + } + } + + if (*format == 'l' && format[1] == 'l') + { + size = 'L'; + + if (tptr < (tformat + sizeof(tformat) - 2)) + { + *tptr++ = 'l'; + *tptr++ = 'l'; + } + + format += 2; + } + else if (*format == 'h' || *format == 'l' || *format == 'L') + { + if (tptr < (tformat + sizeof(tformat) - 1)) + *tptr++ = *format; + + size = *format++; + } + else + { + size = 0; + } + + if (!*format) + break; + + if (tptr < (tformat + sizeof(tformat) - 1)) + *tptr++ = *format; + + type = *format++; + *tptr = '\0'; + + switch (type) + { + case 'E' : // Floating point formats + case 'G' : + case 'e' : + case 'f' : + case 'g' : + if ((size_t)(width + 2) > sizeof(temp)) + break; + + snprintf(temp, sizeof(temp), tformat, va_arg(ap, double)); + + if ((decptr = strstr(temp, dec)) != NULL) + { + // Convert locale decimal point to "." + tempptr = decptr + strlen(dec); + if (tempptr > (decptr + 1)) + memmove(decptr + 1, tempptr, strlen(tempptr) + 1); + *decptr = '.'; + + // Strip trailing 0's... + for (tempptr = temp + strlen(temp) - 1; tempptr > temp && *tempptr == '0'; tempptr --) + *tempptr = '\0'; + + if (*tempptr == '.') + *tempptr = '\0'; // Strip trailing decimal point + } + + // Copy to the output buffer + bytes += (int)strlen(temp); + + if (bufptr < bufend) + { + strncpy(bufptr, temp, (size_t)(bufend - bufptr - 1)); + bufptr += strlen(bufptr); + } + break; + + case 'B' : // Integer formats + case 'X' : + case 'b' : + case 'd' : + case 'i' : + case 'o' : + case 'u' : + case 'x' : + if ((size_t)(width + 2) > sizeof(temp)) + break; + +# ifdef HAVE_LONG_LONG + if (size == 'L') + snprintf(temp, sizeof(temp), tformat, va_arg(ap, long long)); + else +# endif // HAVE_LONG_LONG + if (size == 'l') + snprintf(temp, sizeof(temp), tformat, va_arg(ap, long)); + else + snprintf(temp, sizeof(temp), tformat, va_arg(ap, int)); + + bytes += (int)strlen(temp); + + if (bufptr < bufend) + { + strncpy(bufptr, temp, (size_t)(bufend - bufptr - 1)); + bufptr += strlen(bufptr); + } + break; + + case 'p' : // Pointer value + if ((size_t)(width + 2) > sizeof(temp)) + break; + + snprintf(temp, sizeof(temp), tformat, va_arg(ap, void *)); + + bytes += (int)strlen(temp); + + if (bufptr < bufend) + { + strncpy(bufptr, temp, (size_t)(bufend - bufptr - 1)); + bufptr += strlen(bufptr); + } + break; + + case 'c' : // Character or character array + bytes += width; + + if (bufptr < bufend) + { + if (width <= 1) + { + *bufptr++ = (char)va_arg(ap, int); + } + else + { + if ((bufptr + width) > bufend) + width = (int)(bufend - bufptr); + + memcpy(bufptr, va_arg(ap, char *), (size_t)width); + bufptr += width; + } + } + break; + + case 's' : // String + if ((s = va_arg(ap, char *)) == NULL) + s = "(null)"; + + bytes += strlen(s); + + if (bufptr < bufend) + { + strncpy(bufptr, s, (size_t)(bufend - bufptr - 1)); + bufptr += strlen(bufptr); + } + break; + + case 'n' : // Output number of chars so far + *(va_arg(ap, int *)) = (int)bytes; + break; + } + } + else + { + // Literal character... + bytes ++; + + if (bufptr < bufend) + *bufptr++ = *format++; + } + } + + // Nul-terminate the string and return the number of characters needed. + if (bufptr < bufend) + { + // Everything fit in the buffer... + *bufptr = '\0'; + } + + fprintf(stderr, "_pdfio_vsnprintf: %ld \"%s\"\n", (long)bytes, buffer); + + return (bytes); +} + + // // 'pdfioStringCreate()' - Create a durable literal string. // diff --git a/pdfio-value.c b/pdfio-value.c index 6f46976..6a7ed3a 100644 --- a/pdfio-value.c +++ b/pdfio-value.c @@ -1,7 +1,7 @@ // // PDF value functions for PDFio. // -// Copyright © 2021-2023 by Michael R Sweet. +// Copyright © 2021-2024 by Michael R Sweet. // // Licensed under Apache License v2.0. See the file "LICENSE" for more // information. @@ -497,7 +497,7 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file // If we get here, we have a number... v->type = PDFIO_VALTYPE_NUMBER; - v->value.number = (double)strtod(token, NULL); + v->value.number = _pdfio_strtod(pdf, token); } else if (!strcmp(token, "true") || !strcmp(token, "false")) {