Update token reader to handle balanced parens and to allow arbitrary backslash escapes.

Update unit test to omit path information from filename.
This commit is contained in:
Michael R Sweet 2021-05-09 21:32:09 -04:00
parent 5f43761537
commit af8c731699
No known key found for this signature in database
GPG Key ID: 999559A027815955
2 changed files with 29 additions and 9 deletions

View File

@ -148,7 +148,8 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
char *buffer, // I - String buffer char *buffer, // I - String buffer
size_t bufsize) // I - Size of string buffer size_t bufsize) // I - Size of string buffer
{ {
int ch; // Character int ch, // Character
parens = 0; // Parenthesis level
char *bufptr, // Pointer into buffer char *bufptr, // Pointer into buffer
*bufend, // End of buffer *bufend, // End of buffer
state = '\0'; // Current state state = '\0'; // Current state
@ -210,7 +211,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
switch (state) switch (state)
{ {
case '(' : // Literal string case '(' : // Literal string
while ((ch = get_char(tb)) != EOF && ch != ')') while ((ch = get_char(tb)) != EOF)
{ {
if (ch == '\\') if (ch == '\\')
{ {
@ -267,10 +268,22 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
break; break;
default : default :
_pdfioFileError(tb->pdf, "Unknown escape '\\%c' in literal string.", ch); // Ignore blackslash per PDF spec...
return (false); break;
} }
} }
else if (ch == '(')
{
// Keep track of parenthesis
parens ++;
}
else if (ch == ')')
{
if (parens == 0)
break;
parens --;
}
if (bufptr < bufend) if (bufptr < bufend)
{ {

View File

@ -37,16 +37,23 @@ main(int argc, // I - Number of command-line arguments
{ {
if ((pdf = pdfioFileOpen(argv[i], NULL, NULL)) != NULL) if ((pdf = pdfioFileOpen(argv[i], NULL, NULL)) != NULL)
{ {
const char *filename; // Base filename for messages
if ((filename = strrchr(argv[i], '/')) != NULL)
filename ++;
else
filename = argv[i];
num_objs = pdfioFileGetNumObjects(pdf); num_objs = pdfioFileGetNumObjects(pdf);
num_pages = pdfioFileGetNumPages(pdf); num_pages = pdfioFileGetNumPages(pdf);
printf("%s: PDF %s, %d pages, %d objects.\n", argv[i], pdfioFileGetVersion(pdf), (int)num_pages, (int)num_objs); printf("%s: PDF %s, %d pages, %d objects.\n", filename, pdfioFileGetVersion(pdf), (int)num_pages, (int)num_objs);
for (n = 0; n < num_pages; n ++) for (n = 0; n < num_pages; n ++)
{ {
if ((obj = pdfioFileGetPage(pdf, n)) == NULL) if ((obj = pdfioFileGetPage(pdf, n)) == NULL)
{ {
printf("%s: Unable to get page #%d.\n", argv[i], (int)n + 1); printf("%s: Unable to get page #%d.\n", filename, (int)n + 1);
} }
else else
{ {
@ -64,7 +71,7 @@ main(int argc, // I - Number of command-line arguments
} }
} }
printf("%s: Page #%d is %gx%g.\n", argv[i], (int)n + 1, media_box.x2, media_box.y2); printf("%s: Page #%d is %gx%g.\n", filename, (int)n + 1, media_box.x2, media_box.y2);
} }
} }
@ -72,7 +79,7 @@ main(int argc, // I - Number of command-line arguments
{ {
if ((obj = pdfioFileGetObject(pdf, n)) == NULL) if ((obj = pdfioFileGetObject(pdf, n)) == NULL)
{ {
printf("%s: Unable to get object #%d.\n", argv[i], (int)n); printf("%s: Unable to get object #%d.\n", filename, (int)n);
} }
else else
{ {
@ -81,7 +88,7 @@ main(int argc, // I - Number of command-line arguments
dict = pdfioObjGetDict(obj); dict = pdfioObjGetDict(obj);
printf("%s: %u %u obj dict=%p(%lu)\n", argv[i], (unsigned)pdfioObjGetNumber(obj), (unsigned)pdfioObjGetGeneration(obj), dict, dict ? (unsigned long)dict->num_pairs : 0UL); printf("%s: %u %u obj dict=%p(%lu)\n", filename, (unsigned)pdfioObjGetNumber(obj), (unsigned)pdfioObjGetGeneration(obj), dict, dict ? (unsigned long)dict->num_pairs : 0UL);
if (dict) if (dict)
{ {
for (np = dict->num_pairs, pair = dict->pairs; np > 0; np --, pair ++) for (np = dict->num_pairs, pair = dict->pairs; np > 0; np --, pair ++)