Update token reader to handle balanced parens and to allow arbitrary backslash escapes.

Update unit test to omit path information from filename.
This commit is contained in:
Michael R Sweet 2021-05-09 21:32:09 -04:00
parent 5f43761537
commit af8c731699
No known key found for this signature in database
GPG Key ID: 999559A027815955
2 changed files with 29 additions and 9 deletions

View File

@ -148,7 +148,8 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
char *buffer, // I - String buffer
size_t bufsize) // I - Size of string buffer
{
int ch; // Character
int ch, // Character
parens = 0; // Parenthesis level
char *bufptr, // Pointer into buffer
*bufend, // End of buffer
state = '\0'; // Current state
@ -210,7 +211,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
switch (state)
{
case '(' : // Literal string
while ((ch = get_char(tb)) != EOF && ch != ')')
while ((ch = get_char(tb)) != EOF)
{
if (ch == '\\')
{
@ -267,10 +268,22 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
break;
default :
_pdfioFileError(tb->pdf, "Unknown escape '\\%c' in literal string.", ch);
return (false);
// Ignore blackslash per PDF spec...
break;
}
}
else if (ch == '(')
{
// Keep track of parenthesis
parens ++;
}
else if (ch == ')')
{
if (parens == 0)
break;
parens --;
}
if (bufptr < bufend)
{

View File

@ -37,16 +37,23 @@ main(int argc, // I - Number of command-line arguments
{
if ((pdf = pdfioFileOpen(argv[i], NULL, NULL)) != NULL)
{
const char *filename; // Base filename for messages
if ((filename = strrchr(argv[i], '/')) != NULL)
filename ++;
else
filename = argv[i];
num_objs = pdfioFileGetNumObjects(pdf);
num_pages = pdfioFileGetNumPages(pdf);
printf("%s: PDF %s, %d pages, %d objects.\n", argv[i], pdfioFileGetVersion(pdf), (int)num_pages, (int)num_objs);
printf("%s: PDF %s, %d pages, %d objects.\n", filename, pdfioFileGetVersion(pdf), (int)num_pages, (int)num_objs);
for (n = 0; n < num_pages; n ++)
{
if ((obj = pdfioFileGetPage(pdf, n)) == NULL)
{
printf("%s: Unable to get page #%d.\n", argv[i], (int)n + 1);
printf("%s: Unable to get page #%d.\n", filename, (int)n + 1);
}
else
{
@ -64,7 +71,7 @@ main(int argc, // I - Number of command-line arguments
}
}
printf("%s: Page #%d is %gx%g.\n", argv[i], (int)n + 1, media_box.x2, media_box.y2);
printf("%s: Page #%d is %gx%g.\n", filename, (int)n + 1, media_box.x2, media_box.y2);
}
}
@ -72,7 +79,7 @@ main(int argc, // I - Number of command-line arguments
{
if ((obj = pdfioFileGetObject(pdf, n)) == NULL)
{
printf("%s: Unable to get object #%d.\n", argv[i], (int)n);
printf("%s: Unable to get object #%d.\n", filename, (int)n);
}
else
{
@ -81,7 +88,7 @@ main(int argc, // I - Number of command-line arguments
dict = pdfioObjGetDict(obj);
printf("%s: %u %u obj dict=%p(%lu)\n", argv[i], (unsigned)pdfioObjGetNumber(obj), (unsigned)pdfioObjGetGeneration(obj), dict, dict ? (unsigned long)dict->num_pairs : 0UL);
printf("%s: %u %u obj dict=%p(%lu)\n", filename, (unsigned)pdfioObjGetNumber(obj), (unsigned)pdfioObjGetGeneration(obj), dict, dict ? (unsigned long)dict->num_pairs : 0UL);
if (dict)
{
for (np = dict->num_pairs, pair = dict->pairs; np > 0; np --, pair ++)