2021-05-01 23:50:52 +02:00
|
|
|
|
//
|
2021-05-30 13:10:44 +02:00
|
|
|
|
// PDF token parsing functions for PDFio.
|
2021-05-01 23:50:52 +02:00
|
|
|
|
//
|
|
|
|
|
// Copyright © 2021 by Michael R Sweet.
|
|
|
|
|
//
|
|
|
|
|
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
|
|
|
|
// information.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// Include necessary headers...
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
#include "pdfio-private.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// This file parses PDF language syntax:
|
|
|
|
|
//
|
|
|
|
|
// << dict >> "<<" and ">>" delimit a dictionary
|
|
|
|
|
// (string) "(" and ")" delimit a string
|
|
|
|
|
// [array] "[" and "]" delimit an array
|
|
|
|
|
// <hex-string> "<" and ">" delimit a hex string
|
2021-05-02 04:38:11 +02:00
|
|
|
|
// {...} "{" and "}" are reserved as future delimiters
|
2021-05-01 23:50:52 +02:00
|
|
|
|
// /name "/" starts a name with any special characters
|
|
|
|
|
// quoted as "#HH" where HH is the byte value in hex.
|
|
|
|
|
// %comment "%" starts a comment to the end of a line
|
2021-05-02 04:38:11 +02:00
|
|
|
|
// keyword A keyword consists of other unreserved characters
|
2021-05-01 23:50:52 +02:00
|
|
|
|
// [-+]?[0-9]*(.[0-9]*)? A number optionally starts with "+" or "-".
|
|
|
|
|
//
|
|
|
|
|
// Newlines are CR, LF, or CR LF.
|
|
|
|
|
//
|
|
|
|
|
// Strings and names are returned with the leading delimiter ("(string",
|
|
|
|
|
// "<hex-string", "/name") and all escaping/whitespace removal resolved.
|
|
|
|
|
// Other delimiters, keywords, and numbers are returned as-is.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
|
2021-05-02 04:38:11 +02:00
|
|
|
|
//
|
|
|
|
|
// Constants...
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
#define PDFIO_NUMBER_CHARS "0123456789-+."
|
|
|
|
|
#define PDFIO_DELIM_CHARS "<>(){}[]/%"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//
|
2021-05-08 13:38:44 +02:00
|
|
|
|
// Local functions...
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
static int get_char(_pdfio_token_t *tb);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// '_pdfioTokenClear()' - Clear the token stack.
|
2021-05-02 04:38:11 +02:00
|
|
|
|
//
|
|
|
|
|
|
2021-05-08 13:38:44 +02:00
|
|
|
|
void
|
|
|
|
|
_pdfioTokenClear(_pdfio_token_t *tb) // I - Token buffer/stack
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
2021-05-08 13:38:44 +02:00
|
|
|
|
PDFIO_DEBUG("_pdfioTokenClear(tb=%p)\n", tb);
|
|
|
|
|
|
|
|
|
|
while (tb->num_tokens > 0)
|
|
|
|
|
{
|
|
|
|
|
tb->num_tokens --;
|
|
|
|
|
free(tb->tokens[tb->num_tokens]);
|
|
|
|
|
tb->tokens[tb->num_tokens] = NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-05-02 04:38:11 +02:00
|
|
|
|
|
|
|
|
|
|
2021-05-10 23:37:57 +02:00
|
|
|
|
//
|
|
|
|
|
// '_pdfioTokenFlush()' - Flush (consume) any bytes that have been used.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
_pdfioTokenFlush(_pdfio_token_t *tb) // I - Token buffer/stack
|
|
|
|
|
{
|
|
|
|
|
if (tb->bufptr > tb->buffer)
|
|
|
|
|
{
|
|
|
|
|
size_t remaining = (size_t)(tb->bufend - tb->bufptr);
|
|
|
|
|
// Remaining bytes in buffer
|
|
|
|
|
|
|
|
|
|
// Consume what we've used...
|
|
|
|
|
PDFIO_DEBUG("_pdfioTokenFlush: Consuming %d bytes.\n", (int)(tb->bufptr - tb->buffer));
|
|
|
|
|
(tb->consume_cb)(tb->cb_data, (size_t)(tb->bufptr - tb->buffer));
|
|
|
|
|
|
|
|
|
|
if (remaining > 0)
|
|
|
|
|
{
|
|
|
|
|
// Shuffle remaining bytes for next call...
|
|
|
|
|
memmove(tb->buffer, tb->bufptr, remaining);
|
|
|
|
|
tb->bufptr = tb->buffer;
|
|
|
|
|
tb->bufend = tb->buffer + remaining;
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
|
unsigned char *ptr; // Pointer into buffer
|
|
|
|
|
|
|
|
|
|
PDFIO_DEBUG("_pdfioTokenFlush: Remainder '");
|
|
|
|
|
for (ptr = tb->buffer; ptr < tb->bufend; ptr ++)
|
|
|
|
|
{
|
|
|
|
|
if (*ptr < ' ' || *ptr == 0x7f)
|
|
|
|
|
PDFIO_DEBUG("\\%03o", *ptr);
|
|
|
|
|
else
|
|
|
|
|
PDFIO_DEBUG("%c", *ptr);
|
|
|
|
|
}
|
|
|
|
|
PDFIO_DEBUG("'\n");
|
|
|
|
|
#endif // DEBUG
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// Nothing left, reset pointers...
|
2021-08-23 20:31:54 +02:00
|
|
|
|
PDFIO_DEBUG("_pdfioTokenFlush: Resetting pointers.\n");
|
2021-05-10 23:37:57 +02:00
|
|
|
|
tb->bufptr = tb->bufend = tb->buffer;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2021-05-02 04:38:11 +02:00
|
|
|
|
//
|
2021-05-08 13:38:44 +02:00
|
|
|
|
// '_pdfioTokenGet()' - Get a token.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
bool // O - `true` on success, `false` on failure
|
|
|
|
|
_pdfioTokenGet(_pdfio_token_t *tb, // I - Token buffer/stack
|
|
|
|
|
char *buffer, // I - String buffer
|
|
|
|
|
size_t bufsize) // I - Size of string buffer
|
|
|
|
|
{
|
|
|
|
|
// See if we have a token waiting on the stack...
|
|
|
|
|
if (tb->num_tokens > 0)
|
|
|
|
|
{
|
|
|
|
|
// Yes, return it...
|
|
|
|
|
tb->num_tokens --;
|
|
|
|
|
strncpy(buffer, tb->tokens[tb->num_tokens], bufsize - 1);
|
|
|
|
|
buffer[bufsize - 1] = '\0';
|
|
|
|
|
|
|
|
|
|
PDFIO_DEBUG("_pdfioTokenGet(tb=%p, buffer=%p, bufsize=%u): Popping '%s' from stack.\n", tb, buffer, (unsigned)bufsize, buffer);
|
|
|
|
|
|
|
|
|
|
free(tb->tokens[tb->num_tokens]);
|
|
|
|
|
tb->tokens[tb->num_tokens] = NULL;
|
|
|
|
|
|
|
|
|
|
return (true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// No, read a new one...
|
|
|
|
|
return (_pdfioTokenRead(tb, buffer, bufsize));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2021-05-02 04:38:11 +02:00
|
|
|
|
//
|
2021-05-08 13:38:44 +02:00
|
|
|
|
// '_pdfioTokenInit()' - Initialize a token buffer/stack.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
_pdfioTokenInit(
|
|
|
|
|
_pdfio_token_t *ts, // I - Token buffer/stack
|
|
|
|
|
pdfio_file_t *pdf, // I - PDF file
|
|
|
|
|
_pdfio_tconsume_cb_t consume_cb, // I - Consume callback
|
|
|
|
|
_pdfio_tpeek_cb_t peek_cb, // I - Peek callback
|
|
|
|
|
void *cb_data) // I - Callback data
|
|
|
|
|
{
|
|
|
|
|
// Zero everything out and then initialize key pointers...
|
|
|
|
|
memset(ts, 0, sizeof(_pdfio_token_t));
|
|
|
|
|
|
|
|
|
|
ts->pdf = pdf;
|
|
|
|
|
ts->consume_cb = consume_cb;
|
|
|
|
|
ts->peek_cb = peek_cb;
|
|
|
|
|
ts->cb_data = cb_data;
|
|
|
|
|
ts->bufptr = ts->buffer;
|
|
|
|
|
ts->bufend = ts->buffer;
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-02 04:38:11 +02:00
|
|
|
|
|
2021-05-08 13:38:44 +02:00
|
|
|
|
//
|
|
|
|
|
// '_pdfioTokenPush()' - Push a token on the token stack.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
_pdfioTokenPush(_pdfio_token_t *tb, // I - Token buffer/stack
|
|
|
|
|
const char *token) // I - Token to push
|
|
|
|
|
{
|
|
|
|
|
if (tb->num_tokens < (sizeof(tb->tokens) / sizeof(tb->tokens[0])))
|
|
|
|
|
{
|
|
|
|
|
if ((tb->tokens[tb->num_tokens ++] = strdup(token)) == NULL)
|
|
|
|
|
tb->num_tokens --;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-05-02 04:38:11 +02:00
|
|
|
|
|
|
|
|
|
|
2021-05-01 23:50:52 +02:00
|
|
|
|
//
|
|
|
|
|
// '_pdfioTokenRead()' - Read a token from a file/stream.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
bool // O - `true` on success, `false` on failure
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
|
|
|
|
|
char *buffer, // I - String buffer
|
|
|
|
|
size_t bufsize) // I - Size of string buffer
|
2021-05-01 23:50:52 +02:00
|
|
|
|
{
|
2021-05-10 03:32:09 +02:00
|
|
|
|
int ch, // Character
|
|
|
|
|
parens = 0; // Parenthesis level
|
2021-05-08 13:38:44 +02:00
|
|
|
|
char *bufptr, // Pointer into buffer
|
|
|
|
|
*bufend, // End of buffer
|
|
|
|
|
state = '\0'; // Current state
|
2021-05-02 04:38:11 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// "state" is:
|
|
|
|
|
//
|
|
|
|
|
// - '\0' for idle
|
2021-05-05 03:31:58 +02:00
|
|
|
|
// - '(' for literal string
|
2021-05-02 04:38:11 +02:00
|
|
|
|
// - '/' for name
|
|
|
|
|
// - '<' for possible hex string or dict
|
|
|
|
|
// - '>' for possible dict
|
|
|
|
|
// - '%' for comment
|
|
|
|
|
// - 'K' for keyword
|
|
|
|
|
// - 'N' for number
|
|
|
|
|
|
|
|
|
|
// Read the next token, skipping any leading whitespace...
|
|
|
|
|
bufptr = buffer;
|
|
|
|
|
bufend = buffer + bufsize - 1;
|
|
|
|
|
|
|
|
|
|
// Skip leading whitespace...
|
2021-05-08 13:38:44 +02:00
|
|
|
|
while ((ch = get_char(tb)) != EOF)
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
if (ch == '%')
|
|
|
|
|
{
|
|
|
|
|
// Skip comment
|
2021-05-08 13:38:44 +02:00
|
|
|
|
while ((ch = get_char(tb)) != EOF)
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
if (ch == '\n' || ch == '\r')
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (!isspace(ch))
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ch == EOF)
|
|
|
|
|
return (false);
|
|
|
|
|
|
|
|
|
|
// Check for delimiters...
|
|
|
|
|
if (strchr(PDFIO_DELIM_CHARS, ch) != NULL)
|
|
|
|
|
{
|
|
|
|
|
*bufptr++ = state = (char)ch;
|
|
|
|
|
}
|
|
|
|
|
else if (strchr(PDFIO_NUMBER_CHARS, ch) != NULL)
|
|
|
|
|
{
|
|
|
|
|
// Number
|
|
|
|
|
state = 'N';
|
|
|
|
|
*bufptr++ = (char)ch;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// Keyword
|
|
|
|
|
state = 'K';
|
|
|
|
|
*bufptr++ = (char)ch;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (state)
|
|
|
|
|
{
|
2021-05-05 03:31:58 +02:00
|
|
|
|
case '(' : // Literal string
|
2021-05-10 03:32:09 +02:00
|
|
|
|
while ((ch = get_char(tb)) != EOF)
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
if (ch == '\\')
|
|
|
|
|
{
|
|
|
|
|
// Quoted character...
|
|
|
|
|
int i; // Looping var
|
|
|
|
|
|
2021-05-08 13:38:44 +02:00
|
|
|
|
switch (ch = get_char(tb))
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
case '0' : // Octal character escape
|
|
|
|
|
case '1' :
|
|
|
|
|
case '2' :
|
|
|
|
|
case '3' :
|
|
|
|
|
case '4' :
|
|
|
|
|
case '5' :
|
|
|
|
|
case '6' :
|
|
|
|
|
case '7' :
|
|
|
|
|
for (ch -= '0', i = 0; i < 2; i ++)
|
|
|
|
|
{
|
2021-05-08 13:38:44 +02:00
|
|
|
|
int tch = get_char(tb); // Next char
|
2021-05-02 04:38:11 +02:00
|
|
|
|
|
|
|
|
|
if (tch >= '0' && tch <= '7')
|
|
|
|
|
ch = (char)((ch << 3) | (tch - '0'));
|
|
|
|
|
else
|
|
|
|
|
{
|
2021-05-08 13:38:44 +02:00
|
|
|
|
tb->bufptr --;
|
2021-05-02 04:38:11 +02:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '\\' :
|
|
|
|
|
case '(' :
|
|
|
|
|
case ')' :
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 'n' :
|
|
|
|
|
ch = '\n';
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 'r' :
|
|
|
|
|
ch = '\r';
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 't' :
|
|
|
|
|
ch = '\t';
|
|
|
|
|
break;
|
2021-05-01 23:50:52 +02:00
|
|
|
|
|
2021-05-02 04:38:11 +02:00
|
|
|
|
case 'b' :
|
|
|
|
|
ch = '\b';
|
|
|
|
|
break;
|
2021-05-01 23:50:52 +02:00
|
|
|
|
|
2021-05-02 04:38:11 +02:00
|
|
|
|
case 'f' :
|
|
|
|
|
ch = '\f';
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default :
|
2021-05-10 03:32:09 +02:00
|
|
|
|
// Ignore blackslash per PDF spec...
|
|
|
|
|
break;
|
2021-05-02 04:38:11 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2021-05-10 03:32:09 +02:00
|
|
|
|
else if (ch == '(')
|
|
|
|
|
{
|
|
|
|
|
// Keep track of parenthesis
|
|
|
|
|
parens ++;
|
|
|
|
|
}
|
|
|
|
|
else if (ch == ')')
|
|
|
|
|
{
|
|
|
|
|
if (parens == 0)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
parens --;
|
|
|
|
|
}
|
2021-05-02 04:38:11 +02:00
|
|
|
|
|
|
|
|
|
if (bufptr < bufend)
|
|
|
|
|
{
|
|
|
|
|
// Normal character...
|
|
|
|
|
*bufptr++ = (char)ch;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// Out of space
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioFileError(tb->pdf, "Token too large.");
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ch != ')')
|
|
|
|
|
{
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioFileError(tb->pdf, "Unterminated string literal.");
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (false);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 'K' : // keyword
|
2021-05-08 13:38:44 +02:00
|
|
|
|
while ((ch = get_char(tb)) != EOF && !isspace(ch))
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
if (strchr(PDFIO_DELIM_CHARS, ch) != NULL)
|
|
|
|
|
{
|
|
|
|
|
// End of keyword...
|
2021-05-08 13:38:44 +02:00
|
|
|
|
tb->bufptr --;
|
2021-05-02 04:38:11 +02:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
else if (bufptr < bufend)
|
|
|
|
|
{
|
|
|
|
|
// Normal character...
|
|
|
|
|
*bufptr++ = (char)ch;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// Out of space...
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioFileError(tb->pdf, "Token too large.");
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 'N' : // number
|
2021-05-08 13:38:44 +02:00
|
|
|
|
while ((ch = get_char(tb)) != EOF && !isspace(ch))
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
if (!isdigit(ch) && ch != '.')
|
|
|
|
|
{
|
|
|
|
|
// End of number...
|
2021-05-08 13:38:44 +02:00
|
|
|
|
tb->bufptr --;
|
2021-05-02 04:38:11 +02:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
else if (bufptr < bufend)
|
|
|
|
|
{
|
|
|
|
|
// Normal character...
|
|
|
|
|
*bufptr++ = (char)ch;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// Out of space...
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioFileError(tb->pdf, "Token too large.");
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '/' : // "/name"
|
2021-05-08 13:38:44 +02:00
|
|
|
|
while ((ch = get_char(tb)) != EOF && !isspace(ch))
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
2021-05-05 03:31:58 +02:00
|
|
|
|
if (strchr(PDFIO_DELIM_CHARS, ch) != NULL)
|
|
|
|
|
{
|
|
|
|
|
// End of keyword...
|
2021-05-08 13:38:44 +02:00
|
|
|
|
tb->bufptr --;
|
2021-05-05 03:31:58 +02:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
else if (ch == '#')
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
// Quoted character (#xx) in name...
|
|
|
|
|
int i; // Looping var
|
|
|
|
|
|
|
|
|
|
for (i = 0, ch = 0; i < 2; i ++)
|
|
|
|
|
{
|
2021-05-08 13:38:44 +02:00
|
|
|
|
int tch = get_char(tb);
|
2021-05-02 04:38:11 +02:00
|
|
|
|
|
|
|
|
|
if (!isxdigit(tch & 255))
|
|
|
|
|
{
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioFileError(tb->pdf, "Bad # escape in name.");
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (false);
|
|
|
|
|
}
|
|
|
|
|
else if (isdigit(tch))
|
2021-06-04 17:03:24 +02:00
|
|
|
|
ch = ((ch & 255) << 4) | (tch - '0');
|
2021-05-02 04:38:11 +02:00
|
|
|
|
else
|
2021-06-04 17:03:24 +02:00
|
|
|
|
ch = ((ch & 255) << 4) | (tolower(tch) - 'a' + 10);
|
2021-05-02 04:38:11 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (bufptr < bufend)
|
|
|
|
|
{
|
|
|
|
|
*bufptr++ = (char)ch;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// Out of space
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioFileError(tb->pdf, "Token too large.");
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '<' : // Potential hex string
|
2021-05-08 13:38:44 +02:00
|
|
|
|
if ((ch = get_char(tb)) == '<')
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
// Dictionary delimiter
|
|
|
|
|
*bufptr++ = (char)ch;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
else if (!isspace(ch & 255) && !isxdigit(ch & 255))
|
|
|
|
|
{
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioFileError(tb->pdf, "Syntax error: '<%c'", ch);
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (false);
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-08 13:38:44 +02:00
|
|
|
|
while ((ch = get_char(tb)) != EOF && ch != '>')
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
if (isxdigit(ch))
|
|
|
|
|
{
|
|
|
|
|
if (bufptr < bufend)
|
|
|
|
|
{
|
|
|
|
|
// Hex digit
|
|
|
|
|
*bufptr++ = (char)ch;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// Too large
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioFileError(tb->pdf, "Token too large.");
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (!isspace(ch))
|
|
|
|
|
{
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioFileError(tb->pdf, "Invalid hex string character '%c'.", ch);
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ch == EOF)
|
|
|
|
|
{
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioFileError(tb->pdf, "Unterminated hex string.");
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (false);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '>' : // Dictionary
|
2021-05-08 13:38:44 +02:00
|
|
|
|
if ((ch = get_char(tb)) == '>')
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
*bufptr++ = '>';
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2021-05-08 13:38:44 +02:00
|
|
|
|
_pdfioFileError(tb->pdf, "Syntax error: '>%c'.", ch);
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (false);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-08 13:38:44 +02:00
|
|
|
|
while (tb->bufptr < tb->bufend && isspace(*(tb->bufptr)))
|
|
|
|
|
tb->bufptr ++;
|
|
|
|
|
|
2021-05-02 04:38:11 +02:00
|
|
|
|
*bufptr = '\0';
|
|
|
|
|
|
2021-05-08 13:38:44 +02:00
|
|
|
|
PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
|
2021-05-04 18:59:10 +02:00
|
|
|
|
|
2021-05-02 04:38:11 +02:00
|
|
|
|
return (bufptr > buffer);
|
2021-05-01 23:50:52 +02:00
|
|
|
|
}
|
|
|
|
|
|
2021-05-02 04:38:11 +02:00
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// 'get_char()' - Get a character from the token buffer.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
static int // O - Character or `EOF` on end-of-file
|
2021-05-08 13:38:44 +02:00
|
|
|
|
get_char(_pdfio_token_t *tb) // I - Token buffer
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
ssize_t bytes; // Bytes peeked
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Refill the buffer as needed...
|
|
|
|
|
if (tb->bufptr >= tb->bufend)
|
|
|
|
|
{
|
|
|
|
|
// Consume previous bytes...
|
|
|
|
|
if (tb->bufend > tb->buffer)
|
2021-05-10 23:37:57 +02:00
|
|
|
|
{
|
|
|
|
|
PDFIO_DEBUG("get_char: Consuming %d bytes.\n", (int)(tb->bufend - tb->buffer));
|
2021-05-08 13:38:44 +02:00
|
|
|
|
(tb->consume_cb)(tb->cb_data, (size_t)(tb->bufend - tb->buffer));
|
2021-05-10 23:37:57 +02:00
|
|
|
|
}
|
2021-05-02 04:38:11 +02:00
|
|
|
|
|
|
|
|
|
// Peek new bytes...
|
2021-05-08 13:38:44 +02:00
|
|
|
|
if ((bytes = (tb->peek_cb)(tb->cb_data, tb->buffer, sizeof(tb->buffer))) <= 0)
|
2021-05-02 04:38:11 +02:00
|
|
|
|
{
|
|
|
|
|
tb->bufptr = tb->bufend = tb->buffer;
|
|
|
|
|
return (EOF);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update pointers...
|
|
|
|
|
tb->bufptr = tb->buffer;
|
|
|
|
|
tb->bufend = tb->buffer + bytes;
|
2021-05-10 23:37:57 +02:00
|
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
|
unsigned char *ptr; // Pointer into buffer
|
|
|
|
|
|
|
|
|
|
PDFIO_DEBUG("get_char: Read '");
|
|
|
|
|
for (ptr = tb->buffer; ptr < tb->bufend; ptr ++)
|
|
|
|
|
{
|
|
|
|
|
if (*ptr < ' ' || *ptr == 0x7f)
|
|
|
|
|
PDFIO_DEBUG("\\%03o", *ptr);
|
|
|
|
|
else
|
|
|
|
|
PDFIO_DEBUG("%c", *ptr);
|
|
|
|
|
}
|
|
|
|
|
PDFIO_DEBUG("'\n");
|
|
|
|
|
#endif // DEBUG
|
2021-05-02 04:38:11 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Return the next character...
|
|
|
|
|
return (*(tb->bufptr)++);
|
|
|
|
|
}
|