Fix pdfioStreamGetToken implementation (wasn't flushing input), update

pdfiototext code to better handle different text operators that affect the
location of the text.
This commit is contained in:
Michael R Sweet 2022-03-01 09:18:56 -05:00
parent 93a3fcea6c
commit f7f2969e3a
No known key found for this signature in database
GPG Key ID: 999559A027815955
3 changed files with 19 additions and 9 deletions

View File

@ -6,7 +6,8 @@ v1.0.1 (Month DD, YYYY)
----------------------- -----------------------
- Added missing `pdfioPageGetNumStreams` and `pdfioPageOpenStream` functions. - Added missing `pdfioPageGetNumStreams` and `pdfioPageOpenStream` functions.
- Added pdfiototext demo utility. - Added demo pdfiototext utility.
- Fixed bug in `pdfioStreamGetToken`.
v1.0.0 (December 14, 2021) v1.0.0 (December 14, 2021)

View File

@ -1,7 +1,7 @@
// //
// PDF stream functions for PDFio. // PDF stream functions for PDFio.
// //
// Copyright © 2021 by Michael R Sweet. // Copyright © 2021-2022 by Michael R Sweet.
// //
// Licensed under Apache License v2.0. See the file "LICENSE" for more // Licensed under Apache License v2.0. See the file "LICENSE" for more
// information. // information.
@ -372,6 +372,7 @@ pdfioStreamGetToken(
size_t bufsize) // I - Size of string buffer size_t bufsize) // I - Size of string buffer
{ {
_pdfio_token_t tb; // Token buffer/stack _pdfio_token_t tb; // Token buffer/stack
bool ret; // Return value
// Range check input... // Range check input...
@ -381,7 +382,10 @@ pdfioStreamGetToken(
// Read using the token engine... // Read using the token engine...
_pdfioTokenInit(&tb, st->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st); _pdfioTokenInit(&tb, st->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
return (_pdfioTokenRead(&tb, buffer, bufsize)); ret = _pdfioTokenRead(&tb, buffer, bufsize);
_pdfioTokenFlush(&tb);
return (ret);
} }

View File

@ -12,6 +12,7 @@
// //
#include "pdfio.h" #include "pdfio.h"
#include <string.h>
// //
@ -43,7 +44,7 @@ main(int argc, // I - Number of command-line arguments
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL) if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL)
return (1); return (1);
printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file)); // printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
// Try grabbing content from all of the pages... // Try grabbing content from all of the pages...
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++) for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
@ -53,14 +54,14 @@ main(int argc, // I - Number of command-line arguments
num_streams = pdfioPageGetNumStreams(obj); num_streams = pdfioPageGetNumStreams(obj);
printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams); // printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
for (j = 0; j < num_streams; j ++) for (j = 0; j < num_streams; j ++)
{ {
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL) if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
continue; continue;
printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st); // printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
first = true; first = true;
while (pdfioStreamGetToken(st, buffer, sizeof(buffer))) while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
@ -68,13 +69,17 @@ main(int argc, // I - Number of command-line arguments
if (buffer[0] == '(') if (buffer[0] == '(')
{ {
if (first) if (first)
{
putchar(' ');
first = false; first = false;
} else
putchar(' ');
fputs(buffer + 1, stdout); fputs(buffer + 1, stdout);
} }
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\""))
{
putchar('\n');
first = true;
}
} }
if (!first) if (!first)