Fix pdfioStreamGetToken implementation (wasn't flushing input), update

pdfiototext code to better handle different text operators that affect the
location of the text.
This commit is contained in:
Michael R Sweet 2022-03-01 09:18:56 -05:00
parent 93a3fcea6c
commit f7f2969e3a
No known key found for this signature in database
GPG Key ID: 999559A027815955
3 changed files with 19 additions and 9 deletions

View File

@ -6,7 +6,8 @@ v1.0.1 (Month DD, YYYY)
-----------------------
- Added missing `pdfioPageGetNumStreams` and `pdfioPageOpenStream` functions.
- Added pdfiototext demo utility.
- Added demo pdfiototext utility.
- Fixed bug in `pdfioStreamGetToken`.
v1.0.0 (December 14, 2021)

View File

@ -1,7 +1,7 @@
//
// PDF stream functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2022 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -372,6 +372,7 @@ pdfioStreamGetToken(
size_t bufsize) // I - Size of string buffer
{
_pdfio_token_t tb; // Token buffer/stack
bool ret; // Return value
// Range check input...
@ -381,7 +382,10 @@ pdfioStreamGetToken(
// Read using the token engine...
_pdfioTokenInit(&tb, st->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
return (_pdfioTokenRead(&tb, buffer, bufsize));
ret = _pdfioTokenRead(&tb, buffer, bufsize);
_pdfioTokenFlush(&tb);
return (ret);
}

View File

@ -12,6 +12,7 @@
//
#include "pdfio.h"
#include <string.h>
//
@ -43,7 +44,7 @@ main(int argc, // I - Number of command-line arguments
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL)
return (1);
printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
// printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
// Try grabbing content from all of the pages...
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
@ -53,14 +54,14 @@ main(int argc, // I - Number of command-line arguments
num_streams = pdfioPageGetNumStreams(obj);
printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
// printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
for (j = 0; j < num_streams; j ++)
{
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
continue;
printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
// printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
first = true;
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
@ -68,13 +69,17 @@ main(int argc, // I - Number of command-line arguments
if (buffer[0] == '(')
{
if (first)
{
putchar(' ');
first = false;
}
else
putchar(' ');
fputs(buffer + 1, stdout);
}
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\""))
{
putchar('\n');
first = true;
}
}
if (!first)