mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2024-12-25 21:08:21 +01:00
Fix pdfioStreamGetToken implementation (wasn't flushing input), update
pdfiototext code to better handle different text operators that affect the location of the text.
This commit is contained in:
parent
93a3fcea6c
commit
f7f2969e3a
@ -6,7 +6,8 @@ v1.0.1 (Month DD, YYYY)
|
||||
-----------------------
|
||||
|
||||
- Added missing `pdfioPageGetNumStreams` and `pdfioPageOpenStream` functions.
|
||||
- Added pdfiototext demo utility.
|
||||
- Added demo pdfiototext utility.
|
||||
- Fixed bug in `pdfioStreamGetToken`.
|
||||
|
||||
|
||||
v1.0.0 (December 14, 2021)
|
||||
|
@ -1,7 +1,7 @@
|
||||
//
|
||||
// PDF stream functions for PDFio.
|
||||
//
|
||||
// Copyright © 2021 by Michael R Sweet.
|
||||
// Copyright © 2021-2022 by Michael R Sweet.
|
||||
//
|
||||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
// information.
|
||||
@ -372,6 +372,7 @@ pdfioStreamGetToken(
|
||||
size_t bufsize) // I - Size of string buffer
|
||||
{
|
||||
_pdfio_token_t tb; // Token buffer/stack
|
||||
bool ret; // Return value
|
||||
|
||||
|
||||
// Range check input...
|
||||
@ -381,7 +382,10 @@ pdfioStreamGetToken(
|
||||
// Read using the token engine...
|
||||
_pdfioTokenInit(&tb, st->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
|
||||
|
||||
return (_pdfioTokenRead(&tb, buffer, bufsize));
|
||||
ret = _pdfioTokenRead(&tb, buffer, bufsize);
|
||||
_pdfioTokenFlush(&tb);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
//
|
||||
|
||||
#include "pdfio.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
//
|
||||
@ -43,7 +44,7 @@ main(int argc, // I - Number of command-line arguments
|
||||
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL)
|
||||
return (1);
|
||||
|
||||
printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
|
||||
// printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
|
||||
|
||||
// Try grabbing content from all of the pages...
|
||||
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
|
||||
@ -53,14 +54,14 @@ main(int argc, // I - Number of command-line arguments
|
||||
|
||||
num_streams = pdfioPageGetNumStreams(obj);
|
||||
|
||||
printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
|
||||
// printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
|
||||
|
||||
for (j = 0; j < num_streams; j ++)
|
||||
{
|
||||
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
|
||||
continue;
|
||||
|
||||
printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
|
||||
// printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
|
||||
|
||||
first = true;
|
||||
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
|
||||
@ -68,13 +69,17 @@ main(int argc, // I - Number of command-line arguments
|
||||
if (buffer[0] == '(')
|
||||
{
|
||||
if (first)
|
||||
{
|
||||
putchar(' ');
|
||||
first = false;
|
||||
}
|
||||
else
|
||||
putchar(' ');
|
||||
|
||||
fputs(buffer + 1, stdout);
|
||||
}
|
||||
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\""))
|
||||
{
|
||||
putchar('\n');
|
||||
first = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!first)
|
||||
|
Loading…
Reference in New Issue
Block a user