mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2024-12-26 13:28:22 +01:00
Fix pdfioStreamGetToken implementation (wasn't flushing input), update
pdfiototext code to better handle different text operators that affect the location of the text.
This commit is contained in:
parent
93a3fcea6c
commit
f7f2969e3a
@ -6,7 +6,8 @@ v1.0.1 (Month DD, YYYY)
|
|||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
- Added missing `pdfioPageGetNumStreams` and `pdfioPageOpenStream` functions.
|
- Added missing `pdfioPageGetNumStreams` and `pdfioPageOpenStream` functions.
|
||||||
- Added pdfiototext demo utility.
|
- Added demo pdfiototext utility.
|
||||||
|
- Fixed bug in `pdfioStreamGetToken`.
|
||||||
|
|
||||||
|
|
||||||
v1.0.0 (December 14, 2021)
|
v1.0.0 (December 14, 2021)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
//
|
//
|
||||||
// PDF stream functions for PDFio.
|
// PDF stream functions for PDFio.
|
||||||
//
|
//
|
||||||
// Copyright © 2021 by Michael R Sweet.
|
// Copyright © 2021-2022 by Michael R Sweet.
|
||||||
//
|
//
|
||||||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||||
// information.
|
// information.
|
||||||
@ -372,6 +372,7 @@ pdfioStreamGetToken(
|
|||||||
size_t bufsize) // I - Size of string buffer
|
size_t bufsize) // I - Size of string buffer
|
||||||
{
|
{
|
||||||
_pdfio_token_t tb; // Token buffer/stack
|
_pdfio_token_t tb; // Token buffer/stack
|
||||||
|
bool ret; // Return value
|
||||||
|
|
||||||
|
|
||||||
// Range check input...
|
// Range check input...
|
||||||
@ -381,7 +382,10 @@ pdfioStreamGetToken(
|
|||||||
// Read using the token engine...
|
// Read using the token engine...
|
||||||
_pdfioTokenInit(&tb, st->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
|
_pdfioTokenInit(&tb, st->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
|
||||||
|
|
||||||
return (_pdfioTokenRead(&tb, buffer, bufsize));
|
ret = _pdfioTokenRead(&tb, buffer, bufsize);
|
||||||
|
_pdfioTokenFlush(&tb);
|
||||||
|
|
||||||
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
#include "pdfio.h"
|
#include "pdfio.h"
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
@ -43,7 +44,7 @@ main(int argc, // I - Number of command-line arguments
|
|||||||
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL)
|
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL)
|
||||||
return (1);
|
return (1);
|
||||||
|
|
||||||
printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
|
// printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
|
||||||
|
|
||||||
// Try grabbing content from all of the pages...
|
// Try grabbing content from all of the pages...
|
||||||
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
|
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
|
||||||
@ -53,14 +54,14 @@ main(int argc, // I - Number of command-line arguments
|
|||||||
|
|
||||||
num_streams = pdfioPageGetNumStreams(obj);
|
num_streams = pdfioPageGetNumStreams(obj);
|
||||||
|
|
||||||
printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
|
// printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
|
||||||
|
|
||||||
for (j = 0; j < num_streams; j ++)
|
for (j = 0; j < num_streams; j ++)
|
||||||
{
|
{
|
||||||
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
|
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
|
// printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
|
||||||
|
|
||||||
first = true;
|
first = true;
|
||||||
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
|
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
|
||||||
@ -68,13 +69,17 @@ main(int argc, // I - Number of command-line arguments
|
|||||||
if (buffer[0] == '(')
|
if (buffer[0] == '(')
|
||||||
{
|
{
|
||||||
if (first)
|
if (first)
|
||||||
{
|
|
||||||
putchar(' ');
|
|
||||||
first = false;
|
first = false;
|
||||||
}
|
else
|
||||||
|
putchar(' ');
|
||||||
|
|
||||||
fputs(buffer + 1, stdout);
|
fputs(buffer + 1, stdout);
|
||||||
}
|
}
|
||||||
|
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\""))
|
||||||
|
{
|
||||||
|
putchar('\n');
|
||||||
|
first = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!first)
|
if (!first)
|
||||||
|
Loading…
Reference in New Issue
Block a user