13 Commits

Author SHA1 Message Date
6378047026 Update VC project. 2022-03-02 09:31:33 -05:00
54578144a0 Update documentation and prep for 1.0.1 release. 2022-03-02 09:30:01 -05:00
f7f2969e3a Fix pdfioStreamGetToken implementation (wasn't flushing input), update
pdfiototext code to better handle different text operators that affect the
location of the text.
2022-03-01 09:18:56 -05:00
93a3fcea6c Add missing pdfioPageGetNumStreams and pdfioPageOpenStream functions.
Add initial version of pdfiototext text extraction utility.
2022-02-28 15:00:25 -05:00
fa20982e5d Coverity certs are fixed. 2021-12-15 18:20:54 -05:00
44d20eba1b Add stub code for AES-256 to force Coverity to re-analyze... 2021-12-15 07:35:55 -05:00
c0b7925cdf Fix typo. 2021-12-15 07:28:17 -05:00
68dcf021b2 Download Entrust root cert for validation. 2021-12-15 07:25:44 -05:00
b0a8e60968 Also allow posts to coverity.com while we wait for Ubuntu to pick up the new Entrust root certificate. 2021-12-15 07:10:13 -05:00
9d47745e43 Prep for 1.0rc1. 2021-12-15 06:53:09 -05:00
b0bf2e04b9 Coverity's certificate has expired. 2021-12-14 16:26:57 -05:00
f030112372 See what is happening when downloading Coverity build tool (drop quiet option). 2021-12-14 16:21:49 -05:00
79c4b6f8a8 See what is happening when downloading Coverity build tool. 2021-12-14 16:20:34 -05:00
15 changed files with 341 additions and 43 deletions

1
.gitignore vendored
View File

@ -8,6 +8,7 @@
/doc/pdfio.epub
/packages
/pdfio.xcodeproj/xcshareddata
/pdfiototext
/testpdfio
/testpdfio-*.pdf
/x64

View File

@ -2,6 +2,14 @@ Changes in PDFio
================
v1.0.1 (Month DD, YYYY)
-----------------------
- Added missing `pdfioPageGetNumStreams` and `pdfioPageOpenStream` functions.
- Added demo pdfiototext utility.
- Fixed bug in `pdfioStreamGetToken`.
v1.0.0 (December 14, 2021)
--------------------------

View File

@ -1,7 +1,7 @@
#
# Makefile for PDFio.
#
# Copyright © 2021 by Michael R Sweet.
# Copyright © 2021-2022 by Michael R Sweet.
#
# Licensed under Apache License v2.0. See the file "LICENSE" for more
# information.
@ -26,7 +26,7 @@ DSONAME =
LDFLAGS =
LIBS = -lm -lz
RANLIB = ranlib
VERSION = 1.0.0
VERSION = 1.0.1
prefix = /usr/local
@ -62,10 +62,12 @@ LIBOBJS = \
ttf.o
OBJS = \
$(LIBOBJS) \
pdfiototext.o \
testpdfio.o
TARGETS = \
$(DSONAME) \
libpdfio.a \
pdfiototext \
testpdfio
@ -82,6 +84,9 @@ all-shared:
debug:
$(MAKE) -$(MAKEFLAGS) COMMONFLAGS="-g -fsanitize=address -DDEBUG=1" clean all
macos:
$(MAKE) -$(MAKEFLAGS) COMMONFLAGS="-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64" clean all
# Clean everything
clean:
@ -154,6 +159,11 @@ pdfio1.def: $(LIBOBJS) Makefile
grep -v '^_ttf' | sed -e '1,$$s/^_//' | sort >>$@
# pdfio text extraction (demo, doesn't handle a lot of things yet)
pdfiototext: pdfiototext.o libpdfio.a
$(CC) $(LDFLAGS) $(COMMONFLAGS) -o $@ pdfiototext.o libpdfio.a $(LIBS)
# pdfio test program
testpdfio: testpdfio.o libpdfio.a
$(CC) $(LDFLAGS) $(COMMONFLAGS) -o $@ testpdfio.o libpdfio.a $(LIBS)
@ -167,7 +177,7 @@ ttf.o: ttf.h
# Make documentation using Codedoc <https://www.msweet.org/codedoc>
DOCFLAGS = \
--author "Michael R Sweet" \
--copyright "Copyright (c) 2021 by Michael R Sweet" \
--copyright "Copyright (c) 2021-2022 by Michael R Sweet" \
--docversion $(VERSION)
.PHONY: doc

View File

@ -1,4 +1,4 @@
.TH pdfio 3 "pdf read/write library" "2021-12-14" "pdf read/write library"
.TH pdfio 3 "pdf read/write library" "2022-03-02" "pdf read/write library"
.SH NAME
pdfio \- pdf read/write library
.SH Introduction
@ -34,7 +34,7 @@ PDFio is
.I not
concerned with rendering or viewing a PDF file, although a PDF RIP or viewer could be written using it.
.PP
PDFio is Copyright \[co] 2021 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
PDFio is Copyright \[co] 2021\-2022 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
.SS Requirements
.PP
PDFio requires the following to build the software:
@ -156,7 +156,7 @@ There is also an Xcode project ("pdfio.xcodeproj") you can use on macOS which ge
You can reproduce this with the makefile using:
.nf
sudo make COMMONFLAGS="\-Os \-mmacosx\-version\-min=10.14 \-arch x86_64 \-arch arm64" install
sudo make macos install
.fi
.SS Detecting PDFio
.PP
@ -254,7 +254,7 @@ Each PDF file contains one or more pages. The pdfioFileGetNumPages function retu
}
.fi
.PP
Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page.
Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page. Use the pdfioPageGetNumStreams and pdfioPageOpenStream functions to access the content streams for each page.
.PP
The pdfioFileClose function closes a PDF file and frees all memory that was used for it:
.nf
@ -324,6 +324,14 @@ Some PDF objects have an associated data stream, such as for pages, images, ICC
.PP
The first argument is the object pointer. The second argument is a boolean value that specifies whether you want to decode (typically decompress) the stream data or return it as\-is.
.PP
When reading a page stream you'll use the pdfioPageOpenStream function instead:
.nf
pdfio_file_t *pdf = pdfioFileOpen(...);
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
pdfio_stream_t *st = pdfioPageOpenStream(obj, 0, true);
.fi
.PP
Once you have the stream open, you can use one of several functions to read from it:
.IP \(bu 5
.PP
@ -353,12 +361,21 @@ To create a stream for a new object, call the pdfioObjCreateStream function:
.nf
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
.fi
.PP
The first argument is the newly created object. The second argument is either PDFIO_FILTER_NONE to specify that any encoding is done by your program or PDFIO_FILTER_FLATE to specify that PDFio should Flate compress the stream.
.PP
To create a page content stream call the pdfioFileCreatePage function:
.nf
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_dict_t *dict = pdfioDictCreate(pdf);
\... set page dictionary keys and values ...
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
.fi
.PP
Once you have created the stream, use any of the following functions to write to the stream:
.IP \(bu 5
.PP
@ -2693,6 +2710,24 @@ bool pdfioPageDictAddImage (
pdfio_obj_t *obj
);
.fi
.SS pdfioPageGetNumStreams
Get the number of content streams for a page object.
.PP
.nf
size_t pdfioPageGetNumStreams (
pdfio_obj_t *page
);
.fi
.SS pdfioPageOpenStream
Open a content stream for a page.
.PP
.nf
pdfio_stream_t * pdfioPageOpenStream (
pdfio_obj_t *page,
size_t n,
bool decode
);
.fi
.SS pdfioStreamClose
Close a (data) stream in a PDF file.
.PP
@ -2947,4 +2982,4 @@ typedef uint8_t state_t[4][4];
Michael R Sweet
.SH COPYRIGHT
.PP
Copyright (c) 2021 by Michael R Sweet
Copyright (c) 2021-2022 by Michael R Sweet

View File

@ -1,13 +1,13 @@
<!DOCTYPE html>
<html lang="en-US">
<head>
<title>PDFio Programming Manual v1.0.0</title>
<title>PDFio Programming Manual v1.0.1</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="generator" content="codedoc v3.7">
<meta name="author" content="Michael R Sweet">
<meta name="language" content="en-US">
<meta name="copyright" content="Copyright © 2021 by Michael R Sweet">
<meta name="version" content="1.0.0">
<meta name="copyright" content="Copyright © 2021-2022 by Michael R Sweet">
<meta name="version" content="1.0.1">
<style type="text/css"><!--
body {
background: white;
@ -245,9 +245,9 @@ span.string {
<body>
<div class="header">
<p><img class="title" src="pdfio-512.png"></p>
<h1 class="title">PDFio Programming Manual v1.0.0</h1>
<h1 class="title">PDFio Programming Manual v1.0.1</h1>
<p>Michael R Sweet</p>
<p>Copyright © 2021 by Michael R Sweet</p>
<p>Copyright © 2021-2022 by Michael R Sweet</p>
</div>
<div class="contents">
<h2 class="title">Contents</h2>
@ -425,6 +425,8 @@ span.string {
<li><a href="#pdfioPageDictAddColorSpace">pdfioPageDictAddColorSpace</a></li>
<li><a href="#pdfioPageDictAddFont">pdfioPageDictAddFont</a></li>
<li><a href="#pdfioPageDictAddImage">pdfioPageDictAddImage</a></li>
<li><a href="#pdfioPageGetNumStreams">pdfioPageGetNumStreams</a></li>
<li><a href="#pdfioPageOpenStream">pdfioPageOpenStream</a></li>
<li><a href="#pdfioStreamClose">pdfioStreamClose</a></li>
<li><a href="#pdfioStreamConsume">pdfioStreamConsume</a></li>
<li><a href="#pdfioStreamGetToken">pdfioStreamGetToken</a></li>
@ -491,7 +493,7 @@ span.string {
</li>
</ul>
<p>PDFio is <em>not</em> concerned with rendering or viewing a PDF file, although a PDF RIP or viewer could be written using it.</p>
<p>PDFio is Copyright © 2021 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files &quot;LICENSE&quot; and &quot;NOTICE&quot; for more information.</p>
<p>PDFio is Copyright © 2021-2022 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files &quot;LICENSE&quot; and &quot;NOTICE&quot; for more information.</p>
<h3 class="title" id="requirements">Requirements</h3>
<p>PDFio requires the following to build the software:</p>
<ul>
@ -557,7 +559,7 @@ make install-shared
<pre><code>sudo xcodebuild install
</code></pre>
<p>You can reproduce this with the makefile using:</p>
<pre><code>sudo make COMMONFLAGS=&quot;-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64&quot; install
<pre><code>sudo make macos install
</code></pre>
<h3 class="title" id="detecting-pdfio">Detecting PDFio</h3>
<p>PDFio can be detected using the <code>pkg-config</code> command, for example:</p>
@ -621,7 +623,7 @@ pdfio_obj_t *page; <span class="comment">// Current page</span>
<span class="comment">// do something with page</span>
}
</code></pre>
<p>Each page is represented by a &quot;page tree&quot; object (what <a href="#pdfioFileGetPage"><code>pdfioFileGetPage</code></a> returns) that specifies information about the page and one or more &quot;content&quot; objects that contain the images, fonts, text, and graphics that appear on the page.</p>
<p>Each page is represented by a &quot;page tree&quot; object (what <a href="#pdfioFileGetPage"><code>pdfioFileGetPage</code></a> returns) that specifies information about the page and one or more &quot;content&quot; objects that contain the images, fonts, text, and graphics that appear on the page. Use the <a href="#pdfioPageGetNumStreams"><code>pdfioPageGetNumStreams</code></a> and <a href="#pdfioPageOpenStream"><code>pdfioPageOpenStream</code></a> functions to access the content streams for each page.</p>
<p>The <a href="#pdfioFileClose"><code>pdfioFileClose</code></a> function closes a PDF file and frees all memory that was used for it:</p>
<pre><code class="language-c">pdfioFileClose(pdf);
</code></pre>
@ -663,6 +665,11 @@ pdfio_obj_t *obj = pdfioFileFindObj(pdf, number);
pdfio_stream_t *st = pdfioObjOpenStream(obj, <span class="reserved">true</span>);
</code></pre>
<p>The first argument is the object pointer. The second argument is a boolean value that specifies whether you want to decode (typically decompress) the stream data or return it as-is.</p>
<p>When reading a page stream you'll use the <a href="#pdfioPageOpenStream"><code>pdfioPageOpenStream</code></a> function instead:</p>
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileOpen(...);
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
pdfio_stream_t *st = pdfioPageOpenStream(obj, <span class="number">0</span>, <span class="reserved">true</span>);
</code></pre>
<p>Once you have the stream open, you can use one of several functions to read from it:</p>
<ul>
<li><p><a href="#pdfioStreamConsume"><code>pdfioStreamConsume</code></a> reads and discards a number of bytes in the stream</p>
@ -679,10 +686,16 @@ pdfio_stream_t *st = pdfioObjOpenStream(obj, <span class="reserved">true</span>)
</code></pre>
<p>To create a stream for a new object, call the <a href="#pdfioObjCreateStream"><code>pdfioObjCreateStream</code></a> function:</p>
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
</code></pre>
<p>The first argument is the newly created object. The second argument is either <code>PDFIO_FILTER_NONE</code> to specify that any encoding is done by your program or <code>PDFIO_FILTER_FLATE</code> to specify that PDFio should Flate compress the stream.</p>
<p>To create a page content stream call the <a href="#pdfioFileCreatePage"><code>pdfioFileCreatePage</code></a> function:</p>
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_dict_t *dict = pdfioDictCreate(pdf);
... set page dictionary keys <span class="reserved">and</span> values ...
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
</code></pre>
<p>Once you have created the stream, use any of the following functions to write to the stream:</p>
<ul>
<li><p><a href="#pdfioStreamPrintf"><code>pdfioStreamPrintf</code></a> writes a formatted string to the stream</p>
@ -3264,6 +3277,32 @@ bool pdfioPageDictAddImage(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, const
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioPageGetNumStreams">pdfioPageGetNumStreams</a></h3>
<p class="description">Get the number of content streams for a page object.</p>
<p class="code">
size_t pdfioPageGetNumStreams(<a href="#pdfio_obj_t">pdfio_obj_t</a> *page);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>page</th>
<td class="description">Page object</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Number of streams</p>
<h3 class="function"><a id="pdfioPageOpenStream">pdfioPageOpenStream</a></h3>
<p class="description">Open a content stream for a page.</p>
<p class="code">
<a href="#pdfio_stream_t">pdfio_stream_t</a> *pdfioPageOpenStream(<a href="#pdfio_obj_t">pdfio_obj_t</a> *page, size_t n, bool decode);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>page</th>
<td class="description">Page object</td></tr>
<tr><th>n</th>
<td class="description">Stream index (0-based)</td></tr>
<tr><th>decode</th>
<td class="description"><code>true</code> to decode/decompress stream</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Stream</p>
<h3 class="function"><a id="pdfioStreamClose">pdfioStreamClose</a></h3>
<p class="description">Close a (data) stream in a PDF file.</p>
<p class="code">

View File

@ -15,8 +15,8 @@ goals of pdfio are:
PDFio is *not* concerned with rendering or viewing a PDF file, although a PDF
RIP or viewer could be written using it.
PDFio is Copyright © 2021 by Michael R Sweet and is licensed under the Apache
License Version 2.0 with an (optional) exception to allow linking against
PDFio is Copyright © 2021-2022 by Michael R Sweet and is licensed under the
Apache License Version 2.0 with an (optional) exception to allow linking against
GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
@ -104,7 +104,7 @@ generates a static library that will be installed under "/usr/local" with:
You can reproduce this with the makefile using:
sudo make COMMONFLAGS="-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64" install
sudo make macos install
Detecting PDFio
@ -209,7 +209,8 @@ for (i = 0, count = pdfioFileGetNumPages(pdf); i < count; i ++)
Each page is represented by a "page tree" object (what [`pdfioFileGetPage`](@@)
returns) that specifies information about the page and one or more "content"
objects that contain the images, fonts, text, and graphics that appear on the
page.
page. Use the [`pdfioPageGetNumStreams`](@@) and [`pdfioPageOpenStream`](@@)
functions to access the content streams for each page.
The [`pdfioFileClose`](@@) function closes a PDF file and frees all memory that
was used for it:
@ -294,6 +295,15 @@ The first argument is the object pointer. The second argument is a boolean
value that specifies whether you want to decode (typically decompress) the
stream data or return it as-is.
When reading a page stream you'll use the [`pdfioPageOpenStream`](@@) function
instead:
```c
pdfio_file_t *pdf = pdfioFileOpen(...);
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
pdfio_stream_t *st = pdfioPageOpenStream(obj, 0, true);
```
Once you have the stream open, you can use one of several functions to read
from it:
@ -315,14 +325,23 @@ function:
```c
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
```
The first argument is the newly created object. The second argument is either
`PDFIO_FILTER_NONE` to specify that any encoding is done by your program or
`PDFIO_FILTER_FLATE` to specify that PDFio should Flate compress the stream.
To create a page content stream call the [`pdfioFileCreatePage`](@@) function:
```c
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_dict_t *dict = pdfioDictCreate(pdf);
... set page dictionary keys and values ...
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
```
Once you have created the stream, use any of the following functions to write
to the stream:

View File

@ -663,7 +663,12 @@ _pdfioCryptoUnlock(
length = 128;
}
}
// TODO: Implement AES-256 - V6 R6
else if (version == 6 && revision == 6)
{
// TODO: Implement AES-256 - V6 R6
pdf->encryption = PDFIO_ENCRYPTION_AES_256;
length = 256;
}
PDFIO_DEBUG("_pdfioCryptoUnlock: encryption=%d, length=%d\n", pdf->encryption, length);
@ -788,6 +793,8 @@ _pdfioCryptoUnlock(
else
{
// TODO: Implement AES-256 security handler
_pdfioFileError(pdf, "Unable to unlock AES-256 encrypted file at this time.");
return (false);
}
// If we get here we need to try another password...

View File

@ -1,7 +1,7 @@
//
// PDF page functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2022 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -14,6 +14,13 @@
#include "pdfio-private.h"
//
// Local functions...
//
static _pdfio_value_t *get_contents(pdfio_obj_t *page);
//
// 'pdfioPageCopy()' - Copy a page to a PDF file.
//
@ -47,3 +54,74 @@ pdfioPageCopy(pdfio_file_t *pdf, // I - PDF file
else
return (_pdfioFileAddPage(pdf, dstpage));
}
//
// 'pdfioPageGetNumStreams()' - Get the number of content streams for a page object.
//
size_t // O - Number of streams
pdfioPageGetNumStreams(
pdfio_obj_t *page) // I - Page object
{
_pdfio_value_t *contents = get_contents(page);
// Contents value
if (!contents)
return (0);
else if (contents->type == PDFIO_VALTYPE_ARRAY)
return (pdfioArrayGetSize(contents->value.array));
else
return (1);
}
//
// 'pdfioPageOpenStream()' - Open a content stream for a page.
//
pdfio_stream_t * // O - Stream
pdfioPageOpenStream(
pdfio_obj_t *page, // I - Page object
size_t n, // I - Stream index (0-based)
bool decode) // I - `true` to decode/decompress stream
{
_pdfio_value_t *contents = get_contents(page);
// Contents value
if (!contents)
return (NULL);
else if (contents->type == PDFIO_VALTYPE_ARRAY && n < pdfioArrayGetSize(contents->value.array))
return (pdfioObjOpenStream(pdfioArrayGetObj(contents->value.array, n), decode));
else if (n)
return (NULL);
else
return (pdfioObjOpenStream(pdfioFileFindObj(page->pdf, contents->value.indirect.number), decode));
}
//
// 'get_contents()' - Get a page's Contents value.
//
static _pdfio_value_t * // O - Value or NULL on error
get_contents(pdfio_obj_t *page) // I - Page object
{
// Range check input...
if (!page)
return (NULL);
// Load the page object as needed...
if (page->value.type == PDFIO_VALTYPE_NONE)
{
if (!_pdfioObjLoad(page))
return (NULL);
}
if (page->value.type != PDFIO_VALTYPE_DICT)
return (NULL);
return (_pdfioDictGetValue(page->value.value.dict, "Contents"));
}

View File

@ -1,7 +1,7 @@
//
// PDF stream functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2022 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -372,6 +372,7 @@ pdfioStreamGetToken(
size_t bufsize) // I - Size of string buffer
{
_pdfio_token_t tb; // Token buffer/stack
bool ret; // Return value
// Range check input...
@ -381,7 +382,10 @@ pdfioStreamGetToken(
// Read using the token engine...
_pdfioTokenInit(&tb, st->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
return (_pdfioTokenRead(&tb, buffer, bufsize));
ret = _pdfioTokenRead(&tb, buffer, bufsize);
_pdfioTokenFlush(&tb);
return (ret);
}

View File

@ -87,7 +87,7 @@
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0.0";WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>PDFIO_VERSION="1.0.1";WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
@ -101,7 +101,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0.0";WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>PDFIO_VERSION="1.0.1";WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
@ -115,7 +115,7 @@
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0.0";_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>PDFIO_VERSION="1.0.1";_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
@ -130,7 +130,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0.0";NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>PDFIO_VERSION="1.0.1";NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>

View File

@ -372,7 +372,7 @@
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "Apple Development";
COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1.0.0;
CURRENT_PROJECT_VERSION = 1.0.1;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
@ -450,7 +450,7 @@
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "Apple Development";
COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1.0.0;
CURRENT_PROJECT_VERSION = 1.0.1;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_NS_ASSERTIONS = NO;

View File

@ -219,6 +219,8 @@ pdfioPageCopy
pdfioPageDictAddColorSpace
pdfioPageDictAddFont
pdfioPageDictAddImage
pdfioPageGetNumStreams
pdfioPageOpenStream
pdfioStreamClose
pdfioStreamConsume
pdfioStreamGetToken

View File

@ -3,7 +3,7 @@
<metadata>
<id>pdfio_native</id>
<title>PDFio Library for VS2019+</title>
<version>1.0.0</version>
<version>1.0.1</version>
<authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
@ -12,11 +12,11 @@
<readme>build/native/README.md</readme>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>PDFio Library for VS2019+</description>
<summary>PDFio is a simple C library for reading and writing PDF files. PDFio is licensed under the Apache License Version 2.0 with an exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2021 by Michael R Sweet</copyright>
<summary>PDFio is a simple C library for reading and writing PDF files. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2022 by Michael R Sweet</copyright>
<tags>pdf file native</tags>
<dependencies>
<dependency id="pdfio_native.redist" version="1.0.0" />
<dependency id="pdfio_native.redist" version="1.0.1" />
<dependency id="zlib_native.redist" version="1.2.11" />
</dependencies>
</metadata>

View File

@ -3,7 +3,7 @@
<metadata>
<id>pdfio_native.redist</id>
<title>PDFio Library for VS2019+</title>
<version>1.0.0</version>
<version>1.0.1</version>
<authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
@ -12,8 +12,8 @@
<readme>build/native/README.md</readme>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>PDFio Library for VS2019+</description>
<summary>PDFio is a simple C library for reading and writing PDF files. This package provides the redistributable content for the PDFio library. PDFio is licensed under the Apache License Version 2.0 with an exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2021 by Michael R Sweet</copyright>
<summary>PDFio is a simple C library for reading and writing PDF files. This package provides the redistributable content for the PDFio library. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2022 by Michael R Sweet</copyright>
<tags>pdf file native</tags>
</metadata>
<files>

95
pdfiototext.c Normal file
View File

@ -0,0 +1,95 @@
//
// PDF to text program for PDFio.
//
// Copyright © 2022 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
//
// Usage:
//
// ./pdfiototext FILENAME.pdf > FILENAME.txt
//
#include "pdfio.h"
#include <string.h>
//
// 'main()' - Main entry.
//
int // O - Exit status
main(int argc, // I - Number of command-line arguments
char *argv[]) // I - Command-line arguments
{
pdfio_file_t *file; // PDF file
size_t i, j, // Looping vars
num_pages, // Number of pages
num_streams; // Number of streams for page
pdfio_obj_t *obj; // Current page object
pdfio_stream_t *st; // Current page content stream
char buffer[1024]; // String buffer
bool first; // First string token?
// Verify command-line arguments...
if (argc != 2)
{
puts("Usage: pdfiototext FILENAME.pdf > FILENAME.txt");
return (1);
}
// Open the PDF file...
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL)
return (1);
// printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
// Try grabbing content from all of the pages...
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
{
if ((obj = pdfioFileGetPage(file, i)) == NULL)
continue;
num_streams = pdfioPageGetNumStreams(obj);
// printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
for (j = 0; j < num_streams; j ++)
{
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
continue;
// printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
first = true;
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
{
if (buffer[0] == '(')
{
if (first)
first = false;
else
putchar(' ');
fputs(buffer + 1, stdout);
}
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\""))
{
putchar('\n');
first = true;
}
}
if (!first)
putchar('\n');
pdfioStreamClose(st);
}
}
pdfioFileClose(file);
return (0);
}