mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2025-07-13 14:34:28 +02:00
Compare commits
14 Commits
Author | SHA1 | Date | |
---|---|---|---|
6378047026 | |||
54578144a0 | |||
f7f2969e3a | |||
93a3fcea6c | |||
fa20982e5d | |||
44d20eba1b | |||
c0b7925cdf | |||
68dcf021b2 | |||
b0a8e60968 | |||
9d47745e43 | |||
b0bf2e04b9 | |||
f030112372 | |||
79c4b6f8a8 | |||
bd2f9d44d4 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -8,6 +8,7 @@
|
||||
/doc/pdfio.epub
|
||||
/packages
|
||||
/pdfio.xcodeproj/xcshareddata
|
||||
/pdfiototext
|
||||
/testpdfio
|
||||
/testpdfio-*.pdf
|
||||
/x64
|
||||
|
18
CHANGES.md
18
CHANGES.md
@ -2,8 +2,22 @@ Changes in PDFio
|
||||
================
|
||||
|
||||
|
||||
v1.0rc1 (Month DD, YYYY)
|
||||
------------------------
|
||||
v1.0.1 (Month DD, YYYY)
|
||||
-----------------------
|
||||
|
||||
- Added missing `pdfioPageGetNumStreams` and `pdfioPageOpenStream` functions.
|
||||
- Added demo pdfiototext utility.
|
||||
- Fixed bug in `pdfioStreamGetToken`.
|
||||
|
||||
|
||||
v1.0.0 (December 14, 2021)
|
||||
--------------------------
|
||||
|
||||
- First stable release.
|
||||
|
||||
|
||||
v1.0rc1 (November 30, 2021)
|
||||
---------------------------
|
||||
|
||||
- Fixed a few stack/buffer overflow bugs discovered via fuzzing.
|
||||
|
||||
|
16
Makefile
16
Makefile
@ -1,7 +1,7 @@
|
||||
#
|
||||
# Makefile for PDFio.
|
||||
#
|
||||
# Copyright © 2021 by Michael R Sweet.
|
||||
# Copyright © 2021-2022 by Michael R Sweet.
|
||||
#
|
||||
# Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
# information.
|
||||
@ -26,7 +26,7 @@ DSONAME =
|
||||
LDFLAGS =
|
||||
LIBS = -lm -lz
|
||||
RANLIB = ranlib
|
||||
VERSION = 1.0b2
|
||||
VERSION = 1.0.1
|
||||
prefix = /usr/local
|
||||
|
||||
|
||||
@ -62,10 +62,12 @@ LIBOBJS = \
|
||||
ttf.o
|
||||
OBJS = \
|
||||
$(LIBOBJS) \
|
||||
pdfiototext.o \
|
||||
testpdfio.o
|
||||
TARGETS = \
|
||||
$(DSONAME) \
|
||||
libpdfio.a \
|
||||
pdfiototext \
|
||||
testpdfio
|
||||
|
||||
|
||||
@ -82,6 +84,9 @@ all-shared:
|
||||
debug:
|
||||
$(MAKE) -$(MAKEFLAGS) COMMONFLAGS="-g -fsanitize=address -DDEBUG=1" clean all
|
||||
|
||||
macos:
|
||||
$(MAKE) -$(MAKEFLAGS) COMMONFLAGS="-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64" clean all
|
||||
|
||||
|
||||
# Clean everything
|
||||
clean:
|
||||
@ -154,6 +159,11 @@ pdfio1.def: $(LIBOBJS) Makefile
|
||||
grep -v '^_ttf' | sed -e '1,$$s/^_//' | sort >>$@
|
||||
|
||||
|
||||
# pdfio text extraction (demo, doesn't handle a lot of things yet)
|
||||
pdfiototext: pdfiototext.o libpdfio.a
|
||||
$(CC) $(LDFLAGS) $(COMMONFLAGS) -o $@ pdfiototext.o libpdfio.a $(LIBS)
|
||||
|
||||
|
||||
# pdfio test program
|
||||
testpdfio: testpdfio.o libpdfio.a
|
||||
$(CC) $(LDFLAGS) $(COMMONFLAGS) -o $@ testpdfio.o libpdfio.a $(LIBS)
|
||||
@ -167,7 +177,7 @@ ttf.o: ttf.h
|
||||
# Make documentation using Codedoc <https://www.msweet.org/codedoc>
|
||||
DOCFLAGS = \
|
||||
--author "Michael R Sweet" \
|
||||
--copyright "Copyright (c) 2021 by Michael R Sweet" \
|
||||
--copyright "Copyright (c) 2021-2022 by Michael R Sweet" \
|
||||
--docversion $(VERSION)
|
||||
|
||||
.PHONY: doc
|
||||
|
49
doc/pdfio.3
49
doc/pdfio.3
@ -1,4 +1,4 @@
|
||||
.TH pdfio 3 "pdf read/write library" "2021-10-25" "pdf read/write library"
|
||||
.TH pdfio 3 "pdf read/write library" "2022-03-02" "pdf read/write library"
|
||||
.SH NAME
|
||||
pdfio \- pdf read/write library
|
||||
.SH Introduction
|
||||
@ -34,7 +34,7 @@ PDFio is
|
||||
.I not
|
||||
concerned with rendering or viewing a PDF file, although a PDF RIP or viewer could be written using it.
|
||||
.PP
|
||||
PDFio is Copyright \[co] 2021 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
|
||||
PDFio is Copyright \[co] 2021\-2022 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
|
||||
.SS Requirements
|
||||
.PP
|
||||
PDFio requires the following to build the software:
|
||||
@ -156,7 +156,7 @@ There is also an Xcode project ("pdfio.xcodeproj") you can use on macOS which ge
|
||||
You can reproduce this with the makefile using:
|
||||
.nf
|
||||
|
||||
sudo make COMMONFLAGS="\-Os \-mmacosx\-version\-min=10.14 \-arch x86_64 \-arch arm64" install
|
||||
sudo make macos install
|
||||
.fi
|
||||
.SS Detecting PDFio
|
||||
.PP
|
||||
@ -254,7 +254,7 @@ Each PDF file contains one or more pages. The pdfioFileGetNumPages function retu
|
||||
}
|
||||
.fi
|
||||
.PP
|
||||
Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page.
|
||||
Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page. Use the pdfioPageGetNumStreams and pdfioPageOpenStream functions to access the content streams for each page.
|
||||
.PP
|
||||
The pdfioFileClose function closes a PDF file and frees all memory that was used for it:
|
||||
.nf
|
||||
@ -324,6 +324,14 @@ Some PDF objects have an associated data stream, such as for pages, images, ICC
|
||||
.PP
|
||||
The first argument is the object pointer. The second argument is a boolean value that specifies whether you want to decode (typically decompress) the stream data or return it as\-is.
|
||||
.PP
|
||||
When reading a page stream you'll use the pdfioPageOpenStream function instead:
|
||||
.nf
|
||||
|
||||
pdfio_file_t *pdf = pdfioFileOpen(...);
|
||||
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
|
||||
pdfio_stream_t *st = pdfioPageOpenStream(obj, 0, true);
|
||||
.fi
|
||||
.PP
|
||||
Once you have the stream open, you can use one of several functions to read from it:
|
||||
.IP \(bu 5
|
||||
.PP
|
||||
@ -353,12 +361,21 @@ To create a stream for a new object, call the pdfioObjCreateStream function:
|
||||
.nf
|
||||
|
||||
pdfio_file_t *pdf = pdfioFileCreate(...);
|
||||
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
|
||||
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
|
||||
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
|
||||
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
|
||||
.fi
|
||||
.PP
|
||||
The first argument is the newly created object. The second argument is either PDFIO_FILTER_NONE to specify that any encoding is done by your program or PDFIO_FILTER_FLATE to specify that PDFio should Flate compress the stream.
|
||||
.PP
|
||||
To create a page content stream call the pdfioFileCreatePage function:
|
||||
.nf
|
||||
|
||||
pdfio_file_t *pdf = pdfioFileCreate(...);
|
||||
pdfio_dict_t *dict = pdfioDictCreate(pdf);
|
||||
\... set page dictionary keys and values ...
|
||||
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
|
||||
.fi
|
||||
.PP
|
||||
Once you have created the stream, use any of the following functions to write to the stream:
|
||||
.IP \(bu 5
|
||||
.PP
|
||||
@ -2693,6 +2710,24 @@ bool pdfioPageDictAddImage (
|
||||
pdfio_obj_t *obj
|
||||
);
|
||||
.fi
|
||||
.SS pdfioPageGetNumStreams
|
||||
Get the number of content streams for a page object.
|
||||
.PP
|
||||
.nf
|
||||
size_t pdfioPageGetNumStreams (
|
||||
pdfio_obj_t *page
|
||||
);
|
||||
.fi
|
||||
.SS pdfioPageOpenStream
|
||||
Open a content stream for a page.
|
||||
.PP
|
||||
.nf
|
||||
pdfio_stream_t * pdfioPageOpenStream (
|
||||
pdfio_obj_t *page,
|
||||
size_t n,
|
||||
bool decode
|
||||
);
|
||||
.fi
|
||||
.SS pdfioStreamClose
|
||||
Close a (data) stream in a PDF file.
|
||||
.PP
|
||||
@ -2947,4 +2982,4 @@ typedef uint8_t state_t[4][4];
|
||||
Michael R Sweet
|
||||
.SH COPYRIGHT
|
||||
.PP
|
||||
Copyright (c) 2021 by Michael R Sweet
|
||||
Copyright (c) 2021-2022 by Michael R Sweet
|
||||
|
@ -1,13 +1,13 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en-US">
|
||||
<head>
|
||||
<title>PDFio Programming Manual v1.0.0</title>
|
||||
<title>PDFio Programming Manual v1.0.1</title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
|
||||
<meta name="generator" content="codedoc v3.7">
|
||||
<meta name="author" content="Michael R Sweet">
|
||||
<meta name="language" content="en-US">
|
||||
<meta name="copyright" content="Copyright © 2021 by Michael R Sweet">
|
||||
<meta name="version" content="1.0.0">
|
||||
<meta name="copyright" content="Copyright © 2021-2022 by Michael R Sweet">
|
||||
<meta name="version" content="1.0.1">
|
||||
<style type="text/css"><!--
|
||||
body {
|
||||
background: white;
|
||||
@ -245,9 +245,9 @@ span.string {
|
||||
<body>
|
||||
<div class="header">
|
||||
<p><img class="title" src="pdfio-512.png"></p>
|
||||
<h1 class="title">PDFio Programming Manual v1.0.0</h1>
|
||||
<h1 class="title">PDFio Programming Manual v1.0.1</h1>
|
||||
<p>Michael R Sweet</p>
|
||||
<p>Copyright © 2021 by Michael R Sweet</p>
|
||||
<p>Copyright © 2021-2022 by Michael R Sweet</p>
|
||||
</div>
|
||||
<div class="contents">
|
||||
<h2 class="title">Contents</h2>
|
||||
@ -425,6 +425,8 @@ span.string {
|
||||
<li><a href="#pdfioPageDictAddColorSpace">pdfioPageDictAddColorSpace</a></li>
|
||||
<li><a href="#pdfioPageDictAddFont">pdfioPageDictAddFont</a></li>
|
||||
<li><a href="#pdfioPageDictAddImage">pdfioPageDictAddImage</a></li>
|
||||
<li><a href="#pdfioPageGetNumStreams">pdfioPageGetNumStreams</a></li>
|
||||
<li><a href="#pdfioPageOpenStream">pdfioPageOpenStream</a></li>
|
||||
<li><a href="#pdfioStreamClose">pdfioStreamClose</a></li>
|
||||
<li><a href="#pdfioStreamConsume">pdfioStreamConsume</a></li>
|
||||
<li><a href="#pdfioStreamGetToken">pdfioStreamGetToken</a></li>
|
||||
@ -491,7 +493,7 @@ span.string {
|
||||
</li>
|
||||
</ul>
|
||||
<p>PDFio is <em>not</em> concerned with rendering or viewing a PDF file, although a PDF RIP or viewer could be written using it.</p>
|
||||
<p>PDFio is Copyright © 2021 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.</p>
|
||||
<p>PDFio is Copyright © 2021-2022 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.</p>
|
||||
<h3 class="title" id="requirements">Requirements</h3>
|
||||
<p>PDFio requires the following to build the software:</p>
|
||||
<ul>
|
||||
@ -557,7 +559,7 @@ make install-shared
|
||||
<pre><code>sudo xcodebuild install
|
||||
</code></pre>
|
||||
<p>You can reproduce this with the makefile using:</p>
|
||||
<pre><code>sudo make COMMONFLAGS="-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64" install
|
||||
<pre><code>sudo make macos install
|
||||
</code></pre>
|
||||
<h3 class="title" id="detecting-pdfio">Detecting PDFio</h3>
|
||||
<p>PDFio can be detected using the <code>pkg-config</code> command, for example:</p>
|
||||
@ -621,7 +623,7 @@ pdfio_obj_t *page; <span class="comment">// Current page</span>
|
||||
<span class="comment">// do something with page</span>
|
||||
}
|
||||
</code></pre>
|
||||
<p>Each page is represented by a "page tree" object (what <a href="#pdfioFileGetPage"><code>pdfioFileGetPage</code></a> returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page.</p>
|
||||
<p>Each page is represented by a "page tree" object (what <a href="#pdfioFileGetPage"><code>pdfioFileGetPage</code></a> returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page. Use the <a href="#pdfioPageGetNumStreams"><code>pdfioPageGetNumStreams</code></a> and <a href="#pdfioPageOpenStream"><code>pdfioPageOpenStream</code></a> functions to access the content streams for each page.</p>
|
||||
<p>The <a href="#pdfioFileClose"><code>pdfioFileClose</code></a> function closes a PDF file and frees all memory that was used for it:</p>
|
||||
<pre><code class="language-c">pdfioFileClose(pdf);
|
||||
</code></pre>
|
||||
@ -663,6 +665,11 @@ pdfio_obj_t *obj = pdfioFileFindObj(pdf, number);
|
||||
pdfio_stream_t *st = pdfioObjOpenStream(obj, <span class="reserved">true</span>);
|
||||
</code></pre>
|
||||
<p>The first argument is the object pointer. The second argument is a boolean value that specifies whether you want to decode (typically decompress) the stream data or return it as-is.</p>
|
||||
<p>When reading a page stream you'll use the <a href="#pdfioPageOpenStream"><code>pdfioPageOpenStream</code></a> function instead:</p>
|
||||
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileOpen(...);
|
||||
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
|
||||
pdfio_stream_t *st = pdfioPageOpenStream(obj, <span class="number">0</span>, <span class="reserved">true</span>);
|
||||
</code></pre>
|
||||
<p>Once you have the stream open, you can use one of several functions to read from it:</p>
|
||||
<ul>
|
||||
<li><p><a href="#pdfioStreamConsume"><code>pdfioStreamConsume</code></a> reads and discards a number of bytes in the stream</p>
|
||||
@ -679,10 +686,16 @@ pdfio_stream_t *st = pdfioObjOpenStream(obj, <span class="reserved">true</span>)
|
||||
</code></pre>
|
||||
<p>To create a stream for a new object, call the <a href="#pdfioObjCreateStream"><code>pdfioObjCreateStream</code></a> function:</p>
|
||||
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileCreate(...);
|
||||
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
|
||||
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
|
||||
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
|
||||
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
|
||||
</code></pre>
|
||||
<p>The first argument is the newly created object. The second argument is either <code>PDFIO_FILTER_NONE</code> to specify that any encoding is done by your program or <code>PDFIO_FILTER_FLATE</code> to specify that PDFio should Flate compress the stream.</p>
|
||||
<p>To create a page content stream call the <a href="#pdfioFileCreatePage"><code>pdfioFileCreatePage</code></a> function:</p>
|
||||
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileCreate(...);
|
||||
pdfio_dict_t *dict = pdfioDictCreate(pdf);
|
||||
... set page dictionary keys <span class="reserved">and</span> values ...
|
||||
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
|
||||
</code></pre>
|
||||
<p>Once you have created the stream, use any of the following functions to write to the stream:</p>
|
||||
<ul>
|
||||
<li><p><a href="#pdfioStreamPrintf"><code>pdfioStreamPrintf</code></a> writes a formatted string to the stream</p>
|
||||
@ -3264,6 +3277,32 @@ bool pdfioPageDictAddImage(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, const
|
||||
</tbody></table>
|
||||
<h4 class="returnvalue">Return Value</h4>
|
||||
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
|
||||
<h3 class="function"><a id="pdfioPageGetNumStreams">pdfioPageGetNumStreams</a></h3>
|
||||
<p class="description">Get the number of content streams for a page object.</p>
|
||||
<p class="code">
|
||||
size_t pdfioPageGetNumStreams(<a href="#pdfio_obj_t">pdfio_obj_t</a> *page);</p>
|
||||
<h4 class="parameters">Parameters</h4>
|
||||
<table class="list"><tbody>
|
||||
<tr><th>page</th>
|
||||
<td class="description">Page object</td></tr>
|
||||
</tbody></table>
|
||||
<h4 class="returnvalue">Return Value</h4>
|
||||
<p class="description">Number of streams</p>
|
||||
<h3 class="function"><a id="pdfioPageOpenStream">pdfioPageOpenStream</a></h3>
|
||||
<p class="description">Open a content stream for a page.</p>
|
||||
<p class="code">
|
||||
<a href="#pdfio_stream_t">pdfio_stream_t</a> *pdfioPageOpenStream(<a href="#pdfio_obj_t">pdfio_obj_t</a> *page, size_t n, bool decode);</p>
|
||||
<h4 class="parameters">Parameters</h4>
|
||||
<table class="list"><tbody>
|
||||
<tr><th>page</th>
|
||||
<td class="description">Page object</td></tr>
|
||||
<tr><th>n</th>
|
||||
<td class="description">Stream index (0-based)</td></tr>
|
||||
<tr><th>decode</th>
|
||||
<td class="description"><code>true</code> to decode/decompress stream</td></tr>
|
||||
</tbody></table>
|
||||
<h4 class="returnvalue">Return Value</h4>
|
||||
<p class="description">Stream</p>
|
||||
<h3 class="function"><a id="pdfioStreamClose">pdfioStreamClose</a></h3>
|
||||
<p class="description">Close a (data) stream in a PDF file.</p>
|
||||
<p class="code">
|
||||
|
31
doc/pdfio.md
31
doc/pdfio.md
@ -15,8 +15,8 @@ goals of pdfio are:
|
||||
PDFio is *not* concerned with rendering or viewing a PDF file, although a PDF
|
||||
RIP or viewer could be written using it.
|
||||
|
||||
PDFio is Copyright © 2021 by Michael R Sweet and is licensed under the Apache
|
||||
License Version 2.0 with an (optional) exception to allow linking against
|
||||
PDFio is Copyright © 2021-2022 by Michael R Sweet and is licensed under the
|
||||
Apache License Version 2.0 with an (optional) exception to allow linking against
|
||||
GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
|
||||
|
||||
|
||||
@ -104,7 +104,7 @@ generates a static library that will be installed under "/usr/local" with:
|
||||
|
||||
You can reproduce this with the makefile using:
|
||||
|
||||
sudo make COMMONFLAGS="-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64" install
|
||||
sudo make macos install
|
||||
|
||||
|
||||
Detecting PDFio
|
||||
@ -209,7 +209,8 @@ for (i = 0, count = pdfioFileGetNumPages(pdf); i < count; i ++)
|
||||
Each page is represented by a "page tree" object (what [`pdfioFileGetPage`](@@)
|
||||
returns) that specifies information about the page and one or more "content"
|
||||
objects that contain the images, fonts, text, and graphics that appear on the
|
||||
page.
|
||||
page. Use the [`pdfioPageGetNumStreams`](@@) and [`pdfioPageOpenStream`](@@)
|
||||
functions to access the content streams for each page.
|
||||
|
||||
The [`pdfioFileClose`](@@) function closes a PDF file and frees all memory that
|
||||
was used for it:
|
||||
@ -294,6 +295,15 @@ The first argument is the object pointer. The second argument is a boolean
|
||||
value that specifies whether you want to decode (typically decompress) the
|
||||
stream data or return it as-is.
|
||||
|
||||
When reading a page stream you'll use the [`pdfioPageOpenStream`](@@) function
|
||||
instead:
|
||||
|
||||
```c
|
||||
pdfio_file_t *pdf = pdfioFileOpen(...);
|
||||
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
|
||||
pdfio_stream_t *st = pdfioPageOpenStream(obj, 0, true);
|
||||
```
|
||||
|
||||
Once you have the stream open, you can use one of several functions to read
|
||||
from it:
|
||||
|
||||
@ -315,14 +325,23 @@ function:
|
||||
|
||||
```c
|
||||
pdfio_file_t *pdf = pdfioFileCreate(...);
|
||||
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
|
||||
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
|
||||
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
|
||||
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
|
||||
```
|
||||
|
||||
The first argument is the newly created object. The second argument is either
|
||||
`PDFIO_FILTER_NONE` to specify that any encoding is done by your program or
|
||||
`PDFIO_FILTER_FLATE` to specify that PDFio should Flate compress the stream.
|
||||
|
||||
To create a page content stream call the [`pdfioFileCreatePage`](@@) function:
|
||||
|
||||
```c
|
||||
pdfio_file_t *pdf = pdfioFileCreate(...);
|
||||
pdfio_dict_t *dict = pdfioDictCreate(pdf);
|
||||
... set page dictionary keys and values ...
|
||||
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
|
||||
```
|
||||
|
||||
Once you have created the stream, use any of the following functions to write
|
||||
to the stream:
|
||||
|
||||
|
@ -663,7 +663,12 @@ _pdfioCryptoUnlock(
|
||||
length = 128;
|
||||
}
|
||||
}
|
||||
// TODO: Implement AES-256 - V6 R6
|
||||
else if (version == 6 && revision == 6)
|
||||
{
|
||||
// TODO: Implement AES-256 - V6 R6
|
||||
pdf->encryption = PDFIO_ENCRYPTION_AES_256;
|
||||
length = 256;
|
||||
}
|
||||
|
||||
PDFIO_DEBUG("_pdfioCryptoUnlock: encryption=%d, length=%d\n", pdf->encryption, length);
|
||||
|
||||
@ -788,6 +793,8 @@ _pdfioCryptoUnlock(
|
||||
else
|
||||
{
|
||||
// TODO: Implement AES-256 security handler
|
||||
_pdfioFileError(pdf, "Unable to unlock AES-256 encrypted file at this time.");
|
||||
return (false);
|
||||
}
|
||||
|
||||
// If we get here we need to try another password...
|
||||
|
80
pdfio-page.c
80
pdfio-page.c
@ -1,7 +1,7 @@
|
||||
//
|
||||
// PDF page functions for PDFio.
|
||||
//
|
||||
// Copyright © 2021 by Michael R Sweet.
|
||||
// Copyright © 2021-2022 by Michael R Sweet.
|
||||
//
|
||||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
// information.
|
||||
@ -14,6 +14,13 @@
|
||||
#include "pdfio-private.h"
|
||||
|
||||
|
||||
//
|
||||
// Local functions...
|
||||
//
|
||||
|
||||
static _pdfio_value_t *get_contents(pdfio_obj_t *page);
|
||||
|
||||
|
||||
//
|
||||
// 'pdfioPageCopy()' - Copy a page to a PDF file.
|
||||
//
|
||||
@ -47,3 +54,74 @@ pdfioPageCopy(pdfio_file_t *pdf, // I - PDF file
|
||||
else
|
||||
return (_pdfioFileAddPage(pdf, dstpage));
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// 'pdfioPageGetNumStreams()' - Get the number of content streams for a page object.
|
||||
//
|
||||
|
||||
size_t // O - Number of streams
|
||||
pdfioPageGetNumStreams(
|
||||
pdfio_obj_t *page) // I - Page object
|
||||
{
|
||||
_pdfio_value_t *contents = get_contents(page);
|
||||
// Contents value
|
||||
|
||||
|
||||
if (!contents)
|
||||
return (0);
|
||||
else if (contents->type == PDFIO_VALTYPE_ARRAY)
|
||||
return (pdfioArrayGetSize(contents->value.array));
|
||||
else
|
||||
return (1);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// 'pdfioPageOpenStream()' - Open a content stream for a page.
|
||||
//
|
||||
|
||||
pdfio_stream_t * // O - Stream
|
||||
pdfioPageOpenStream(
|
||||
pdfio_obj_t *page, // I - Page object
|
||||
size_t n, // I - Stream index (0-based)
|
||||
bool decode) // I - `true` to decode/decompress stream
|
||||
{
|
||||
_pdfio_value_t *contents = get_contents(page);
|
||||
// Contents value
|
||||
|
||||
|
||||
if (!contents)
|
||||
return (NULL);
|
||||
else if (contents->type == PDFIO_VALTYPE_ARRAY && n < pdfioArrayGetSize(contents->value.array))
|
||||
return (pdfioObjOpenStream(pdfioArrayGetObj(contents->value.array, n), decode));
|
||||
else if (n)
|
||||
return (NULL);
|
||||
else
|
||||
return (pdfioObjOpenStream(pdfioFileFindObj(page->pdf, contents->value.indirect.number), decode));
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// 'get_contents()' - Get a page's Contents value.
|
||||
//
|
||||
|
||||
static _pdfio_value_t * // O - Value or NULL on error
|
||||
get_contents(pdfio_obj_t *page) // I - Page object
|
||||
{
|
||||
// Range check input...
|
||||
if (!page)
|
||||
return (NULL);
|
||||
|
||||
// Load the page object as needed...
|
||||
if (page->value.type == PDFIO_VALTYPE_NONE)
|
||||
{
|
||||
if (!_pdfioObjLoad(page))
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
if (page->value.type != PDFIO_VALTYPE_DICT)
|
||||
return (NULL);
|
||||
|
||||
return (_pdfioDictGetValue(page->value.value.dict, "Contents"));
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
//
|
||||
// PDF stream functions for PDFio.
|
||||
//
|
||||
// Copyright © 2021 by Michael R Sweet.
|
||||
// Copyright © 2021-2022 by Michael R Sweet.
|
||||
//
|
||||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
// information.
|
||||
@ -372,6 +372,7 @@ pdfioStreamGetToken(
|
||||
size_t bufsize) // I - Size of string buffer
|
||||
{
|
||||
_pdfio_token_t tb; // Token buffer/stack
|
||||
bool ret; // Return value
|
||||
|
||||
|
||||
// Range check input...
|
||||
@ -381,7 +382,10 @@ pdfioStreamGetToken(
|
||||
// Read using the token engine...
|
||||
_pdfioTokenInit(&tb, st->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
|
||||
|
||||
return (_pdfioTokenRead(&tb, buffer, bufsize));
|
||||
ret = _pdfioTokenRead(&tb, buffer, bufsize);
|
||||
_pdfioTokenFlush(&tb);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
|
||||
|
@ -87,7 +87,7 @@
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>PDFIO_VERSION="1.0.1";WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@ -101,7 +101,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>PDFIO_VERSION="1.0.1";WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@ -115,7 +115,7 @@
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>PDFIO_VERSION="1.0.1";_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@ -130,7 +130,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>PDFIO_VERSION="1.0.1";NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
|
@ -372,7 +372,7 @@
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
CODE_SIGN_IDENTITY = "Apple Development";
|
||||
COPY_PHASE_STRIP = NO;
|
||||
CURRENT_PROJECT_VERSION = 1.0.0;
|
||||
CURRENT_PROJECT_VERSION = 1.0.1;
|
||||
DEBUG_INFORMATION_FORMAT = dwarf;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_TESTABILITY = YES;
|
||||
@ -450,7 +450,7 @@
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
CODE_SIGN_IDENTITY = "Apple Development";
|
||||
COPY_PHASE_STRIP = NO;
|
||||
CURRENT_PROJECT_VERSION = 1.0.0;
|
||||
CURRENT_PROJECT_VERSION = 1.0.1;
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
ENABLE_HARDENED_RUNTIME = YES;
|
||||
ENABLE_NS_ASSERTIONS = NO;
|
||||
|
@ -219,6 +219,8 @@ pdfioPageCopy
|
||||
pdfioPageDictAddColorSpace
|
||||
pdfioPageDictAddFont
|
||||
pdfioPageDictAddImage
|
||||
pdfioPageGetNumStreams
|
||||
pdfioPageOpenStream
|
||||
pdfioStreamClose
|
||||
pdfioStreamConsume
|
||||
pdfioStreamGetToken
|
||||
|
@ -3,7 +3,7 @@
|
||||
<metadata>
|
||||
<id>pdfio_native</id>
|
||||
<title>PDFio Library for VS2019+</title>
|
||||
<version>1.0.0-b7</version>
|
||||
<version>1.0.1</version>
|
||||
<authors>Michael R Sweet</authors>
|
||||
<owners>michaelrsweet</owners>
|
||||
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
|
||||
@ -12,11 +12,11 @@
|
||||
<readme>build/native/README.md</readme>
|
||||
<requireLicenseAcceptance>false</requireLicenseAcceptance>
|
||||
<description>PDFio Library for VS2019+</description>
|
||||
<summary>PDFio is a simple C library for reading and writing PDF files. PDFio is licensed under the Apache License Version 2.0 with an exception to allow linking against GNU GPL2-only software.</summary>
|
||||
<copyright>Copyright © 2019-2021 by Michael R Sweet</copyright>
|
||||
<summary>PDFio is a simple C library for reading and writing PDF files. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
|
||||
<copyright>Copyright © 2019-2022 by Michael R Sweet</copyright>
|
||||
<tags>pdf file native</tags>
|
||||
<dependencies>
|
||||
<dependency id="pdfio_native.redist" version="1.0.0-b7" />
|
||||
<dependency id="pdfio_native.redist" version="1.0.1" />
|
||||
<dependency id="zlib_native.redist" version="1.2.11" />
|
||||
</dependencies>
|
||||
</metadata>
|
||||
|
@ -3,7 +3,7 @@
|
||||
<metadata>
|
||||
<id>pdfio_native.redist</id>
|
||||
<title>PDFio Library for VS2019+</title>
|
||||
<version>1.0.0-b7</version>
|
||||
<version>1.0.1</version>
|
||||
<authors>Michael R Sweet</authors>
|
||||
<owners>michaelrsweet</owners>
|
||||
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
|
||||
@ -12,8 +12,8 @@
|
||||
<readme>build/native/README.md</readme>
|
||||
<requireLicenseAcceptance>false</requireLicenseAcceptance>
|
||||
<description>PDFio Library for VS2019+</description>
|
||||
<summary>PDFio is a simple C library for reading and writing PDF files. This package provides the redistributable content for the PDFio library. PDFio is licensed under the Apache License Version 2.0 with an exception to allow linking against GNU GPL2-only software.</summary>
|
||||
<copyright>Copyright © 2019-2021 by Michael R Sweet</copyright>
|
||||
<summary>PDFio is a simple C library for reading and writing PDF files. This package provides the redistributable content for the PDFio library. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
|
||||
<copyright>Copyright © 2019-2022 by Michael R Sweet</copyright>
|
||||
<tags>pdf file native</tags>
|
||||
</metadata>
|
||||
<files>
|
||||
|
95
pdfiototext.c
Normal file
95
pdfiototext.c
Normal file
@ -0,0 +1,95 @@
|
||||
//
|
||||
// PDF to text program for PDFio.
|
||||
//
|
||||
// Copyright © 2022 by Michael R Sweet.
|
||||
//
|
||||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
// information.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// ./pdfiototext FILENAME.pdf > FILENAME.txt
|
||||
//
|
||||
|
||||
#include "pdfio.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
//
|
||||
// 'main()' - Main entry.
|
||||
//
|
||||
|
||||
int // O - Exit status
|
||||
main(int argc, // I - Number of command-line arguments
|
||||
char *argv[]) // I - Command-line arguments
|
||||
{
|
||||
pdfio_file_t *file; // PDF file
|
||||
size_t i, j, // Looping vars
|
||||
num_pages, // Number of pages
|
||||
num_streams; // Number of streams for page
|
||||
pdfio_obj_t *obj; // Current page object
|
||||
pdfio_stream_t *st; // Current page content stream
|
||||
char buffer[1024]; // String buffer
|
||||
bool first; // First string token?
|
||||
|
||||
|
||||
// Verify command-line arguments...
|
||||
if (argc != 2)
|
||||
{
|
||||
puts("Usage: pdfiototext FILENAME.pdf > FILENAME.txt");
|
||||
return (1);
|
||||
}
|
||||
|
||||
// Open the PDF file...
|
||||
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL)
|
||||
return (1);
|
||||
|
||||
// printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
|
||||
|
||||
// Try grabbing content from all of the pages...
|
||||
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
|
||||
{
|
||||
if ((obj = pdfioFileGetPage(file, i)) == NULL)
|
||||
continue;
|
||||
|
||||
num_streams = pdfioPageGetNumStreams(obj);
|
||||
|
||||
// printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
|
||||
|
||||
for (j = 0; j < num_streams; j ++)
|
||||
{
|
||||
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
|
||||
continue;
|
||||
|
||||
// printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
|
||||
|
||||
first = true;
|
||||
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
|
||||
{
|
||||
if (buffer[0] == '(')
|
||||
{
|
||||
if (first)
|
||||
first = false;
|
||||
else
|
||||
putchar(' ');
|
||||
|
||||
fputs(buffer + 1, stdout);
|
||||
}
|
||||
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\""))
|
||||
{
|
||||
putchar('\n');
|
||||
first = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!first)
|
||||
putchar('\n');
|
||||
|
||||
pdfioStreamClose(st);
|
||||
}
|
||||
}
|
||||
|
||||
pdfioFileClose(file);
|
||||
|
||||
return (0);
|
||||
}
|
Reference in New Issue
Block a user