52 Commits

Author SHA1 Message Date
d4c594cec4 Bump copyright in readme. 2023-03-20 10:22:19 -04:00
35c674b633 Fix another build issue. 2023-03-20 10:11:05 -04:00
97d4955666 Fix potential denial-of-service in flate stream code. 2023-03-20 09:27:19 -04:00
e138232a93 Fix build error due to mismatched function declarations. 2023-03-20 08:19:31 -04:00
8d8225f4a1 Fix release date. 2023-02-07 17:10:48 -05:00
7045d9dad9 Bump Windows version numbers and update exports file. 2023-02-06 17:36:54 -05:00
4f10021e7e Fix denial-of-service attack when reading corrupt PDF files. 2023-02-03 20:39:04 -05:00
57d5894f33 Update pdfioStreamGetToken documentation (Issue #37) 2023-01-11 17:13:58 -05:00
2b8a1c8481 Fix CodeQL config file syntax. 2022-12-09 11:31:56 -05:00
948ee16b06 Fix the one "value" complaint from CodeQL and suppress all useless queries. 2022-12-09 11:22:10 -05:00
c7101ae9dd Add CodeQL scanning. 2022-12-09 11:09:34 -05:00
599640eda1 Update makefile to be silent with basic progress reporting. 2022-08-02 09:41:13 -04:00
a3f3bbfe11 Fix pdfioFileGetAuthor, etc. APIs (Issue #33) 2022-07-12 18:36:08 -04:00
26d485cfc5 Update Windows DLL exports file. 2022-07-06 15:25:45 -04:00
64d306a322 Cleanup. 2022-07-06 08:47:52 -04:00
067683cbcd Add some protection against opening multiple streams in the same file at the same time. 2022-07-04 13:03:11 -04:00
50f27974cf Update documentation. 2022-07-03 10:01:20 -04:00
ae9a91719b Add pdfioContentPathEnd function. 2022-07-03 10:01:10 -04:00
1a17933635 Fix pdfioContentMatrixRotate function. 2022-07-01 20:30:40 -04:00
acea6fdbed Changelog. 2022-06-27 17:17:44 -04:00
66fa12f928 Update Windows DLL exports file. 2022-06-27 10:17:21 -04:00
f4b8983c61 Implement pdfioDictIterateKeys API (Issue #31) 2022-06-27 10:17:00 -04:00
ed4e2fc38a Merge pull request #32 from ire4ever1190/patch-1
Fix `install-shared` Make task
2022-06-09 09:46:23 -04:00
1ed7f0089c Update Makefile 2022-06-09 14:33:53 +10:00
563d53edd4 Update Windows DLL exports file. 2022-05-24 19:16:20 -04:00
316b0ad559 Add pdfioFileCreateTemporary function (Issue #29) 2022-05-15 22:52:53 -04:00
f8b471acfd Update README and NOTICE files... 2022-03-02 09:50:14 -05:00
cedd7d104f Changelog update. 2022-03-02 09:47:14 -05:00
6378047026 Update VC project. 2022-03-02 09:31:33 -05:00
54578144a0 Update documentation and prep for 1.0.1 release. 2022-03-02 09:30:01 -05:00
f7f2969e3a Fix pdfioStreamGetToken implementation (wasn't flushing input), update
pdfiototext code to better handle different text operators that affect the
location of the text.
2022-03-01 09:18:56 -05:00
93a3fcea6c Add missing pdfioPageGetNumStreams and pdfioPageOpenStream functions.
Add initial version of pdfiototext text extraction utility.
2022-02-28 15:00:25 -05:00
fa20982e5d Coverity certs are fixed. 2021-12-15 18:20:54 -05:00
44d20eba1b Add stub code for AES-256 to force Coverity to re-analyze... 2021-12-15 07:35:55 -05:00
c0b7925cdf Fix typo. 2021-12-15 07:28:17 -05:00
68dcf021b2 Download Entrust root cert for validation. 2021-12-15 07:25:44 -05:00
b0a8e60968 Also allow posts to coverity.com while we wait for Ubuntu to pick up the new Entrust root certificate. 2021-12-15 07:10:13 -05:00
9d47745e43 Prep for 1.0rc1. 2021-12-15 06:53:09 -05:00
b0bf2e04b9 Coverity's certificate has expired. 2021-12-14 16:26:57 -05:00
f030112372 See what is happening when downloading Coverity build tool (drop quiet option). 2021-12-14 16:21:49 -05:00
79c4b6f8a8 See what is happening when downloading Coverity build tool. 2021-12-14 16:20:34 -05:00
bd2f9d44d4 Prep for 1.0.0 release. 2021-12-14 12:36:33 -05:00
3c7a980a0b Don't include AFL files in source archives. 2021-11-30 08:46:43 -05:00
019c05d04a Fix AFL target, remove excess PDF test files. 2021-11-30 08:13:41 -05:00
7ab550254a Add AFL make target (runs for 10 minutes). 2021-11-29 20:59:30 -05:00
fa8e54cca2 Add some files to use for AFL++. 2021-11-29 18:54:40 -05:00
d92fcb7bfb Add AFL++ PDF dictionary. 2021-11-29 18:47:04 -05:00
001dcbb123 Fix testpdfio build - dependencies on pdfio-private.h were missing. 2021-11-29 17:57:49 -05:00
a431d7806f Fix a few stack/buffer overflow bugs discovered by Bart, Steffan, and Mark from
the Radboud University NL (thanks!)

- Add depth argument to all value read functions that recurse
- Add depth argument to page tree loading code
- Validate xref stream sizes individually to avoid out-of-bounds access to local
  xref buffer.
2021-11-29 17:46:56 -05:00
ec8e900ea5 Add math library to libs. 2021-11-18 19:23:42 -05:00
c73aa7ae20 Add link for builds. 2021-11-11 06:52:23 -05:00
c53786e0e1 Changelog. 2021-11-07 11:29:18 -05:00
60 changed files with 2792 additions and 161 deletions

2
.gitattributes vendored
View File

@ -1,2 +1,4 @@
.git* export-ignore
afl-pdf.dict export-ignore
afl-input export-ignore
makesrcdist export-ignore

22
.github/codeql.yml vendored Normal file
View File

@ -0,0 +1,22 @@
paths-ignore:
- testpdfio.c
query-filters:
- exclude:
id: cpp/commented-out-code
- exclude:
id: cpp/toctou-race-condition
- exclude:
id: cpp/weak-cryptographic-algorithm
- exclude:
id: cpp/world-writable-file-creation
- exclude:
id: cpp/uncontrolled-allocation-size
- exclude:
id: cpp/path-injection
- exclude:
id: cpp/stack-address-escape
- exclude:
id: cpp/loop-variable-changed
- exclude:
id: cpp/long-switch

50
.github/workflows/codeql.yml vendored Normal file
View File

@ -0,0 +1,50 @@
name: "CodeQL"
on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]
schedule:
- cron: "46 3 * * 0"
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ cpp ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Update build environment
run: sudo apt-get update --fix-missing -y
- name: Install prerequisites
run: sudo apt-get install -y zlib1g-dev
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql.yml
queries: +security-and-quality
- name: Autobuild
uses: github/codeql-action/autobuild@v2
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{ matrix.language }}"

2
.gitignore vendored
View File

@ -4,9 +4,11 @@
*.o
*.so.1
/.vs
/afl-output
/doc/pdfio.epub
/packages
/pdfio.xcodeproj/xcshareddata
/pdfiototext
/testpdfio
/testpdfio-*.pdf
/x64

View File

@ -1,2 +0,0 @@
queries:
- exclude: cpp/toctou-race-condition

View File

@ -2,6 +2,48 @@ Changes in PDFio
================
v1.1.1 (March 20, 2023)
-----------------------
- CVE-2023-28428: Fixed a potential denial-of-service with corrupt PDF files.
- Fixed a few build issues.
v1.1.0 (February 6, 2023)
-------------------------
- CVE-2023-24808: Fixed a potential denial-of-service with corrupt PDF files.
- Added `pdfioFileCreateTemporary` function (Issue #29)
- Added `pdfioDictIterateKeys` function (Issue #31)
- Added `pdfioContentPathEnd` function.
- Added protection against opening multiple streams in the same file at the
same time.
- Documentation updates (Issue #37)
- Fixed "install-shared" target (Issue #32)
- Fixed `pdfioFileGet...` metadata APIs (Issue #33)
- Fixed `pdfioContentMatrixRotate` function.
v1.0.1 (March 2, 2022)
----------------------
- Added missing `pdfioPageGetNumStreams` and `pdfioPageOpenStream` functions.
- Added demo pdfiototext utility.
- Fixed bug in `pdfioStreamGetToken`.
v1.0.0 (December 14, 2021)
--------------------------
- First stable release.
v1.0rc1 (November 30, 2021)
---------------------------
- Fixed a few stack/buffer overflow bugs discovered via fuzzing.
v1.0b2 (November 7, 2021)
-------------------------
@ -10,6 +52,7 @@ v1.0b2 (November 7, 2021)
- Fixed `all-shared` target (Issue #22)
- Fixed memory leaks (Issue #23)
- Updated `pdfioContentSetDashPattern` to accept `double` values (Issue #25)
- Added support for reading and writing encrypted PDFs (Issue #26)
- Fixed some issues identified by a Coverity scan.

View File

@ -1,7 +1,7 @@
#
# Makefile for PDFio.
#
# Copyright © 2021 by Michael R Sweet.
# Copyright © 2021-2023 by Michael R Sweet.
#
# Licensed under Apache License v2.0. See the file "LICENSE" for more
# information.
@ -10,14 +10,17 @@
# POSIX makefile
.POSIX:
# Variables...
# Build silently
.SILENT:
# Variables
AR = ar
ARFLAGS = cr
CC = cc
CFLAGS =
CODESIGN_IDENTITY = Developer ID
#COMMONFLAGS = -Os -g
COMMONFLAGS = -O0 -g
COMMONFLAGS = -Os -g
#COMMONFLAGS = -O0 -g -fsanitize=address
CPPFLAGS = '-DPDFIO_VERSION="$(VERSION)"'
DESTDIR = $(DSTROOT)
DSO = cc
@ -26,13 +29,14 @@ DSONAME =
LDFLAGS =
LIBS = -lm -lz
RANLIB = ranlib
VERSION = 1.0b2
VERSION = 1.1.1
prefix = /usr/local
# Base rules
.SUFFIXES: .c .h .o
.c.o:
echo Compiling $<...
$(CC) $(CFLAGS) $(CPPFLAGS) $(COMMONFLAGS) -c $<
@ -62,10 +66,12 @@ LIBOBJS = \
ttf.o
OBJS = \
$(LIBOBJS) \
pdfiototext.o \
testpdfio.o
TARGETS = \
$(DSONAME) \
libpdfio.a \
pdfiototext \
testpdfio
@ -82,6 +88,9 @@ all-shared:
debug:
$(MAKE) -$(MAKEFLAGS) COMMONFLAGS="-g -fsanitize=address -DDEBUG=1" clean all
macos:
$(MAKE) -$(MAKEFLAGS) COMMONFLAGS="-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64" clean all
# Clean everything
clean:
@ -90,8 +99,10 @@ clean:
# Install everything
install: $(TARGETS)
echo Installing header files to $(DESTDIR)$(prefix)/include...
-mkdir -p $(DESTDIR)$(prefix)/include
cp $(PUBHEADERS) $(DESTDIR)$(prefix)/include
echo Installing library files to $(DESTDIR)$(prefix)/lib...
-mkdir -p $(DESTDIR)$(prefix)/lib
cp libpdfio.a $(DESTDIR)$(prefix)/lib
$(RANLIB) $(DESTDIR)$(prefix)/lib/libpdfio.a
@ -103,19 +114,22 @@ install: $(TARGETS)
codesign -s "$(CODESIGN_IDENTITY)" -o runtime --timestamp $(DESTDIR)$(prefix)/lib/libpdfio.1.dylib; \
ln -sf libpdfio.1.dylib $(DESTDIR)$(prefix)/lib/libpdfio.dylib; \
fi
echo Installing pkg-config files to $(DESTDIR)$(prefix)/lib/pkgconfig...
-mkdir -p $(DESTDIR)$(prefix)/lib/pkgconfig
echo 'prefix="$(prefix)"' >$(DESTDIR)$(prefix)/lib/pkgconfig/pdfio.pc
echo 'Version: $(VERSION)' >>$(DESTDIR)$(prefix)/lib/pkgconfig/pdfio.pc
cat pdfio.pc.in >>$(DESTDIR)$(prefix)/lib/pkgconfig/pdfio.pc
echo Installing documentation to $(DESTDIR)$(prefix)/share/doc/pdfio...
-mkdir -p $(DESTDIR)$(prefix)/share/doc/pdfio
cp doc/pdfio.html doc/pdfio-512.png LICENSE NOTICE $(DESTDIR)$(prefix)/share/doc/pdfio
echo Installing man page to $(DESTDIR)$(prefix)/share/man/man3...
-mkdir -p $(DESTDIR)$(prefix)/share/man/man3
cp doc/pdfio.3 $(DESTDIR)$(prefix)/share/man/man3
install-shared:
if test `uname` = Darwin; then \
$(MAKE) DSONAME="libpdfio.1.dylib" -$(MAKEFLAGS) install; \
else
else \
$(MAKE) DSONAME="libpdfio.so.1" -$(MAKEFLAGS) install; \
fi
@ -130,13 +144,16 @@ valgrind: testpdfio
# pdfio library
libpdfio.a: $(LIBOBJS)
echo Archiving $@...
$(AR) $(ARFLAGS) $@ $(LIBOBJS)
$(RANLIB) $@
libpdfio.so.1: $(LIBOBJS)
echo Linking $@...
$(CC) $(DSOFLAGS) $(COMMONFLAGS) -shared -o $@ -Wl,-soname,$@ $(LIBOBJS) $(LIBS)
libpdfio.1.dylib: $(LIBOBJS)
echo Linking $@...
$(CC) $(DSOFLAGS) $(COMMONFLAGS) -dynamiclib -o $@ -install_name $(prefix)/lib/$@ -current_version $(VERSION) -compatibility_version 1.0 $(LIBOBJS) $(LIBS)
@ -154,31 +171,46 @@ pdfio1.def: $(LIBOBJS) Makefile
grep -v '^_ttf' | sed -e '1,$$s/^_//' | sort >>$@
# pdfio text extraction (demo, doesn't handle a lot of things yet)
pdfiototext: pdfiototext.o libpdfio.a
echo Linking $@...
$(CC) $(LDFLAGS) $(COMMONFLAGS) -o $@ pdfiototext.o libpdfio.a $(LIBS)
# pdfio test program
testpdfio: testpdfio.o libpdfio.a
echo Linking $@...
$(CC) $(LDFLAGS) $(COMMONFLAGS) -o $@ testpdfio.o libpdfio.a $(LIBS)
# Dependencies
$(OBJS): pdfio.h Makefile
$(LIBOBJS): pdfio-private.h
$(OBJS): pdfio.h pdfio-private.h Makefile
pdfio-content.o: pdfio-content.h ttf.h
ttf.o: ttf.h
# Make documentation using Codedoc <https://www.msweet.org/codedoc>
DOCFLAGS = \
--author "Michael R Sweet" \
--copyright "Copyright (c) 2021 by Michael R Sweet" \
--copyright "Copyright (c) 2021-2022 by Michael R Sweet" \
--docversion $(VERSION)
.PHONY: doc
doc:
echo Generating documentation...
codedoc $(DOCFLAGS) --title "PDFio Programming Manual v$(VERSION)" $(PUBHEADERS) $(PUBOBJS:.o=.c) --body doc/pdfio.md --coverimage doc/pdfio-512.png pdfio.xml >doc/pdfio.html
codedoc $(DOCFLAGS) --title "PDFio Programming Manual v$(VERSION)" --body doc/pdfio.md --coverimage doc/pdfio-epub.png pdfio.xml --epub doc/pdfio.epub
codedoc $(DOCFLAGS) --title "pdf read/write library" --man pdfio --section 3 --body doc/pdfio.md pdfio.xml >doc/pdfio.3
rm -f pdfio.xml
# Fuzz-test the library <>
.PHONY: afl
afl:
$(MAKE) -$(MAKEFLAGS) CC="afl-clang-fast" COMMONFLAGS="-g" clean all
test afl-output || rm -rf afl-output
afl-fuzz -x afl-pdf.dict -i afl-input -o afl-output -V 600 -e pdf -t 5000 ./testpdfio @@
# Analyze code with the Clang static analyzer <https://clang-analyzer.llvm.org>
clang:
clang $(CPPFLAGS) --analyze $(OBJS:.o=.c) 2>clang.log

2
NOTICE
View File

@ -1,6 +1,6 @@
PDFio - PDF Read/Write Library
Copyright © 2021 by Michael R Sweet.
Copyright © 2021-2023 by Michael R Sweet.
(Optional) Exceptions to the Apache 2.0 License:
================================================

View File

@ -3,7 +3,7 @@ pdfio - PDF Read/Write Library
![Version](https://img.shields.io/github/v/release/michaelrsweet/pdfio?include_prereleases)
![Apache 2.0](https://img.shields.io/github/license/michaelrsweet/pdfio)
![Build](https://github.com/michaelrsweet/pdfio/workflows/Build/badge.svg)
[![Build Status](https://img.shields.io/github/workflow/status/michaelrsweet/pdfio/Build)](https://github.com/michaelrsweet/pdfio/actions/workflows/build.yml)
[![Coverity Scan Status](https://img.shields.io/coverity/scan/22385.svg)](https://scan.coverity.com/projects/michaelrsweet-pdfio)
[![LGTM Grade](https://img.shields.io/lgtm/grade/cpp/github/michaelrsweet/pdfio)](https://lgtm.com/projects/g/michaelrsweet/pdfio/context:cpp)
[![LGTM Alerts](https://img.shields.io/lgtm/alerts/github/michaelrsweet/pdfio)](https://lgtm.com/projects/g/michaelrsweet/pdfio/)
@ -115,13 +115,13 @@ generates a static library that will be installed under "/usr/local" with:
You can reproduce this with the makefile using:
sudo make COMMONFLAGS="-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64" install
sudo make macos install
Legal Stuff
-----------
PDFio is Copyright © 2021 by Michael R Sweet.
PDFio is Copyright © 2021-2023 by Michael R Sweet.
This software is licensed under the Apache License Version 2.0 with an
(optional) exception to allow linking against GPL2/LGPL2 software. See the

BIN
afl-input/PDFBOX-1010-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1018-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1023-2.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1029-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1036-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1036-2.pdf Normal file

Binary file not shown.

View File

@ -0,0 +1,55 @@
%PDF-1.3
1 0 obj<</Type/Catalog/Pages 5 0 R>>
endobj
3 0 obj<</ModDate(D:20110505091515-05'00')/CreationDate(2011/05/05 09:15)/Creator(PaperPort 11.0)/Producer(PaperPort 11.0)/Subject()/Author()/Keywords()/Title()>>
endobj
4 0 obj<</Type/Page/MediaBox[0 0 622.0799 756]/Parent 5 0 R/CropBox[0 0 622.0799 756]/Contents 7 0 R/Resources<</ProcSet[/PDF/Text/ImageB/ImageC/ImageI]/XObject<</Z_Im0 6 0 R>>>>>>
endobj
5 0 obj<</Count 1/Type/Pages/Kids[ 4 0 R]>>
endobj
6 0 obj<</Type/XObject/Subtype/Image/Name/XImg/Width 1728/Height 2100/BitsPerComponent 1/ColorSpace/DeviceGray/Intent//Filter[/CCITTFaxDecode]/DecodeParms[<</Colors 1/Columns 1728/Rows 2100/K -1>>]/Length 81592>>stream
endstream
endobj
7 0 obj<</Length 72>>stream
q
622.07996 0 0 756 0 0 cm
0 g
[]0 d 1 w 10 M 0 i 0 J 0 j
/Z_Im0 Do
Q
endstream
endobj
xref
0 8
0000000002 65535 f
0000000010 00000 n
0000000000 00000 f
0000000054 00000 n
0000000224 00000 n
0000000412 00000 n
0000000463 00000 n
0000082294 00000 n
trailer
<</Size 8/Info 3 0 R/Root 1 0 R/ID[<c48c2a5922382dc456a05f8e3ccbb9f8><94a076a2f82a754598b70200e827ac8b>]>>
startxref
82414
%%EOF
%PaperPortPDFversion3 0 obj<</ModDate(D:20110505091515-05'00')/CreationDate(2011/05/05 09:15)/Creator(PaperPort 11.0)/Producer(PaperPort 11.0)/Subject()/Author()/Keywords()/Title()>>
endobj
5 0 obj<</Count 2/Type/Pages/Kids[ 4 0 R 8 0 R]>>
endobj
8 0 obj<</Type/Page/MediaBox[0 0 622.0799 757.4399]/Parent 5 0 R/CropBox[0 0 622.0799 757.4399]/Contents 10 0 R/Resources<</ProcSet[/PDF/Text/ImageB/ImageC/ImageI]/XObject<</Z_Im0 9 0 R>>>>>>
endobj
9 0 obj<</Type/XObject/Subtype/Image/Name/XImg/Width 1728/Height 2104/BitsPerComponent 1/ColorSpace/DeviceGray/Intent//Filter[/CCITTFaxDecode]/DecodeParms[<</Colors 1/Columns 1728/Rows 2104/K -1>>]/Length 78404>>stream
endstream
endobj
10 0 obj<</Length 78>>stream
q
622.07996 0 0 757.44001 0 0 cm
0 g
[]0 d 1 w 10 M 0 i 0 J 0 j
/Z_Im0 Do
Q
endstream

BIN
afl-input/PDFBOX-1039-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1047-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1048-1.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1065-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1065-1.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1067-1.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1068-1.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1074-1.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1074-3.pdf Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
afl-input/PDFBOX-1094-3.pdf Normal file

Binary file not shown.

Binary file not shown.

BIN
afl-input/PDFBOX-1094-4.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1095-2.pdf Normal file

Binary file not shown.

1466
afl-pdf.dict Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
.TH pdfio 3 "pdf read/write library" "2021-10-25" "pdf read/write library"
.TH pdfio 3 "pdf read/write library" "2022-07-03" "pdf read/write library"
.SH NAME
pdfio \- pdf read/write library
.SH Introduction
@ -34,7 +34,7 @@ PDFio is
.I not
concerned with rendering or viewing a PDF file, although a PDF RIP or viewer could be written using it.
.PP
PDFio is Copyright \[co] 2021 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
PDFio is Copyright \[co] 2021\-2022 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
.SS Requirements
.PP
PDFio requires the following to build the software:
@ -156,7 +156,7 @@ There is also an Xcode project ("pdfio.xcodeproj") you can use on macOS which ge
You can reproduce this with the makefile using:
.nf
sudo make COMMONFLAGS="\-Os \-mmacosx\-version\-min=10.14 \-arch x86_64 \-arch arm64" install
sudo make macos install
.fi
.SS Detecting PDFio
.PP
@ -254,7 +254,7 @@ Each PDF file contains one or more pages. The pdfioFileGetNumPages function retu
}
.fi
.PP
Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page.
Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page. Use the pdfioPageGetNumStreams and pdfioPageOpenStream functions to access the content streams for each page.
.PP
The pdfioFileClose function closes a PDF file and frees all memory that was used for it:
.nf
@ -324,6 +324,14 @@ Some PDF objects have an associated data stream, such as for pages, images, ICC
.PP
The first argument is the object pointer. The second argument is a boolean value that specifies whether you want to decode (typically decompress) the stream data or return it as\-is.
.PP
When reading a page stream you'll use the pdfioPageOpenStream function instead:
.nf
pdfio_file_t *pdf = pdfioFileOpen(...);
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
pdfio_stream_t *st = pdfioPageOpenStream(obj, 0, true);
.fi
.PP
Once you have the stream open, you can use one of several functions to read from it:
.IP \(bu 5
.PP
@ -353,12 +361,21 @@ To create a stream for a new object, call the pdfioObjCreateStream function:
.nf
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
.fi
.PP
The first argument is the newly created object. The second argument is either PDFIO_FILTER_NONE to specify that any encoding is done by your program or PDFIO_FILTER_FLATE to specify that PDFio should Flate compress the stream.
.PP
To create a page content stream call the pdfioFileCreatePage function:
.nf
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_dict_t *dict = pdfioDictCreate(pdf);
\... set page dictionary keys and values ...
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
.fi
.PP
Once you have created the stream, use any of the following functions to write to the stream:
.IP \(bu 5
.PP
@ -1423,6 +1440,14 @@ bool pdfioContentPathCurve23 (
double y3
);
.fi
.SS pdfioContentPathEnd
Clear the current path.
.PP
.nf
bool pdfioContentPathEnd (
pdfio_stream_t *st
);
.fi
.SS pdfioContentPathLineTo
Add a straight line to the current path.
.PP
@ -1933,6 +1958,31 @@ pdfio_valtype_t pdfioDictGetType (
const char *key
);
.fi
.SS pdfioDictIterateKeys
Iterate the keys in a dictionary.
.PP
.nf
void pdfioDictIterateKeys (
pdfio_dict_t *dict,
pdfio_dict_cb_t cb,
void *cb_data
);
.fi
.PP
This function iterates the keys in a dictionary, calling the supplied
function "cb":
.PP
.nf
bool
my_dict_cb(pdfio_dict_t *dict, const char *key, void *cb_data)
{
... "key" contains the dictionary key ...
... return true to continue or false to stop ...
}
.fi
The iteration continues as long as the callback returns \fBtrue\fR or all keys
have been iterated.
.SS pdfioDictSetArray
Set a key array in a dictionary.
.PP
@ -2285,6 +2335,20 @@ pdfio_stream_t * pdfioFileCreatePage (
pdfio_dict_t *dict
);
.fi
.SS pdfioFileCreateTemporary
.PP
.nf
pdfio_file_t * pdfioFileCreateTemporary (
char *buffer,
size_t bufsize,
const char *version,
pdfio_rect_t *media_box,
pdfio_rect_t *crop_box,
pdfio_error_cb_t error_cb,
void *error_data
);
.fi
.SS pdfioFileFindObj
Find an object using its object number.
.PP
@ -2693,6 +2757,24 @@ bool pdfioPageDictAddImage (
pdfio_obj_t *obj
);
.fi
.SS pdfioPageGetNumStreams
Get the number of content streams for a page object.
.PP
.nf
size_t pdfioPageGetNumStreams (
pdfio_obj_t *page
);
.fi
.SS pdfioPageOpenStream
Open a content stream for a page.
.PP
.nf
pdfio_stream_t * pdfioPageOpenStream (
pdfio_obj_t *page,
size_t n,
bool decode
);
.fi
.SS pdfioStreamClose
Close a (data) stream in a PDF file.
.PP
@ -2840,6 +2922,12 @@ Standard color spaces
.nf
typedef enum pdfio_cs_e pdfio_cs_t;
.fi
.SS pdfio_dict_cb_t
Dictionary iterator callback
.PP
.nf
typedef bool(*)(pdfio_dict_t *dict, const char *key, void *cb_data) pdfio_dict_cb_t;
.fi
.SS pdfio_dict_t
Key/value dictionary
.PP
@ -2947,4 +3035,4 @@ typedef uint8_t state_t[4][4];
Michael R Sweet
.SH COPYRIGHT
.PP
Copyright (c) 2021 by Michael R Sweet
Copyright (c) 2021-2022 by Michael R Sweet

View File

@ -1,13 +1,13 @@
<!DOCTYPE html>
<html lang="en-US">
<head>
<title>PDFio Programming Manual v1.0.0</title>
<title>PDFio Programming Manual v1.1</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="generator" content="codedoc v3.7">
<meta name="author" content="Michael R Sweet">
<meta name="language" content="en-US">
<meta name="copyright" content="Copyright © 2021 by Michael R Sweet">
<meta name="version" content="1.0.0">
<meta name="copyright" content="Copyright © 2021-2022 by Michael R Sweet">
<meta name="version" content="1.1">
<style type="text/css"><!--
body {
background: white;
@ -245,9 +245,9 @@ span.string {
<body>
<div class="header">
<p><img class="title" src="pdfio-512.png"></p>
<h1 class="title">PDFio Programming Manual v1.0.0</h1>
<h1 class="title">PDFio Programming Manual v1.1</h1>
<p>Michael R Sweet</p>
<p>Copyright © 2021 by Michael R Sweet</p>
<p>Copyright © 2021-2022 by Michael R Sweet</p>
</div>
<div class="contents">
<h2 class="title">Contents</h2>
@ -307,6 +307,7 @@ span.string {
<li><a href="#pdfioContentPathCurve">pdfioContentPathCurve</a></li>
<li><a href="#pdfioContentPathCurve13">pdfioContentPathCurve13</a></li>
<li><a href="#pdfioContentPathCurve23">pdfioContentPathCurve23</a></li>
<li><a href="#pdfioContentPathEnd">pdfioContentPathEnd</a></li>
<li><a href="#pdfioContentPathLineTo">pdfioContentPathLineTo</a></li>
<li><a href="#pdfioContentPathMoveTo">pdfioContentPathMoveTo</a></li>
<li><a href="#pdfioContentPathRect">pdfioContentPathRect</a></li>
@ -360,6 +361,7 @@ span.string {
<li><a href="#pdfioDictGetRect">pdfioDictGetRect</a></li>
<li><a href="#pdfioDictGetString">pdfioDictGetString</a></li>
<li><a href="#pdfioDictGetType">pdfioDictGetType</a></li>
<li><a href="#pdfioDictIterateKeys">pdfioDictIterateKeys</a></li>
<li><a href="#pdfioDictSetArray">pdfioDictSetArray</a></li>
<li><a href="#pdfioDictSetBinary">pdfioDictSetBinary</a></li>
<li><a href="#pdfioDictSetBoolean">pdfioDictSetBoolean</a></li>
@ -383,6 +385,7 @@ span.string {
<li><a href="#pdfioFileCreateObj">pdfioFileCreateObj</a></li>
<li><a href="#pdfioFileCreateOutput">pdfioFileCreateOutput</a></li>
<li><a href="#pdfioFileCreatePage">pdfioFileCreatePage</a></li>
<li><a href="#pdfioFileCreateTemporary">pdfioFileCreateTemporary</a></li>
<li><a href="#pdfioFileFindObj">pdfioFileFindObj</a></li>
<li><a href="#pdfioFileGetAuthor">pdfioFileGetAuthor</a></li>
<li><a href="#pdfioFileGetCreationDate">pdfioFileGetCreationDate</a></li>
@ -425,6 +428,8 @@ span.string {
<li><a href="#pdfioPageDictAddColorSpace">pdfioPageDictAddColorSpace</a></li>
<li><a href="#pdfioPageDictAddFont">pdfioPageDictAddFont</a></li>
<li><a href="#pdfioPageDictAddImage">pdfioPageDictAddImage</a></li>
<li><a href="#pdfioPageGetNumStreams">pdfioPageGetNumStreams</a></li>
<li><a href="#pdfioPageOpenStream">pdfioPageOpenStream</a></li>
<li><a href="#pdfioStreamClose">pdfioStreamClose</a></li>
<li><a href="#pdfioStreamConsume">pdfioStreamConsume</a></li>
<li><a href="#pdfioStreamGetToken">pdfioStreamGetToken</a></li>
@ -440,6 +445,7 @@ span.string {
<li><a href="#TYPES">Data Types</a><ul class="subcontents">
<li><a href="#pdfio_array_t">pdfio_array_t</a></li>
<li><a href="#pdfio_cs_t">pdfio_cs_t</a></li>
<li><a href="#pdfio_dict_cb_t">pdfio_dict_cb_t</a></li>
<li><a href="#pdfio_dict_t">pdfio_dict_t</a></li>
<li><a href="#pdfio_encryption_t">pdfio_encryption_t</a></li>
<li><a href="#pdfio_error_cb_t">pdfio_error_cb_t</a></li>
@ -491,7 +497,7 @@ span.string {
</li>
</ul>
<p>PDFio is <em>not</em> concerned with rendering or viewing a PDF file, although a PDF RIP or viewer could be written using it.</p>
<p>PDFio is Copyright © 2021 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files &quot;LICENSE&quot; and &quot;NOTICE&quot; for more information.</p>
<p>PDFio is Copyright © 2021-2022 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files &quot;LICENSE&quot; and &quot;NOTICE&quot; for more information.</p>
<h3 class="title" id="requirements">Requirements</h3>
<p>PDFio requires the following to build the software:</p>
<ul>
@ -557,7 +563,7 @@ make install-shared
<pre><code>sudo xcodebuild install
</code></pre>
<p>You can reproduce this with the makefile using:</p>
<pre><code>sudo make COMMONFLAGS=&quot;-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64&quot; install
<pre><code>sudo make macos install
</code></pre>
<h3 class="title" id="detecting-pdfio">Detecting PDFio</h3>
<p>PDFio can be detected using the <code>pkg-config</code> command, for example:</p>
@ -621,7 +627,7 @@ pdfio_obj_t *page; <span class="comment">// Current page</span>
<span class="comment">// do something with page</span>
}
</code></pre>
<p>Each page is represented by a &quot;page tree&quot; object (what <a href="#pdfioFileGetPage"><code>pdfioFileGetPage</code></a> returns) that specifies information about the page and one or more &quot;content&quot; objects that contain the images, fonts, text, and graphics that appear on the page.</p>
<p>Each page is represented by a &quot;page tree&quot; object (what <a href="#pdfioFileGetPage"><code>pdfioFileGetPage</code></a> returns) that specifies information about the page and one or more &quot;content&quot; objects that contain the images, fonts, text, and graphics that appear on the page. Use the <a href="#pdfioPageGetNumStreams"><code>pdfioPageGetNumStreams</code></a> and <a href="#pdfioPageOpenStream"><code>pdfioPageOpenStream</code></a> functions to access the content streams for each page.</p>
<p>The <a href="#pdfioFileClose"><code>pdfioFileClose</code></a> function closes a PDF file and frees all memory that was used for it:</p>
<pre><code class="language-c">pdfioFileClose(pdf);
</code></pre>
@ -663,6 +669,11 @@ pdfio_obj_t *obj = pdfioFileFindObj(pdf, number);
pdfio_stream_t *st = pdfioObjOpenStream(obj, <span class="reserved">true</span>);
</code></pre>
<p>The first argument is the object pointer. The second argument is a boolean value that specifies whether you want to decode (typically decompress) the stream data or return it as-is.</p>
<p>When reading a page stream you'll use the <a href="#pdfioPageOpenStream"><code>pdfioPageOpenStream</code></a> function instead:</p>
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileOpen(...);
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
pdfio_stream_t *st = pdfioPageOpenStream(obj, <span class="number">0</span>, <span class="reserved">true</span>);
</code></pre>
<p>Once you have the stream open, you can use one of several functions to read from it:</p>
<ul>
<li><p><a href="#pdfioStreamConsume"><code>pdfioStreamConsume</code></a> reads and discards a number of bytes in the stream</p>
@ -679,10 +690,16 @@ pdfio_stream_t *st = pdfioObjOpenStream(obj, <span class="reserved">true</span>)
</code></pre>
<p>To create a stream for a new object, call the <a href="#pdfioObjCreateStream"><code>pdfioObjCreateStream</code></a> function:</p>
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
</code></pre>
<p>The first argument is the newly created object. The second argument is either <code>PDFIO_FILTER_NONE</code> to specify that any encoding is done by your program or <code>PDFIO_FILTER_FLATE</code> to specify that PDFio should Flate compress the stream.</p>
<p>To create a page content stream call the <a href="#pdfioFileCreatePage"><code>pdfioFileCreatePage</code></a> function:</p>
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_dict_t *dict = pdfioDictCreate(pdf);
... set page dictionary keys <span class="reserved">and</span> values ...
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
</code></pre>
<p>Once you have created the stream, use any of the following functions to write to the stream:</p>
<ul>
<li><p><a href="#pdfioStreamPrintf"><code>pdfioStreamPrintf</code></a> writes a formatted string to the stream</p>
@ -1498,6 +1515,17 @@ bool pdfioContentPathCurve23(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st, d
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioContentPathEnd">pdfioContentPathEnd</a></h3>
<p class="description">Clear the current path.</p>
<p class="code">
bool pdfioContentPathEnd(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>st</th>
<td class="description">Stream</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioContentPathLineTo">pdfioContentPathLineTo</a></h3>
<p class="description">Add a straight line to the current path.</p>
<p class="code">
@ -2248,6 +2276,33 @@ const char *pdfioDictGetString(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, c
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Value type</p>
<h3 class="function"><a id="pdfioDictIterateKeys">pdfioDictIterateKeys</a></h3>
<p class="description">Iterate the keys in a dictionary.</p>
<p class="code">
void pdfioDictIterateKeys(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, <a href="#pdfio_dict_cb_t">pdfio_dict_cb_t</a> cb, void *cb_data);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>dict</th>
<td class="description">Dictionary</td></tr>
<tr><th>cb</th>
<td class="description">Callback function</td></tr>
<tr><th>cb_data</th>
<td class="description">Callback data</td></tr>
</tbody></table>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function iterates the keys in a dictionary, calling the supplied
function &quot;cb&quot;:
<pre>
bool
my_dict_cb(pdfio_dict_t *dict, const char *key, void *cb_data)
{
... &quot;key&quot; contains the dictionary key ...
... return true to continue or false to stop ...
}
</pre>
The iteration continues as long as the callback returns <code>true</code> or all keys
have been iterated.</p>
<h3 class="function"><a id="pdfioDictSetArray">pdfioDictSetArray</a></h3>
<p class="description">Set a key array in a dictionary.</p>
<p class="code">
@ -2717,6 +2772,35 @@ stored as indirect object references.</blockquote>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Contents stream</p>
<h3 class="function"><a id="pdfioFileCreateTemporary">pdfioFileCreateTemporary</a></h3>
<p class="description"></p>
<p class="code">
<a href="#pdfio_file_t">pdfio_file_t</a> *pdfioFileCreateTemporary(char *buffer, size_t bufsize, const char *version, <a href="#pdfio_rect_t">pdfio_rect_t</a> *media_box, <a href="#pdfio_rect_t">pdfio_rect_t</a> *crop_box, <a href="#pdfio_error_cb_t">pdfio_error_cb_t</a> error_cb, void *error_data);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>buffer</th>
<td class="description">Filename buffer</td></tr>
<tr><th>bufsize</th>
<td class="description">Size of filename buffer</td></tr>
<tr><th>version</th>
<td class="description">PDF version number or <code>NULL</code> for default (2.0)</td></tr>
<tr><th>media_box</th>
<td class="description">Default MediaBox for pages</td></tr>
<tr><th>crop_box</th>
<td class="description">Default CropBox for pages</td></tr>
<tr><th>error_cb</th>
<td class="description">Error callback or <code>NULL</code> for default</td></tr>
<tr><th>error_data</th>
<td class="description">Error callback data, if any</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Create a temporary PDF file.</p>
<p class="discussion">This function creates a PDF file with a unique filename in the current
temporary directory. The temporary file is stored in the string &quot;buffer&quot; an
will have a &quot;.pdf&quot; extension. Otherwise, this function works the same as
the <a href="#pdfioFileCreate"><code>pdfioFileCreate</code></a> function.
</p>
<h3 class="function"><a id="pdfioFileFindObj">pdfioFileFindObj</a></h3>
<p class="description">Find an object using its object number.</p>
<p class="code">
@ -3264,6 +3348,32 @@ bool pdfioPageDictAddImage(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, const
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioPageGetNumStreams">pdfioPageGetNumStreams</a></h3>
<p class="description">Get the number of content streams for a page object.</p>
<p class="code">
size_t pdfioPageGetNumStreams(<a href="#pdfio_obj_t">pdfio_obj_t</a> *page);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>page</th>
<td class="description">Page object</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Number of streams</p>
<h3 class="function"><a id="pdfioPageOpenStream">pdfioPageOpenStream</a></h3>
<p class="description">Open a content stream for a page.</p>
<p class="code">
<a href="#pdfio_stream_t">pdfio_stream_t</a> *pdfioPageOpenStream(<a href="#pdfio_obj_t">pdfio_obj_t</a> *page, size_t n, bool decode);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>page</th>
<td class="description">Page object</td></tr>
<tr><th>n</th>
<td class="description">Stream index (0-based)</td></tr>
<tr><th>decode</th>
<td class="description"><code>true</code> to decode/decompress stream</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Stream</p>
<h3 class="function"><a id="pdfioStreamClose">pdfioStreamClose</a></h3>
<p class="description">Close a (data) stream in a PDF file.</p>
<p class="code">
@ -3445,6 +3555,11 @@ typedef struct _pdfio_array_s pdfio_array_t;
<p class="code">
typedef enum <a href="#pdfio_cs_e">pdfio_cs_e</a> pdfio_cs_t;
</p>
<h3 class="typedef"><a id="pdfio_dict_cb_t">pdfio_dict_cb_t</a></h3>
<p class="description">Dictionary iterator callback</p>
<p class="code">
typedef bool (*pdfio_dict_cb_t)(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, const char *key, void *cb_data);
</p>
<h3 class="typedef"><a id="pdfio_dict_t">pdfio_dict_t</a></h3>
<p class="description">Key/value dictionary</p>
<p class="code">

View File

@ -15,8 +15,8 @@ goals of pdfio are:
PDFio is *not* concerned with rendering or viewing a PDF file, although a PDF
RIP or viewer could be written using it.
PDFio is Copyright © 2021 by Michael R Sweet and is licensed under the Apache
License Version 2.0 with an (optional) exception to allow linking against
PDFio is Copyright © 2021-2022 by Michael R Sweet and is licensed under the
Apache License Version 2.0 with an (optional) exception to allow linking against
GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
@ -104,7 +104,7 @@ generates a static library that will be installed under "/usr/local" with:
You can reproduce this with the makefile using:
sudo make COMMONFLAGS="-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64" install
sudo make macos install
Detecting PDFio
@ -209,7 +209,8 @@ for (i = 0, count = pdfioFileGetNumPages(pdf); i < count; i ++)
Each page is represented by a "page tree" object (what [`pdfioFileGetPage`](@@)
returns) that specifies information about the page and one or more "content"
objects that contain the images, fonts, text, and graphics that appear on the
page.
page. Use the [`pdfioPageGetNumStreams`](@@) and [`pdfioPageOpenStream`](@@)
functions to access the content streams for each page.
The [`pdfioFileClose`](@@) function closes a PDF file and frees all memory that
was used for it:
@ -294,6 +295,15 @@ The first argument is the object pointer. The second argument is a boolean
value that specifies whether you want to decode (typically decompress) the
stream data or return it as-is.
When reading a page stream you'll use the [`pdfioPageOpenStream`](@@) function
instead:
```c
pdfio_file_t *pdf = pdfioFileOpen(...);
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
pdfio_stream_t *st = pdfioPageOpenStream(obj, 0, true);
```
Once you have the stream open, you can use one of several functions to read
from it:
@ -315,14 +325,23 @@ function:
```c
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
```
The first argument is the newly created object. The second argument is either
`PDFIO_FILTER_NONE` to specify that any encoding is done by your program or
`PDFIO_FILTER_FLATE` to specify that PDFio should Flate compress the stream.
To create a page content stream call the [`pdfioFileCreatePage`](@@) function:
```c
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_dict_t *dict = pdfioDictCreate(pdf);
... set page dictionary keys and values ...
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
```
Once you have created the stream, use any of the following functions to write
to the stream:

View File

@ -575,7 +575,8 @@ _pdfioArrayGetValue(pdfio_array_t *a, // I - Array
pdfio_array_t * // O - New array
_pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file
pdfio_obj_t *obj, // I - Object, if any
_pdfio_token_t *tb) // I - Token buffer/stack
_pdfio_token_t *tb, // I - Token buffer/stack
size_t depth) // I - Depth of array
{
pdfio_array_t *array; // New array
char token[8192]; // Token from file
@ -599,7 +600,7 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file
// Push the token and decode the value...
_pdfioTokenPush(tb, token);
if (!_pdfioValueRead(pdf, obj, tb, &value))
if (!_pdfioValueRead(pdf, obj, tb, &value, depth))
break;
// PDFIO_DEBUG("_pdfioArrayRead(%p): Appending ", (void *)array);

View File

@ -1,7 +1,7 @@
//
// Common support functions for pdfio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -38,6 +38,8 @@ _pdfioFileConsume(pdfio_file_t *pdf, // I - PDF file
else if (_pdfioFileSeek(pdf, (off_t)bytes, SEEK_CUR) < 0)
return (false);
PDFIO_DEBUG("_pdfioFileConsume: pos=%ld\n", (long)(pdf->bufpos + pdf->bufptr - pdf->buffer));
return (true);
}
@ -525,7 +527,7 @@ read_buffer(pdfio_file_t *pdf, // I - PDF file
return (rbytes);
}
//
// 'write_buffer()' - Write a buffer to a PDF file.
//

View File

@ -489,8 +489,11 @@ pdfioContentMatrixRotate(
pdfio_stream_t *st, // I - Stream
double degrees) // I - Rotation angle in degrees counter-clockwise
{
double dcos = cos(degrees / M_PI); // Cosine
double dsin = sin(degrees / M_PI); // Sine
double dcos = cos(M_PI * degrees / 180.0);
// Cosine
double dsin = sin(M_PI * degrees / 180.0);
// Sine
return (pdfioStreamPrintf(st, "%g %g %g %g 0 0 cm\n", dcos, -dsin, dsin, dcos));
}
@ -586,6 +589,17 @@ pdfioContentPathCurve23(
}
//
// 'pdfioContentPathEnd()' - Clear the current path.
//
bool // O - `true` on success, `false` on failure
pdfioContentPathEnd(pdfio_stream_t *st) // I - Stream
{
return (pdfioStreamPuts(st, "n\n"));
}
//
// 'pdfioContentPathLineTo()' - Add a straight line to the current path.
//

View File

@ -91,6 +91,7 @@ extern bool pdfioContentPathClose(pdfio_stream_t *st) _PDFIO_PUBLIC;
extern bool pdfioContentPathCurve(pdfio_stream_t *st, double x1, double y1, double x2, double y2, double x3, double y3) _PDFIO_PUBLIC;
extern bool pdfioContentPathCurve13(pdfio_stream_t *st, double x1, double y1, double x3, double y3) _PDFIO_PUBLIC;
extern bool pdfioContentPathCurve23(pdfio_stream_t *st, double x2, double y2, double x3, double y3) _PDFIO_PUBLIC;
extern bool pdfioContentPathEnd(pdfio_stream_t *st) _PDFIO_PUBLIC;
extern bool pdfioContentPathLineTo(pdfio_stream_t *st, double x, double y) _PDFIO_PUBLIC;
extern bool pdfioContentPathMoveTo(pdfio_stream_t *st, double x, double y) _PDFIO_PUBLIC;
extern bool pdfioContentPathRect(pdfio_stream_t *st, double x, double y, double width, double height) _PDFIO_PUBLIC;

View File

@ -663,7 +663,12 @@ _pdfioCryptoUnlock(
length = 128;
}
}
// TODO: Implement AES-256 - V6 R6
else if (version == 6 && revision == 6)
{
// TODO: Implement AES-256 - V6 R6
pdf->encryption = PDFIO_ENCRYPTION_AES_256;
length = 256;
}
PDFIO_DEBUG("_pdfioCryptoUnlock: encryption=%d, length=%d\n", pdf->encryption, length);
@ -788,6 +793,8 @@ _pdfioCryptoUnlock(
else
{
// TODO: Implement AES-256 security handler
_pdfioFileError(pdf, "Unable to unlock AES-256 encrypted file at this time.");
return (false);
}
// If we get here we need to try another password...

View File

@ -1,7 +1,7 @@
//
// PDF dictionary functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -464,6 +464,47 @@ _pdfioDictGetValue(pdfio_dict_t *dict, // I - Dictionary
}
//
// 'pdfioDictIterateKeys()' - Iterate the keys in a dictionary.
//
// This function iterates the keys in a dictionary, calling the supplied
// function "cb":
//
// ```
// bool
// my_dict_cb(pdfio_dict_t *dict, const char *key, void *cb_data)
// {
// ... "key" contains the dictionary key ...
// ... return true to continue or false to stop ...
// }
// ```
//
// The iteration continues as long as the callback returns `true` or all keys
// have been iterated.
//
void
pdfioDictIterateKeys(
pdfio_dict_t *dict, // I - Dictionary
pdfio_dict_cb_t cb, // I - Callback function
void *cb_data) // I - Callback data
{
size_t i; // Looping var
_pdfio_pair_t *pair; // Current pair
// Range check input...
if (!dict || !cb)
return;
for (i = dict->num_pairs, pair = dict->pairs; i > 0; i --, pair ++)
{
if (!(cb)(dict, pair->key, cb_data))
break;
}
}
//
// '_pdfioDictRead()' - Read a dictionary from a PDF file.
//
@ -473,7 +514,8 @@ _pdfioDictGetValue(pdfio_dict_t *dict, // I - Dictionary
pdfio_dict_t * // O - New dictionary
_pdfioDictRead(pdfio_file_t *pdf, // I - PDF file
pdfio_obj_t *obj, // I - Object, if any
_pdfio_token_t *tb) // I - Token buffer/stack
_pdfio_token_t *tb, // I - Token buffer/stack
size_t depth) // I - Depth of dictionary
{
pdfio_dict_t *dict; // New dictionary
char key[256]; // Dictionary key
@ -499,9 +541,16 @@ _pdfioDictRead(pdfio_file_t *pdf, // I - PDF file
_pdfioFileError(pdf, "Invalid dictionary contents.");
break;
}
else if (_pdfioDictGetValue(dict, key + 1))
{
_pdfioFileError(pdf, "Duplicate dictionary key '%s'.", key + 1);
return (NULL);
}
// Then get the next value...
if (!_pdfioValueRead(pdf, obj, tb, &value))
PDFIO_DEBUG("_pdfioDictRead: Reading value for '%s'.\n", key + 1);
if (!_pdfioValueRead(pdf, obj, tb, &value, depth))
{
_pdfioFileError(pdf, "Missing value for dictionary key.");
break;
@ -706,7 +755,7 @@ pdfioDictSetNull(pdfio_dict_t *dict, // I - Dictionary
bool // O - `true` on success, `false` on failure
pdfioDictSetNumber(pdfio_dict_t *dict, // I - Dictionary
const char *key, // I - Key
double value) // I - Value
double value) // I - Value
{
_pdfio_value_t temp; // New value
@ -890,9 +939,9 @@ _pdfioDictSetValue(
#ifdef DEBUG
PDFIO_DEBUG("_pdfioDictSetValue(%p): %lu pairs\n", (void *)dict, (unsigned long)dict->num_pairs);
PDFIO_DEBUG("_pdfioDictSetValue(%p): ", (void *)dict);
PDFIO_DEBUG_DICT(dict);
PDFIO_DEBUG("\n");
// PDFIO_DEBUG("_pdfioDictSetValue(%p): ", (void *)dict);
// PDFIO_DEBUG_DICT(dict);
// PDFIO_DEBUG("\n");
#endif // DEBUG
return (true);

View File

@ -1,7 +1,7 @@
//
// PDF file functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -24,8 +24,9 @@
static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset);
static int compare_objmaps(_pdfio_objmap_t *a, _pdfio_objmap_t *b);
static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b);
static const char *get_info_string(pdfio_file_t *pdf, const char *key);
static bool load_obj_stream(pdfio_obj_t *obj);
static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj);
static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth);
static bool load_xref(pdfio_file_t *pdf, off_t xref_offset, pdfio_password_cb_t password_cb, void *password_data);
static bool write_catalog(pdfio_file_t *pdf);
static bool write_pages(pdfio_file_t *pdf);
@ -264,46 +265,26 @@ pdfioFileCreate(
// Write a standard PDF header...
if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", version))
{
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
goto error;
// Create the pages object...
if ((dict = pdfioDictCreate(pdf)) == NULL)
{
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
goto error;
pdfioDictSetName(dict, "Type", "Pages");
if ((pdf->pages_obj = pdfioFileCreateObj(pdf, dict)) == NULL)
{
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
goto error;
// Create the info object...
if ((info_dict = pdfioDictCreate(pdf)) == NULL)
{
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
goto error;
pdfioDictSetDate(info_dict, "CreationDate", time(NULL));
pdfioDictSetString(info_dict, "Producer", "pdfio/" PDFIO_VERSION);
if ((pdf->info_obj = pdfioFileCreateObj(pdf, info_dict)) == NULL)
{
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
goto error;
// Create random file ID values...
_pdfioCryptoMakeRandom(id_value, sizeof(id_value));
@ -315,6 +296,15 @@ pdfioFileCreate(
}
return (pdf);
// Common error handling code...
error:
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
@ -535,41 +525,26 @@ pdfioFileCreateOutput(
// Write a standard PDF header...
if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", version))
{
pdfioFileClose(pdf);
return (NULL);
}
goto error;
// Create the pages object...
if ((dict = pdfioDictCreate(pdf)) == NULL)
{
pdfioFileClose(pdf);
return (NULL);
}
goto error;
pdfioDictSetName(dict, "Type", "Pages");
if ((pdf->pages_obj = pdfioFileCreateObj(pdf, dict)) == NULL)
{
pdfioFileClose(pdf);
return (NULL);
}
goto error;
// Create the info object...
if ((info_dict = pdfioDictCreate(pdf)) == NULL)
{
pdfioFileClose(pdf);
return (NULL);
}
goto error;
pdfioDictSetDate(info_dict, "CreationDate", time(NULL));
pdfioDictSetString(info_dict, "Producer", "pdfio/" PDFIO_VERSION);
if ((pdf->info_obj = pdfioFileCreateObj(pdf, info_dict)) == NULL)
{
pdfioFileClose(pdf);
return (NULL);
}
goto error;
// Create random file ID values...
_pdfioCryptoMakeRandom(id_value, sizeof(id_value));
@ -581,6 +556,13 @@ pdfioFileCreateOutput(
}
return (pdf);
// Common error handling code...
error:
pdfioFileClose(pdf);
return (NULL);
}
@ -657,6 +639,194 @@ pdfioFileCreatePage(pdfio_file_t *pdf, // I - PDF file
}
//
// 'pdfioFileCreateTemporary()' - Create a temporary PDF file.
//
// This function creates a PDF file with a unique filename in the current
// temporary directory. The temporary file is stored in the string "buffer" an
// will have a ".pdf" extension. Otherwise, this function works the same as
// the @link pdfioFileCreate@ function.
//
// @since PDFio v1.1@
//
pdfio_file_t *
pdfioFileCreateTemporary(
char *buffer, // I - Filename buffer
size_t bufsize, // I - Size of filename buffer
const char *version, // I - PDF version number or `NULL` for default (2.0)
pdfio_rect_t *media_box, // I - Default MediaBox for pages
pdfio_rect_t *crop_box, // I - Default CropBox for pages
pdfio_error_cb_t error_cb, // I - Error callback or `NULL` for default
void *error_data) // I - Error callback data, if any
{
pdfio_file_t *pdf; // PDF file
pdfio_dict_t *dict; // Dictionary for pages object
pdfio_dict_t *info_dict; // Dictionary for information object
unsigned char id_value[16]; // File ID value
int i; // Looping var
const char *tmpdir; // Temporary directory
#if _WIN32 || defined(__APPLE__)
char tmppath[256]; // Temporary directory path
#endif // _WIN32 || __APPLE__
unsigned tmpnum; // Temporary filename number
// Range check input...
if (!buffer || bufsize < 32)
{
if (buffer)
*buffer = '\0';
return (NULL);
}
if (!version)
version = "2.0";
if (!error_cb)
{
error_cb = _pdfioFileDefaultError;
error_data = NULL;
}
// Allocate a PDF file structure...
if ((pdf = (pdfio_file_t *)calloc(1, sizeof(pdfio_file_t))) == NULL)
{
pdfio_file_t temp; // Dummy file
char message[8192]; // Message string
temp.filename = (char *)"temporary.pdf";
snprintf(message, sizeof(message), "Unable to allocate memory for PDF file - %s", strerror(errno));
(error_cb)(&temp, message, error_data);
*buffer = '\0';
return (NULL);
}
// Create the file...
#if _WIN32
if ((tmpdir = getenv("TEMP")) == NULL)
{
GetTempPathA(sizeof(tmppath), tmppath);
tmpdir = tmppath;
}
#elif defined(__APPLE__)
if ((tmpdir = getenv("TMPDIR")) != NULL && access(tmpdir, W_OK))
tmpdir = NULL;
if (!tmpdir)
{
// Grab the per-process temporary directory for sandboxed apps...
# ifdef _CS_DARWIN_USER_TEMP_DIR
if (confstr(_CS_DARWIN_USER_TEMP_DIR, tmppath, sizeof(tmppath)))
tmpdir = tmppath;
else
# endif // _CS_DARWIN_USER_TEMP_DIR
tmpdir = "/private/tmp";
}
#else
if ((tmpdir = getenv("TMPDIR")) == NULL || access(tmpdir, W_OK))
tmpdir = "/tmp";
#endif // _WIN32
for (i = 0; i < 1000; i ++)
{
_pdfioCryptoMakeRandom((uint8_t *)&tmpnum, sizeof(tmpnum));
snprintf(buffer, bufsize, "%s/%08x.pdf", tmpdir, tmpnum);
if ((pdf->fd = open(buffer, O_WRONLY | O_BINARY | O_CREAT | O_TRUNC | O_EXCL, 0666)) >= 0)
break;
}
pdf->filename = strdup(buffer);
if (i >= 1000)
{
_pdfioFileError(pdf, "Unable to create file - %s", strerror(errno));
free(pdf->filename);
free(pdf);
*buffer = '\0';
return (NULL);
}
pdf->version = strdup(version);
pdf->mode = _PDFIO_MODE_WRITE;
pdf->error_cb = error_cb;
pdf->error_data = error_data;
pdf->permissions = PDFIO_PERMISSION_ALL;
pdf->bufptr = pdf->buffer;
pdf->bufend = pdf->buffer + sizeof(pdf->buffer);
if (media_box)
{
pdf->media_box = *media_box;
}
else
{
// Default to "universal" size (intersection of A4 and US Letter)
pdf->media_box.x2 = 210.0 * 72.0f / 25.4f;
pdf->media_box.y2 = 11.0f * 72.0f;
}
if (crop_box)
{
pdf->crop_box = *crop_box;
}
else
{
// Default to "universal" size (intersection of A4 and US Letter)
pdf->crop_box.x2 = 210.0 * 72.0f / 25.4f;
pdf->crop_box.y2 = 11.0f * 72.0f;
}
// Write a standard PDF header...
if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", version))
goto error;
// Create the pages object...
if ((dict = pdfioDictCreate(pdf)) == NULL)
goto error;
pdfioDictSetName(dict, "Type", "Pages");
if ((pdf->pages_obj = pdfioFileCreateObj(pdf, dict)) == NULL)
goto error;
// Create the info object...
if ((info_dict = pdfioDictCreate(pdf)) == NULL)
goto error;
pdfioDictSetDate(info_dict, "CreationDate", time(NULL));
pdfioDictSetString(info_dict, "Producer", "pdfio/" PDFIO_VERSION);
if ((pdf->info_obj = pdfioFileCreateObj(pdf, info_dict)) == NULL)
goto error;
// Create random file ID values...
_pdfioCryptoMakeRandom(id_value, sizeof(id_value));
if ((pdf->id_array = pdfioArrayCreate(pdf)) != NULL)
{
pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value));
pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value));
}
return (pdf);
// Common error handling code...
error:
pdfioFileClose(pdf);
unlink(buffer);
*buffer = '\0';
return (NULL);
}
//
// '_pdfioFileFindMappedObj()' - Find a mapped object.
//
@ -723,7 +893,7 @@ pdfioFileFindObj(
const char * // O - Author or `NULL` for none
pdfioFileGetAuthor(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Author") : NULL);
return (get_info_string(pdf, "Author"));
}
@ -735,7 +905,7 @@ time_t // O - Creation date or `0` for none
pdfioFileGetCreationDate(
pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetDate(pdf->info_obj->value.value.dict, "CreationDate") : 0);
return (pdf && pdf->info_obj ? pdfioDictGetDate(pdfioObjGetDict(pdf->info_obj), "CreationDate") : 0);
}
@ -746,7 +916,7 @@ pdfioFileGetCreationDate(
const char * // O - Creator string or `NULL` for none
pdfioFileGetCreator(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Creator") : NULL);
return (get_info_string(pdf, "Creator"));
}
@ -768,7 +938,7 @@ pdfioFileGetID(pdfio_file_t *pdf) // I - PDF file
const char * // O - Keywords string or `NULL` for none
pdfioFileGetKeywords(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Keywords") : NULL);
return (get_info_string(pdf, "Keywords"));
}
@ -872,7 +1042,7 @@ pdfioFileGetPermissions(
const char * // O - Producer string or `NULL` for none
pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Producer") : NULL);
return (get_info_string(pdf, "Producer"));
}
@ -883,7 +1053,7 @@ pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file
const char * // O - Subject or `NULL` for none
pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Subject") : NULL);
return (get_info_string(pdf, "Subject"));
}
@ -894,7 +1064,7 @@ pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file
const char * // O - Title or `NULL` for none
pdfioFileGetTitle(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Title") : NULL);
return (get_info_string(pdf, "Title"));
}
@ -1237,6 +1407,51 @@ compare_objs(pdfio_obj_t **a, // I - First object
}
//
// 'get_info_string()' - Get a string value from the Info dictionary.
//
// This function also handles converting binary strings to C strings, which
// occur in encrypted PDF files.
//
static const char * // O - String or `NULL` if not found
get_info_string(pdfio_file_t *pdf, // I - PDF file
const char *key) // I - Dictionary key
{
pdfio_dict_t *dict; // Info dictionary
_pdfio_value_t *value; // Value
// Range check input...
if (!pdf || !pdf->info_obj || (dict = pdfioObjGetDict(pdf->info_obj)) == NULL || (value = _pdfioDictGetValue(dict, key)) == NULL)
return (NULL);
// If we already have a value, return it...
if (value->type == PDFIO_VALTYPE_NAME || value->type == PDFIO_VALTYPE_STRING)
{
return (value->value.string);
}
else if (value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096)
{
// Convert binary string to regular string...
char temp[4096]; // Temporary string
memcpy(temp, value->value.binary.data, value->value.binary.datalen);
temp[value->value.binary.datalen] = '\0';
free(value->value.binary.data);
value->type = PDFIO_VALTYPE_STRING;
value->value.string = pdfioStringCreate(pdf, temp);
return (value->value.string);
}
else
{
// Something else that is not a string...
return (NULL);
}
}
//
// 'load_obj_stream()' - Load an object stream.
//
@ -1312,7 +1527,7 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
// Read the objects themselves...
for (cur_obj = 0; cur_obj < num_objs; cur_obj ++)
{
if (!_pdfioValueRead(obj->pdf, obj, &tb, &(objs[cur_obj]->value)))
if (!_pdfioValueRead(obj->pdf, obj, &tb, &(objs[cur_obj]->value), 0))
{
pdfioStreamClose(st);
return (false);
@ -1332,7 +1547,8 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
static bool // O - `true` on success, `false` on error
load_pages(pdfio_file_t *pdf, // I - PDF file
pdfio_obj_t *obj) // I - Page object
pdfio_obj_t *obj, // I - Page object
size_t depth) // I - Depth of page tree
{
pdfio_dict_t *dict; // Page object dictionary
const char *type; // Node type
@ -1364,9 +1580,15 @@ load_pages(pdfio_file_t *pdf, // I - PDF file
size_t i, // Looping var
num_kids; // Number of elements in array
if (depth >= PDFIO_MAX_DEPTH)
{
_pdfioFileError(pdf, "Depth of pages objects too great to load.");
return (false);
}
for (i = 0, num_kids = pdfioArrayGetSize(kids); i < num_kids; i ++)
{
if (!load_pages(pdf, pdfioArrayGetObj(kids, i)))
if (!load_pages(pdf, pdfioArrayGetObj(kids, i), depth + 1))
return (false);
}
}
@ -1496,7 +1718,7 @@ load_xref(
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, obj, &tb, &trailer))
if (!_pdfioValueRead(pdf, obj, &tb, &trailer, 0))
{
_pdfioFileError(pdf, "Unable to read cross-reference stream dictionary.");
return (false);
@ -1537,7 +1759,7 @@ load_xref(
w_2 = w[0];
w_3 = w[0] + w[1];
if (w[1] == 0 || w[2] > 2 || w_total > sizeof(buffer))
if (w[1] == 0 || w[2] > 2 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer))
{
_pdfioFileError(pdf, "Cross-reference stream has invalid W key.");
return (false);
@ -1751,7 +1973,7 @@ load_xref(
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, NULL, &tb, &trailer))
if (!_pdfioValueRead(pdf, NULL, &tb, &trailer, 0))
{
_pdfioFileError(pdf, "Unable to read trailer dictionary.");
return (false);
@ -1762,6 +1984,8 @@ load_xref(
return (false);
}
PDFIO_DEBUG("load_xref: Got trailer dict.\n");
_pdfioTokenFlush(&tb);
if (!pdf->trailer_dict)
@ -1803,7 +2027,7 @@ load_xref(
PDFIO_DEBUG("load_xref: Root=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number);
return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages")));
return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages"), 0));
}

View File

@ -1,7 +1,7 @@
//
// PDF object functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -33,8 +33,14 @@ pdfioObjClose(pdfio_obj_t *obj) // I - Object
if (!obj)
return (false);
// Clear the current object pointer...
obj->pdf->current_obj = NULL;
if (obj->pdf->mode != _PDFIO_MODE_WRITE)
return (true); // Nothing to do when reading
{
// Nothing to do when reading
return (true);
}
// Write what remains for the object...
if (!obj->offset)
@ -165,6 +171,12 @@ pdfioObjCreateStream(
return (NULL);
}
if (obj->pdf->current_obj)
{
_pdfioFileError(obj->pdf, "Another object (%u) is already open.", (unsigned)obj->pdf->current_obj->number);
return (NULL);
}
// Write the header...
if (!_pdfioDictGetValue(obj->value.value.dict, "Length"))
{
@ -193,7 +205,8 @@ pdfioObjCreateStream(
if (!_pdfioFilePuts(obj->pdf, "stream\n"))
return (NULL);
obj->stream_offset = _pdfioFileTell(obj->pdf);
obj->stream_offset = _pdfioFileTell(obj->pdf);
obj->pdf->current_obj = obj;
// Return the new stream...
return (_pdfioStreamCreate(obj, length_obj, filter));
@ -412,7 +425,7 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object
// Then grab the object value...
_pdfioTokenInit(&tb, obj->pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, obj->pdf);
if (!_pdfioValueRead(obj->pdf, obj, &tb, &obj->value))
if (!_pdfioValueRead(obj->pdf, obj, &tb, &obj->value, 0))
{
_pdfioFileError(obj->pdf, "Unable to read value for object %lu.", (unsigned long)obj->number);
return (false);
@ -454,6 +467,12 @@ pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
if (!obj)
return (NULL);
if (obj->pdf->current_obj)
{
_pdfioFileError(obj->pdf, "Another object (%u) is already open.", (unsigned)obj->pdf->current_obj->number);
return (NULL);
}
// Make sure we've loaded the object dictionary...
if (!obj->value.type)
{
@ -466,6 +485,8 @@ pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
return (NULL);
// Open the stream...
obj->pdf->current_obj = obj;
return (_pdfioStreamOpen(obj, decode));
}

View File

@ -1,7 +1,7 @@
//
// PDF page functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2022 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -14,6 +14,13 @@
#include "pdfio-private.h"
//
// Local functions...
//
static _pdfio_value_t *get_contents(pdfio_obj_t *page);
//
// 'pdfioPageCopy()' - Copy a page to a PDF file.
//
@ -47,3 +54,74 @@ pdfioPageCopy(pdfio_file_t *pdf, // I - PDF file
else
return (_pdfioFileAddPage(pdf, dstpage));
}
//
// 'pdfioPageGetNumStreams()' - Get the number of content streams for a page object.
//
size_t // O - Number of streams
pdfioPageGetNumStreams(
pdfio_obj_t *page) // I - Page object
{
_pdfio_value_t *contents = get_contents(page);
// Contents value
if (!contents)
return (0);
else if (contents->type == PDFIO_VALTYPE_ARRAY)
return (pdfioArrayGetSize(contents->value.array));
else
return (1);
}
//
// 'pdfioPageOpenStream()' - Open a content stream for a page.
//
pdfio_stream_t * // O - Stream
pdfioPageOpenStream(
pdfio_obj_t *page, // I - Page object
size_t n, // I - Stream index (0-based)
bool decode) // I - `true` to decode/decompress stream
{
_pdfio_value_t *contents = get_contents(page);
// Contents value
if (!contents)
return (NULL);
else if (contents->type == PDFIO_VALTYPE_ARRAY && n < pdfioArrayGetSize(contents->value.array))
return (pdfioObjOpenStream(pdfioArrayGetObj(contents->value.array, n), decode));
else if (n)
return (NULL);
else
return (pdfioObjOpenStream(pdfioFileFindObj(page->pdf, contents->value.indirect.number), decode));
}
//
// 'get_contents()' - Get a page's Contents value.
//
static _pdfio_value_t * // O - Value or NULL on error
get_contents(pdfio_obj_t *page) // I - Page object
{
// Range check input...
if (!page)
return (NULL);
// Load the page object as needed...
if (page->value.type == PDFIO_VALTYPE_NONE)
{
if (!_pdfioObjLoad(page))
return (NULL);
}
if (page->value.type != PDFIO_VALTYPE_DICT)
return (NULL);
return (_pdfioDictGetValue(page->value.value.dict, "Contents"));
}

View File

@ -1,7 +1,7 @@
//
// Private header file for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2022 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -116,6 +116,8 @@
// Types and constants...
//
# define PDFIO_MAX_DEPTH 32 // Maximum nesting depth for values
typedef enum _pdfio_mode_e // Read/write mode
{
_PDFIO_MODE_READ, // Read a PDF file
@ -287,7 +289,8 @@ struct _pdfio_file_s // PDF file structure
pdfio_dict_t **dicts; // Dictionaries
size_t num_objs, // Number of objects
alloc_objs; // Allocated objects
pdfio_obj_t **objs; // Objects
pdfio_obj_t **objs, // Objects
*current_obj; // Current object being written/read
size_t num_objmaps, // Number of object maps
alloc_objmaps; // Allocated object maps
_pdfio_objmap_t *objmaps; // Object maps
@ -341,7 +344,7 @@ struct _pdfio_stream_s // Stream
extern void _pdfioArrayDebug(pdfio_array_t *a, FILE *fp) _PDFIO_INTERNAL;
extern void _pdfioArrayDelete(pdfio_array_t *a) _PDFIO_INTERNAL;
extern _pdfio_value_t *_pdfioArrayGetValue(pdfio_array_t *a, size_t n) _PDFIO_INTERNAL;
extern pdfio_array_t *_pdfioArrayRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts) _PDFIO_INTERNAL;
extern pdfio_array_t *_pdfioArrayRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts, size_t depth) _PDFIO_INTERNAL;
extern bool _pdfioArrayWrite(pdfio_array_t *a, pdfio_obj_t *obj) _PDFIO_INTERNAL;
extern void _pdfioCryptoAESInit(_pdfio_aes_t *ctx, const uint8_t *key, size_t keylen, const uint8_t *iv) _PDFIO_INTERNAL;
@ -365,7 +368,7 @@ extern void _pdfioDictClear(pdfio_dict_t *dict, const char *key) _PDFIO_INTERNA
extern void _pdfioDictDebug(pdfio_dict_t *dict, FILE *fp) _PDFIO_INTERNAL;
extern void _pdfioDictDelete(pdfio_dict_t *dict) _PDFIO_INTERNAL;
extern _pdfio_value_t *_pdfioDictGetValue(pdfio_dict_t *dict, const char *key) _PDFIO_INTERNAL;
extern pdfio_dict_t *_pdfioDictRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts) _PDFIO_INTERNAL;
extern pdfio_dict_t *_pdfioDictRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts, size_t depth) _PDFIO_INTERNAL;
extern bool _pdfioDictSetValue(pdfio_dict_t *dict, const char *key, _pdfio_value_t *value) _PDFIO_INTERNAL;
extern bool _pdfioDictWrite(pdfio_dict_t *dict, pdfio_obj_t *obj, off_t *length) _PDFIO_INTERNAL;
@ -405,7 +408,7 @@ extern bool _pdfioTokenRead(_pdfio_token_t *tb, char *buffer, size_t bufsize);
extern _pdfio_value_t *_pdfioValueCopy(pdfio_file_t *pdfdst, _pdfio_value_t *vdst, pdfio_file_t *pdfsrc, _pdfio_value_t *vsrc) _PDFIO_INTERNAL;
extern void _pdfioValueDebug(_pdfio_value_t *v, FILE *fp) _PDFIO_INTERNAL;
extern void _pdfioValueDelete(_pdfio_value_t *v) _PDFIO_INTERNAL;
extern _pdfio_value_t *_pdfioValueRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts, _pdfio_value_t *v) _PDFIO_INTERNAL;
extern _pdfio_value_t *_pdfioValueRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts, _pdfio_value_t *v, size_t depth) _PDFIO_INTERNAL;
extern bool _pdfioValueWrite(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_value_t *v, off_t *length) _PDFIO_INTERNAL;
#endif // !PDFIO_PRIVATE_H

View File

@ -1,7 +1,7 @@
//
// SHA-256 functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
// Copyright © 2011 IETF Trust and the persons identified as authors of the
// code. All rights reserved.
//
@ -217,7 +217,7 @@ _pdfioCryptoSHA256Append(_pdfio_sha256_t *context, const uint8_t *message_array,
*/
void
_pdfioCryptoSHA256Finish(_pdfio_sha256_t *context,
uint8_t Message_Digest[SHA256HashSize])
uint8_t *Message_Digest)
{
SHA224_256ResultN(context, Message_Digest, SHA256HashSize);
}

View File

@ -1,7 +1,7 @@
//
// PDF stream functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -174,6 +174,8 @@ pdfioStreamClose(pdfio_stream_t *st) // I - Stream
done:
st->pdf->current_obj = NULL;
free(st->prbuffer);
free(st->psbuffer);
free(st);
@ -364,6 +366,13 @@ pdfioStreamConsume(pdfio_stream_t *st, // I - Stream
//
// 'pdfioStreamGetToken()' - Read a single PDF token from a stream.
//
// This function reads a single PDF token from a stream. Operator tokens,
// boolean values, and numbers are returned as-is in the provided string buffer.
// String values start with the opening parenthesis ('(') but have all escaping
// resolved and the terminating parenthesis removed. Hexadecimal string values
// start with the opening angle bracket ('<') and have all whitespace and the
// terminating angle bracket removed.
//
bool // O - `true` on success, `false` on EOF
pdfioStreamGetToken(
@ -372,6 +381,7 @@ pdfioStreamGetToken(
size_t bufsize) // I - Size of string buffer
{
_pdfio_token_t tb; // Token buffer/stack
bool ret; // Return value
// Range check input...
@ -381,7 +391,10 @@ pdfioStreamGetToken(
// Read using the token engine...
_pdfioTokenInit(&tb, st->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
return (_pdfioTokenRead(&tb, buffer, bufsize));
ret = _pdfioTokenRead(&tb, buffer, bufsize);
_pdfioTokenFlush(&tb);
return (ret);
}
@ -995,6 +1008,7 @@ stream_read(pdfio_stream_t *st, // I - Stream
size_t bytes) // I - Number of bytes to read
{
ssize_t rbytes; // Bytes read
uInt avail_in, avail_out; // Previous flate values
if (st->filter == PDFIO_FILTER_NONE)
@ -1047,11 +1061,19 @@ stream_read(pdfio_stream_t *st, // I - Stream
st->flate.next_out = (Bytef *)buffer;
st->flate.avail_out = (uInt)bytes;
avail_in = st->flate.avail_in;
avail_out = st->flate.avail_out;
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data: %s", zstrerror(status));
return (-1);
}
else if (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out)
{
_pdfioFileError(st->pdf, "Corrupt stream data.");
return (-1);
}
return (st->flate.next_out - (Bytef *)buffer);
}
@ -1100,12 +1122,15 @@ stream_read(pdfio_stream_t *st, // I - Stream
st->flate.avail_in = (uInt)rbytes;
}
avail_in = st->flate.avail_in;
avail_out = st->flate.avail_out;
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data: %s", zstrerror(status));
return (-1);
}
else if (status == Z_STREAM_END)
else if (status == Z_STREAM_END || (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out))
break;
}
@ -1167,12 +1192,15 @@ stream_read(pdfio_stream_t *st, // I - Stream
st->flate.avail_in = (uInt)rbytes;
}
avail_in = st->flate.avail_in;
avail_out = st->flate.avail_out;
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data: %s", zstrerror(status));
return (-1);
}
else if (status == Z_STREAM_END)
else if (status == Z_STREAM_END || (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out))
break;
}

View File

@ -1,7 +1,7 @@
//
// PDF token parsing functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -129,9 +129,20 @@ _pdfioTokenGet(_pdfio_token_t *tb, // I - Token buffer/stack
if (tb->num_tokens > 0)
{
// Yes, return it...
size_t len; // Length of token
tb->num_tokens --;
strncpy(buffer, tb->tokens[tb->num_tokens], bufsize - 1);
buffer[bufsize - 1] = '\0';
if ((len = strlen(tb->tokens[tb->num_tokens])) > (bufsize - 1))
{
// Value too large...
PDFIO_DEBUG("_pdfioTokenGet(tb=%p, buffer=%p, bufsize=%u): Token '%s' from stack too large.\n", tb, buffer, (unsigned)bufsize, tb->tokens[tb->num_tokens]);
*buffer = '\0';
return (false);
}
memcpy(buffer, tb->tokens[tb->num_tokens], len);
buffer[len] = '\0';
PDFIO_DEBUG("_pdfioTokenGet(tb=%p, buffer=%p, bufsize=%u): Popping '%s' from stack.\n", tb, buffer, (unsigned)bufsize, buffer);
@ -536,7 +547,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr = '\0';
PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
// PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
return (bufptr > buffer);
}
@ -573,6 +584,7 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
tb->bufptr = tb->buffer;
tb->bufend = tb->buffer + bytes;
#if 0
#ifdef DEBUG
unsigned char *ptr; // Pointer into buffer
@ -586,6 +598,7 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
}
PDFIO_DEBUG("'\n");
#endif // DEBUG
#endif // 0
}
// Return the next character...

View File

@ -1,7 +1,7 @@
//
// PDF value functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -196,7 +196,8 @@ _pdfio_value_t * // O - Value or `NULL` on error/EOF
_pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
pdfio_obj_t *obj, // I - Object, if any
_pdfio_token_t *tb, // I - Token buffer/stack
_pdfio_value_t *v) // I - Value
_pdfio_value_t *v, // I - Value
size_t depth) // I - Depth of value
{
char token[32768]; // Token buffer
#ifdef DEBUG
@ -218,7 +219,6 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
PDFIO_DEBUG("_pdfioValueRead(pdf=%p, obj=%p, v=%p)\n", pdf, obj, v);
(void)obj; // TODO: Implement decryption
if (!_pdfioTokenGet(tb, token, sizeof(token)))
return (NULL);
@ -226,15 +226,27 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
if (!strcmp(token, "["))
{
// Start of array
if (depth >= PDFIO_MAX_DEPTH)
{
_pdfioFileError(pdf, "Too many nested arrays.");
return (NULL);
}
v->type = PDFIO_VALTYPE_ARRAY;
if ((v->value.array = _pdfioArrayRead(pdf, obj, tb)) == NULL)
if ((v->value.array = _pdfioArrayRead(pdf, obj, tb, depth + 1)) == NULL)
return (NULL);
}
else if (!strcmp(token, "<<"))
{
// Start of dictionary
if (depth >= PDFIO_MAX_DEPTH)
{
_pdfioFileError(pdf, "Too many nested dictionaries.");
return (NULL);
}
v->type = PDFIO_VALTYPE_DICT;
if ((v->value.dict = _pdfioDictRead(pdf, obj, tb)) == NULL)
if ((v->value.dict = _pdfioDictRead(pdf, obj, tb, depth + 1)) == NULL)
return (NULL);
}
else if (!strncmp(token, "(D:", 3))
@ -271,6 +283,7 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
}
}
}
if (token[i])
{
// Just a string...
@ -354,6 +367,33 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
*dataptr++ = (unsigned char)d;
}
if (obj && pdf->encryption)
{
// Decrypt the string...
_pdfio_crypto_ctx_t ctx; // Decryption context
_pdfio_crypto_cb_t cb; // Decryption callback
size_t ivlen; // Number of initialization vector bytes
uint8_t temp[32768]; // Temporary buffer for decryption
size_t templen; // Number of actual data bytes
if (v->value.binary.datalen > (sizeof(temp) - 32))
{
_pdfioFileError(pdf, "Unable to read encrypted binary string - too long.");
return (false);
}
cb = _pdfioCryptoMakeReader(pdf, obj, &ctx, v->value.binary.data, &ivlen);
templen = (cb)(&ctx, temp, v->value.binary.data + ivlen, v->value.binary.datalen - ivlen);
// Copy the decrypted string back to the value and adjust the length...
memcpy(v->value.binary.data, temp, templen);
if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128)
v->value.binary.datalen = templen - temp[templen - 1];
else
v->value.binary.datalen = templen;
}
}
else if (strchr("0123456789-+.", token[0]) != NULL)
{

View File

@ -1,7 +1,7 @@
//
// Public header file for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2022 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -55,6 +55,8 @@ typedef struct _pdfio_array_s pdfio_array_t;
// Array of PDF values
typedef struct _pdfio_dict_s pdfio_dict_t;
// Key/value dictionary
typedef bool (*pdfio_dict_cb_t)(pdfio_dict_t *dict, const char *key, void *cb_data);
// Dictionary iterator callback
typedef struct _pdfio_file_s pdfio_file_t;
// PDF file
typedef bool (*pdfio_error_cb_t)(pdfio_file_t *pdf, const char *message, void *data);
@ -165,6 +167,7 @@ extern pdfio_obj_t *pdfioDictGetObj(pdfio_dict_t *dict, const char *key) _PDFIO_
extern pdfio_rect_t *pdfioDictGetRect(pdfio_dict_t *dict, const char *key, pdfio_rect_t *rect) _PDFIO_PUBLIC;
extern const char *pdfioDictGetString(pdfio_dict_t *dict, const char *key) _PDFIO_PUBLIC;
extern pdfio_valtype_t pdfioDictGetType(pdfio_dict_t *dict, const char *key) _PDFIO_PUBLIC;
extern void pdfioDictIterateKeys(pdfio_dict_t *dict, pdfio_dict_cb_t cb, void *cb_data) _PDFIO_PUBLIC;
extern bool pdfioDictSetArray(pdfio_dict_t *dict, const char *key, pdfio_array_t *value) _PDFIO_PUBLIC;
extern bool pdfioDictSetBinary(pdfio_dict_t *dict, const char *key, const unsigned char *value, size_t valuelen) _PDFIO_PUBLIC;
extern bool pdfioDictSetBoolean(pdfio_dict_t *dict, const char *key, bool value) _PDFIO_PUBLIC;
@ -185,6 +188,7 @@ extern pdfio_obj_t *pdfioFileCreateObj(pdfio_file_t *pdf, pdfio_dict_t *dict) _P
extern pdfio_file_t *pdfioFileCreateOutput(pdfio_output_cb_t output_cb, void *output_ctx, const char *version, pdfio_rect_t *media_box, pdfio_rect_t *crop_box, pdfio_error_cb_t error_cb, void *error_data) _PDFIO_PUBLIC;
// TODO: Add number, array, string, etc. versions of pdfioFileCreateObject?
extern pdfio_stream_t *pdfioFileCreatePage(pdfio_file_t *pdf, pdfio_dict_t *dict) _PDFIO_PUBLIC;
extern pdfio_file_t *pdfioFileCreateTemporary(char *buffer, size_t bufsize, const char *version, pdfio_rect_t *media_box, pdfio_rect_t *crop_box, pdfio_error_cb_t error_cb, void *error_data) _PDFIO_PUBLIC;
extern pdfio_obj_t *pdfioFileFindObj(pdfio_file_t *pdf, size_t number) _PDFIO_PUBLIC;
extern const char *pdfioFileGetAuthor(pdfio_file_t *pdf) _PDFIO_PUBLIC;
extern time_t pdfioFileGetCreationDate(pdfio_file_t *pdf) _PDFIO_PUBLIC;

View File

@ -2,5 +2,5 @@ Name: pdfio
Description: PDF read/write library
URL: https://www.msweet.org/pdfio
Requires: zlib >= 1.0
Libs: -L${prefix}/lib -lpdfio
Libs: -L${prefix}/lib -lpdfio -lm
Cflags: -I${prefix}/include

View File

@ -87,7 +87,7 @@
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>PDFIO_VERSION="1.1.0";WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
@ -101,7 +101,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>PDFIO_VERSION="1.1.0";WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
@ -115,7 +115,7 @@
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>PDFIO_VERSION="1.1.0";_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
@ -130,7 +130,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>PDFIO_VERSION="1.1.0";NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>

View File

@ -372,7 +372,7 @@
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "Apple Development";
COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1.0.0;
CURRENT_PROJECT_VERSION = 1.1;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
@ -450,7 +450,7 @@
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "Apple Development";
COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1.0.0;
CURRENT_PROJECT_VERSION = 1.1;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_NS_ASSERTIONS = NO;

View File

@ -101,6 +101,7 @@ pdfioContentPathClose
pdfioContentPathCurve
pdfioContentPathCurve13
pdfioContentPathCurve23
pdfioContentPathEnd
pdfioContentPathLineTo
pdfioContentPathMoveTo
pdfioContentPathRect
@ -154,6 +155,7 @@ pdfioDictGetObj
pdfioDictGetRect
pdfioDictGetString
pdfioDictGetType
pdfioDictIterateKeys
pdfioDictSetArray
pdfioDictSetBinary
pdfioDictSetBoolean
@ -177,6 +179,7 @@ pdfioFileCreateImageObjFromFile
pdfioFileCreateObj
pdfioFileCreateOutput
pdfioFileCreatePage
pdfioFileCreateTemporary
pdfioFileFindObj
pdfioFileGetAuthor
pdfioFileGetCreationDate
@ -219,6 +222,8 @@ pdfioPageCopy
pdfioPageDictAddColorSpace
pdfioPageDictAddFont
pdfioPageDictAddImage
pdfioPageGetNumStreams
pdfioPageOpenStream
pdfioStreamClose
pdfioStreamConsume
pdfioStreamGetToken

View File

@ -3,7 +3,7 @@
<metadata>
<id>pdfio_native</id>
<title>PDFio Library for VS2019+</title>
<version>1.0.0-b7</version>
<version>1.1.0</version>
<authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
@ -12,11 +12,11 @@
<readme>build/native/README.md</readme>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>PDFio Library for VS2019+</description>
<summary>PDFio is a simple C library for reading and writing PDF files. PDFio is licensed under the Apache License Version 2.0 with an exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2021 by Michael R Sweet</copyright>
<summary>PDFio is a simple C library for reading and writing PDF files. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2023 by Michael R Sweet</copyright>
<tags>pdf file native</tags>
<dependencies>
<dependency id="pdfio_native.redist" version="1.0.0-b7" />
<dependency id="pdfio_native.redist" version="1.1.0" />
<dependency id="zlib_native.redist" version="1.2.11" />
</dependencies>
</metadata>

View File

@ -3,7 +3,7 @@
<metadata>
<id>pdfio_native.redist</id>
<title>PDFio Library for VS2019+</title>
<version>1.0.0-b7</version>
<version>1.1.0</version>
<authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
@ -12,8 +12,8 @@
<readme>build/native/README.md</readme>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>PDFio Library for VS2019+</description>
<summary>PDFio is a simple C library for reading and writing PDF files. This package provides the redistributable content for the PDFio library. PDFio is licensed under the Apache License Version 2.0 with an exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2021 by Michael R Sweet</copyright>
<summary>PDFio is a simple C library for reading and writing PDF files. This package provides the redistributable content for the PDFio library. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2023 by Michael R Sweet</copyright>
<tags>pdf file native</tags>
</metadata>
<files>

95
pdfiototext.c Normal file
View File

@ -0,0 +1,95 @@
//
// PDF to text program for PDFio.
//
// Copyright © 2022 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
//
// Usage:
//
// ./pdfiototext FILENAME.pdf > FILENAME.txt
//
#include "pdfio.h"
#include <string.h>
//
// 'main()' - Main entry.
//
int // O - Exit status
main(int argc, // I - Number of command-line arguments
char *argv[]) // I - Command-line arguments
{
pdfio_file_t *file; // PDF file
size_t i, j, // Looping vars
num_pages, // Number of pages
num_streams; // Number of streams for page
pdfio_obj_t *obj; // Current page object
pdfio_stream_t *st; // Current page content stream
char buffer[1024]; // String buffer
bool first; // First string token?
// Verify command-line arguments...
if (argc != 2)
{
puts("Usage: pdfiototext FILENAME.pdf > FILENAME.txt");
return (1);
}
// Open the PDF file...
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL)
return (1);
// printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
// Try grabbing content from all of the pages...
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
{
if ((obj = pdfioFileGetPage(file, i)) == NULL)
continue;
num_streams = pdfioPageGetNumStreams(obj);
// printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
for (j = 0; j < num_streams; j ++)
{
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
continue;
// printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
first = true;
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
{
if (buffer[0] == '(')
{
if (first)
first = false;
else
putchar(' ');
fputs(buffer + 1, stdout);
}
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\""))
{
putchar('\n');
first = true;
}
}
if (!first)
putchar('\n');
pdfioStreamClose(st);
}
}
pdfioFileClose(file);
return (0);
}

View File

@ -34,6 +34,7 @@ static int do_test_file(const char *filename, int objnum, bool verbose);
static int do_unit_tests(void);
static int draw_image(pdfio_stream_t *st, const char *name, double x, double y, double w, double h, const char *label);
static bool error_cb(pdfio_file_t *pdf, const char *message, bool *error);
static bool iterate_cb(pdfio_dict_t *dict, const char *key, void *cb_data);
static ssize_t output_cb(int *fd, const void *buffer, size_t bytes);
static const char *password_cb(void *data, const char *filename);
static int read_unit_file(const char *filename, size_t num_pages, size_t first_image, bool is_output);
@ -405,7 +406,7 @@ do_test_file(const char *filename, // I - PDF filename
filter = pdfioDictGetName(dict, "Filter");
if ((st = pdfioObjOpenStream(obj, (filter && !strcmp(filter, "FlateDecode")) ? PDFIO_FILTER_FLATE : PDFIO_FILTER_NONE)) == NULL)
if ((st = pdfioObjOpenStream(obj, filter && !strcmp(filter, "FlateDecode"))) == NULL)
{
_pdfioValueDebug(&obj->value, stdout);
putchar('\n');
@ -506,6 +507,9 @@ do_unit_tests(void)
_pdfio_value_t value; // Value
size_t first_image, // First image object
num_pages; // Number of pages written
char temppdf[1024]; // Temporary PDF file
pdfio_dict_t *dict; // Test dictionary
int count = 0; // Number of key/value pairs
static const char *complex_dict = // Complex dictionary value
"<</Annots 5457 0 R/Contents 5469 0 R/CropBox[0 0 595.4 842]/Group 725 0 R"
"/MediaBox[0 0 595.4 842]/Parent 23513 0 R/Resources<</ColorSpace<<"
@ -967,11 +971,46 @@ do_unit_tests(void)
// TODO: Test for known values in this test file.
// Test dictionary APIs
fputs("pdfioDictCreate: ", stdout);
if ((dict = pdfioDictCreate(inpdf)) != NULL)
{
puts("PASS");
fputs("pdfioDictSet*: ", stdout);
if (pdfioDictSetBoolean(dict, "Boolean", true) && pdfioDictSetName(dict, "Name", "Name") && pdfioDictSetNumber(dict, "Number", 42.0) && pdfioDictSetString(dict, "String", "String"))
{
puts("PASS");
}
else
{
puts("FAIL");
return (1);
}
fputs("pdfioDictIterateKeys: ", stdout);
pdfioDictIterateKeys(dict, iterate_cb, &count);
if (count == 4)
{
puts("PASS");
}
else
{
printf("FAIL (got %d, expected 4)\n", count);
return (1);
}
}
else
{
puts("FAIL");
return (1);
}
// Test the value parsers for edge cases...
fputs("_pdfioValueRead(complex_dict): ", stdout);
s = complex_dict;
_pdfioTokenInit(&tb, inpdf, (_pdfio_tconsume_cb_t)token_consume_cb, (_pdfio_tpeek_cb_t)token_peek_cb, (void *)&s);
if (_pdfioValueRead(inpdf, NULL, &tb, &value))
if (_pdfioValueRead(inpdf, NULL, &tb, &value, 0))
{
// TODO: Check value...
fputs("PASS: ", stdout);
@ -985,7 +1024,7 @@ do_unit_tests(void)
fputs("_pdfioValueRead(cid_dict): ", stdout);
s = cid_dict;
_pdfioTokenInit(&tb, inpdf, (_pdfio_tconsume_cb_t)token_consume_cb, (_pdfio_tpeek_cb_t)token_peek_cb, (void *)&s);
if (_pdfioValueRead(inpdf, NULL, &tb, &value))
if (_pdfioValueRead(inpdf, NULL, &tb, &value, 0))
{
// TODO: Check value...
fputs("PASS: ", stdout);
@ -1107,6 +1146,18 @@ do_unit_tests(void)
if (read_unit_file("testpdfio-aesp.pdf", num_pages, first_image, false))
return (1);
fputs("pdfioFileCreateTemporary: ", stdout);
if ((outpdf = pdfioFileCreateTemporary(temppdf, sizeof(temppdf), NULL, NULL, NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL)
printf("PASS (%s)\n", temppdf);
else
return (1);
if (write_unit_file(inpdf, outpdf, &num_pages, &first_image))
return (1);
if (read_unit_file(temppdf, num_pages, first_image, false))
return (1);
pdfioFileClose(inpdf);
return (0);
@ -1199,6 +1250,27 @@ error_cb(pdfio_file_t *pdf, // I - PDF file
}
//
// 'iterate_cb()' - Test pdfioDictIterateKeys function.
//
static bool // O - `true` to continue, `false` to stop
iterate_cb(pdfio_dict_t *dict, // I - Dictionary
const char *key, // I - Key
void *cb_data) // I - Callback data
{
int *count = (int *)cb_data; // Pointer to counter
if (!dict || !key || !cb_data)
return (false);
(*count)++;
return (true);
}
//
// 'output_cb()' - Write output to a file.
//
@ -1238,6 +1310,7 @@ read_unit_file(const char *filename, // I - File to read
{
pdfio_file_t *pdf; // PDF file
size_t i; // Looping var
const char *s; // String
bool error = false; // Error callback data
@ -1248,6 +1321,87 @@ read_unit_file(const char *filename, // I - File to read
else
return (1);
// Verify metadata...
fputs("pdfioFileGetAuthor: ", stdout);
if ((s = pdfioFileGetAuthor(pdf)) != NULL && !strcmp(s, "Michael R Sweet"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'Michael R Sweet')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'Michael R Sweet')");
return (1);
}
fputs("pdfioFileGetCreator: ", stdout);
if ((s = pdfioFileGetCreator(pdf)) != NULL && !strcmp(s, "testpdfio"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'testpdfio')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'testpdfio')");
return (1);
}
fputs("pdfioFileGetKeywords: ", stdout);
if ((s = pdfioFileGetKeywords(pdf)) != NULL && !strcmp(s, "one fish,two fish,red fish,blue fish"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'one fish,two fish,red fish,blue fish')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'one fish,two fish,red fish,blue fish')");
return (1);
}
fputs("pdfioFileGetSubject: ", stdout);
if ((s = pdfioFileGetSubject(pdf)) != NULL && !strcmp(s, "Unit test document"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'Unit test document')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'Unit test document')");
return (1);
}
fputs("pdfioFileGetTitle: ", stdout);
if ((s = pdfioFileGetTitle(pdf)) != NULL && !strcmp(s, "Test Document"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'Test Document')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'Test Document')");
return (1);
}
// Verify the number of pages is the same...
fputs("pdfioFileGetNumPages: ", stdout);
if (num_pages == pdfioFileGetNumPages(pdf))
@ -3167,13 +3321,13 @@ write_unit_file(
// Create some image objects...
fputs("pdfioFileCreateImageObjFromFile(\"testfiles/color.jpg\"): ", stdout);
if ((color_jpg = pdfioFileCreateImageObjFromFile(outpdf, "testfiles/color.jpg", true)) != NULL)
puts("PASS");
printf("PASS (%u)\n", (unsigned)pdfioObjGetNumber(color_jpg));
else
return (1);
fputs("pdfioFileCreateImageObjFromFile(\"testfiles/gray.jpg\"): ", stdout);
if ((gray_jpg = pdfioFileCreateImageObjFromFile(outpdf, "testfiles/gray.jpg", true)) != NULL)
puts("PASS");
printf("PASS (%u)\n", (unsigned)pdfioObjGetNumber(gray_jpg));
else
return (1);

6
ttf.c
View File

@ -420,7 +420,7 @@ ttfCreate(const char *filename, // I - Filename
if (read_os_2(font, &os_2))
{
// Copy key values from OS/2 table...
static const ttf_stretch_t widths[] =
static const ttf_stretch_t stretches[] =
{
TTF_STRETCH_ULTRA_CONDENSED, // ultra-condensed
TTF_STRETCH_EXTRA_CONDENSED, // extra-condensed
@ -433,8 +433,8 @@ ttfCreate(const char *filename, // I - Filename
TTF_STRETCH_ULTRA_EXPANDED // ultra-expanded
};
if (os_2.usWidthClass >= 1 && os_2.usWidthClass <= (int)(sizeof(widths) / sizeof(widths[0])))
font->stretch = widths[os_2.usWidthClass - 1];
if (os_2.usWidthClass >= 1 && os_2.usWidthClass <= (int)(sizeof(stretches) / sizeof(stretches[0])))
font->stretch = stretches[os_2.usWidthClass - 1];
font->weight = (short)os_2.usWeightClass;
font->cap_height = os_2.sCapHeight;