80 Commits

Author SHA1 Message Date
9fec2195d0 Update changelog. 2023-11-15 10:14:02 -05:00
8ccbdaed94 Update documentation. 2023-11-15 09:28:57 -05:00
4804db38a5 Fix missing ivlen initializer for 40-bit RC4 (Issue #51) 2023-11-15 08:43:07 -05:00
ddd984215a Save work (debug printfs, etc.) 2023-11-15 08:38:47 -05:00
efe7c01015 Fix typo and 'make debug'. 2023-11-14 18:38:26 -05:00
600fa4ce59 Fix Unicode font handling (Issue #16) 2023-11-14 18:19:34 -05:00
688810f143 Save work. 2023-11-13 16:18:02 -05:00
858cc101b6 Save work. 2023-11-13 13:39:06 -05:00
43114f43bf Bump version. 2023-10-10 10:22:27 -04:00
c4abceef79 Make Visual Studio compiler happy. 2023-10-10 07:24:27 -04:00
2e91e05d7b Allow "compound" filters that consist of a single named filter (Issue #47) 2023-10-10 07:14:12 -04:00
7e3db6b639 Merge pull request #48 from crystalidea/master
added windows.h header for GetTempPathA
2023-10-10 07:07:46 -04:00
acb6b66bdb added windows.h header for GetTempPathA 2023-10-10 09:12:03 +02:00
b0a66eef78 Fix reading of PDF files from Crystal Reports (Issue #45) 2023-10-09 10:04:20 -04:00
ed88322496 Debug logging, work in progress for Unicode text support. 2023-10-07 12:05:18 -04:00
59959bf0e5 Merge TTF changes to fix off-by-one error. 2023-10-06 16:44:20 -04:00
19c45871fa Update pdfioContentSetDashPattern to support setting solid line styles (Issue #41) 2023-10-06 15:47:27 -04:00
b0e4646f9d Rework CR/LF skip code to be more consistent. 2023-10-06 14:41:55 -04:00
12ef2fe2c3 Remove LGTM badges. 2023-10-06 14:40:40 -04:00
4630060ee7 Update security reporting and contribution text. 2023-10-06 14:40:28 -04:00
74a6fb1860 Get rid of superfluous comments. 2023-10-06 14:40:08 -04:00
a3ea0a99ff Cleanup spacing and comments. 2023-10-06 14:39:42 -04:00
fdfa700442 Update ignored files. 2023-10-06 14:39:10 -04:00
d759baf11e Bump version and put PDFIO_VERSION definition in the pdfio.h header. 2023-10-06 14:38:38 -04:00
7f6ffcda22 Fix a couple issues with parsing PDF files produced by Microsoft Reporting
Services (Issue #46)

- Odd cross-reference stream containing 3-byte generation number field for this
  16-bit value
- Odd empty hex strings
2023-10-06 10:46:30 -04:00
87ca4db73f Clean up private header. 2023-10-02 05:27:40 -04:00
a83f7f50ff Allow extra whitespace/newlines after stream tokens (Issue #40) 2023-10-02 05:06:33 -04:00
6a4ce57d09 Bump versions for Mac/Windows project files. 2023-03-20 10:40:25 -04:00
d4c594cec4 Bump copyright in readme. 2023-03-20 10:22:19 -04:00
35c674b633 Fix another build issue. 2023-03-20 10:11:05 -04:00
97d4955666 Fix potential denial-of-service in flate stream code. 2023-03-20 09:27:19 -04:00
e138232a93 Fix build error due to mismatched function declarations. 2023-03-20 08:19:31 -04:00
8d8225f4a1 Fix release date. 2023-02-07 17:10:48 -05:00
7045d9dad9 Bump Windows version numbers and update exports file. 2023-02-06 17:36:54 -05:00
4f10021e7e Fix denial-of-service attack when reading corrupt PDF files. 2023-02-03 20:39:04 -05:00
57d5894f33 Update pdfioStreamGetToken documentation (Issue #37) 2023-01-11 17:13:58 -05:00
2b8a1c8481 Fix CodeQL config file syntax. 2022-12-09 11:31:56 -05:00
948ee16b06 Fix the one "value" complaint from CodeQL and suppress all useless queries. 2022-12-09 11:22:10 -05:00
c7101ae9dd Add CodeQL scanning. 2022-12-09 11:09:34 -05:00
599640eda1 Update makefile to be silent with basic progress reporting. 2022-08-02 09:41:13 -04:00
a3f3bbfe11 Fix pdfioFileGetAuthor, etc. APIs (Issue #33) 2022-07-12 18:36:08 -04:00
26d485cfc5 Update Windows DLL exports file. 2022-07-06 15:25:45 -04:00
64d306a322 Cleanup. 2022-07-06 08:47:52 -04:00
067683cbcd Add some protection against opening multiple streams in the same file at the same time. 2022-07-04 13:03:11 -04:00
50f27974cf Update documentation. 2022-07-03 10:01:20 -04:00
ae9a91719b Add pdfioContentPathEnd function. 2022-07-03 10:01:10 -04:00
1a17933635 Fix pdfioContentMatrixRotate function. 2022-07-01 20:30:40 -04:00
acea6fdbed Changelog. 2022-06-27 17:17:44 -04:00
66fa12f928 Update Windows DLL exports file. 2022-06-27 10:17:21 -04:00
f4b8983c61 Implement pdfioDictIterateKeys API (Issue #31) 2022-06-27 10:17:00 -04:00
ed4e2fc38a Merge pull request #32 from ire4ever1190/patch-1
Fix `install-shared` Make task
2022-06-09 09:46:23 -04:00
1ed7f0089c Update Makefile 2022-06-09 14:33:53 +10:00
563d53edd4 Update Windows DLL exports file. 2022-05-24 19:16:20 -04:00
316b0ad559 Add pdfioFileCreateTemporary function (Issue #29) 2022-05-15 22:52:53 -04:00
f8b471acfd Update README and NOTICE files... 2022-03-02 09:50:14 -05:00
cedd7d104f Changelog update. 2022-03-02 09:47:14 -05:00
6378047026 Update VC project. 2022-03-02 09:31:33 -05:00
54578144a0 Update documentation and prep for 1.0.1 release. 2022-03-02 09:30:01 -05:00
f7f2969e3a Fix pdfioStreamGetToken implementation (wasn't flushing input), update
pdfiototext code to better handle different text operators that affect the
location of the text.
2022-03-01 09:18:56 -05:00
93a3fcea6c Add missing pdfioPageGetNumStreams and pdfioPageOpenStream functions.
Add initial version of pdfiototext text extraction utility.
2022-02-28 15:00:25 -05:00
fa20982e5d Coverity certs are fixed. 2021-12-15 18:20:54 -05:00
44d20eba1b Add stub code for AES-256 to force Coverity to re-analyze... 2021-12-15 07:35:55 -05:00
c0b7925cdf Fix typo. 2021-12-15 07:28:17 -05:00
68dcf021b2 Download Entrust root cert for validation. 2021-12-15 07:25:44 -05:00
b0a8e60968 Also allow posts to coverity.com while we wait for Ubuntu to pick up the new Entrust root certificate. 2021-12-15 07:10:13 -05:00
9d47745e43 Prep for 1.0rc1. 2021-12-15 06:53:09 -05:00
b0bf2e04b9 Coverity's certificate has expired. 2021-12-14 16:26:57 -05:00
f030112372 See what is happening when downloading Coverity build tool (drop quiet option). 2021-12-14 16:21:49 -05:00
79c4b6f8a8 See what is happening when downloading Coverity build tool. 2021-12-14 16:20:34 -05:00
bd2f9d44d4 Prep for 1.0.0 release. 2021-12-14 12:36:33 -05:00
3c7a980a0b Don't include AFL files in source archives. 2021-11-30 08:46:43 -05:00
019c05d04a Fix AFL target, remove excess PDF test files. 2021-11-30 08:13:41 -05:00
7ab550254a Add AFL make target (runs for 10 minutes). 2021-11-29 20:59:30 -05:00
fa8e54cca2 Add some files to use for AFL++. 2021-11-29 18:54:40 -05:00
d92fcb7bfb Add AFL++ PDF dictionary. 2021-11-29 18:47:04 -05:00
001dcbb123 Fix testpdfio build - dependencies on pdfio-private.h were missing. 2021-11-29 17:57:49 -05:00
a431d7806f Fix a few stack/buffer overflow bugs discovered by Bart, Steffan, and Mark from
the Radboud University NL (thanks!)

- Add depth argument to all value read functions that recurse
- Add depth argument to page tree loading code
- Validate xref stream sizes individually to avoid out-of-bounds access to local
  xref buffer.
2021-11-29 17:46:56 -05:00
ec8e900ea5 Add math library to libs. 2021-11-18 19:23:42 -05:00
c73aa7ae20 Add link for builds. 2021-11-11 06:52:23 -05:00
c53786e0e1 Changelog. 2021-11-07 11:29:18 -05:00
64 changed files with 3383 additions and 327 deletions

2
.gitattributes vendored
View File

@ -1,2 +1,4 @@
.git* export-ignore
afl-pdf.dict export-ignore
afl-input export-ignore
makesrcdist export-ignore

22
.github/codeql.yml vendored Normal file
View File

@ -0,0 +1,22 @@
paths-ignore:
- testpdfio.c
query-filters:
- exclude:
id: cpp/commented-out-code
- exclude:
id: cpp/toctou-race-condition
- exclude:
id: cpp/weak-cryptographic-algorithm
- exclude:
id: cpp/world-writable-file-creation
- exclude:
id: cpp/uncontrolled-allocation-size
- exclude:
id: cpp/path-injection
- exclude:
id: cpp/stack-address-escape
- exclude:
id: cpp/loop-variable-changed
- exclude:
id: cpp/long-switch

50
.github/workflows/codeql.yml vendored Normal file
View File

@ -0,0 +1,50 @@
name: "CodeQL"
on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]
schedule:
- cron: "46 3 * * 0"
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ cpp ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Update build environment
run: sudo apt-get update --fix-missing -y
- name: Install prerequisites
run: sudo apt-get install -y zlib1g-dev
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
config-file: ./.github/codeql.yml
queries: +security-and-quality
- name: Autobuild
uses: github/codeql-action/autobuild@v2
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{ matrix.language }}"

5
.gitignore vendored
View File

@ -4,9 +4,14 @@
*.o
*.so.1
/.vs
/afl-output
/doc/pdfio.epub
/packages
/pdfio.xcodeproj/xcshareddata
/pdfio-*.tar.gz*
/pdfio-*.zip*
/pdfiototext
/testpdfio
/testpdfio-*.pdf
/testttf
/x64

View File

@ -1,2 +0,0 @@
queries:
- exclude: cpp/toctou-race-condition

View File

@ -2,6 +2,71 @@ Changes in PDFio
================
v1.1.3 (November 15, 2023)
--------------------------
- Fixed Unicode font support (Issue #16)
- Fixed missing initializer for 40-bit RC4 encryption (Issue #51)
v1.1.2 (October 10, 2023)
-------------------------
- Updated `pdfioContentSetDashPattern` to support setting a solid (0 length)
dash pattern (Issue #41)
- Fixed an issue with broken PDF files containing extra CR and/or LF separators
after the object stream token (Issue #40)
- Fixed an issue with PDF files produced by Crystal Reports (Issue #45)
- Fixed an issue with PDF files produced by Microsoft Reporting Services
(Issue #46)
- Fixed support for compound filters where the filter array consists of a
single named filter (Issue #47)
- Fixed builds on Windows - needed windows.h header for temporary files
(Issue #48)
v1.1.1 (March 20, 2023)
-----------------------
- CVE-2023-28428: Fixed a potential denial-of-service with corrupt PDF files.
- Fixed a few build issues.
v1.1.0 (February 6, 2023)
-------------------------
- CVE-2023-24808: Fixed a potential denial-of-service with corrupt PDF files.
- Added `pdfioFileCreateTemporary` function (Issue #29)
- Added `pdfioDictIterateKeys` function (Issue #31)
- Added `pdfioContentPathEnd` function.
- Added protection against opening multiple streams in the same file at the
same time.
- Documentation updates (Issue #37)
- Fixed "install-shared" target (Issue #32)
- Fixed `pdfioFileGet...` metadata APIs (Issue #33)
- Fixed `pdfioContentMatrixRotate` function.
v1.0.1 (March 2, 2022)
----------------------
- Added missing `pdfioPageGetNumStreams` and `pdfioPageOpenStream` functions.
- Added demo pdfiototext utility.
- Fixed bug in `pdfioStreamGetToken`.
v1.0.0 (December 14, 2021)
--------------------------
- First stable release.
v1.0rc1 (November 30, 2021)
---------------------------
- Fixed a few stack/buffer overflow bugs discovered via fuzzing.
v1.0b2 (November 7, 2021)
-------------------------
@ -10,6 +75,7 @@ v1.0b2 (November 7, 2021)
- Fixed `all-shared` target (Issue #22)
- Fixed memory leaks (Issue #23)
- Updated `pdfioContentSetDashPattern` to accept `double` values (Issue #25)
- Added support for reading and writing encrypted PDFs (Issue #26)
- Fixed some issues identified by a Coverity scan.

View File

@ -118,7 +118,7 @@ the source file and the copyright and licensing notice:
//
// Description of file contents.
//
// Copyright YYYY by AUTHOR.
// Copyright © YYYY by AUTHOR.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -330,7 +330,7 @@ typedef, for example:
All constant names are uppercase with underscores between words, e.g.,
`PDFIO_THIS_CONSTANT`, `PDFIO_THAT_CONSTANT`, etc. Constants begin with the
"PDFio\_" prefix to avoid conflicts with system constants. Private constants
"PDFIO\_" prefix to avoid conflicts with system constants. Private constants
start with an underscore, e.g., `_PDFIO_THIS_CONSTANT`,
`_PDFIO_THAT_CONSTANT`, etc.
@ -369,11 +369,12 @@ extensions MUST NOT be used.
The following variables are defined in the makefile:
- `AR`; the static library archiver command,
- `ARFLAGS`; options for the static library archiver command,
- `ARFLAGS`; options for the static library archiver,
- `CC`; the C compiler command,
- `CFLAGS`; options for the C compiler command,
- `CFLAGS`; options for the C compiler,
- `CODESIGN_IDENTITY`: the code signing identity,
- `COMMONFLAGS`; common compiler optimization options,
- `CPPFLAGS`; options for the C preprocessor,
- `DESTDIR`/`DSTROOT`: the destination root directory when installing.
- `DSO`; the shared library building command,
- `DSOFLAGS`; options for the shared library building command,
@ -395,4 +396,5 @@ The following standard targets are defined in the makefile:
with debug printfs and the clang address sanitizer enabled.
- `install`; installs all distribution files in their corresponding locations.
- `install-shared`; same as `install` but also installs the shared library.
- `macos`; same as `all` but creates a Universal Binary (X64 + ARM64).
- `test`; runs the unit test program, building it as needed.

View File

@ -1,7 +1,7 @@
#
# Makefile for PDFio.
#
# Copyright © 2021 by Michael R Sweet.
# Copyright © 2021-2023 by Michael R Sweet.
#
# Licensed under Apache License v2.0. See the file "LICENSE" for more
# information.
@ -10,15 +10,18 @@
# POSIX makefile
.POSIX:
# Variables...
# Build silently
.SILENT:
# Variables
AR = ar
ARFLAGS = cr
CC = cc
CFLAGS =
CODESIGN_IDENTITY = Developer ID
#COMMONFLAGS = -Os -g
COMMONFLAGS = -O0 -g
CPPFLAGS = '-DPDFIO_VERSION="$(VERSION)"'
COMMONFLAGS = -Os -g
#COMMONFLAGS = -O0 -g -fsanitize=address
CPPFLAGS =
DESTDIR = $(DSTROOT)
DSO = cc
DSOFLAGS =
@ -26,13 +29,14 @@ DSONAME =
LDFLAGS =
LIBS = -lm -lz
RANLIB = ranlib
VERSION = 1.0b2
VERSION = 1.1.3
prefix = /usr/local
# Base rules
.SUFFIXES: .c .h .o
.c.o:
echo Compiling $<...
$(CC) $(CFLAGS) $(CPPFLAGS) $(COMMONFLAGS) -c $<
@ -62,11 +66,15 @@ LIBOBJS = \
ttf.o
OBJS = \
$(LIBOBJS) \
testpdfio.o
pdfiototext.o \
testpdfio.o \
testttf.o
TARGETS = \
$(DSONAME) \
libpdfio.a \
testpdfio
pdfiototext \
testpdfio \
testttf
# Make everything
@ -82,6 +90,9 @@ all-shared:
debug:
$(MAKE) -$(MAKEFLAGS) COMMONFLAGS="-g -fsanitize=address -DDEBUG=1" clean all
macos:
$(MAKE) -$(MAKEFLAGS) COMMONFLAGS="-Os -mmacosx-version-min=11 -arch x86_64 -arch arm64" clean all
# Clean everything
clean:
@ -90,8 +101,10 @@ clean:
# Install everything
install: $(TARGETS)
echo Installing header files to $(DESTDIR)$(prefix)/include...
-mkdir -p $(DESTDIR)$(prefix)/include
cp $(PUBHEADERS) $(DESTDIR)$(prefix)/include
echo Installing library files to $(DESTDIR)$(prefix)/lib...
-mkdir -p $(DESTDIR)$(prefix)/lib
cp libpdfio.a $(DESTDIR)$(prefix)/lib
$(RANLIB) $(DESTDIR)$(prefix)/lib/libpdfio.a
@ -103,26 +116,30 @@ install: $(TARGETS)
codesign -s "$(CODESIGN_IDENTITY)" -o runtime --timestamp $(DESTDIR)$(prefix)/lib/libpdfio.1.dylib; \
ln -sf libpdfio.1.dylib $(DESTDIR)$(prefix)/lib/libpdfio.dylib; \
fi
echo Installing pkg-config files to $(DESTDIR)$(prefix)/lib/pkgconfig...
-mkdir -p $(DESTDIR)$(prefix)/lib/pkgconfig
echo 'prefix="$(prefix)"' >$(DESTDIR)$(prefix)/lib/pkgconfig/pdfio.pc
echo 'Version: $(VERSION)' >>$(DESTDIR)$(prefix)/lib/pkgconfig/pdfio.pc
cat pdfio.pc.in >>$(DESTDIR)$(prefix)/lib/pkgconfig/pdfio.pc
echo Installing documentation to $(DESTDIR)$(prefix)/share/doc/pdfio...
-mkdir -p $(DESTDIR)$(prefix)/share/doc/pdfio
cp doc/pdfio.html doc/pdfio-512.png LICENSE NOTICE $(DESTDIR)$(prefix)/share/doc/pdfio
echo Installing man page to $(DESTDIR)$(prefix)/share/man/man3...
-mkdir -p $(DESTDIR)$(prefix)/share/man/man3
cp doc/pdfio.3 $(DESTDIR)$(prefix)/share/man/man3
install-shared:
if test `uname` = Darwin; then \
$(MAKE) DSONAME="libpdfio.1.dylib" -$(MAKEFLAGS) install; \
else
else \
$(MAKE) DSONAME="libpdfio.so.1" -$(MAKEFLAGS) install; \
fi
# Test everything
test: testpdfio
./testpdfio
./testttf 2>test.log
./testpdfio 2>test.log
valgrind: testpdfio
valgrind --leak-check=full ./testpdfio
@ -130,13 +147,16 @@ valgrind: testpdfio
# pdfio library
libpdfio.a: $(LIBOBJS)
echo Archiving $@...
$(AR) $(ARFLAGS) $@ $(LIBOBJS)
$(RANLIB) $@
libpdfio.so.1: $(LIBOBJS)
echo Linking $@...
$(CC) $(DSOFLAGS) $(COMMONFLAGS) -shared -o $@ -Wl,-soname,$@ $(LIBOBJS) $(LIBS)
libpdfio.1.dylib: $(LIBOBJS)
echo Linking $@...
$(CC) $(DSOFLAGS) $(COMMONFLAGS) -dynamiclib -o $@ -install_name $(prefix)/lib/$@ -current_version $(VERSION) -compatibility_version 1.0 $(LIBOBJS) $(LIBS)
@ -154,31 +174,52 @@ pdfio1.def: $(LIBOBJS) Makefile
grep -v '^_ttf' | sed -e '1,$$s/^_//' | sort >>$@
# pdfio text extraction (demo, doesn't handle a lot of things yet)
pdfiototext: pdfiototext.o libpdfio.a
echo Linking $@...
$(CC) $(LDFLAGS) $(COMMONFLAGS) -o $@ pdfiototext.o libpdfio.a $(LIBS)
# pdfio test program
testpdfio: testpdfio.o libpdfio.a
echo Linking $@...
$(CC) $(LDFLAGS) $(COMMONFLAGS) -o $@ testpdfio.o libpdfio.a $(LIBS)
# TTF test program
testttf: ttf.o testttf.o
echo Linking $@...
$(CC) $(LDFLAGS) $(COMMONFLAGS) -o testttf ttf.o testttf.o $(LIBS)
# Dependencies
$(OBJS): pdfio.h Makefile
$(LIBOBJS): pdfio-private.h
$(OBJS): pdfio.h pdfio-private.h Makefile
pdfio-content.o: pdfio-content.h ttf.h
testttf.o: ttf.h
ttf.o: ttf.h
# Make documentation using Codedoc <https://www.msweet.org/codedoc>
DOCFLAGS = \
--author "Michael R Sweet" \
--copyright "Copyright (c) 2021 by Michael R Sweet" \
--copyright "Copyright (c) 2021-2023 by Michael R Sweet" \
--docversion $(VERSION)
.PHONY: doc
doc:
echo Generating documentation...
codedoc $(DOCFLAGS) --title "PDFio Programming Manual v$(VERSION)" $(PUBHEADERS) $(PUBOBJS:.o=.c) --body doc/pdfio.md --coverimage doc/pdfio-512.png pdfio.xml >doc/pdfio.html
codedoc $(DOCFLAGS) --title "PDFio Programming Manual v$(VERSION)" --body doc/pdfio.md --coverimage doc/pdfio-epub.png pdfio.xml --epub doc/pdfio.epub
codedoc $(DOCFLAGS) --title "pdf read/write library" --man pdfio --section 3 --body doc/pdfio.md pdfio.xml >doc/pdfio.3
rm -f pdfio.xml
# Fuzz-test the library <>
.PHONY: afl
afl:
$(MAKE) -$(MAKEFLAGS) CC="afl-clang-fast" COMMONFLAGS="-g" clean all
test afl-output || rm -rf afl-output
afl-fuzz -x afl-pdf.dict -i afl-input -o afl-output -V 600 -e pdf -t 5000 ./testpdfio @@
# Analyze code with the Clang static analyzer <https://clang-analyzer.llvm.org>
clang:
clang $(CPPFLAGS) --analyze $(OBJS:.o=.c) 2>clang.log

2
NOTICE
View File

@ -1,6 +1,6 @@
PDFio - PDF Read/Write Library
Copyright © 2021 by Michael R Sweet.
Copyright © 2021-2023 by Michael R Sweet.
(Optional) Exceptions to the Apache 2.0 License:
================================================

View File

@ -3,10 +3,8 @@ pdfio - PDF Read/Write Library
![Version](https://img.shields.io/github/v/release/michaelrsweet/pdfio?include_prereleases)
![Apache 2.0](https://img.shields.io/github/license/michaelrsweet/pdfio)
![Build](https://github.com/michaelrsweet/pdfio/workflows/Build/badge.svg)
[![Build Status](https://img.shields.io/github/workflow/status/michaelrsweet/pdfio/Build)](https://github.com/michaelrsweet/pdfio/actions/workflows/build.yml)
[![Coverity Scan Status](https://img.shields.io/coverity/scan/22385.svg)](https://scan.coverity.com/projects/michaelrsweet-pdfio)
[![LGTM Grade](https://img.shields.io/lgtm/grade/cpp/github/michaelrsweet/pdfio)](https://lgtm.com/projects/g/michaelrsweet/pdfio/context:cpp)
[![LGTM Alerts](https://img.shields.io/lgtm/alerts/github/michaelrsweet/pdfio)](https://lgtm.com/projects/g/michaelrsweet/pdfio/)
PDFio is a simple C library for reading and writing PDF files. The primary
goals of PDFio are:
@ -115,13 +113,13 @@ generates a static library that will be installed under "/usr/local" with:
You can reproduce this with the makefile using:
sudo make COMMONFLAGS="-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64" install
sudo make macos install
Legal Stuff
-----------
PDFio is Copyright © 2021 by Michael R Sweet.
PDFio is Copyright © 2021-2023 by Michael R Sweet.
This software is licensed under the Apache License Version 2.0 with an
(optional) exception to allow linking against GPL2/LGPL2 software. See the

View File

@ -5,6 +5,25 @@ This file describes how security issues are reported and handled, and what the
expectations are for security issues reported to this project.
Reporting a Security Bug
------------------------
For the purposes of this project, a security bug is a software defect that
allows a *local or remote user* to gain unauthorized access or privileges on the
host computer or to cause the software to crash. Such defects should be
reported to the project security advisory page at
<https://github.com/michaelrsweet/pdfio/security/advisories>.
Alternately, security bugs can be reported to "security AT msweet.org" using the
PGP public key below. Expect a response within 5 business days. Any proposed
embargo date should be at least 30 days and no more than 90 days in the future.
> *Note:* If you've found a software defect that allows a *program* to gain
> unauthorized access or privileges on the host computer or causes the program
> to crash, that defect should be reported as an ordinary project issue at
> <https://github.com/michaelrsweet/pdfio/issues>.
Responsible Disclosure
----------------------
@ -50,14 +69,6 @@ example:
1.0rc1
Reporting a Vulnerability
-------------------------
Report all security issues to "security AT msweet.org". Expect a response
within 5 business days. Any proposed embargo date should be at least 30 days
and no more than 90 days in the future.
PGP Public Key
--------------

BIN
afl-input/PDFBOX-1010-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1018-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1023-2.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1029-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1036-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1036-2.pdf Normal file

Binary file not shown.

View File

@ -0,0 +1,55 @@
%PDF-1.3
1 0 obj<</Type/Catalog/Pages 5 0 R>>
endobj
3 0 obj<</ModDate(D:20110505091515-05'00')/CreationDate(2011/05/05 09:15)/Creator(PaperPort 11.0)/Producer(PaperPort 11.0)/Subject()/Author()/Keywords()/Title()>>
endobj
4 0 obj<</Type/Page/MediaBox[0 0 622.0799 756]/Parent 5 0 R/CropBox[0 0 622.0799 756]/Contents 7 0 R/Resources<</ProcSet[/PDF/Text/ImageB/ImageC/ImageI]/XObject<</Z_Im0 6 0 R>>>>>>
endobj
5 0 obj<</Count 1/Type/Pages/Kids[ 4 0 R]>>
endobj
6 0 obj<</Type/XObject/Subtype/Image/Name/XImg/Width 1728/Height 2100/BitsPerComponent 1/ColorSpace/DeviceGray/Intent//Filter[/CCITTFaxDecode]/DecodeParms[<</Colors 1/Columns 1728/Rows 2100/K -1>>]/Length 81592>>stream
endstream
endobj
7 0 obj<</Length 72>>stream
q
622.07996 0 0 756 0 0 cm
0 g
[]0 d 1 w 10 M 0 i 0 J 0 j
/Z_Im0 Do
Q
endstream
endobj
xref
0 8
0000000002 65535 f
0000000010 00000 n
0000000000 00000 f
0000000054 00000 n
0000000224 00000 n
0000000412 00000 n
0000000463 00000 n
0000082294 00000 n
trailer
<</Size 8/Info 3 0 R/Root 1 0 R/ID[<c48c2a5922382dc456a05f8e3ccbb9f8><94a076a2f82a754598b70200e827ac8b>]>>
startxref
82414
%%EOF
%PaperPortPDFversion3 0 obj<</ModDate(D:20110505091515-05'00')/CreationDate(2011/05/05 09:15)/Creator(PaperPort 11.0)/Producer(PaperPort 11.0)/Subject()/Author()/Keywords()/Title()>>
endobj
5 0 obj<</Count 2/Type/Pages/Kids[ 4 0 R 8 0 R]>>
endobj
8 0 obj<</Type/Page/MediaBox[0 0 622.0799 757.4399]/Parent 5 0 R/CropBox[0 0 622.0799 757.4399]/Contents 10 0 R/Resources<</ProcSet[/PDF/Text/ImageB/ImageC/ImageI]/XObject<</Z_Im0 9 0 R>>>>>>
endobj
9 0 obj<</Type/XObject/Subtype/Image/Name/XImg/Width 1728/Height 2104/BitsPerComponent 1/ColorSpace/DeviceGray/Intent//Filter[/CCITTFaxDecode]/DecodeParms[<</Colors 1/Columns 1728/Rows 2104/K -1>>]/Length 78404>>stream
endstream
endobj
10 0 obj<</Length 78>>stream
q
622.07996 0 0 757.44001 0 0 cm
0 g
[]0 d 1 w 10 M 0 i 0 J 0 j
/Z_Im0 Do
Q
endstream

BIN
afl-input/PDFBOX-1039-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1047-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1048-1.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1065-0.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1065-1.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1067-1.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1068-1.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1074-1.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1074-3.pdf Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
afl-input/PDFBOX-1094-3.pdf Normal file

Binary file not shown.

Binary file not shown.

BIN
afl-input/PDFBOX-1094-4.pdf Normal file

Binary file not shown.

BIN
afl-input/PDFBOX-1095-2.pdf Normal file

Binary file not shown.

1466
afl-pdf.dict Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
.TH pdfio 3 "pdf read/write library" "2021-10-25" "pdf read/write library"
.TH pdfio 3 "pdf read/write library" "2023-11-15" "pdf read/write library"
.SH NAME
pdfio \- pdf read/write library
.SH Introduction
@ -34,7 +34,7 @@ PDFio is
.I not
concerned with rendering or viewing a PDF file, although a PDF RIP or viewer could be written using it.
.PP
PDFio is Copyright \[co] 2021 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
PDFio is Copyright \[co] 2021\-2023 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
.SS Requirements
.PP
PDFio requires the following to build the software:
@ -156,7 +156,7 @@ There is also an Xcode project ("pdfio.xcodeproj") you can use on macOS which ge
You can reproduce this with the makefile using:
.nf
sudo make COMMONFLAGS="\-Os \-mmacosx\-version\-min=10.14 \-arch x86_64 \-arch arm64" install
sudo make macos install
.fi
.SS Detecting PDFio
.PP
@ -254,7 +254,7 @@ Each PDF file contains one or more pages. The pdfioFileGetNumPages function retu
}
.fi
.PP
Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page.
Each page is represented by a "page tree" object (what pdfioFileGetPage returns) that specifies information about the page and one or more "content" objects that contain the images, fonts, text, and graphics that appear on the page. Use the pdfioPageGetNumStreams and pdfioPageOpenStream functions to access the content streams for each page.
.PP
The pdfioFileClose function closes a PDF file and frees all memory that was used for it:
.nf
@ -324,6 +324,14 @@ Some PDF objects have an associated data stream, such as for pages, images, ICC
.PP
The first argument is the object pointer. The second argument is a boolean value that specifies whether you want to decode (typically decompress) the stream data or return it as\-is.
.PP
When reading a page stream you'll use the pdfioPageOpenStream function instead:
.nf
pdfio_file_t *pdf = pdfioFileOpen(...);
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
pdfio_stream_t *st = pdfioPageOpenStream(obj, 0, true);
.fi
.PP
Once you have the stream open, you can use one of several functions to read from it:
.IP \(bu 5
.PP
@ -353,12 +361,21 @@ To create a stream for a new object, call the pdfioObjCreateStream function:
.nf
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
.fi
.PP
The first argument is the newly created object. The second argument is either PDFIO_FILTER_NONE to specify that any encoding is done by your program or PDFIO_FILTER_FLATE to specify that PDFio should Flate compress the stream.
.PP
To create a page content stream call the pdfioFileCreatePage function:
.nf
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_dict_t *dict = pdfioDictCreate(pdf);
\... set page dictionary keys and values ...
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
.fi
.PP
Once you have created the stream, use any of the following functions to write to the stream:
.IP \(bu 5
.PP
@ -1340,7 +1357,7 @@ bool pdfioContentFillAndStroke (
.fi
.SS pdfioContentMatrixConcat
Concatenate a matrix to the current graphics
state.
state.
.PP
.nf
bool pdfioContentMatrixConcat (
@ -1423,6 +1440,14 @@ bool pdfioContentPathCurve23 (
double y3
);
.fi
.SS pdfioContentPathEnd
Clear the current path.
.PP
.nf
bool pdfioContentPathEnd (
pdfio_stream_t *st
);
.fi
.SS pdfioContentPathLineTo
Add a straight line to the current path.
.PP
@ -1482,6 +1507,9 @@ bool pdfioContentSetDashPattern (
double off
);
.fi
.PP
This function sets the stroke pattern when drawing lines. If "on" and "off"
are 0, a solid line is drawn.
.SS pdfioContentSetFillColorDeviceCMYK
Set device CMYK fill color.
.PP
@ -1933,6 +1961,32 @@ pdfio_valtype_t pdfioDictGetType (
const char *key
);
.fi
.SS pdfioDictIterateKeys
Iterate the keys in a dictionary.
.PP
.nf
void pdfioDictIterateKeys (
pdfio_dict_t *dict,
pdfio_dict_cb_t cb,
void *cb_data
);
.fi
.PP
This function iterates the keys in a dictionary, calling the supplied
function "cb":
.PP
.nf
bool
my_dict_cb(pdfio_dict_t *dict, const char *key, void *cb_data)
{
... "key" contains the dictionary key ...
... return true to continue or false to stop ...
}
.fi
The iteration continues as long as the callback returns \fBtrue\fR or all keys
have been iterated.
.SS pdfioDictSetArray
Set a key array in a dictionary.
.PP
@ -2255,10 +2309,11 @@ written:
ssize_t
output_cb(void *output_ctx, const void *buffer, size_t bytes)
{
// Write buffer to output and return the number of bytes written
// Write buffer to output and return the number of bytes written
}
.fi
The "version" argument specifies the PDF version number for the file or
\fBNULL\fR for the default ("2.0").
.PP
@ -2285,6 +2340,20 @@ pdfio_stream_t * pdfioFileCreatePage (
pdfio_dict_t *dict
);
.fi
.SS pdfioFileCreateTemporary
.PP
.nf
pdfio_file_t * pdfioFileCreateTemporary (
char *buffer,
size_t bufsize,
const char *version,
pdfio_rect_t *media_box,
pdfio_rect_t *crop_box,
pdfio_error_cb_t error_cb,
void *error_data
);
.fi
.SS pdfioFileFindObj
Find an object using its object number.
.PP
@ -2552,7 +2621,7 @@ double pdfioImageGetWidth (
.fi
.SS pdfioObjClose
Close an object, writing any data as needed to the PDF
file.
file.
.PP
.nf
bool pdfioObjClose (
@ -2693,6 +2762,24 @@ bool pdfioPageDictAddImage (
pdfio_obj_t *obj
);
.fi
.SS pdfioPageGetNumStreams
Get the number of content streams for a page object.
.PP
.nf
size_t pdfioPageGetNumStreams (
pdfio_obj_t *page
);
.fi
.SS pdfioPageOpenStream
Open a content stream for a page.
.PP
.nf
pdfio_stream_t * pdfioPageOpenStream (
pdfio_obj_t *page,
size_t n,
bool decode
);
.fi
.SS pdfioStreamClose
Close a (data) stream in a PDF file.
.PP
@ -2720,6 +2807,13 @@ bool pdfioStreamGetToken (
size_t bufsize
);
.fi
.PP
This function reads a single PDF token from a stream. Operator tokens,
boolean values, and numbers are returned as-is in the provided string buffer.
String values start with the opening parenthesis ('(') but have all escaping
resolved and the terminating parenthesis removed. Hexadecimal string values
start with the opening angle bracket ('<') and have all whitespace and the
terminating angle bracket removed.
.SS pdfioStreamPeek
Peek at data in a stream.
.PP
@ -2840,6 +2934,12 @@ Standard color spaces
.nf
typedef enum pdfio_cs_e pdfio_cs_t;
.fi
.SS pdfio_dict_cb_t
Dictionary iterator callback
.PP
.nf
typedef bool(*)(pdfio_dict_t *dict, const char *key, void *cb_data) pdfio_dict_cb_t;
.fi
.SS pdfio_dict_t
Key/value dictionary
.PP
@ -2947,4 +3047,4 @@ typedef uint8_t state_t[4][4];
Michael R Sweet
.SH COPYRIGHT
.PP
Copyright (c) 2021 by Michael R Sweet
Copyright (c) 2021-2023 by Michael R Sweet

View File

@ -1,13 +1,13 @@
<!DOCTYPE html>
<html lang="en-US">
<head>
<title>PDFio Programming Manual v1.0.0</title>
<title>PDFio Programming Manual v1.1.3</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="generator" content="codedoc v3.7">
<meta name="author" content="Michael R Sweet">
<meta name="language" content="en-US">
<meta name="copyright" content="Copyright © 2021 by Michael R Sweet">
<meta name="version" content="1.0.0">
<meta name="copyright" content="Copyright © 2021-2023 by Michael R Sweet">
<meta name="version" content="1.1.3">
<style type="text/css"><!--
body {
background: white;
@ -245,9 +245,9 @@ span.string {
<body>
<div class="header">
<p><img class="title" src="pdfio-512.png"></p>
<h1 class="title">PDFio Programming Manual v1.0.0</h1>
<h1 class="title">PDFio Programming Manual v1.1.3</h1>
<p>Michael R Sweet</p>
<p>Copyright © 2021 by Michael R Sweet</p>
<p>Copyright © 2021-2023 by Michael R Sweet</p>
</div>
<div class="contents">
<h2 class="title">Contents</h2>
@ -307,6 +307,7 @@ span.string {
<li><a href="#pdfioContentPathCurve">pdfioContentPathCurve</a></li>
<li><a href="#pdfioContentPathCurve13">pdfioContentPathCurve13</a></li>
<li><a href="#pdfioContentPathCurve23">pdfioContentPathCurve23</a></li>
<li><a href="#pdfioContentPathEnd">pdfioContentPathEnd</a></li>
<li><a href="#pdfioContentPathLineTo">pdfioContentPathLineTo</a></li>
<li><a href="#pdfioContentPathMoveTo">pdfioContentPathMoveTo</a></li>
<li><a href="#pdfioContentPathRect">pdfioContentPathRect</a></li>
@ -360,6 +361,7 @@ span.string {
<li><a href="#pdfioDictGetRect">pdfioDictGetRect</a></li>
<li><a href="#pdfioDictGetString">pdfioDictGetString</a></li>
<li><a href="#pdfioDictGetType">pdfioDictGetType</a></li>
<li><a href="#pdfioDictIterateKeys">pdfioDictIterateKeys</a></li>
<li><a href="#pdfioDictSetArray">pdfioDictSetArray</a></li>
<li><a href="#pdfioDictSetBinary">pdfioDictSetBinary</a></li>
<li><a href="#pdfioDictSetBoolean">pdfioDictSetBoolean</a></li>
@ -383,6 +385,7 @@ span.string {
<li><a href="#pdfioFileCreateObj">pdfioFileCreateObj</a></li>
<li><a href="#pdfioFileCreateOutput">pdfioFileCreateOutput</a></li>
<li><a href="#pdfioFileCreatePage">pdfioFileCreatePage</a></li>
<li><a href="#pdfioFileCreateTemporary">pdfioFileCreateTemporary</a></li>
<li><a href="#pdfioFileFindObj">pdfioFileFindObj</a></li>
<li><a href="#pdfioFileGetAuthor">pdfioFileGetAuthor</a></li>
<li><a href="#pdfioFileGetCreationDate">pdfioFileGetCreationDate</a></li>
@ -425,6 +428,8 @@ span.string {
<li><a href="#pdfioPageDictAddColorSpace">pdfioPageDictAddColorSpace</a></li>
<li><a href="#pdfioPageDictAddFont">pdfioPageDictAddFont</a></li>
<li><a href="#pdfioPageDictAddImage">pdfioPageDictAddImage</a></li>
<li><a href="#pdfioPageGetNumStreams">pdfioPageGetNumStreams</a></li>
<li><a href="#pdfioPageOpenStream">pdfioPageOpenStream</a></li>
<li><a href="#pdfioStreamClose">pdfioStreamClose</a></li>
<li><a href="#pdfioStreamConsume">pdfioStreamConsume</a></li>
<li><a href="#pdfioStreamGetToken">pdfioStreamGetToken</a></li>
@ -440,6 +445,7 @@ span.string {
<li><a href="#TYPES">Data Types</a><ul class="subcontents">
<li><a href="#pdfio_array_t">pdfio_array_t</a></li>
<li><a href="#pdfio_cs_t">pdfio_cs_t</a></li>
<li><a href="#pdfio_dict_cb_t">pdfio_dict_cb_t</a></li>
<li><a href="#pdfio_dict_t">pdfio_dict_t</a></li>
<li><a href="#pdfio_encryption_t">pdfio_encryption_t</a></li>
<li><a href="#pdfio_error_cb_t">pdfio_error_cb_t</a></li>
@ -491,7 +497,7 @@ span.string {
</li>
</ul>
<p>PDFio is <em>not</em> concerned with rendering or viewing a PDF file, although a PDF RIP or viewer could be written using it.</p>
<p>PDFio is Copyright © 2021 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files &quot;LICENSE&quot; and &quot;NOTICE&quot; for more information.</p>
<p>PDFio is Copyright © 2021-2023 by Michael R Sweet and is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GPL2/LGPL2 software. See the files &quot;LICENSE&quot; and &quot;NOTICE&quot; for more information.</p>
<h3 class="title" id="requirements">Requirements</h3>
<p>PDFio requires the following to build the software:</p>
<ul>
@ -557,7 +563,7 @@ make install-shared
<pre><code>sudo xcodebuild install
</code></pre>
<p>You can reproduce this with the makefile using:</p>
<pre><code>sudo make COMMONFLAGS=&quot;-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64&quot; install
<pre><code>sudo make macos install
</code></pre>
<h3 class="title" id="detecting-pdfio">Detecting PDFio</h3>
<p>PDFio can be detected using the <code>pkg-config</code> command, for example:</p>
@ -621,7 +627,7 @@ pdfio_obj_t *page; <span class="comment">// Current page</span>
<span class="comment">// do something with page</span>
}
</code></pre>
<p>Each page is represented by a &quot;page tree&quot; object (what <a href="#pdfioFileGetPage"><code>pdfioFileGetPage</code></a> returns) that specifies information about the page and one or more &quot;content&quot; objects that contain the images, fonts, text, and graphics that appear on the page.</p>
<p>Each page is represented by a &quot;page tree&quot; object (what <a href="#pdfioFileGetPage"><code>pdfioFileGetPage</code></a> returns) that specifies information about the page and one or more &quot;content&quot; objects that contain the images, fonts, text, and graphics that appear on the page. Use the <a href="#pdfioPageGetNumStreams"><code>pdfioPageGetNumStreams</code></a> and <a href="#pdfioPageOpenStream"><code>pdfioPageOpenStream</code></a> functions to access the content streams for each page.</p>
<p>The <a href="#pdfioFileClose"><code>pdfioFileClose</code></a> function closes a PDF file and frees all memory that was used for it:</p>
<pre><code class="language-c">pdfioFileClose(pdf);
</code></pre>
@ -663,6 +669,11 @@ pdfio_obj_t *obj = pdfioFileFindObj(pdf, number);
pdfio_stream_t *st = pdfioObjOpenStream(obj, <span class="reserved">true</span>);
</code></pre>
<p>The first argument is the object pointer. The second argument is a boolean value that specifies whether you want to decode (typically decompress) the stream data or return it as-is.</p>
<p>When reading a page stream you'll use the <a href="#pdfioPageOpenStream"><code>pdfioPageOpenStream</code></a> function instead:</p>
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileOpen(...);
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
pdfio_stream_t *st = pdfioPageOpenStream(obj, <span class="number">0</span>, <span class="reserved">true</span>);
</code></pre>
<p>Once you have the stream open, you can use one of several functions to read from it:</p>
<ul>
<li><p><a href="#pdfioStreamConsume"><code>pdfioStreamConsume</code></a> reads and discards a number of bytes in the stream</p>
@ -679,10 +690,16 @@ pdfio_stream_t *st = pdfioObjOpenStream(obj, <span class="reserved">true</span>)
</code></pre>
<p>To create a stream for a new object, call the <a href="#pdfioObjCreateStream"><code>pdfioObjCreateStream</code></a> function:</p>
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
</code></pre>
<p>The first argument is the newly created object. The second argument is either <code>PDFIO_FILTER_NONE</code> to specify that any encoding is done by your program or <code>PDFIO_FILTER_FLATE</code> to specify that PDFio should Flate compress the stream.</p>
<p>To create a page content stream call the <a href="#pdfioFileCreatePage"><code>pdfioFileCreatePage</code></a> function:</p>
<pre><code class="language-c">pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_dict_t *dict = pdfioDictCreate(pdf);
... set page dictionary keys <span class="reserved">and</span> values ...
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
</code></pre>
<p>Once you have created the stream, use any of the following functions to write to the stream:</p>
<ul>
<li><p><a href="#pdfioStreamPrintf"><code>pdfioStreamPrintf</code></a> writes a formatted string to the stream</p>
@ -1371,7 +1388,7 @@ bool pdfioContentFillAndStroke(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st,
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioContentMatrixConcat">pdfioContentMatrixConcat</a></h3>
<p class="description">Concatenate a matrix to the current graphics
state.</p>
state.</p>
<p class="code">
bool pdfioContentMatrixConcat(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st, pdfio_matrix_t m);</p>
<h4 class="parameters">Parameters</h4>
@ -1498,6 +1515,17 @@ bool pdfioContentPathCurve23(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st, d
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioContentPathEnd">pdfioContentPathEnd</a></h3>
<p class="description">Clear the current path.</p>
<p class="code">
bool pdfioContentPathEnd(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>st</th>
<td class="description">Stream</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioContentPathLineTo">pdfioContentPathLineTo</a></h3>
<p class="description">Add a straight line to the current path.</p>
<p class="code">
@ -1586,6 +1614,9 @@ bool pdfioContentSetDashPattern(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function sets the stroke pattern when drawing lines. If &quot;on&quot; and &quot;off&quot;
are 0, a solid line is drawn.</p>
<h3 class="function"><a id="pdfioContentSetFillColorDeviceCMYK">pdfioContentSetFillColorDeviceCMYK</a></h3>
<p class="description">Set device CMYK fill color.</p>
<p class="code">
@ -2248,6 +2279,34 @@ const char *pdfioDictGetString(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, c
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Value type</p>
<h3 class="function"><a id="pdfioDictIterateKeys">pdfioDictIterateKeys</a></h3>
<p class="description">Iterate the keys in a dictionary.</p>
<p class="code">
void pdfioDictIterateKeys(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, <a href="#pdfio_dict_cb_t">pdfio_dict_cb_t</a> cb, void *cb_data);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>dict</th>
<td class="description">Dictionary</td></tr>
<tr><th>cb</th>
<td class="description">Callback function</td></tr>
<tr><th>cb_data</th>
<td class="description">Callback data</td></tr>
</tbody></table>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function iterates the keys in a dictionary, calling the supplied
function &quot;cb&quot;:
<pre>
bool
my_dict_cb(pdfio_dict_t *dict, const char *key, void *cb_data)
{
... &quot;key&quot; contains the dictionary key ...
... return true to continue or false to stop ...
}
</pre>
The iteration continues as long as the callback returns <code>true</code> or all keys
have been iterated.</p>
<h3 class="function"><a id="pdfioDictSetArray">pdfioDictSetArray</a></h3>
<p class="description">Set a key array in a dictionary.</p>
<p class="code">
@ -2686,9 +2745,10 @@ written:
ssize_t
output_cb(void *output_ctx, const void *buffer, size_t bytes)
{
// Write buffer to output and return the number of bytes written
// Write buffer to output and return the number of bytes written
}
</pre>
The &quot;version&quot; argument specifies the PDF version number for the file or
<code>NULL</code> for the default (&quot;2.0&quot;).<br>
<br>
@ -2717,6 +2777,35 @@ stored as indirect object references.</blockquote>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Contents stream</p>
<h3 class="function"><a id="pdfioFileCreateTemporary">pdfioFileCreateTemporary</a></h3>
<p class="description"></p>
<p class="code">
<a href="#pdfio_file_t">pdfio_file_t</a> *pdfioFileCreateTemporary(char *buffer, size_t bufsize, const char *version, <a href="#pdfio_rect_t">pdfio_rect_t</a> *media_box, <a href="#pdfio_rect_t">pdfio_rect_t</a> *crop_box, <a href="#pdfio_error_cb_t">pdfio_error_cb_t</a> error_cb, void *error_data);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>buffer</th>
<td class="description">Filename buffer</td></tr>
<tr><th>bufsize</th>
<td class="description">Size of filename buffer</td></tr>
<tr><th>version</th>
<td class="description">PDF version number or <code>NULL</code> for default (2.0)</td></tr>
<tr><th>media_box</th>
<td class="description">Default MediaBox for pages</td></tr>
<tr><th>crop_box</th>
<td class="description">Default CropBox for pages</td></tr>
<tr><th>error_cb</th>
<td class="description">Error callback or <code>NULL</code> for default</td></tr>
<tr><th>error_data</th>
<td class="description">Error callback data, if any</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Create a temporary PDF file.</p>
<p class="discussion">This function creates a PDF file with a unique filename in the current
temporary directory. The temporary file is stored in the string &quot;buffer&quot; an
will have a &quot;.pdf&quot; extension. Otherwise, this function works the same as
the <a href="#pdfioFileCreate"><code>pdfioFileCreate</code></a> function.
</p>
<h3 class="function"><a id="pdfioFileFindObj">pdfioFileFindObj</a></h3>
<p class="description">Find an object using its object number.</p>
<p class="code">
@ -3068,7 +3157,7 @@ double pdfioImageGetWidth(<a href="#pdfio_obj_t">pdfio_obj_t</a> *obj);</p>
<p class="description">Width in columns</p>
<h3 class="function"><a id="pdfioObjClose">pdfioObjClose</a></h3>
<p class="description">Close an object, writing any data as needed to the PDF
file.</p>
file.</p>
<p class="code">
bool pdfioObjClose(<a href="#pdfio_obj_t">pdfio_obj_t</a> *obj);</p>
<h4 class="parameters">Parameters</h4>
@ -3264,6 +3353,32 @@ bool pdfioPageDictAddImage(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, const
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on failure</p>
<h3 class="function"><a id="pdfioPageGetNumStreams">pdfioPageGetNumStreams</a></h3>
<p class="description">Get the number of content streams for a page object.</p>
<p class="code">
size_t pdfioPageGetNumStreams(<a href="#pdfio_obj_t">pdfio_obj_t</a> *page);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>page</th>
<td class="description">Page object</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Number of streams</p>
<h3 class="function"><a id="pdfioPageOpenStream">pdfioPageOpenStream</a></h3>
<p class="description">Open a content stream for a page.</p>
<p class="code">
<a href="#pdfio_stream_t">pdfio_stream_t</a> *pdfioPageOpenStream(<a href="#pdfio_obj_t">pdfio_obj_t</a> *page, size_t n, bool decode);</p>
<h4 class="parameters">Parameters</h4>
<table class="list"><tbody>
<tr><th>page</th>
<td class="description">Page object</td></tr>
<tr><th>n</th>
<td class="description">Stream index (0-based)</td></tr>
<tr><th>decode</th>
<td class="description"><code>true</code> to decode/decompress stream</td></tr>
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description">Stream</p>
<h3 class="function"><a id="pdfioStreamClose">pdfioStreamClose</a></h3>
<p class="description">Close a (data) stream in a PDF file.</p>
<p class="code">
@ -3303,6 +3418,13 @@ bool pdfioStreamGetToken(<a href="#pdfio_stream_t">pdfio_stream_t</a> *st, char
</tbody></table>
<h4 class="returnvalue">Return Value</h4>
<p class="description"><code>true</code> on success, <code>false</code> on EOF</p>
<h4 class="discussion">Discussion</h4>
<p class="discussion">This function reads a single PDF token from a stream. Operator tokens,
boolean values, and numbers are returned as-is in the provided string buffer.
String values start with the opening parenthesis ('(') but have all escaping
resolved and the terminating parenthesis removed. Hexadecimal string values
start with the opening angle bracket ('&lt;') and have all whitespace and the
terminating angle bracket removed.</p>
<h3 class="function"><a id="pdfioStreamPeek">pdfioStreamPeek</a></h3>
<p class="description">Peek at data in a stream.</p>
<p class="code">
@ -3445,6 +3567,11 @@ typedef struct _pdfio_array_s pdfio_array_t;
<p class="code">
typedef enum <a href="#pdfio_cs_e">pdfio_cs_e</a> pdfio_cs_t;
</p>
<h3 class="typedef"><a id="pdfio_dict_cb_t">pdfio_dict_cb_t</a></h3>
<p class="description">Dictionary iterator callback</p>
<p class="code">
typedef bool (*pdfio_dict_cb_t)(<a href="#pdfio_dict_t">pdfio_dict_t</a> *dict, const char *key, void *cb_data);
</p>
<h3 class="typedef"><a id="pdfio_dict_t">pdfio_dict_t</a></h3>
<p class="description">Key/value dictionary</p>
<p class="code">

View File

@ -15,8 +15,8 @@ goals of pdfio are:
PDFio is *not* concerned with rendering or viewing a PDF file, although a PDF
RIP or viewer could be written using it.
PDFio is Copyright © 2021 by Michael R Sweet and is licensed under the Apache
License Version 2.0 with an (optional) exception to allow linking against
PDFio is Copyright © 2021-2023 by Michael R Sweet and is licensed under the
Apache License Version 2.0 with an (optional) exception to allow linking against
GPL2/LGPL2 software. See the files "LICENSE" and "NOTICE" for more information.
@ -104,7 +104,7 @@ generates a static library that will be installed under "/usr/local" with:
You can reproduce this with the makefile using:
sudo make COMMONFLAGS="-Os -mmacosx-version-min=10.14 -arch x86_64 -arch arm64" install
sudo make macos install
Detecting PDFio
@ -209,7 +209,8 @@ for (i = 0, count = pdfioFileGetNumPages(pdf); i < count; i ++)
Each page is represented by a "page tree" object (what [`pdfioFileGetPage`](@@)
returns) that specifies information about the page and one or more "content"
objects that contain the images, fonts, text, and graphics that appear on the
page.
page. Use the [`pdfioPageGetNumStreams`](@@) and [`pdfioPageOpenStream`](@@)
functions to access the content streams for each page.
The [`pdfioFileClose`](@@) function closes a PDF file and frees all memory that
was used for it:
@ -294,6 +295,15 @@ The first argument is the object pointer. The second argument is a boolean
value that specifies whether you want to decode (typically decompress) the
stream data or return it as-is.
When reading a page stream you'll use the [`pdfioPageOpenStream`](@@) function
instead:
```c
pdfio_file_t *pdf = pdfioFileOpen(...);
pdfio_obj_t *obj = pdfioFileGetPage(pdf, number);
pdfio_stream_t *st = pdfioPageOpenStream(obj, 0, true);
```
Once you have the stream open, you can use one of several functions to read
from it:
@ -315,14 +325,23 @@ function:
```c
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
pdfio_obj_t *obj = pdfioFileCreateObj(pdf, ...);
pdfio_stream_t *st = pdfioObjCreateStream(obj, PDFIO_FILTER_FLATE);
```
The first argument is the newly created object. The second argument is either
`PDFIO_FILTER_NONE` to specify that any encoding is done by your program or
`PDFIO_FILTER_FLATE` to specify that PDFio should Flate compress the stream.
To create a page content stream call the [`pdfioFileCreatePage`](@@) function:
```c
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_dict_t *dict = pdfioDictCreate(pdf);
... set page dictionary keys and values ...
pdfio_stream_t *st = pdfioFileCreatePage(pdf, dict);
```
Once you have created the stream, use any of the following functions to write
to the stream:

View File

@ -575,7 +575,8 @@ _pdfioArrayGetValue(pdfio_array_t *a, // I - Array
pdfio_array_t * // O - New array
_pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file
pdfio_obj_t *obj, // I - Object, if any
_pdfio_token_t *tb) // I - Token buffer/stack
_pdfio_token_t *tb, // I - Token buffer/stack
size_t depth) // I - Depth of array
{
pdfio_array_t *array; // New array
char token[8192]; // Token from file
@ -599,7 +600,7 @@ _pdfioArrayRead(pdfio_file_t *pdf, // I - PDF file
// Push the token and decode the value...
_pdfioTokenPush(tb, token);
if (!_pdfioValueRead(pdf, obj, tb, &value))
if (!_pdfioValueRead(pdf, obj, tb, &value, depth))
break;
// PDFIO_DEBUG("_pdfioArrayRead(%p): Appending ", (void *)array);

View File

@ -1,7 +1,7 @@
//
// Common support functions for pdfio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -38,6 +38,8 @@ _pdfioFileConsume(pdfio_file_t *pdf, // I - PDF file
else if (_pdfioFileSeek(pdf, (off_t)bytes, SEEK_CUR) < 0)
return (false);
PDFIO_DEBUG("_pdfioFileConsume: pos=%ld\n", (long)(pdf->bufpos + pdf->bufptr - pdf->buffer));
return (true);
}
@ -373,9 +375,9 @@ _pdfioFileSeek(pdfio_file_t *pdf, // I - PDF file
if (whence != SEEK_END && offset >= pdf->bufpos && offset < (pdf->bufpos + pdf->bufend - pdf->buffer))
{
// Yes, seek within existing buffer...
pdf->bufptr = pdf->buffer + offset - pdf->bufpos;
pdf->bufptr = pdf->buffer + (offset - pdf->bufpos);
PDFIO_DEBUG("_pdfioFileSeek: Seek within buffer, bufpos=%ld.\n", (long)pdf->bufpos);
PDFIO_DEBUG("_pdfioFileSeek: buffer=%p, bufptr=%p, bufend=%p\n", pdf->buffer, pdf->bufptr, pdf->bufend);
PDFIO_DEBUG("_pdfioFileSeek: buffer=%p, bufptr=%p(<%02X%02X...>), bufend=%p\n", pdf->buffer, pdf->bufptr, pdf->bufptr[0] & 255, pdf->bufptr[1] & 255, pdf->bufend);
return (offset);
}
@ -525,7 +527,7 @@ read_buffer(pdfio_file_t *pdf, // I - PDF file
return (rbytes);
}
//
// 'write_buffer()' - Write a buffer to a PDF file.
//

View File

@ -1,7 +1,7 @@
//
// Content helper functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -489,8 +489,11 @@ pdfioContentMatrixRotate(
pdfio_stream_t *st, // I - Stream
double degrees) // I - Rotation angle in degrees counter-clockwise
{
double dcos = cos(degrees / M_PI); // Cosine
double dsin = sin(degrees / M_PI); // Sine
double dcos = cos(M_PI * degrees / 180.0);
// Cosine
double dsin = sin(M_PI * degrees / 180.0);
// Sine
return (pdfioStreamPrintf(st, "%g %g %g %g 0 0 cm\n", dcos, -dsin, dsin, dcos));
}
@ -586,6 +589,17 @@ pdfioContentPathCurve23(
}
//
// 'pdfioContentPathEnd()' - Clear the current path.
//
bool // O - `true` on success, `false` on failure
pdfioContentPathEnd(pdfio_stream_t *st) // I - Stream
{
return (pdfioStreamPuts(st, "n\n"));
}
//
// 'pdfioContentPathLineTo()' - Add a straight line to the current path.
//
@ -656,6 +670,9 @@ pdfioContentSave(pdfio_stream_t *st) // I - Stream
//
// 'pdfioContentSetDashPattern()' - Set the stroke pattern.
//
// This function sets the stroke pattern when drawing lines. If "on" and "off"
// are 0, a solid line is drawn.
//
bool // O - `true` on success, `false` on failure
pdfioContentSetDashPattern(
@ -664,7 +681,12 @@ pdfioContentSetDashPattern(
double on, // I - On length
double off) // I - Off length
{
return (pdfioStreamPrintf(st, "[%g %g] %g d\n", on, off, phase));
if (on <= 0.0 && off <= 0.0)
return (pdfioStreamPrintf(st, "[] %g d\n", phase));
else if (fabs(on - off) < 0.001)
return (pdfioStreamPrintf(st, "[%g] %g d\n", on, phase));
else
return (pdfioStreamPrintf(st, "[%g %g] %g d\n", on, off, phase));
}
@ -1388,6 +1410,7 @@ pdfioFileCreateFontObjFromFile(
pdfio_dict_t *cid2gid; // CIDToGIDMap dictionary
pdfio_obj_t *cid2gid_obj; // CIDToGIDMap object
size_t i, // Looping var
start, // Start character
num_cmap; // Number of CMap entries
const int *cmap; // CMap entries
unsigned char *bufptr, // Pointer into buffer
@ -1396,6 +1419,9 @@ pdfioFileCreateFontObjFromFile(
pdfio_obj_t *type2_obj; // CIDFontType2 font object
pdfio_array_t *descendants; // Decendant font list
pdfio_dict_t *sidict; // CIDSystemInfo dictionary
pdfio_array_t *w_array, // Width array
*temp_array; // Temporary width sub-array
int w0, w1; // Widths
// Create a CIDToGIDMap object for the Unicode font...
if ((cid2gid = pdfioDictCreate(pdf)) == NULL)
@ -1426,8 +1452,11 @@ pdfioFileCreateFontObjFromFile(
cmap = ttfGetCMap(font, &num_cmap);
PDFIO_DEBUG("pdfioFileCreateFontObjFromFile: num_cmap=%u\n", (unsigned)num_cmap);
for (i = 0, bufptr = buffer, bufend = buffer + sizeof(buffer); i < num_cmap; i ++)
{
PDFIO_DEBUG("pdfioFileCreateFontObjFromFile: cmap[%u]=%d\n", (unsigned)i, cmap[i]);
if (cmap[i] < 0)
{
// Map undefined glyph to .notdef...
@ -1481,6 +1510,54 @@ pdfioFileCreateFontObjFromFile(
return (NULL);
}
// Width array
if ((w_array = pdfioArrayCreate(pdf)) == NULL)
{
ttfDelete(font);
return (NULL);
}
for (start = 0, w0 = ttfGetWidth(font, 0), i = 1; i < 65536; start = i, w0 = w1, i ++)
{
while (i < 65536 && (w1 = ttfGetWidth(font, i)) == w0)
i ++;
if ((i - start) > 1)
{
// Encode a repeating sequence...
pdfioArrayAppendNumber(w_array, start);
pdfioArrayAppendNumber(w_array, i - 1);
pdfioArrayAppendNumber(w_array, w0);
}
else
{
// Encode a non-repeating sequence...
pdfioArrayAppendNumber(w_array, start);
if ((temp_array = pdfioArrayCreate(pdf)) == NULL)
{
ttfDelete(font);
return (NULL);
}
pdfioArrayAppendNumber(temp_array, w0);
for (w0 = w1, i ++; i < 65536; w0 = w1, i ++)
{
if ((w1 = ttfGetWidth(font, i)) == w0 && i < 65535)
break;
pdfioArrayAppendNumber(temp_array, w0);
}
if (i == 65536)
pdfioArrayAppendNumber(temp_array, w0);
else
i --;
pdfioArrayAppendArray(w_array, temp_array);
}
}
// CIDSystemInfo mapping to Adobe UCS2 v0 (Unicode)
pdfioDictSetString(sidict, "Registry", "Adobe");
pdfioDictSetString(sidict, "Ordering", "Identity");
@ -1493,6 +1570,7 @@ pdfioFileCreateFontObjFromFile(
pdfioDictSetDict(type2, "CIDSystemInfo", sidict);
pdfioDictSetObj(type2, "CIDToGIDMap", cid2gid_obj);
pdfioDictSetObj(type2, "FontDescriptor", desc_obj);
pdfioDictSetArray(type2, "W", w_array);
if ((type2_obj = pdfioFileCreateObj(pdf, type2)) == NULL)
{
@ -2975,7 +3053,7 @@ write_string(pdfio_stream_t *st, // I - Stream
// Start the string...
if (!pdfioStreamPuts(st, unicode ? "<" : "("))
if (!pdfioStreamPuts(st, unicode ? "<FEFF" : "("))
return (false);
// Loop through the string, handling UTF-8 as needed...

View File

@ -1,7 +1,7 @@
//
// Public content header file for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -9,18 +9,7 @@
#ifndef PDFIO_CONTENT_H
# define PDFIO_CONTENT_H
//
// Include necessary headers...
//
# include "pdfio.h"
//
// C++ magic...
//
# ifdef __cplusplus
extern "C" {
# endif // __cplusplus
@ -91,6 +80,7 @@ extern bool pdfioContentPathClose(pdfio_stream_t *st) _PDFIO_PUBLIC;
extern bool pdfioContentPathCurve(pdfio_stream_t *st, double x1, double y1, double x2, double y2, double x3, double y3) _PDFIO_PUBLIC;
extern bool pdfioContentPathCurve13(pdfio_stream_t *st, double x1, double y1, double x3, double y3) _PDFIO_PUBLIC;
extern bool pdfioContentPathCurve23(pdfio_stream_t *st, double x2, double y2, double x3, double y3) _PDFIO_PUBLIC;
extern bool pdfioContentPathEnd(pdfio_stream_t *st) _PDFIO_PUBLIC;
extern bool pdfioContentPathLineTo(pdfio_stream_t *st, double x, double y) _PDFIO_PUBLIC;
extern bool pdfioContentPathMoveTo(pdfio_stream_t *st, double x, double y) _PDFIO_PUBLIC;
extern bool pdfioContentPathRect(pdfio_stream_t *st, double x, double y, double width, double height) _PDFIO_PUBLIC;
@ -150,10 +140,6 @@ extern bool pdfioPageDictAddFont(pdfio_dict_t *dict, const char *name, pdfio_ob
extern bool pdfioPageDictAddImage(pdfio_dict_t *dict, const char *name, pdfio_obj_t *obj) _PDFIO_PUBLIC;
//
// C++ magic...
//
# ifdef __cplusplus
}
# endif // __cplusplus

View File

@ -1,7 +1,7 @@
//
// Cryptographic support functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -446,6 +446,7 @@ _pdfio_crypto_cb_t // O - Decryption callback or `NULL` for none
// Initialize the RC4 context using 40 bits of the digest...
_pdfioCryptoRC4Init(&ctx->rc4, digest, 5);
*ivlen = 0;
return ((_pdfio_crypto_cb_t)_pdfioCryptoRC4Crypt);
case PDFIO_ENCRYPTION_RC4_128 :
@ -663,7 +664,12 @@ _pdfioCryptoUnlock(
length = 128;
}
}
// TODO: Implement AES-256 - V6 R6
else if (version == 6 && revision == 6)
{
// TODO: Implement AES-256 - V6 R6
pdf->encryption = PDFIO_ENCRYPTION_AES_256;
length = 256;
}
PDFIO_DEBUG("_pdfioCryptoUnlock: encryption=%d, length=%d\n", pdf->encryption, length);
@ -788,6 +794,8 @@ _pdfioCryptoUnlock(
else
{
// TODO: Implement AES-256 security handler
_pdfioFileError(pdf, "Unable to unlock AES-256 encrypted file at this time.");
return (false);
}
// If we get here we need to try another password...

View File

@ -1,7 +1,7 @@
//
// PDF dictionary functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -464,6 +464,47 @@ _pdfioDictGetValue(pdfio_dict_t *dict, // I - Dictionary
}
//
// 'pdfioDictIterateKeys()' - Iterate the keys in a dictionary.
//
// This function iterates the keys in a dictionary, calling the supplied
// function "cb":
//
// ```
// bool
// my_dict_cb(pdfio_dict_t *dict, const char *key, void *cb_data)
// {
// ... "key" contains the dictionary key ...
// ... return true to continue or false to stop ...
// }
// ```
//
// The iteration continues as long as the callback returns `true` or all keys
// have been iterated.
//
void
pdfioDictIterateKeys(
pdfio_dict_t *dict, // I - Dictionary
pdfio_dict_cb_t cb, // I - Callback function
void *cb_data) // I - Callback data
{
size_t i; // Looping var
_pdfio_pair_t *pair; // Current pair
// Range check input...
if (!dict || !cb)
return;
for (i = dict->num_pairs, pair = dict->pairs; i > 0; i --, pair ++)
{
if (!(cb)(dict, pair->key, cb_data))
break;
}
}
//
// '_pdfioDictRead()' - Read a dictionary from a PDF file.
//
@ -473,7 +514,8 @@ _pdfioDictGetValue(pdfio_dict_t *dict, // I - Dictionary
pdfio_dict_t * // O - New dictionary
_pdfioDictRead(pdfio_file_t *pdf, // I - PDF file
pdfio_obj_t *obj, // I - Object, if any
_pdfio_token_t *tb) // I - Token buffer/stack
_pdfio_token_t *tb, // I - Token buffer/stack
size_t depth) // I - Depth of dictionary
{
pdfio_dict_t *dict; // New dictionary
char key[256]; // Dictionary key
@ -499,9 +541,16 @@ _pdfioDictRead(pdfio_file_t *pdf, // I - PDF file
_pdfioFileError(pdf, "Invalid dictionary contents.");
break;
}
else if (_pdfioDictGetValue(dict, key + 1))
{
_pdfioFileError(pdf, "Duplicate dictionary key '%s'.", key + 1);
return (NULL);
}
// Then get the next value...
if (!_pdfioValueRead(pdf, obj, tb, &value))
PDFIO_DEBUG("_pdfioDictRead: Reading value for '%s'.\n", key + 1);
if (!_pdfioValueRead(pdf, obj, tb, &value, depth))
{
_pdfioFileError(pdf, "Missing value for dictionary key.");
break;
@ -706,7 +755,7 @@ pdfioDictSetNull(pdfio_dict_t *dict, // I - Dictionary
bool // O - `true` on success, `false` on failure
pdfioDictSetNumber(pdfio_dict_t *dict, // I - Dictionary
const char *key, // I - Key
double value) // I - Value
double value) // I - Value
{
_pdfio_value_t temp; // New value
@ -890,9 +939,9 @@ _pdfioDictSetValue(
#ifdef DEBUG
PDFIO_DEBUG("_pdfioDictSetValue(%p): %lu pairs\n", (void *)dict, (unsigned long)dict->num_pairs);
PDFIO_DEBUG("_pdfioDictSetValue(%p): ", (void *)dict);
PDFIO_DEBUG_DICT(dict);
PDFIO_DEBUG("\n");
// PDFIO_DEBUG("_pdfioDictSetValue(%p): ", (void *)dict);
// PDFIO_DEBUG_DICT(dict);
// PDFIO_DEBUG("\n");
#endif // DEBUG
return (true);

View File

@ -1,7 +1,7 @@
//
// PDF file functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -24,8 +24,9 @@
static pdfio_obj_t *add_obj(pdfio_file_t *pdf, size_t number, unsigned short generation, off_t offset);
static int compare_objmaps(_pdfio_objmap_t *a, _pdfio_objmap_t *b);
static int compare_objs(pdfio_obj_t **a, pdfio_obj_t **b);
static const char *get_info_string(pdfio_file_t *pdf, const char *key);
static bool load_obj_stream(pdfio_obj_t *obj);
static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj);
static bool load_pages(pdfio_file_t *pdf, pdfio_obj_t *obj, size_t depth);
static bool load_xref(pdfio_file_t *pdf, off_t xref_offset, pdfio_password_cb_t password_cb, void *password_data);
static bool write_catalog(pdfio_file_t *pdf);
static bool write_pages(pdfio_file_t *pdf);
@ -264,46 +265,26 @@ pdfioFileCreate(
// Write a standard PDF header...
if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", version))
{
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
goto error;
// Create the pages object...
if ((dict = pdfioDictCreate(pdf)) == NULL)
{
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
goto error;
pdfioDictSetName(dict, "Type", "Pages");
if ((pdf->pages_obj = pdfioFileCreateObj(pdf, dict)) == NULL)
{
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
goto error;
// Create the info object...
if ((info_dict = pdfioDictCreate(pdf)) == NULL)
{
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
goto error;
pdfioDictSetDate(info_dict, "CreationDate", time(NULL));
pdfioDictSetString(info_dict, "Producer", "pdfio/" PDFIO_VERSION);
if ((pdf->info_obj = pdfioFileCreateObj(pdf, info_dict)) == NULL)
{
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
goto error;
// Create random file ID values...
_pdfioCryptoMakeRandom(id_value, sizeof(id_value));
@ -315,6 +296,15 @@ pdfioFileCreate(
}
return (pdf);
// Common error handling code...
error:
pdfioFileClose(pdf);
unlink(filename);
return (NULL);
}
@ -535,41 +525,26 @@ pdfioFileCreateOutput(
// Write a standard PDF header...
if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", version))
{
pdfioFileClose(pdf);
return (NULL);
}
goto error;
// Create the pages object...
if ((dict = pdfioDictCreate(pdf)) == NULL)
{
pdfioFileClose(pdf);
return (NULL);
}
goto error;
pdfioDictSetName(dict, "Type", "Pages");
if ((pdf->pages_obj = pdfioFileCreateObj(pdf, dict)) == NULL)
{
pdfioFileClose(pdf);
return (NULL);
}
goto error;
// Create the info object...
if ((info_dict = pdfioDictCreate(pdf)) == NULL)
{
pdfioFileClose(pdf);
return (NULL);
}
goto error;
pdfioDictSetDate(info_dict, "CreationDate", time(NULL));
pdfioDictSetString(info_dict, "Producer", "pdfio/" PDFIO_VERSION);
if ((pdf->info_obj = pdfioFileCreateObj(pdf, info_dict)) == NULL)
{
pdfioFileClose(pdf);
return (NULL);
}
goto error;
// Create random file ID values...
_pdfioCryptoMakeRandom(id_value, sizeof(id_value));
@ -581,6 +556,13 @@ pdfioFileCreateOutput(
}
return (pdf);
// Common error handling code...
error:
pdfioFileClose(pdf);
return (NULL);
}
@ -657,6 +639,194 @@ pdfioFileCreatePage(pdfio_file_t *pdf, // I - PDF file
}
//
// 'pdfioFileCreateTemporary()' - Create a temporary PDF file.
//
// This function creates a PDF file with a unique filename in the current
// temporary directory. The temporary file is stored in the string "buffer" an
// will have a ".pdf" extension. Otherwise, this function works the same as
// the @link pdfioFileCreate@ function.
//
// @since PDFio v1.1@
//
pdfio_file_t *
pdfioFileCreateTemporary(
char *buffer, // I - Filename buffer
size_t bufsize, // I - Size of filename buffer
const char *version, // I - PDF version number or `NULL` for default (2.0)
pdfio_rect_t *media_box, // I - Default MediaBox for pages
pdfio_rect_t *crop_box, // I - Default CropBox for pages
pdfio_error_cb_t error_cb, // I - Error callback or `NULL` for default
void *error_data) // I - Error callback data, if any
{
pdfio_file_t *pdf; // PDF file
pdfio_dict_t *dict; // Dictionary for pages object
pdfio_dict_t *info_dict; // Dictionary for information object
unsigned char id_value[16]; // File ID value
int i; // Looping var
const char *tmpdir; // Temporary directory
#if _WIN32 || defined(__APPLE__)
char tmppath[256]; // Temporary directory path
#endif // _WIN32 || __APPLE__
unsigned tmpnum; // Temporary filename number
// Range check input...
if (!buffer || bufsize < 32)
{
if (buffer)
*buffer = '\0';
return (NULL);
}
if (!version)
version = "2.0";
if (!error_cb)
{
error_cb = _pdfioFileDefaultError;
error_data = NULL;
}
// Allocate a PDF file structure...
if ((pdf = (pdfio_file_t *)calloc(1, sizeof(pdfio_file_t))) == NULL)
{
pdfio_file_t temp; // Dummy file
char message[8192]; // Message string
temp.filename = (char *)"temporary.pdf";
snprintf(message, sizeof(message), "Unable to allocate memory for PDF file - %s", strerror(errno));
(error_cb)(&temp, message, error_data);
*buffer = '\0';
return (NULL);
}
// Create the file...
#if _WIN32
if ((tmpdir = getenv("TEMP")) == NULL)
{
GetTempPathA(sizeof(tmppath), tmppath);
tmpdir = tmppath;
}
#elif defined(__APPLE__)
if ((tmpdir = getenv("TMPDIR")) != NULL && access(tmpdir, W_OK))
tmpdir = NULL;
if (!tmpdir)
{
// Grab the per-process temporary directory for sandboxed apps...
# ifdef _CS_DARWIN_USER_TEMP_DIR
if (confstr(_CS_DARWIN_USER_TEMP_DIR, tmppath, sizeof(tmppath)))
tmpdir = tmppath;
else
# endif // _CS_DARWIN_USER_TEMP_DIR
tmpdir = "/private/tmp";
}
#else
if ((tmpdir = getenv("TMPDIR")) == NULL || access(tmpdir, W_OK))
tmpdir = "/tmp";
#endif // _WIN32
for (i = 0; i < 1000; i ++)
{
_pdfioCryptoMakeRandom((uint8_t *)&tmpnum, sizeof(tmpnum));
snprintf(buffer, bufsize, "%s/%08x.pdf", tmpdir, tmpnum);
if ((pdf->fd = open(buffer, O_WRONLY | O_BINARY | O_CREAT | O_TRUNC | O_EXCL, 0666)) >= 0)
break;
}
pdf->filename = strdup(buffer);
if (i >= 1000)
{
_pdfioFileError(pdf, "Unable to create file - %s", strerror(errno));
free(pdf->filename);
free(pdf);
*buffer = '\0';
return (NULL);
}
pdf->version = strdup(version);
pdf->mode = _PDFIO_MODE_WRITE;
pdf->error_cb = error_cb;
pdf->error_data = error_data;
pdf->permissions = PDFIO_PERMISSION_ALL;
pdf->bufptr = pdf->buffer;
pdf->bufend = pdf->buffer + sizeof(pdf->buffer);
if (media_box)
{
pdf->media_box = *media_box;
}
else
{
// Default to "universal" size (intersection of A4 and US Letter)
pdf->media_box.x2 = 210.0 * 72.0f / 25.4f;
pdf->media_box.y2 = 11.0f * 72.0f;
}
if (crop_box)
{
pdf->crop_box = *crop_box;
}
else
{
// Default to "universal" size (intersection of A4 and US Letter)
pdf->crop_box.x2 = 210.0 * 72.0f / 25.4f;
pdf->crop_box.y2 = 11.0f * 72.0f;
}
// Write a standard PDF header...
if (!_pdfioFilePrintf(pdf, "%%PDF-%s\n%%\342\343\317\323\n", version))
goto error;
// Create the pages object...
if ((dict = pdfioDictCreate(pdf)) == NULL)
goto error;
pdfioDictSetName(dict, "Type", "Pages");
if ((pdf->pages_obj = pdfioFileCreateObj(pdf, dict)) == NULL)
goto error;
// Create the info object...
if ((info_dict = pdfioDictCreate(pdf)) == NULL)
goto error;
pdfioDictSetDate(info_dict, "CreationDate", time(NULL));
pdfioDictSetString(info_dict, "Producer", "pdfio/" PDFIO_VERSION);
if ((pdf->info_obj = pdfioFileCreateObj(pdf, info_dict)) == NULL)
goto error;
// Create random file ID values...
_pdfioCryptoMakeRandom(id_value, sizeof(id_value));
if ((pdf->id_array = pdfioArrayCreate(pdf)) != NULL)
{
pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value));
pdfioArrayAppendBinary(pdf->id_array, id_value, sizeof(id_value));
}
return (pdf);
// Common error handling code...
error:
pdfioFileClose(pdf);
unlink(buffer);
*buffer = '\0';
return (NULL);
}
//
// '_pdfioFileFindMappedObj()' - Find a mapped object.
//
@ -723,7 +893,7 @@ pdfioFileFindObj(
const char * // O - Author or `NULL` for none
pdfioFileGetAuthor(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Author") : NULL);
return (get_info_string(pdf, "Author"));
}
@ -735,7 +905,7 @@ time_t // O - Creation date or `0` for none
pdfioFileGetCreationDate(
pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetDate(pdf->info_obj->value.value.dict, "CreationDate") : 0);
return (pdf && pdf->info_obj ? pdfioDictGetDate(pdfioObjGetDict(pdf->info_obj), "CreationDate") : 0);
}
@ -746,7 +916,7 @@ pdfioFileGetCreationDate(
const char * // O - Creator string or `NULL` for none
pdfioFileGetCreator(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Creator") : NULL);
return (get_info_string(pdf, "Creator"));
}
@ -768,7 +938,7 @@ pdfioFileGetID(pdfio_file_t *pdf) // I - PDF file
const char * // O - Keywords string or `NULL` for none
pdfioFileGetKeywords(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Keywords") : NULL);
return (get_info_string(pdf, "Keywords"));
}
@ -872,7 +1042,7 @@ pdfioFileGetPermissions(
const char * // O - Producer string or `NULL` for none
pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Producer") : NULL);
return (get_info_string(pdf, "Producer"));
}
@ -883,7 +1053,7 @@ pdfioFileGetProducer(pdfio_file_t *pdf) // I - PDF file
const char * // O - Subject or `NULL` for none
pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Subject") : NULL);
return (get_info_string(pdf, "Subject"));
}
@ -894,7 +1064,7 @@ pdfioFileGetSubject(pdfio_file_t *pdf) // I - PDF file
const char * // O - Title or `NULL` for none
pdfioFileGetTitle(pdfio_file_t *pdf) // I - PDF file
{
return (pdf && pdf->info_obj ? pdfioDictGetString(pdf->info_obj->value.value.dict, "Title") : NULL);
return (get_info_string(pdf, "Title"));
}
@ -1237,6 +1407,51 @@ compare_objs(pdfio_obj_t **a, // I - First object
}
//
// 'get_info_string()' - Get a string value from the Info dictionary.
//
// This function also handles converting binary strings to C strings, which
// occur in encrypted PDF files.
//
static const char * // O - String or `NULL` if not found
get_info_string(pdfio_file_t *pdf, // I - PDF file
const char *key) // I - Dictionary key
{
pdfio_dict_t *dict; // Info dictionary
_pdfio_value_t *value; // Value
// Range check input...
if (!pdf || !pdf->info_obj || (dict = pdfioObjGetDict(pdf->info_obj)) == NULL || (value = _pdfioDictGetValue(dict, key)) == NULL)
return (NULL);
// If we already have a value, return it...
if (value->type == PDFIO_VALTYPE_NAME || value->type == PDFIO_VALTYPE_STRING)
{
return (value->value.string);
}
else if (value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096)
{
// Convert binary string to regular string...
char temp[4096]; // Temporary string
memcpy(temp, value->value.binary.data, value->value.binary.datalen);
temp[value->value.binary.datalen] = '\0';
free(value->value.binary.data);
value->type = PDFIO_VALTYPE_STRING;
value->value.string = pdfioStringCreate(pdf, temp);
return (value->value.string);
}
else
{
// Something else that is not a string...
return (NULL);
}
}
//
// 'load_obj_stream()' - Load an object stream.
//
@ -1299,6 +1514,7 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
// Skip offset
_pdfioTokenGet(&tb, buffer, sizeof(buffer));
PDFIO_DEBUG("load_obj_stream: %ld at offset %s\n", (long)number, buffer);
}
if (!buffer[0])
@ -1312,7 +1528,7 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
// Read the objects themselves...
for (cur_obj = 0; cur_obj < num_objs; cur_obj ++)
{
if (!_pdfioValueRead(obj->pdf, obj, &tb, &(objs[cur_obj]->value)))
if (!_pdfioValueRead(obj->pdf, obj, &tb, &(objs[cur_obj]->value), 0))
{
pdfioStreamClose(st);
return (false);
@ -1332,7 +1548,8 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
static bool // O - `true` on success, `false` on error
load_pages(pdfio_file_t *pdf, // I - PDF file
pdfio_obj_t *obj) // I - Page object
pdfio_obj_t *obj, // I - Page object
size_t depth) // I - Depth of page tree
{
pdfio_dict_t *dict; // Page object dictionary
const char *type; // Node type
@ -1364,9 +1581,15 @@ load_pages(pdfio_file_t *pdf, // I - PDF file
size_t i, // Looping var
num_kids; // Number of elements in array
if (depth >= PDFIO_MAX_DEPTH)
{
_pdfioFileError(pdf, "Depth of pages objects too great to load.");
return (false);
}
for (i = 0, num_kids = pdfioArrayGetSize(kids); i < num_kids; i ++)
{
if (!load_pages(pdf, pdfioArrayGetObj(kids, i)))
if (!load_pages(pdf, pdfioArrayGetObj(kids, i), depth + 1))
return (false);
}
}
@ -1496,7 +1719,7 @@ load_xref(
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, obj, &tb, &trailer))
if (!_pdfioValueRead(pdf, obj, &tb, &trailer, 0))
{
_pdfioFileError(pdf, "Unable to read cross-reference stream dictionary.");
return (false);
@ -1515,6 +1738,10 @@ load_xref(
return (false);
}
PDFIO_DEBUG("load_xref: tb.bufptr=%p, tb.bufend=%p, tb.bufptr[0]=0x%02x, tb.bufptr[0]=0x%02x\n", tb.bufptr, tb.bufend, tb.bufptr[0], tb.bufptr[1]);
if (tb.bufptr && tb.bufptr < tb.bufend && (tb.bufptr[0] == 0x0d || tb.bufptr[0] == 0x0a))
tb.bufptr ++; // Skip trailing CR or LF after token
_pdfioTokenFlush(&tb);
obj->stream_offset = _pdfioFileTell(pdf);
@ -1537,9 +1764,9 @@ load_xref(
w_2 = w[0];
w_3 = w[0] + w[1];
if (w[1] == 0 || w[2] > 2 || w_total > sizeof(buffer))
if (w[1] == 0 || w[2] > 4 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer))
{
_pdfioFileError(pdf, "Cross-reference stream has invalid W key.");
_pdfioFileError(pdf, "Cross-reference stream has invalid W key [%u %u %u].", (unsigned)w[0], (unsigned)w[1], (unsigned)w[2]);
return (false);
}
@ -1579,9 +1806,11 @@ load_xref(
}
}
// Offset
for (i = 1, offset = buffer[w_2]; i < w[1]; i ++)
offset = (offset << 8) | buffer[w_2 + i];
// Generation number
switch (w[2])
{
default :
@ -1593,6 +1822,19 @@ load_xref(
case 2 :
generation = (buffer[w_3] << 8) | buffer[w_3 + 1];
break;
case 3 :
// Issue #46: Stupid Microsoft PDF generator using 3 bytes to
// encode 16-bit generation numbers == 0 (probably a lazy coder
// stuffing things into an array of 64-bit unsigned integers)
generation = (buffer[w_3] << 16) | (buffer[w_3 + 1] << 8) | buffer[w_3 + 2];
if (generation > 65535)
generation = 65535;
break;
case 4 : // Even stupider :)
generation = (buffer[w_3] << 24) | (buffer[w_3 + 1] << 16) | (buffer[w_3 + 2] << 8) | buffer[w_3 + 3];
if (generation > 65535)
generation = 65535;
break;
}
// Create a placeholder for the object in memory...
@ -1624,8 +1866,18 @@ load_xref(
break;
}
if (i >= num_sobjs && num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0])))
sobjs[num_sobjs ++] = (size_t)offset;
if (i >= num_sobjs)
{
if (num_sobjs < (sizeof(sobjs) / sizeof(sobjs[0])))
{
sobjs[num_sobjs ++] = (size_t)offset;
}
else
{
_pdfioFileError(pdf, "Too many object streams.");
return (false);
}
}
}
else if (!current)
{
@ -1673,12 +1925,12 @@ load_xref(
}
}
}
else if (!strcmp(line, "xref"))
else if (!strncmp(line, "xref", 4) && !line[4] || isspace(line[4] & 255))
{
// Read the xref tables
while (_pdfioFileGets(pdf, line, sizeof(line)))
{
if (!strcmp(line, "trailer"))
if (!strncmp(line, "trailer", 7) && !line[7] || isspace(line[7] & 255))
break;
else if (!line[0])
continue;
@ -1743,7 +1995,7 @@ load_xref(
}
}
if (strcmp(line, "trailer"))
if (strncmp(line, "trailer", 7))
{
_pdfioFileError(pdf, "Missing trailer.");
return (false);
@ -1751,7 +2003,7 @@ load_xref(
_pdfioTokenInit(&tb, pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, pdf);
if (!_pdfioValueRead(pdf, NULL, &tb, &trailer))
if (!_pdfioValueRead(pdf, NULL, &tb, &trailer, 0))
{
_pdfioFileError(pdf, "Unable to read trailer dictionary.");
return (false);
@ -1762,6 +2014,8 @@ load_xref(
return (false);
}
PDFIO_DEBUG("load_xref: Got trailer dict.\n");
_pdfioTokenFlush(&tb);
if (!pdf->trailer_dict)
@ -1803,7 +2057,7 @@ load_xref(
PDFIO_DEBUG("load_xref: Root=%p(%lu)\n", pdf->root_obj, (unsigned long)pdf->root_obj->number);
return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages")));
return (load_pages(pdf, pdfioDictGetObj(pdfioObjGetDict(pdf->root_obj), "Pages"), 0));
}

View File

@ -1,7 +1,7 @@
//
// PDF object functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -33,8 +33,14 @@ pdfioObjClose(pdfio_obj_t *obj) // I - Object
if (!obj)
return (false);
// Clear the current object pointer...
obj->pdf->current_obj = NULL;
if (obj->pdf->mode != _PDFIO_MODE_WRITE)
return (true); // Nothing to do when reading
{
// Nothing to do when reading
return (true);
}
// Write what remains for the object...
if (!obj->offset)
@ -165,6 +171,12 @@ pdfioObjCreateStream(
return (NULL);
}
if (obj->pdf->current_obj)
{
_pdfioFileError(obj->pdf, "Another object (%u) is already open.", (unsigned)obj->pdf->current_obj->number);
return (NULL);
}
// Write the header...
if (!_pdfioDictGetValue(obj->value.value.dict, "Length"))
{
@ -193,7 +205,8 @@ pdfioObjCreateStream(
if (!_pdfioFilePuts(obj->pdf, "stream\n"))
return (NULL);
obj->stream_offset = _pdfioFileTell(obj->pdf);
obj->stream_offset = _pdfioFileTell(obj->pdf);
obj->pdf->current_obj = obj;
// Return the new stream...
return (_pdfioStreamCreate(obj, length_obj, filter));
@ -407,12 +420,15 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object
}
ptr += 3;
while (*ptr && isspace(*ptr & 255))
ptr ++;
_pdfioFileConsume(obj->pdf, (size_t)(ptr - line));
// Then grab the object value...
_pdfioTokenInit(&tb, obj->pdf, (_pdfio_tconsume_cb_t)_pdfioFileConsume, (_pdfio_tpeek_cb_t)_pdfioFilePeek, obj->pdf);
if (!_pdfioValueRead(obj->pdf, obj, &tb, &obj->value))
if (!_pdfioValueRead(obj->pdf, obj, &tb, &obj->value, 0))
{
_pdfioFileError(obj->pdf, "Unable to read value for object %lu.", (unsigned long)obj->number);
return (false);
@ -425,11 +441,15 @@ _pdfioObjLoad(pdfio_obj_t *obj) // I - Object
return (false);
}
PDFIO_DEBUG("_pdfioObjLoad: tb.bufptr=%p, tb.bufend=%p, tb.bufptr[0]=0x%02x, tb.bufptr[0]=0x%02x\n", tb.bufptr, tb.bufend, tb.bufptr[0], tb.bufptr[1]);
if (tb.bufptr && tb.bufptr < tb.bufend && (tb.bufptr[0] == 0x0d || tb.bufptr[0] == 0x0a))
tb.bufptr ++; // Skip trailing CR or LF after token
_pdfioTokenFlush(&tb);
if (!strcmp(line, "stream"))
{
// Yes, save its location...
// Yes, this is an embedded stream so save its location...
obj->stream_offset = _pdfioFileTell(obj->pdf);
PDFIO_DEBUG("_pdfioObjLoad: stream_offset=%lu.\n", (unsigned long)obj->stream_offset);
}
@ -454,6 +474,12 @@ pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
if (!obj)
return (NULL);
if (obj->pdf->current_obj)
{
_pdfioFileError(obj->pdf, "Another object (%u) is already open.", (unsigned)obj->pdf->current_obj->number);
return (NULL);
}
// Make sure we've loaded the object dictionary...
if (!obj->value.type)
{
@ -466,6 +492,8 @@ pdfioObjOpenStream(pdfio_obj_t *obj, // I - Object
return (NULL);
// Open the stream...
obj->pdf->current_obj = obj;
return (_pdfioStreamOpen(obj, decode));
}

View File

@ -1,7 +1,7 @@
//
// PDF page functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2022 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -14,6 +14,13 @@
#include "pdfio-private.h"
//
// Local functions...
//
static _pdfio_value_t *get_contents(pdfio_obj_t *page);
//
// 'pdfioPageCopy()' - Copy a page to a PDF file.
//
@ -47,3 +54,74 @@ pdfioPageCopy(pdfio_file_t *pdf, // I - PDF file
else
return (_pdfioFileAddPage(pdf, dstpage));
}
//
// 'pdfioPageGetNumStreams()' - Get the number of content streams for a page object.
//
size_t // O - Number of streams
pdfioPageGetNumStreams(
pdfio_obj_t *page) // I - Page object
{
_pdfio_value_t *contents = get_contents(page);
// Contents value
if (!contents)
return (0);
else if (contents->type == PDFIO_VALTYPE_ARRAY)
return (pdfioArrayGetSize(contents->value.array));
else
return (1);
}
//
// 'pdfioPageOpenStream()' - Open a content stream for a page.
//
pdfio_stream_t * // O - Stream
pdfioPageOpenStream(
pdfio_obj_t *page, // I - Page object
size_t n, // I - Stream index (0-based)
bool decode) // I - `true` to decode/decompress stream
{
_pdfio_value_t *contents = get_contents(page);
// Contents value
if (!contents)
return (NULL);
else if (contents->type == PDFIO_VALTYPE_ARRAY && n < pdfioArrayGetSize(contents->value.array))
return (pdfioObjOpenStream(pdfioArrayGetObj(contents->value.array, n), decode));
else if (n)
return (NULL);
else
return (pdfioObjOpenStream(pdfioFileFindObj(page->pdf, contents->value.indirect.number), decode));
}
//
// 'get_contents()' - Get a page's Contents value.
//
static _pdfio_value_t * // O - Value or NULL on error
get_contents(pdfio_obj_t *page) // I - Page object
{
// Range check input...
if (!page)
return (NULL);
// Load the page object as needed...
if (page->value.type == PDFIO_VALTYPE_NONE)
{
if (!_pdfioObjLoad(page))
return (NULL);
}
if (page->value.type != PDFIO_VALTYPE_DICT)
return (NULL);
return (_pdfioDictGetValue(page->value.value.dict, "Contents"));
}

View File

@ -1,7 +1,7 @@
//
// Private header file for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -9,41 +9,22 @@
#ifndef PDFIO_PRIVATE_H
# define PDFIO_PRIVATE_H
//
// Include necessary headers...
//
# ifdef _WIN32
/*
* Disable bogus VS warnings/errors...
*/
# define _CRT_SECURE_NO_WARNINGS
# define _CRT_SECURE_NO_WARNINGS // Disable bogus VS warnings/errors...
# endif // _WIN32
# include "pdfio.h"
# include <stdarg.h>
# include <stdint.h>
# include <string.h>
# include <ctype.h>
# include <errno.h>
# include <inttypes.h>
# include <fcntl.h>
# ifdef _WIN32
# include <io.h>
# include <direct.h>
/*
* Microsoft renames the POSIX functions to _name, and introduces
* a broken compatibility layer using the original names. As a result,
* random crashes can occur when, for example, strdup() allocates memory
* from a different heap than used by malloc() and free().
*
* To avoid moronic problems like this, we #define the POSIX function
* names to the corresponding non-standard Microsoft names.
*/
# define access _access
# include <windows.h> // GetTempPathA
# define access _access // Map standard POSIX/C99 names
# define close _close
# define fileno _fileno
# define lseek _lseek
@ -56,25 +37,18 @@
# define unlink _unlink
# define vsnprintf _vsnprintf
# define write _write
/*
* Map various parameters for POSIX...
*/
# define F_OK 00
# define F_OK 00 // POSIX parameters/flags
# define W_OK 02
# define R_OK 04
# define O_RDONLY _O_RDONLY
# define O_RDONLY _O_RDONLY // Map standard POSIX open flags
# define O_WRONLY _O_WRONLY
# define O_CREAT _O_CREAT
# define O_TRUNC _O_TRUNC
# define O_BINARY _O_BINARY
# else // !_WIN32
# include <unistd.h>
# define O_BINARY 0
# define O_BINARY 0 // Map Windows-specific open flag
# endif // _WIN32
# include <string.h>
# include <ctype.h>
# include <zlib.h>
@ -116,6 +90,8 @@
// Types and constants...
//
# define PDFIO_MAX_DEPTH 32 // Maximum nesting depth for values
typedef enum _pdfio_mode_e // Read/write mode
{
_PDFIO_MODE_READ, // Read a PDF file
@ -287,7 +263,8 @@ struct _pdfio_file_s // PDF file structure
pdfio_dict_t **dicts; // Dictionaries
size_t num_objs, // Number of objects
alloc_objs; // Allocated objects
pdfio_obj_t **objs; // Objects
pdfio_obj_t **objs, // Objects
*current_obj; // Current object being written/read
size_t num_objmaps, // Number of object maps
alloc_objmaps; // Allocated object maps
_pdfio_objmap_t *objmaps; // Object maps
@ -341,7 +318,7 @@ struct _pdfio_stream_s // Stream
extern void _pdfioArrayDebug(pdfio_array_t *a, FILE *fp) _PDFIO_INTERNAL;
extern void _pdfioArrayDelete(pdfio_array_t *a) _PDFIO_INTERNAL;
extern _pdfio_value_t *_pdfioArrayGetValue(pdfio_array_t *a, size_t n) _PDFIO_INTERNAL;
extern pdfio_array_t *_pdfioArrayRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts) _PDFIO_INTERNAL;
extern pdfio_array_t *_pdfioArrayRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts, size_t depth) _PDFIO_INTERNAL;
extern bool _pdfioArrayWrite(pdfio_array_t *a, pdfio_obj_t *obj) _PDFIO_INTERNAL;
extern void _pdfioCryptoAESInit(_pdfio_aes_t *ctx, const uint8_t *key, size_t keylen, const uint8_t *iv) _PDFIO_INTERNAL;
@ -365,7 +342,7 @@ extern void _pdfioDictClear(pdfio_dict_t *dict, const char *key) _PDFIO_INTERNA
extern void _pdfioDictDebug(pdfio_dict_t *dict, FILE *fp) _PDFIO_INTERNAL;
extern void _pdfioDictDelete(pdfio_dict_t *dict) _PDFIO_INTERNAL;
extern _pdfio_value_t *_pdfioDictGetValue(pdfio_dict_t *dict, const char *key) _PDFIO_INTERNAL;
extern pdfio_dict_t *_pdfioDictRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts) _PDFIO_INTERNAL;
extern pdfio_dict_t *_pdfioDictRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts, size_t depth) _PDFIO_INTERNAL;
extern bool _pdfioDictSetValue(pdfio_dict_t *dict, const char *key, _pdfio_value_t *value) _PDFIO_INTERNAL;
extern bool _pdfioDictWrite(pdfio_dict_t *dict, pdfio_obj_t *obj, off_t *length) _PDFIO_INTERNAL;
@ -405,7 +382,8 @@ extern bool _pdfioTokenRead(_pdfio_token_t *tb, char *buffer, size_t bufsize);
extern _pdfio_value_t *_pdfioValueCopy(pdfio_file_t *pdfdst, _pdfio_value_t *vdst, pdfio_file_t *pdfsrc, _pdfio_value_t *vsrc) _PDFIO_INTERNAL;
extern void _pdfioValueDebug(_pdfio_value_t *v, FILE *fp) _PDFIO_INTERNAL;
extern void _pdfioValueDelete(_pdfio_value_t *v) _PDFIO_INTERNAL;
extern _pdfio_value_t *_pdfioValueRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts, _pdfio_value_t *v) _PDFIO_INTERNAL;
extern _pdfio_value_t *_pdfioValueRead(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_token_t *ts, _pdfio_value_t *v, size_t depth) _PDFIO_INTERNAL;
extern bool _pdfioValueWrite(pdfio_file_t *pdf, pdfio_obj_t *obj, _pdfio_value_t *v, off_t *length) _PDFIO_INTERNAL;
#endif // !PDFIO_PRIVATE_H

View File

@ -1,7 +1,7 @@
//
// SHA-256 functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
// Copyright © 2011 IETF Trust and the persons identified as authors of the
// code. All rights reserved.
//
@ -217,7 +217,7 @@ _pdfioCryptoSHA256Append(_pdfio_sha256_t *context, const uint8_t *message_array,
*/
void
_pdfioCryptoSHA256Finish(_pdfio_sha256_t *context,
uint8_t Message_Digest[SHA256HashSize])
uint8_t *Message_Digest)
{
SHA224_256ResultN(context, Message_Digest, SHA256HashSize);
}

View File

@ -1,7 +1,7 @@
//
// PDF stream functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -174,6 +174,8 @@ pdfioStreamClose(pdfio_stream_t *st) // I - Stream
done:
st->pdf->current_obj = NULL;
free(st->prbuffer);
free(st->psbuffer);
free(st);
@ -364,6 +366,13 @@ pdfioStreamConsume(pdfio_stream_t *st, // I - Stream
//
// 'pdfioStreamGetToken()' - Read a single PDF token from a stream.
//
// This function reads a single PDF token from a stream. Operator tokens,
// boolean values, and numbers are returned as-is in the provided string buffer.
// String values start with the opening parenthesis ('(') but have all escaping
// resolved and the terminating parenthesis removed. Hexadecimal string values
// start with the opening angle bracket ('<') and have all whitespace and the
// terminating angle bracket removed.
//
bool // O - `true` on success, `false` on EOF
pdfioStreamGetToken(
@ -372,6 +381,7 @@ pdfioStreamGetToken(
size_t bufsize) // I - Size of string buffer
{
_pdfio_token_t tb; // Token buffer/stack
bool ret; // Return value
// Range check input...
@ -381,7 +391,10 @@ pdfioStreamGetToken(
// Read using the token engine...
_pdfioTokenInit(&tb, st->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
return (_pdfioTokenRead(&tb, buffer, bufsize));
ret = _pdfioTokenRead(&tb, buffer, bufsize);
_pdfioTokenFlush(&tb);
return (ret);
}
@ -439,6 +452,7 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
return (NULL);
}
PDFIO_DEBUG("_pdfioStreamOpen: ivlen=%d\n", (int)ivlen);
if (ivlen > 0)
_pdfioFileConsume(st->pdf, ivlen);
@ -451,11 +465,19 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
// Try to decode/decompress the contents of this object...
const char *filter = pdfioDictGetName(dict, "Filter");
// Filter value
pdfio_array_t *fa = pdfioDictGetArray(dict, "Filter");
// Filter array
if (!filter && fa && pdfioArrayGetSize(fa) == 1)
{
// Support single-valued arrays...
filter = pdfioArrayGetName(fa, 0);
}
if (!filter)
{
// No single filter name, do we have a compound filter?
if (pdfioDictGetArray(dict, "Filter"))
if (fa)
{
// TODO: Implement compound filters...
_pdfioFileError(st->pdf, "Unsupported compound stream filter.");
@ -546,6 +568,7 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
else
st->predictor = _PDFIO_PREDICTOR_NONE;
PDFIO_DEBUG("_pdfioStreamOpen: pos=%ld\n", (long)_pdfioFileTell(st->pdf));
if (sizeof(st->cbuffer) > st->remaining)
rbytes = _pdfioFileRead(st->pdf, st->cbuffer, st->remaining);
else
@ -566,12 +589,6 @@ _pdfioStreamOpen(pdfio_obj_t *obj, // I - Object
st->flate.next_in = (Bytef *)st->cbuffer;
st->flate.avail_in = (uInt)rbytes;
if (st->cbuffer[0] == 0x0a)
{
st->flate.next_in ++; // Skip newline
st->flate.avail_in --;
}
PDFIO_DEBUG("_pdfioStreamOpen: avail_in=%u, cbuffer=<%02X%02X%02X%02X%02X%02X%02X%02X...>\n", st->flate.avail_in, st->cbuffer[0], st->cbuffer[1], st->cbuffer[2], st->cbuffer[3], st->cbuffer[4], st->cbuffer[5], st->cbuffer[6], st->cbuffer[7]);
if ((status = inflateInit(&(st->flate))) != Z_OK)
@ -995,6 +1012,7 @@ stream_read(pdfio_stream_t *st, // I - Stream
size_t bytes) // I - Number of bytes to read
{
ssize_t rbytes; // Bytes read
uInt avail_in, avail_out; // Previous flate values
if (st->filter == PDFIO_FILTER_NONE)
@ -1047,11 +1065,19 @@ stream_read(pdfio_stream_t *st, // I - Stream
st->flate.next_out = (Bytef *)buffer;
st->flate.avail_out = (uInt)bytes;
avail_in = st->flate.avail_in;
avail_out = st->flate.avail_out;
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data: %s", zstrerror(status));
return (-1);
}
else if (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out)
{
_pdfioFileError(st->pdf, "Corrupt stream data.");
return (-1);
}
return (st->flate.next_out - (Bytef *)buffer);
}
@ -1100,12 +1126,15 @@ stream_read(pdfio_stream_t *st, // I - Stream
st->flate.avail_in = (uInt)rbytes;
}
avail_in = st->flate.avail_in;
avail_out = st->flate.avail_out;
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data: %s", zstrerror(status));
return (-1);
}
else if (status == Z_STREAM_END)
else if (status == Z_STREAM_END || (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out))
break;
}
@ -1167,12 +1196,15 @@ stream_read(pdfio_stream_t *st, // I - Stream
st->flate.avail_in = (uInt)rbytes;
}
avail_in = st->flate.avail_in;
avail_out = st->flate.avail_out;
if ((status = inflate(&(st->flate), Z_NO_FLUSH)) < Z_OK)
{
_pdfioFileError(st->pdf, "Unable to decompress stream data: %s", zstrerror(status));
return (-1);
}
else if (status == Z_STREAM_END)
else if (status == Z_STREAM_END || (avail_in == st->flate.avail_in && avail_out == st->flate.avail_out))
break;
}

View File

@ -1,7 +1,7 @@
//
// PDF token parsing functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -129,9 +129,20 @@ _pdfioTokenGet(_pdfio_token_t *tb, // I - Token buffer/stack
if (tb->num_tokens > 0)
{
// Yes, return it...
size_t len; // Length of token
tb->num_tokens --;
strncpy(buffer, tb->tokens[tb->num_tokens], bufsize - 1);
buffer[bufsize - 1] = '\0';
if ((len = strlen(tb->tokens[tb->num_tokens])) > (bufsize - 1))
{
// Value too large...
PDFIO_DEBUG("_pdfioTokenGet(tb=%p, buffer=%p, bufsize=%u): Token '%s' from stack too large.\n", tb, buffer, (unsigned)bufsize, tb->tokens[tb->num_tokens]);
*buffer = '\0';
return (false);
}
memcpy(buffer, tb->tokens[tb->num_tokens], len);
buffer[len] = '\0';
PDFIO_DEBUG("_pdfioTokenGet(tb=%p, buffer=%p, bufsize=%u): Popping '%s' from stack.\n", tb, buffer, (unsigned)bufsize, buffer);
@ -484,6 +495,13 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr++ = (char)ch;
break;
}
else if (ch == '>')
{
// Issue #46: Empty hex string from Microsoft PDF generator; treat as
// empty literal string...
*buffer = '(';
break;
}
else if (!isspace(ch & 255) && !isxdigit(ch & 255))
{
_pdfioFileError(tb->pdf, "Syntax error: '<%c'", ch);
@ -536,7 +554,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr = '\0';
PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
// PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
return (bufptr > buffer);
}
@ -573,6 +591,7 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
tb->bufptr = tb->buffer;
tb->bufend = tb->buffer + bytes;
#if 0
#ifdef DEBUG
unsigned char *ptr; // Pointer into buffer
@ -586,6 +605,7 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
}
PDFIO_DEBUG("'\n");
#endif // DEBUG
#endif // 0
}
// Return the next character...

View File

@ -1,7 +1,7 @@
//
// PDF value functions for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -196,7 +196,8 @@ _pdfio_value_t * // O - Value or `NULL` on error/EOF
_pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
pdfio_obj_t *obj, // I - Object, if any
_pdfio_token_t *tb, // I - Token buffer/stack
_pdfio_value_t *v) // I - Value
_pdfio_value_t *v, // I - Value
size_t depth) // I - Depth of value
{
char token[32768]; // Token buffer
#ifdef DEBUG
@ -218,7 +219,6 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
PDFIO_DEBUG("_pdfioValueRead(pdf=%p, obj=%p, v=%p)\n", pdf, obj, v);
(void)obj; // TODO: Implement decryption
if (!_pdfioTokenGet(tb, token, sizeof(token)))
return (NULL);
@ -226,15 +226,27 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
if (!strcmp(token, "["))
{
// Start of array
if (depth >= PDFIO_MAX_DEPTH)
{
_pdfioFileError(pdf, "Too many nested arrays.");
return (NULL);
}
v->type = PDFIO_VALTYPE_ARRAY;
if ((v->value.array = _pdfioArrayRead(pdf, obj, tb)) == NULL)
if ((v->value.array = _pdfioArrayRead(pdf, obj, tb, depth + 1)) == NULL)
return (NULL);
}
else if (!strcmp(token, "<<"))
{
// Start of dictionary
if (depth >= PDFIO_MAX_DEPTH)
{
_pdfioFileError(pdf, "Too many nested dictionaries.");
return (NULL);
}
v->type = PDFIO_VALTYPE_DICT;
if ((v->value.dict = _pdfioDictRead(pdf, obj, tb)) == NULL)
if ((v->value.dict = _pdfioDictRead(pdf, obj, tb, depth + 1)) == NULL)
return (NULL);
}
else if (!strncmp(token, "(D:", 3))
@ -271,6 +283,7 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
}
}
}
if (token[i])
{
// Just a string...
@ -354,6 +367,33 @@ _pdfioValueRead(pdfio_file_t *pdf, // I - PDF file
*dataptr++ = (unsigned char)d;
}
if (obj && pdf->encryption)
{
// Decrypt the string...
_pdfio_crypto_ctx_t ctx; // Decryption context
_pdfio_crypto_cb_t cb; // Decryption callback
size_t ivlen; // Number of initialization vector bytes
uint8_t temp[32768]; // Temporary buffer for decryption
size_t templen; // Number of actual data bytes
if (v->value.binary.datalen > (sizeof(temp) - 32))
{
_pdfioFileError(pdf, "Unable to read encrypted binary string - too long.");
return (false);
}
cb = _pdfioCryptoMakeReader(pdf, obj, &ctx, v->value.binary.data, &ivlen);
templen = (cb)(&ctx, temp, v->value.binary.data + ivlen, v->value.binary.datalen - ivlen);
// Copy the decrypted string back to the value and adjust the length...
memcpy(v->value.binary.data, temp, templen);
if (pdf->encryption >= PDFIO_ENCRYPTION_AES_128)
v->value.binary.datalen = templen - temp[templen - 1];
else
v->value.binary.datalen = templen;
}
}
else if (strchr("0123456789-+.", token[0]) != NULL)
{

28
pdfio.h
View File

@ -1,7 +1,7 @@
//
// Public header file for PDFio.
//
// Copyright © 2021 by Michael R Sweet.
// Copyright © 2021-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -9,27 +9,23 @@
#ifndef PDFIO_H
# define PDFIO_H
//
// Include necessary headers...
//
# include <stdio.h>
# include <stdlib.h>
# include <stdbool.h>
# include <sys/types.h>
# include <time.h>
//
// C++ magic...
//
# ifdef __cplusplus
extern "C" {
# endif // __cplusplus
//
// Version number...
//
# define PDFIO_VERSION "1.1.3"
//
// Visibility and other annotations...
//
@ -55,6 +51,8 @@ typedef struct _pdfio_array_s pdfio_array_t;
// Array of PDF values
typedef struct _pdfio_dict_s pdfio_dict_t;
// Key/value dictionary
typedef bool (*pdfio_dict_cb_t)(pdfio_dict_t *dict, const char *key, void *cb_data);
// Dictionary iterator callback
typedef struct _pdfio_file_s pdfio_file_t;
// PDF file
typedef bool (*pdfio_error_cb_t)(pdfio_file_t *pdf, const char *message, void *data);
@ -165,6 +163,7 @@ extern pdfio_obj_t *pdfioDictGetObj(pdfio_dict_t *dict, const char *key) _PDFIO_
extern pdfio_rect_t *pdfioDictGetRect(pdfio_dict_t *dict, const char *key, pdfio_rect_t *rect) _PDFIO_PUBLIC;
extern const char *pdfioDictGetString(pdfio_dict_t *dict, const char *key) _PDFIO_PUBLIC;
extern pdfio_valtype_t pdfioDictGetType(pdfio_dict_t *dict, const char *key) _PDFIO_PUBLIC;
extern void pdfioDictIterateKeys(pdfio_dict_t *dict, pdfio_dict_cb_t cb, void *cb_data) _PDFIO_PUBLIC;
extern bool pdfioDictSetArray(pdfio_dict_t *dict, const char *key, pdfio_array_t *value) _PDFIO_PUBLIC;
extern bool pdfioDictSetBinary(pdfio_dict_t *dict, const char *key, const unsigned char *value, size_t valuelen) _PDFIO_PUBLIC;
extern bool pdfioDictSetBoolean(pdfio_dict_t *dict, const char *key, bool value) _PDFIO_PUBLIC;
@ -185,6 +184,7 @@ extern pdfio_obj_t *pdfioFileCreateObj(pdfio_file_t *pdf, pdfio_dict_t *dict) _P
extern pdfio_file_t *pdfioFileCreateOutput(pdfio_output_cb_t output_cb, void *output_ctx, const char *version, pdfio_rect_t *media_box, pdfio_rect_t *crop_box, pdfio_error_cb_t error_cb, void *error_data) _PDFIO_PUBLIC;
// TODO: Add number, array, string, etc. versions of pdfioFileCreateObject?
extern pdfio_stream_t *pdfioFileCreatePage(pdfio_file_t *pdf, pdfio_dict_t *dict) _PDFIO_PUBLIC;
extern pdfio_file_t *pdfioFileCreateTemporary(char *buffer, size_t bufsize, const char *version, pdfio_rect_t *media_box, pdfio_rect_t *crop_box, pdfio_error_cb_t error_cb, void *error_data) _PDFIO_PUBLIC;
extern pdfio_obj_t *pdfioFileFindObj(pdfio_file_t *pdf, size_t number) _PDFIO_PUBLIC;
extern const char *pdfioFileGetAuthor(pdfio_file_t *pdf) _PDFIO_PUBLIC;
extern time_t pdfioFileGetCreationDate(pdfio_file_t *pdf) _PDFIO_PUBLIC;
@ -240,10 +240,6 @@ extern char *pdfioStringCreate(pdfio_file_t *pdf, const char *s) _PDFIO_PUBLIC
extern char *pdfioStringCreatef(pdfio_file_t *pdf, const char *format, ...) _PDFIO_FORMAT(2,3) _PDFIO_PUBLIC;
//
// C++ magic...
//
# ifdef __cplusplus
}
# endif // __cplusplus

View File

@ -2,5 +2,5 @@ Name: pdfio
Description: PDF read/write library
URL: https://www.msweet.org/pdfio
Requires: zlib >= 1.0
Libs: -L${prefix}/lib -lpdfio
Libs: -L${prefix}/lib -lpdfio -lm
Cflags: -I${prefix}/include

View File

@ -87,7 +87,7 @@
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
@ -101,7 +101,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
@ -115,7 +115,7 @@
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
@ -130,7 +130,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>PDFIO_VERSION="1.0b2";NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>

View File

@ -372,7 +372,7 @@
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "Apple Development";
COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1.0.0;
CURRENT_PROJECT_VERSION = 1.1.2;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
@ -381,7 +381,6 @@
GCC_PREPROCESSOR_DEFINITIONS = (
"$(inherited)",
"DEBUG=1",
"'PDFIO_VERSION=\"$(CURRENT_PROJECT_VERSION)\"'",
);
GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
@ -397,7 +396,7 @@
GCC_WARN_UNUSED_LABEL = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
MACOSX_DEPLOYMENT_TARGET = 10.14;
MACOSX_DEPLOYMENT_TARGET = 11.0;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
@ -450,7 +449,7 @@
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "Apple Development";
COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1.0.0;
CURRENT_PROJECT_VERSION = 1.1.2;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_NS_ASSERTIONS = NO;
@ -459,7 +458,6 @@
GCC_NO_COMMON_BLOCKS = YES;
GCC_PREPROCESSOR_DEFINITIONS = (
"$(inherited)",
"'PDFIO_VERSION=\"$(CURRENT_PROJECT_VERSION)\"'",
);
GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
@ -475,7 +473,7 @@
GCC_WARN_UNUSED_LABEL = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
MACOSX_DEPLOYMENT_TARGET = 10.14;
MACOSX_DEPLOYMENT_TARGET = 11.0;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
RUN_CLANG_STATIC_ANALYZER = YES;
@ -515,7 +513,7 @@
ENABLE_HARDENED_RUNTIME = YES;
GCC_DYNAMIC_NO_PIC = NO;
GCC_OPTIMIZATION_LEVEL = 0;
MACOSX_DEPLOYMENT_TARGET = 10.14;
MACOSX_DEPLOYMENT_TARGET = 11.0;
PRODUCT_BUNDLE_IDENTIFIER = org.msweet.testpdfio;
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
@ -529,7 +527,7 @@
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = "";
ENABLE_HARDENED_RUNTIME = YES;
MACOSX_DEPLOYMENT_TARGET = 10.14;
MACOSX_DEPLOYMENT_TARGET = 11.0;
PRODUCT_BUNDLE_IDENTIFIER = org.msweet.testpdfio;
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";

View File

@ -101,6 +101,7 @@ pdfioContentPathClose
pdfioContentPathCurve
pdfioContentPathCurve13
pdfioContentPathCurve23
pdfioContentPathEnd
pdfioContentPathLineTo
pdfioContentPathMoveTo
pdfioContentPathRect
@ -154,6 +155,7 @@ pdfioDictGetObj
pdfioDictGetRect
pdfioDictGetString
pdfioDictGetType
pdfioDictIterateKeys
pdfioDictSetArray
pdfioDictSetBinary
pdfioDictSetBoolean
@ -177,6 +179,7 @@ pdfioFileCreateImageObjFromFile
pdfioFileCreateObj
pdfioFileCreateOutput
pdfioFileCreatePage
pdfioFileCreateTemporary
pdfioFileFindObj
pdfioFileGetAuthor
pdfioFileGetCreationDate
@ -219,6 +222,8 @@ pdfioPageCopy
pdfioPageDictAddColorSpace
pdfioPageDictAddFont
pdfioPageDictAddImage
pdfioPageGetNumStreams
pdfioPageOpenStream
pdfioStreamClose
pdfioStreamConsume
pdfioStreamGetToken

View File

@ -3,7 +3,7 @@
<metadata>
<id>pdfio_native</id>
<title>PDFio Library for VS2019+</title>
<version>1.0.0-b7</version>
<version>1.1.3</version>
<authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
@ -12,11 +12,11 @@
<readme>build/native/README.md</readme>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>PDFio Library for VS2019+</description>
<summary>PDFio is a simple C library for reading and writing PDF files. PDFio is licensed under the Apache License Version 2.0 with an exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2021 by Michael R Sweet</copyright>
<summary>PDFio is a simple C library for reading and writing PDF files. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2023 by Michael R Sweet</copyright>
<tags>pdf file native</tags>
<dependencies>
<dependency id="pdfio_native.redist" version="1.0.0-b7" />
<dependency id="pdfio_native.redist" version="1.1.3" />
<dependency id="zlib_native.redist" version="1.2.11" />
</dependencies>
</metadata>

View File

@ -3,7 +3,7 @@
<metadata>
<id>pdfio_native.redist</id>
<title>PDFio Library for VS2019+</title>
<version>1.0.0-b7</version>
<version>1.1.3</version>
<authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
@ -12,9 +12,12 @@
<readme>build/native/README.md</readme>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>PDFio Library for VS2019+</description>
<summary>PDFio is a simple C library for reading and writing PDF files. This package provides the redistributable content for the PDFio library. PDFio is licensed under the Apache License Version 2.0 with an exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2021 by Michael R Sweet</copyright>
<summary>PDFio is a simple C library for reading and writing PDF files. This package provides the redistributable content for the PDFio library. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2023 by Michael R Sweet</copyright>
<tags>pdf file native</tags>
<dependencies>
<dependency id="zlib_native.redist" version="1.2.11" />
</dependencies>
</metadata>
<files>
<file src="doc\pdfio-128.png" target="build\native" />

95
pdfiototext.c Normal file
View File

@ -0,0 +1,95 @@
//
// PDF to text program for PDFio.
//
// Copyright © 2022 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
//
// Usage:
//
// ./pdfiototext FILENAME.pdf > FILENAME.txt
//
#include "pdfio.h"
#include <string.h>
//
// 'main()' - Main entry.
//
int // O - Exit status
main(int argc, // I - Number of command-line arguments
char *argv[]) // I - Command-line arguments
{
pdfio_file_t *file; // PDF file
size_t i, j, // Looping vars
num_pages, // Number of pages
num_streams; // Number of streams for page
pdfio_obj_t *obj; // Current page object
pdfio_stream_t *st; // Current page content stream
char buffer[1024]; // String buffer
bool first; // First string token?
// Verify command-line arguments...
if (argc != 2)
{
puts("Usage: pdfiototext FILENAME.pdf > FILENAME.txt");
return (1);
}
// Open the PDF file...
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL)
return (1);
// printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
// Try grabbing content from all of the pages...
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
{
if ((obj = pdfioFileGetPage(file, i)) == NULL)
continue;
num_streams = pdfioPageGetNumStreams(obj);
// printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
for (j = 0; j < num_streams; j ++)
{
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
continue;
// printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
first = true;
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
{
if (buffer[0] == '(')
{
if (first)
first = false;
else
putchar(' ');
fputs(buffer + 1, stdout);
}
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\""))
{
putchar('\n');
first = true;
}
}
if (!first)
putchar('\n');
pdfioStreamClose(st);
}
}
pdfioFileClose(file);
return (0);
}

View File

@ -34,6 +34,7 @@ static int do_test_file(const char *filename, int objnum, bool verbose);
static int do_unit_tests(void);
static int draw_image(pdfio_stream_t *st, const char *name, double x, double y, double w, double h, const char *label);
static bool error_cb(pdfio_file_t *pdf, const char *message, bool *error);
static bool iterate_cb(pdfio_dict_t *dict, const char *key, void *cb_data);
static ssize_t output_cb(int *fd, const void *buffer, size_t bytes);
static const char *password_cb(void *data, const char *filename);
static int read_unit_file(const char *filename, size_t num_pages, size_t first_image, bool is_output);
@ -405,7 +406,7 @@ do_test_file(const char *filename, // I - PDF filename
filter = pdfioDictGetName(dict, "Filter");
if ((st = pdfioObjOpenStream(obj, (filter && !strcmp(filter, "FlateDecode")) ? PDFIO_FILTER_FLATE : PDFIO_FILTER_NONE)) == NULL)
if ((st = pdfioObjOpenStream(obj, filter && !strcmp(filter, "FlateDecode"))) == NULL)
{
_pdfioValueDebug(&obj->value, stdout);
putchar('\n');
@ -454,7 +455,7 @@ do_test_file(const char *filename, // I - PDF filename
}
}
printf(" Page #%d is %gx%g.\n", (int)n + 1, media_box.x2, media_box.y2);
printf(" Page #%d (obj %d) is %gx%g.\n", (int)n + 1, (int)pdfioObjGetNumber(obj), media_box.x2, media_box.y2);
}
}
@ -506,6 +507,9 @@ do_unit_tests(void)
_pdfio_value_t value; // Value
size_t first_image, // First image object
num_pages; // Number of pages written
char temppdf[1024]; // Temporary PDF file
pdfio_dict_t *dict; // Test dictionary
int count = 0; // Number of key/value pairs
static const char *complex_dict = // Complex dictionary value
"<</Annots 5457 0 R/Contents 5469 0 R/CropBox[0 0 595.4 842]/Group 725 0 R"
"/MediaBox[0 0 595.4 842]/Parent 23513 0 R/Resources<</ColorSpace<<"
@ -967,11 +971,46 @@ do_unit_tests(void)
// TODO: Test for known values in this test file.
// Test dictionary APIs
fputs("pdfioDictCreate: ", stdout);
if ((dict = pdfioDictCreate(inpdf)) != NULL)
{
puts("PASS");
fputs("pdfioDictSet*: ", stdout);
if (pdfioDictSetBoolean(dict, "Boolean", true) && pdfioDictSetName(dict, "Name", "Name") && pdfioDictSetNumber(dict, "Number", 42.0) && pdfioDictSetString(dict, "String", "String"))
{
puts("PASS");
}
else
{
puts("FAIL");
return (1);
}
fputs("pdfioDictIterateKeys: ", stdout);
pdfioDictIterateKeys(dict, iterate_cb, &count);
if (count == 4)
{
puts("PASS");
}
else
{
printf("FAIL (got %d, expected 4)\n", count);
return (1);
}
}
else
{
puts("FAIL");
return (1);
}
// Test the value parsers for edge cases...
fputs("_pdfioValueRead(complex_dict): ", stdout);
s = complex_dict;
_pdfioTokenInit(&tb, inpdf, (_pdfio_tconsume_cb_t)token_consume_cb, (_pdfio_tpeek_cb_t)token_peek_cb, (void *)&s);
if (_pdfioValueRead(inpdf, NULL, &tb, &value))
if (_pdfioValueRead(inpdf, NULL, &tb, &value, 0))
{
// TODO: Check value...
fputs("PASS: ", stdout);
@ -985,7 +1024,7 @@ do_unit_tests(void)
fputs("_pdfioValueRead(cid_dict): ", stdout);
s = cid_dict;
_pdfioTokenInit(&tb, inpdf, (_pdfio_tconsume_cb_t)token_consume_cb, (_pdfio_tpeek_cb_t)token_peek_cb, (void *)&s);
if (_pdfioValueRead(inpdf, NULL, &tb, &value))
if (_pdfioValueRead(inpdf, NULL, &tb, &value, 0))
{
// TODO: Check value...
fputs("PASS: ", stdout);
@ -1107,6 +1146,18 @@ do_unit_tests(void)
if (read_unit_file("testpdfio-aesp.pdf", num_pages, first_image, false))
return (1);
fputs("pdfioFileCreateTemporary: ", stdout);
if ((outpdf = pdfioFileCreateTemporary(temppdf, sizeof(temppdf), NULL, NULL, NULL, (pdfio_error_cb_t)error_cb, &error)) != NULL)
printf("PASS (%s)\n", temppdf);
else
return (1);
if (write_unit_file(inpdf, outpdf, &num_pages, &first_image))
return (1);
if (read_unit_file(temppdf, num_pages, first_image, false))
return (1);
pdfioFileClose(inpdf);
return (0);
@ -1199,6 +1250,27 @@ error_cb(pdfio_file_t *pdf, // I - PDF file
}
//
// 'iterate_cb()' - Test pdfioDictIterateKeys function.
//
static bool // O - `true` to continue, `false` to stop
iterate_cb(pdfio_dict_t *dict, // I - Dictionary
const char *key, // I - Key
void *cb_data) // I - Callback data
{
int *count = (int *)cb_data; // Pointer to counter
if (!dict || !key || !cb_data)
return (false);
(*count)++;
return (true);
}
//
// 'output_cb()' - Write output to a file.
//
@ -1238,6 +1310,7 @@ read_unit_file(const char *filename, // I - File to read
{
pdfio_file_t *pdf; // PDF file
size_t i; // Looping var
const char *s; // String
bool error = false; // Error callback data
@ -1248,6 +1321,87 @@ read_unit_file(const char *filename, // I - File to read
else
return (1);
// Verify metadata...
fputs("pdfioFileGetAuthor: ", stdout);
if ((s = pdfioFileGetAuthor(pdf)) != NULL && !strcmp(s, "Michael R Sweet"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'Michael R Sweet')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'Michael R Sweet')");
return (1);
}
fputs("pdfioFileGetCreator: ", stdout);
if ((s = pdfioFileGetCreator(pdf)) != NULL && !strcmp(s, "testpdfio"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'testpdfio')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'testpdfio')");
return (1);
}
fputs("pdfioFileGetKeywords: ", stdout);
if ((s = pdfioFileGetKeywords(pdf)) != NULL && !strcmp(s, "one fish,two fish,red fish,blue fish"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'one fish,two fish,red fish,blue fish')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'one fish,two fish,red fish,blue fish')");
return (1);
}
fputs("pdfioFileGetSubject: ", stdout);
if ((s = pdfioFileGetSubject(pdf)) != NULL && !strcmp(s, "Unit test document"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'Unit test document')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'Unit test document')");
return (1);
}
fputs("pdfioFileGetTitle: ", stdout);
if ((s = pdfioFileGetTitle(pdf)) != NULL && !strcmp(s, "Test Document"))
{
puts("PASS");
}
else if (s)
{
printf("FAIL (got '%s', expected 'Test Document')\n", s);
return (1);
}
else
{
puts("FAIL (got NULL, expected 'Test Document')");
return (1);
}
// Verify the number of pages is the same...
fputs("pdfioFileGetNumPages: ", stdout);
if (num_pages == pdfioFileGetNumPages(pdf))
@ -2237,24 +2391,11 @@ write_font_test(pdfio_file_t *pdf, // I - PDF file
};
#if 0
if (unicode)
{
fputs("pdfioFileCreateFontObjFromFile(NotoSansJP-Regular.otf): ", stdout);
if ((opensans = pdfioFileCreateFontObjFromFile(pdf, "testfiles/NotoSansJP-Regular.otf", true)) != NULL)
puts("PASS");
else
return (1);
}
fputs("pdfioFileCreateFontObjFromFile(OpenSans-Regular.ttf): ", stdout);
if ((opensans = pdfioFileCreateFontObjFromFile(pdf, "testfiles/OpenSans-Regular.ttf", unicode)) != NULL)
puts("PASS");
else
#endif // 0
{
fputs("pdfioFileCreateFontObjFromFile(OpenSans-Regular.ttf): ", stdout);
if ((opensans = pdfioFileCreateFontObjFromFile(pdf, "testfiles/OpenSans-Regular.ttf", unicode)) != NULL)
puts("PASS");
else
return (1);
}
return (1);
fputs("pdfioDictCreate: ", stdout);
if ((dict = pdfioDictCreate(pdf)) != NULL)
@ -3167,13 +3308,13 @@ write_unit_file(
// Create some image objects...
fputs("pdfioFileCreateImageObjFromFile(\"testfiles/color.jpg\"): ", stdout);
if ((color_jpg = pdfioFileCreateImageObjFromFile(outpdf, "testfiles/color.jpg", true)) != NULL)
puts("PASS");
printf("PASS (%u)\n", (unsigned)pdfioObjGetNumber(color_jpg));
else
return (1);
fputs("pdfioFileCreateImageObjFromFile(\"testfiles/gray.jpg\"): ", stdout);
if ((gray_jpg = pdfioFileCreateImageObjFromFile(outpdf, "testfiles/gray.jpg", true)) != NULL)
puts("PASS");
printf("PASS (%u)\n", (unsigned)pdfioObjGetNumber(gray_jpg));
else
return (1);

305
testttf.c Normal file
View File

@ -0,0 +1,305 @@
//
// Unit test program for TTF library
//
// https://github.com/michaelrsweet/ttf
//
// Copyright © 2018-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
//
// Usage:
//
// ./testttf [FILENAME]
//
#include <stdio.h>
#include "ttf.h"
//
// Local functions...
//
static void error_cb(void *data, const char *message);
static int test_font(const char *filename);
//
// 'main()' - Main entry for unit tests.
//
int // O - Exit status
main(int argc, // I - Number of command-line arguments
char *argv[]) // I - Command-line arguments
{
int i; // Looping var
int errors = 0; // Number of errors
if (argc > 1)
{
for (i = 1; i < argc; i ++)
errors += test_font(argv[i]);
}
else
{
// Test with the bundled TrueType files...
errors += test_font("testfiles/OpenSans-Bold.ttf");
errors += test_font("testfiles/OpenSans-Regular.ttf");
errors += test_font("testfiles/NotoSansJP-Regular.otf");
}
if (!errors)
puts("\nALL TESTS PASSED");
else
printf("\n%d TEST(S) FAILED\n", errors);
return (errors);
}
//
// 'error_cb()' - Error callback.
//
static void
error_cb(void *data, // I - User data (not used)
const char *message) // I - Message string
{
fprintf(stderr, "FAIL (%s)\n", message);
}
//
// 'test_font()' - Test a font file.
//
static int // O - Number of errors
test_font(const char *filename) // I - Font filename
{
int i, // Looping var
errors = 0; // Number of errors
ttf_t *font; // Font
const char *value; // Font (string) value
int intvalue; // Font (integer) value
float realvalue; // Font (real) value
ttf_rect_t bounds; // Bounds
ttf_rect_t extents; // Extents
size_t num_fonts; // Number of fonts
ttf_style_t style; // Font style
ttf_weight_t weight; // Font weight
static const char * const stretches[] =
{ // Font stretch strings
"TTF_STRETCH_NORMAL", // normal
"TTF_STRETCH_ULTRA_CONDENSED", // ultra-condensed
"TTF_STRETCH_EXTRA_CONDENSED", // extra-condensed
"TTF_STRETCH_CONDENSED", // condensed
"TTF_STRETCH_SEMI_CONDENSED", // semi-condensed
"TTF_STRETCH_SEMI_EXPANDED", // semi-expanded
"TTF_STRETCH_EXPANDED", // expanded
"TTF_STRETCH_EXTRA_EXPANDED", // extra-expanded
"TTF_STRETCH_ULTRA_EXPANDED" // ultra-expanded
};
static const char * const strings[] = // Test strings
{
"Hello, World!", // English
"مرحبا بالعالم!", // Arabic
"Bonjour le monde!", // French
"Γειά σου Κόσμε!", // Greek
"שלום עולם!", // Hebrew
"Привет мир!", // Russian
"こんにちは世界!" // Japanese
};
static const char * const styles[] = // Font style names
{
"TTF_STYLE_NORMAL",
"TTF_STYLE_ITALIC",
"TTF_STYLE_OBLIQUE"
};
printf("ttfCreate(\"%s\"): ", filename);
if ((font = ttfCreate(filename, 0, error_cb, NULL)) != NULL)
puts("PASS");
else
errors ++;
fputs("ttfGetAscent: ", stdout);
if ((intvalue = ttfGetAscent(font)) > 0)
{
printf("PASS (%d)\n", intvalue);
}
else
{
printf("FAIL (%d)\n", intvalue);
errors ++;
}
fputs("ttfGetBounds: ", stdout);
if (ttfGetBounds(font, &bounds))
{
printf("PASS (%g %g %g %g)\n", bounds.left, bounds.bottom, bounds.right, bounds.top);
}
else
{
puts("FAIL");
errors ++;
}
fputs("ttfGetCapHeight: ", stdout);
if ((intvalue = ttfGetCapHeight(font)) > 0)
{
printf("PASS (%d)\n", intvalue);
}
else
{
printf("FAIL (%d)\n", intvalue);
errors ++;
}
fputs("ttfGetCopyright: ", stdout);
if ((value = ttfGetCopyright(font)) != NULL)
{
printf("PASS (%s)\n", value);
}
else
{
puts("FAIL");
errors ++;
}
for (i = 0; i < (int)(sizeof(strings) / sizeof(strings[0])); i ++)
{
printf("ttfGetExtents(\"%s\"): ", strings[i]);
if (ttfGetExtents(font, 12.0f, strings[i], &extents))
{
printf("PASS (%.1f %.1f %.1f %.1f)\n", extents.left, extents.bottom, extents.right, extents.top);
}
else
{
puts("FAIL");
errors ++;
}
}
fputs("ttfGetFamily: ", stdout);
if ((value = ttfGetFamily(font)) != NULL)
{
printf("PASS (%s)\n", value);
}
else
{
puts("FAIL");
errors ++;
}
fputs("ttfGetItalicAngle: ", stdout);
if ((realvalue = ttfGetItalicAngle(font)) >= 0.0)
{
printf("PASS (%g)\n", realvalue);
}
else
{
printf("FAIL (%g)\n", realvalue);
errors ++;
}
fputs("ttfGetNumFonts: ", stdout);
if ((num_fonts = ttfGetNumFonts(font)) > 0)
{
printf("PASS (%u)\n", (unsigned)num_fonts);
}
else
{
puts("FAIL");
errors ++;
}
fputs("ttfGetPostScriptName: ", stdout);
if ((value = ttfGetPostScriptName(font)) != NULL)
{
printf("PASS (%s)\n", value);
}
else
{
puts("FAIL");
errors ++;
}
fputs("ttfGetStretch: ", stdout);
if ((intvalue = (int)ttfGetStretch(font)) >= TTF_STRETCH_NORMAL && intvalue <= TTF_STRETCH_ULTRA_EXPANDED)
{
printf("PASS (%s)\n", stretches[intvalue]);
}
else
{
printf("FAIL (%d)\n", intvalue);
errors ++;
}
fputs("ttfGetStyle: ", stdout);
if ((style = ttfGetStyle(font)) >= TTF_STYLE_NORMAL && style <= TTF_STYLE_ITALIC)
{
printf("PASS (%s)\n", styles[style]);
}
else
{
puts("FAIL");
errors ++;
}
fputs("ttfGetVersion: ", stdout);
if ((value = ttfGetVersion(font)) != NULL)
{
printf("PASS (%s)\n", value);
}
else
{
puts("FAIL");
errors ++;
}
fputs("ttfGetWeight: ", stdout);
if ((weight = ttfGetWeight(font)) >= 0)
{
printf("PASS (%u)\n", (unsigned)weight);
}
else
{
puts("FAIL");
errors ++;
}
fputs("ttfGetWidth(' '): ", stdout);
if ((intvalue = ttfGetWidth(font, ' ')) > 0)
{
printf("PASS (%d)\n", intvalue);
}
else
{
printf("FAIL (%d)\n", intvalue);
errors ++;
}
fputs("ttfGetXHeight: ", stdout);
if ((intvalue = ttfGetXHeight(font)) > 0)
{
printf("PASS (%d)\n", intvalue);
}
else
{
printf("FAIL (%d)\n", intvalue);
errors ++;
}
fputs("ttfIsFixedPitch: ", stdout);
if (ttfIsFixedPitch(font))
puts("PASS (true)");
else
puts("PASS (false)");
ttfDelete(font);
return (errors);
}

58
ttf.c
View File

@ -3,16 +3,12 @@
//
// https://github.com/michaelrsweet/ttf
//
// Copyright © 2018-2021 by Michael R Sweet.
// Copyright © 2018-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
//
//
// Include necessary headers...
//
#ifdef _WIN32
# define _CRT_SECURE_NO_WARNINGS
#endif // _WIN32
@ -75,7 +71,7 @@ typedef __int64 ssize_t; // POSIX type not present on Windows...
//
// DEBUG is typically defined for debug builds. TTF_DEBUG maps to printf when
// DEBUG is typically defined for debug builds. TTF_DEBUG maps to fprintf when
// DEBUG is defined and is a no-op otherwise...
//
@ -420,7 +416,7 @@ ttfCreate(const char *filename, // I - Filename
if (read_os_2(font, &os_2))
{
// Copy key values from OS/2 table...
static const ttf_stretch_t widths[] =
static const ttf_stretch_t stretches[] =
{
TTF_STRETCH_ULTRA_CONDENSED, // ultra-condensed
TTF_STRETCH_EXTRA_CONDENSED, // extra-condensed
@ -433,8 +429,8 @@ ttfCreate(const char *filename, // I - Filename
TTF_STRETCH_ULTRA_EXPANDED // ultra-expanded
};
if (os_2.usWidthClass >= 1 && os_2.usWidthClass <= (int)(sizeof(widths) / sizeof(widths[0])))
font->stretch = widths[os_2.usWidthClass - 1];
if (os_2.usWidthClass >= 1 && os_2.usWidthClass <= (int)(sizeof(stretches) / sizeof(stretches[0])))
font->stretch = stretches[os_2.usWidthClass - 1];
font->weight = (short)os_2.usWeightClass;
font->cap_height = os_2.sCapHeight;
@ -452,7 +448,7 @@ ttfCreate(const char *filename, // I - Filename
font->cap_height = font->ascent;
if (font->x_height == 0)
font->x_height = 3 * font->ascent / 5;
font->x_height = 3 * font->ascent / 5;
// Build a sparse glyph widths table...
font->min_char = -1;
@ -481,6 +477,11 @@ ttfCreate(const char *filename, // I - Filename
else
font->widths[bin][i & 255] = widths[glyph];
}
#ifdef DEBUG
if (i >= ' ' && i < 127)
TTF_DEBUG("ttfCreate: width['%c']=%d(%d)\n", (char)i, font->widths[0][i].width, font->widths[0][i].left_bearing);
#endif // DEBUG
}
// Cleanup and return the font...
@ -737,17 +738,6 @@ ttfGetFamily(ttf_t *font) // I - Font
}
//
// 'ttfIsFixedPitch()' - Determine whether a font is fixedpitch.
//
bool // O - `true` if fixed pitch, `false` otherwise
ttfIsFixedPitch(ttf_t *font) // I - Font
{
return (font ? font->is_fixed : false);
}
//
// 'ttfGetItalicAngle()' - Get the italic angle.
//
@ -856,13 +846,14 @@ int // O - Width in 1000ths
ttfGetWidth(ttf_t *font, // I - Font
int ch) // I - Unicode character
{
int bin = ch >> 8; // Bin in widths array
int bin = ch >> 8; // Bin in widths array
// Range check input...
if (!font || ch < ' ' || ch == 0x7f)
return (0);
else if (font->widths[bin])
if (font->widths[bin])
return ((int)(1000.0f * font->widths[bin][ch & 255].width / font->units));
else if (font->widths[0]) // .notdef
return ((int)(1000.0f * font->widths[0][0].width / font->units));
@ -882,6 +873,17 @@ ttfGetXHeight(ttf_t *font) // I - Font
}
//
// 'ttfIsFixedPitch()' - Determine whether a font is fixedpitch.
//
bool // O - `true` if fixed pitch, `false` otherwise
ttfIsFixedPitch(ttf_t *font) // I - Font
{
return (font ? font->is_fixed : false);
}
//
// 'copy_name()' - Copy a name string from a font.
//
@ -1305,17 +1307,17 @@ read_cmap(ttf_t *font) // I - Font
{
// Use an "obscure indexing trick" (words from the spec, not
// mine) to look up the glyph index...
temp = segment->idRangeOffset / 2 + ch - segment->startCode + seg - segCount - 1;
temp = segment->idRangeOffset / 2 + ch - segment->startCode + seg - segCount;
if (temp < 0 || temp >= numGlyphIdArray || !glyphIdArray[temp])
if (temp < 0 || temp >= numGlyphIdArray)
glyph = -1;
else
glyph = ((glyphIdArray[temp] + segment->idDelta) & 65535) - 1;
glyph = (glyphIdArray[temp] + segment->idDelta) & 65535;
}
else
{
// Just use idDelta to compute a glyph index...
glyph = ((ch + segment->idDelta) & 65535) - 1;
glyph = (ch + segment->idDelta) & 65535;
}
cmapptr[ch] = glyph;
@ -1582,6 +1584,8 @@ read_hmtx(ttf_t *font, // I - Font
{
widths[i].width = (short)read_ushort(font);
widths[i].left_bearing = (short)read_short(font);
TTF_DEBUG("read_hmtx: widths[%d].width=%d, .left_bearing=%d\n", i, widths[i].width, widths[i].left_bearing);
}
return (widths);

13
ttf.h
View File

@ -3,7 +3,7 @@
//
// https://github.com/michaelrsweet/ttf
//
// Copyright © 2018-2021 by Michael R Sweet.
// Copyright © 2018-2023 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
@ -11,17 +11,11 @@
#ifndef TTF_H
# define TTF_H
//
// Include necessary headers...
//
# include <stdbool.h>
# include <sys/types.h>
# ifdef __cplusplus
extern "C" {
# endif //
# endif // __cplusplus
//
@ -89,8 +83,8 @@ extern ttf_t *ttfCreate(const char *filename, size_t idx, ttf_err_cb_t err_cb,
extern void ttfDelete(ttf_t *font);
extern int ttfGetAscent(ttf_t *font);
extern ttf_rect_t *ttfGetBounds(ttf_t *font, ttf_rect_t *bounds);
extern int ttfGetCapHeight(ttf_t *font);
extern const int *ttfGetCMap(ttf_t *font, size_t *num_cmap);
extern int ttfGetCapHeight(ttf_t *font);
extern const char *ttfGetCopyright(ttf_t *font);
extern int ttfGetDescent(ttf_t *font);
extern ttf_rect_t *ttfGetExtents(ttf_t *font, float size, const char *s, ttf_rect_t *extents);
@ -112,5 +106,4 @@ extern bool ttfIsFixedPitch(ttf_t *font);
# ifdef __cplusplus
}
# endif // __cplusplus
#endif // !TTF_H