22 Commits

Author SHA1 Message Date
48fe8d1bc9 Bump version. 2025-01-24 15:31:31 -05:00
a4026bfe00 Prep for release. 2025-01-24 15:30:59 -05:00
1e945cb750 Add LICENSE files to example install list. 2025-01-24 14:44:44 -05:00
4cb4ceaadd Update docos with fixed codedoc. 2025-01-24 14:42:41 -05:00
cca7383c73 Fix support for UTF-16 string values in dictionaries (Issue #92)
Specifically to support Unicode Title and Author values.
2025-01-24 10:43:41 -05:00
6c68b9fa5a Add URLs and copyrights for Code 128 font and ProPhoto ICC profile (Issue #91) 2025-01-24 09:56:51 -05:00
dd7ed67ec1 Update makesrcdist to validate CHANGES.md. 2025-01-23 15:34:43 -05:00
9e2f3aba10 Fix reading of compressed object streams (Issue #92) 2025-01-23 15:27:22 -05:00
fca4dbd395 Make sure we have license files for the example fonts (Issue #91) 2025-01-23 13:03:23 -05:00
41ac7a0b4b Changelog. 2025-01-18 09:45:29 -05:00
5fc571b711 Merge pull request #89 from vlasovsoft1979/master
Fix undefined behavior in _pdfioFileSeek
2025-01-18 09:42:58 -05:00
acf27d29c6 Fix undefined behavior 2025-01-18 13:56:25 +03:00
026f653e07 Fix loading of last 1024 bytes for small PDF files (Issue #87) 2025-01-17 16:58:33 -05:00
3bc041e6d3 Delay loading of the Info object and clean up the pdfioinfo example (Issue #87) 2025-01-17 16:50:30 -05:00
fbd61d1fe9 Bump copyright and version, changelog for example makefile fix. 2025-01-10 14:54:11 -05:00
ee2794199c Merge pull request #86 from tlaronde/master
examples/Makefile: libm is not added by default by all
2025-01-10 14:50:44 -05:00
31c3400f23 examples/Makefile: libm is not added by default by all
-lm has to be added for system/compilers that don't add the lib by
default (the case on NetBSD).
2025-01-10 20:18:06 +01:00
6d65a609e5 Update documentation and examples makefile. 2024-12-26 15:12:56 -05:00
e96f9bfa6b Fix compiler warning and update Xcode project. 2024-12-23 15:07:32 -05:00
10c15fc281 Bump NuGet package versions. 2024-12-22 21:33:35 -05:00
fd8427d68a Add pdf2text example docos, install examples to doc directory. 2024-12-22 21:29:32 -05:00
ed1421287f Move pdfiototext to examples. 2024-12-22 19:00:17 -05:00
26 changed files with 911 additions and 115 deletions

2
.gitignore vendored
View File

@ -14,6 +14,7 @@
/examples/code128 /examples/code128
/examples/image2pdf /examples/image2pdf
/examples/md2pdf /examples/md2pdf
/examples/pdf2text
/examples/pdfioinfo /examples/pdfioinfo
/Makefile /Makefile
/packages /packages
@ -21,7 +22,6 @@
/pdfio.xcodeproj/xcshareddata /pdfio.xcodeproj/xcshareddata
/pdfio-*.tar.gz* /pdfio-*.tar.gz*
/pdfio-*.zip* /pdfio-*.zip*
/pdfiototext
/testpdfio /testpdfio
/testpdfio-*.pdf /testpdfio-*.pdf
/testttf /testttf

View File

@ -1,8 +1,19 @@
Changes in PDFio Changes in PDFio
================ ================
v1.4.1 - 2025-01-24
-------------------
v1.4.0 - YYYY-MM-DD - Added license files for the example fonts now bundled with PDFio (Issue #91)
- Fixed the link libraries for the example source code (Issue #86)
- Fixed handling of the Info object (Issue #87)
- Fixed opening of PDF files less than 1024 bytes in length (Issue #87)
- Fixed potential `NULL` dereference when reading (Issue #89)
- Fixed reading of compressed object streams (Issue #92)
- Fixed reading of UTF-16 string values (Issue #92)
v1.4.0 - 2024-12-26
------------------- -------------------
- Added new `pdfioDictGetKey` and `pdfioDictGetNumPairs` APIs (Issue #63) - Added new `pdfioDictGetKey` and `pdfioDictGetNumPairs` APIs (Issue #63)

View File

@ -1,7 +1,7 @@
# #
# Makefile for PDFio. # Makefile for PDFio.
# #
# Copyright © 2021-2024 by Michael R Sweet. # Copyright © 2021-2025 by Michael R Sweet.
# #
# Licensed under Apache License v2.0. See the file "LICENSE" for more # Licensed under Apache License v2.0. See the file "LICENSE" for more
# information. # information.
@ -103,15 +103,35 @@ LIBOBJS = \
ttf.o ttf.o
OBJS = \ OBJS = \
$(LIBOBJS) \ $(LIBOBJS) \
pdfiototext.o \
testpdfio.o \ testpdfio.o \
testttf.o testttf.o
TARGETS = \ TARGETS = \
$(LIBPDFIO) \ $(LIBPDFIO) \
$(LIBPDFIO_STATIC) \ $(LIBPDFIO_STATIC) \
pdfiototext \
testpdfio \ testpdfio \
testttf testttf
DOCFILES = \
doc/pdfio.html \
doc/pdfio-512.png \
LICENSE \
NOTICE
EXAMPLES = \
examples/Makefile \
examples/Roboto-LICENSE.txt \
examples/Roboto-Bold.ttf \
examples/Roboto-Italic.ttf \
examples/Roboto-Regular.ttf \
examples/RobotoMono-Regular.ttf \
examples/code128.c \
examples/code128.ttf \
examples/code128-LICENSE.txt \
examples/image2pdf.c \
examples/md2pdf.c \
examples/md2pdf.md \
examples/mmd.c \
examples/mmd.h \
examples/pdf2text.c \
examples/pdfioinfo.c
# Make everything # Make everything
@ -152,9 +172,14 @@ install: $(TARGETS)
$(INSTALL) -c -m 644 pdfio.pc $(BUILDROOT)$(libdir)/pkgconfig $(INSTALL) -c -m 644 pdfio.pc $(BUILDROOT)$(libdir)/pkgconfig
echo Installing documentation to $(BUILDROOT)$(datadir)/doc/pdfio... echo Installing documentation to $(BUILDROOT)$(datadir)/doc/pdfio...
$(INSTALL) -d -m 755 $(BUILDROOT)$(datadir)/doc/pdfio $(INSTALL) -d -m 755 $(BUILDROOT)$(datadir)/doc/pdfio
for file in doc/pdfio.html doc/pdfio-512.png LICENSE NOTICE; do \ for file in $(DOCFILES); do \
$(INSTALL) -c -m 644 $$file $(BUILDROOT)$(datadir)/doc/pdfio; \ $(INSTALL) -c -m 644 $$file $(BUILDROOT)$(datadir)/doc/pdfio; \
done done
echo Installing examples to $(BUILDROOT)$(datadir)/doc/pdfio/examples...
$(INSTALL) -d -m 755 $(BUILDROOT)$(datadir)/doc/pdfio/examples
for file in $(EXAMPLES); do \
$(INSTALL) -c -m 644 $$file $(BUILDROOT)$(datadir)/doc/pdfio/examples; \
done
echo Installing man page to $(BUILDROOT)$(mandir)/man3... echo Installing man page to $(BUILDROOT)$(mandir)/man3...
$(INSTALL) -d -m 755 $(BUILDROOT)$(mandir)/man3 $(INSTALL) -d -m 755 $(BUILDROOT)$(mandir)/man3
$(INSTALL) -c -m 644 doc/pdfio.3 $(BUILDROOT)$(mandir)/man3 $(INSTALL) -c -m 644 doc/pdfio.3 $(BUILDROOT)$(mandir)/man3
@ -201,12 +226,6 @@ pdfio1.def: $(LIBOBJS) Makefile
grep -v '^_ttf' | sed -e '1,$$s/^_//' | sort >>$@ grep -v '^_ttf' | sed -e '1,$$s/^_//' | sort >>$@
# pdfio text extraction (demo, doesn't handle a lot of things yet)
pdfiototext: pdfiototext.o libpdfio.a
echo Linking $@...
$(CC) $(LDFLAGS) -o $@ pdfiototext.o libpdfio.a $(LIBS)
# pdfio test program # pdfio test program
testpdfio: testpdfio.o libpdfio.a testpdfio: testpdfio.o libpdfio.a
echo Linking $@... echo Linking $@...
@ -229,7 +248,7 @@ ttf.o: ttf.h
# Make documentation using Codedoc <https://www.msweet.org/codedoc> # Make documentation using Codedoc <https://www.msweet.org/codedoc>
DOCFLAGS = \ DOCFLAGS = \
--author "Michael R Sweet" \ --author "Michael R Sweet" \
--copyright "Copyright (c) 2021-2024 by Michael R Sweet" \ --copyright "Copyright (c) 2021-2025 by Michael R Sweet" \
--docversion $(PDFIO_VERSION) --docversion $(PDFIO_VERSION)
.PHONY: doc .PHONY: doc

2
NOTICE
View File

@ -1,6 +1,6 @@
PDFio - PDF Read/Write Library PDFio - PDF Read/Write Library
Copyright © 2021-2024 by Michael R Sweet. Copyright © 2021-2025 by Michael R Sweet.
(Optional) Exceptions to the Apache 2.0 License: (Optional) Exceptions to the Apache 2.0 License:
================================================ ================================================

View File

@ -89,7 +89,7 @@ generates a static library that will be installed under "/usr/local" with:
Legal Stuff Legal Stuff
----------- -----------
PDFio is Copyright © 2021-2024 by Michael R Sweet. PDFio is Copyright © 2021-2025 by Michael R Sweet.
This software is licensed under the Apache License Version 2.0 with an This software is licensed under the Apache License Version 2.0 with an
(optional) exception to allow linking against GPL2/LGPL2 software. See the (optional) exception to allow linking against GPL2/LGPL2 software. See the

24
configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for pdfio 1.4.0. # Generated by GNU Autoconf 2.71 for pdfio 1.4.1.
# #
# Report bugs to <https://github.com/michaelrsweet/pdfio/issues>. # Report bugs to <https://github.com/michaelrsweet/pdfio/issues>.
# #
@ -610,8 +610,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='pdfio' PACKAGE_NAME='pdfio'
PACKAGE_TARNAME='pdfio' PACKAGE_TARNAME='pdfio'
PACKAGE_VERSION='1.4.0' PACKAGE_VERSION='1.4.1'
PACKAGE_STRING='pdfio 1.4.0' PACKAGE_STRING='pdfio 1.4.1'
PACKAGE_BUGREPORT='https://github.com/michaelrsweet/pdfio/issues' PACKAGE_BUGREPORT='https://github.com/michaelrsweet/pdfio/issues'
PACKAGE_URL='https://www.msweet.org/pdfio' PACKAGE_URL='https://www.msweet.org/pdfio'
@ -1293,7 +1293,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures pdfio 1.4.0 to adapt to many kinds of systems. \`configure' configures pdfio 1.4.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1359,7 +1359,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of pdfio 1.4.0:";; short | recursive ) echo "Configuration of pdfio 1.4.1:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@ -1456,7 +1456,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
pdfio configure 1.4.0 pdfio configure 1.4.1
generated by GNU Autoconf 2.71 generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc. Copyright (C) 2021 Free Software Foundation, Inc.
@ -1612,7 +1612,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by pdfio $as_me 1.4.0, which was It was created by pdfio $as_me 1.4.1, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw $ $0$ac_configure_args_raw
@ -2368,9 +2368,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
PDFIO_VERSION="1.4.0" PDFIO_VERSION="1.4.1"
PDFIO_VERSION_MAJOR="`echo 1.4.0 | awk -F. '{print $1}'`" PDFIO_VERSION_MAJOR="`echo 1.4.1 | awk -F. '{print $1}'`"
PDFIO_VERSION_MINOR="`echo 1.4.0 | awk -F. '{printf("%d\n",$2);}'`" PDFIO_VERSION_MINOR="`echo 1.4.1 | awk -F. '{printf("%d\n",$2);}'`"
@ -4935,7 +4935,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by pdfio $as_me 1.4.0, which was This file was extended by pdfio $as_me 1.4.1, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@ -4991,7 +4991,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped' ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\ ac_cs_version="\\
pdfio config.status 1.4.0 pdfio config.status 1.4.1
configured by $0, generated by GNU Autoconf 2.71, configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@ -21,7 +21,7 @@ AC_PREREQ([2.70])
dnl Package name and version... dnl Package name and version...
AC_INIT([pdfio], [1.4.0], [https://github.com/michaelrsweet/pdfio/issues], [pdfio], [https://www.msweet.org/pdfio]) AC_INIT([pdfio], [1.4.1], [https://github.com/michaelrsweet/pdfio/issues], [pdfio], [https://www.msweet.org/pdfio])
PDFIO_VERSION="AC_PACKAGE_VERSION" PDFIO_VERSION="AC_PACKAGE_VERSION"
PDFIO_VERSION_MAJOR="`echo AC_PACKAGE_VERSION | awk -F. '{print $1}'`" PDFIO_VERSION_MAJOR="`echo AC_PACKAGE_VERSION | awk -F. '{print $1}'`"

View File

@ -1,4 +1,4 @@
.TH pdfio 3 "pdf read/write library" "2024-12-22" "pdf read/write library" .TH pdfio 3 "pdf read/write library" "2025-01-24" "pdf read/write library"
.SH NAME .SH NAME
pdfio \- pdf read/write library pdfio \- pdf read/write library
.SH Introduction .SH Introduction
@ -1028,6 +1028,8 @@ pdfioContentTextShowJustified draws an array of literal strings with offsets bet
.SH Examples .SH Examples
.PP
PDFio includes several example programs that are typically installed to the /usr/share/doc/pdfio/examples or /usr/local/share/doc/pdfio/examples directories. A makefile is included to build them.
.SS Read PDF Metadata .SS Read PDF Metadata
.PP .PP
The pdfioinfo.c example program opens a PDF file and prints the title, author, creation date, and number of pages: The pdfioinfo.c example program opens a PDF file and prints the title, author, creation date, and number of pages:
@ -1043,9 +1045,11 @@ The pdfioinfo.c example program opens a PDF file and prints the title, author, c
{ {
const char *filename; // PDF filename const char *filename; // PDF filename
pdfio_file_t *pdf; // PDF file pdfio_file_t *pdf; // PDF file
const char *author; // Author name
time_t creation_date; // Creation date time_t creation_date; // Creation date
struct tm *creation_tm; // Creation date/time information struct tm *creation_tm; // Creation date/time information
char creation_text[256]; // Creation date/time as a string char creation_text[256]; // Creation date/time as a string
const char *title; // Title
// Get the filename from the command\-line... // Get the filename from the command\-line...
@ -1063,15 +1067,25 @@ The pdfioinfo.c example program opens a PDF file and prints the title, author, c
if (pdf == NULL) if (pdf == NULL)
return (1); return (1);
// Get the title and author...
author = pdfioFileGetAuthor(pdf);
title = pdfioFileGetTitle(pdf);
// Get the creation date and convert to a string... // Get the creation date and convert to a string...
creation_date = pdfioFileGetCreationDate(pdf); if ((creation_date = pdfioFileGetCreationDate(pdf)) > 0)
{
creation_tm = localtime(&creation_date); creation_tm = localtime(&creation_date);
strftime(creation_text, sizeof(creation_text), "%c", creation_tm); strftime(creation_text, sizeof(creation_text), "%c", creation_tm);
}
else
{
snprintf(creation_text, sizeof(creation_text), "\-\- not set \-\-");
}
// Print file information to stdout... // Print file information to stdout...
printf("%s:\\n", filename); printf("%s:\\n", filename);
printf(" Title: %s\\n", pdfioFileGetTitle(pdf)); printf(" Title: %s\\n", title ? title : "\-\- not set \-\-");
printf(" Author: %s\\n", pdfioFileGetAuthor(pdf)); printf(" Author: %s\\n", author ? author : "\-\- not set \-\-");
printf(" Created On: %s\\n", creation_text); printf(" Created On: %s\\n", creation_text);
printf(" Number Pages: %u\\n", (unsigned)pdfioFileGetNumPages(pdf)); printf(" Number Pages: %u\\n", (unsigned)pdfioFileGetNumPages(pdf));
@ -1081,7 +1095,43 @@ The pdfioinfo.c example program opens a PDF file and prints the title, author, c
return (0); return (0);
} }
.fi .fi
.SS Create PDF File With Text and Image .SS Extract Text from PDF File
.PP
The pdf2text.c example code extracts non\-Unicode text from a PDF file by scanning each page for strings and text drawing commands. Since it doesn't look at the font encoding or support Unicode text, it is really only useful to extract plain ASCII text from a PDF file. And since it writes text in the order it appears in the page stream, it may not come out in the same order as appears on the page.
.PP
The pdfioStreamGetToken function is used to read individual tokens from the page streams. Tokens starting with the open parenthesis are text strings, while PDF operators are left as\-is. We use some simple logic to make sure that we include spaces between text strings and add newlines for the text operators that start a new line in a text block:
.nf
pdfio_stream_t *st; // Page stream
bool first = true; // First string on line?
char buffer[1024]; // Token buffer
// Read PDF tokens from the page stream...
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
{
if (buffer[0] == '(')
{
// Text string using an 8\-bit encoding
if (first)
first = false;
else if (buffer[1] != ' ')
putchar(' ');
fputs(buffer + 1, stdout);
}
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") ||
!strcmp(buffer, "\\'") || !strcmp(buffer, "\\""))
{
// Text operators that advance to the next line in the block
putchar('\\n');
first = true;
}
}
if (!first)
putchar('\\n');
.fi
.SS Create a PDF File With Text and an Image
.PP .PP
The image2pdf.c example code creates a PDF file containing a JPEG or PNG image file and optional caption on a single page. The create_pdf_image_file function creates the PDF file, embeds a base font and the named JPEG or PNG image file, and then creates a page with the image centered on the page with any text centered below: The image2pdf.c example code creates a PDF file containing a JPEG or PNG image file and optional caption on a single page. The create_pdf_image_file function creates the PDF file, embeds a base font and the named JPEG or PNG image file, and then creates a page with the image centered on the page with any text centered below:
.nf .nf
@ -2024,7 +2074,7 @@ The render_line function adds content from the linefrag_t array to a PDF page. I
} }
.fi .fi
.PP .PP
We then loops through the fragments for the current line, drawing checkboxes, images, and text as needed. When a hyperlink is present, we add the link to the links array in the docdata_t structure, mapping "@" and "@@" to an internal link corresponding to the linked text: We then loop through the fragments for the current line, drawing checkboxes, images, and text as needed. When a hyperlink is present, we add the link to the links array in the docdata_t structure, mapping "@" and "@@" to an internal link corresponding to the linked text:
.nf .nf
if (frag\->url && dd\->num_links < DOCLINK_MAX) if (frag\->url && dd\->num_links < DOCLINK_MAX)
@ -2099,7 +2149,7 @@ Then it formats each cell using the format_block function described previously.
for (col = 0; col < num_cols; col ++) for (col = 0; col < num_cols; col ++)
{ {
dd|>y = row_y; dd\->y = row_y;
format_block(dd, row\->cells[col], deffont, SIZE_TABLE, cols[col].left, format_block(dd, row\->cells[col], deffont, SIZE_TABLE, cols[col].left,
cols[col].right, /*leader*/NULL); cols[col].right, /*leader*/NULL);
@ -4517,4 +4567,4 @@ typedef enum pdfio_valtype_e pdfio_valtype_t;
Michael R Sweet Michael R Sweet
.SH COPYRIGHT .SH COPYRIGHT
.PP .PP
Copyright (c) 2021-2024 by Michael R Sweet Copyright (c) 2021-2025 by Michael R Sweet

View File

@ -1,13 +1,13 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en-US"> <html lang="en-US">
<head> <head>
<title>PDFio Programming Manual v1.4.0</title> <title>PDFio Programming Manual v1.4.1</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8"> <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="generator" content="codedoc v3.8"> <meta name="generator" content="codedoc v3.8">
<meta name="author" content="Michael R Sweet"> <meta name="author" content="Michael R Sweet">
<meta name="language" content="en-US"> <meta name="language" content="en-US">
<meta name="copyright" content="Copyright © 2021-2024 by Michael R Sweet"> <meta name="copyright" content="Copyright © 2021-2025 by Michael R Sweet">
<meta name="version" content="1.4.0"> <meta name="version" content="1.4.1">
<style type="text/css"><!-- <style type="text/css"><!--
body { body {
background: white; background: white;
@ -251,9 +251,9 @@ span.string {
<body> <body>
<div class="header"> <div class="header">
<p><img class="title" src="pdfio-512.png"></p> <p><img class="title" src="pdfio-512.png"></p>
<h1 class="title">PDFio Programming Manual v1.4.0</h1> <h1 class="title">PDFio Programming Manual v1.4.1</h1>
<p>Michael R Sweet</p> <p>Michael R Sweet</p>
<p>Copyright © 2021-2024 by Michael R Sweet</p> <p>Copyright © 2021-2025 by Michael R Sweet</p>
</div> </div>
<div class="contents"> <div class="contents">
<h2 class="title">Contents</h2> <h2 class="title">Contents</h2>
@ -276,7 +276,8 @@ span.string {
</ul></li> </ul></li>
<li><a href="#examples">Examples</a><ul class="subcontents"> <li><a href="#examples">Examples</a><ul class="subcontents">
<li><a href="#read-pdf-metadata">Read PDF Metadata</a></li> <li><a href="#read-pdf-metadata">Read PDF Metadata</a></li>
<li><a href="#create-pdf-file-with-text-and-image">Create PDF File With Text and Image</a></li> <li><a href="#extract-text-from-pdf-file">Extract Text from PDF File</a></li>
<li><a href="#create-a-pdf-file-with-text-and-an-image">Create a PDF File With Text and an Image</a></li>
<li><a href="#generate-a-code-128-barcode">Generate a Code 128 Barcode</a></li> <li><a href="#generate-a-code-128-barcode">Generate a Code 128 Barcode</a></li>
<li><a href="#convert-markdown-to-pdf">Convert Markdown to PDF</a></li> <li><a href="#convert-markdown-to-pdf">Convert Markdown to PDF</a></li>
</ul></li> </ul></li>
@ -1147,6 +1148,7 @@ pdfio_obj_t *img =
</li> </li>
</ul> </ul>
<h2 class="title" id="examples">Examples</h2> <h2 class="title" id="examples">Examples</h2>
<p>PDFio includes several example programs that are typically installed to the <code>/usr/share/doc/pdfio/examples</code> or <code>/usr/local/share/doc/pdfio/examples</code> directories. A makefile is included to build them.</p>
<h3 class="title" id="read-pdf-metadata">Read PDF Metadata</h3> <h3 class="title" id="read-pdf-metadata">Read PDF Metadata</h3>
<p>The <code>pdfioinfo.c</code> example program opens a PDF file and prints the title, author, creation date, and number of pages:</p> <p>The <code>pdfioinfo.c</code> example program opens a PDF file and prints the title, author, creation date, and number of pages:</p>
<pre><code class="language-c"><span class="directive">#include &lt;pdfio.h&gt;</span> <pre><code class="language-c"><span class="directive">#include &lt;pdfio.h&gt;</span>
@ -1159,9 +1161,11 @@ main(<span class="reserved">int</span> argc, <span clas
{ {
<span class="reserved">const</span> <span class="reserved">char</span> *filename; <span class="comment">// PDF filename</span> <span class="reserved">const</span> <span class="reserved">char</span> *filename; <span class="comment">// PDF filename</span>
pdfio_file_t *pdf; <span class="comment">// PDF file</span> pdfio_file_t *pdf; <span class="comment">// PDF file</span>
<span class="reserved">const</span> <span class="reserved">char</span> *author; <span class="comment">// Author name</span>
time_t creation_date; <span class="comment">// Creation date</span> time_t creation_date; <span class="comment">// Creation date</span>
<span class="reserved">struct</span> tm *creation_tm; <span class="comment">// Creation date/time information</span> <span class="reserved">struct</span> tm *creation_tm; <span class="comment">// Creation date/time information</span>
<span class="reserved">char</span> creation_text[<span class="number">256</span>]; <span class="comment">// Creation date/time as a string</span> <span class="reserved">char</span> creation_text[<span class="number">256</span>]; <span class="comment">// Creation date/time as a string</span>
<span class="reserved">const</span> <span class="reserved">char</span> *title; <span class="comment">// Title</span>
<span class="comment">// Get the filename from the command-line...</span> <span class="comment">// Get the filename from the command-line...</span>
@ -1179,15 +1183,25 @@ main(<span class="reserved">int</span> argc, <span clas
<span class="reserved">if</span> (pdf == NULL) <span class="reserved">if</span> (pdf == NULL)
<span class="reserved">return</span> (<span class="number">1</span>); <span class="reserved">return</span> (<span class="number">1</span>);
<span class="comment">// Get the title and author...</span>
author = pdfioFileGetAuthor(pdf);
title = pdfioFileGetTitle(pdf);
<span class="comment">// Get the creation date and convert to a string...</span> <span class="comment">// Get the creation date and convert to a string...</span>
creation_date = pdfioFileGetCreationDate(pdf); <span class="reserved">if</span> ((creation_date = pdfioFileGetCreationDate(pdf)) &gt; <span class="number">0</span>)
{
creation_tm = localtime(&amp;creation_date); creation_tm = localtime(&amp;creation_date);
strftime(creation_text, <span class="reserved">sizeof</span>(creation_text), <span class="string">&quot;%c&quot;</span>, creation_tm); strftime(creation_text, <span class="reserved">sizeof</span>(creation_text), <span class="string">&quot;%c&quot;</span>, creation_tm);
}
<span class="reserved">else</span>
{
snprintf(creation_text, <span class="reserved">sizeof</span>(creation_text), <span class="string">&quot;-- not set --&quot;</span>);
}
<span class="comment">// Print file information to stdout...</span> <span class="comment">// Print file information to stdout...</span>
printf(<span class="string">&quot;%s:\n&quot;</span>, filename); printf(<span class="string">&quot;%s:\n&quot;</span>, filename);
printf(<span class="string">&quot; Title: %s\n&quot;</span>, pdfioFileGetTitle(pdf)); printf(<span class="string">&quot; Title: %s\n&quot;</span>, title ? title : <span class="string">&quot;-- not set --&quot;</span>);
printf(<span class="string">&quot; Author: %s\n&quot;</span>, pdfioFileGetAuthor(pdf)); printf(<span class="string">&quot; Author: %s\n&quot;</span>, author ? author : <span class="string">&quot;-- not set --&quot;</span>);
printf(<span class="string">&quot; Created On: %s\n&quot;</span>, creation_text); printf(<span class="string">&quot; Created On: %s\n&quot;</span>, creation_text);
printf(<span class="string">&quot; Number Pages: %u\n&quot;</span>, (<span class="reserved">unsigned</span>)pdfioFileGetNumPages(pdf)); printf(<span class="string">&quot; Number Pages: %u\n&quot;</span>, (<span class="reserved">unsigned</span>)pdfioFileGetNumPages(pdf));
@ -1197,7 +1211,39 @@ main(<span class="reserved">int</span> argc, <span clas
<span class="reserved">return</span> (<span class="number">0</span>); <span class="reserved">return</span> (<span class="number">0</span>);
} }
</code></pre> </code></pre>
<h3 class="title" id="create-pdf-file-with-text-and-image">Create PDF File With Text and Image</h3> <h3 class="title" id="extract-text-from-pdf-file">Extract Text from PDF File</h3>
<p>The <code>pdf2text.c</code> example code extracts non-Unicode text from a PDF file by scanning each page for strings and text drawing commands. Since it doesn't look at the font encoding or support Unicode text, it is really only useful to extract plain ASCII text from a PDF file. And since it writes text in the order it appears in the page stream, it may not come out in the same order as appears on the page.</p>
<p>The <a href="#pdfioStreamGetToken"><code>pdfioStreamGetToken</code></a> function is used to read individual tokens from the page streams. Tokens starting with the open parenthesis are text strings, while PDF operators are left as-is. We use some simple logic to make sure that we include spaces between text strings and add newlines for the text operators that start a new line in a text block:</p>
<pre><code class="language-c">pdfio_stream_t *st; <span class="comment">// Page stream</span>
<span class="reserved">bool</span> first = <span class="reserved">true</span>; <span class="comment">// First string on line?</span>
<span class="reserved">char</span> buffer[<span class="number">1024</span>]; <span class="comment">// Token buffer</span>
<span class="comment">// Read PDF tokens from the page stream...</span>
<span class="reserved">while</span> (pdfioStreamGetToken(st, buffer, <span class="reserved">sizeof</span>(buffer)))
{
<span class="reserved">if</span> (buffer[<span class="number">0</span>] == <span class="string">'('</span>)
{
<span class="comment">// Text string using an 8-bit encoding</span>
<span class="reserved">if</span> (first)
first = <span class="reserved">false</span>;
<span class="reserved">else</span> <span class="reserved">if</span> (buffer[<span class="number">1</span>] != <span class="string">' '</span>)
putchar(<span class="string">' '</span>);
fputs(buffer + <span class="number">1</span>, stdout);
}
<span class="reserved">else</span> <span class="reserved">if</span> (!strcmp(buffer, <span class="string">&quot;Td&quot;</span>) || !strcmp(buffer, <span class="string">&quot;TD&quot;</span>) || !strcmp(buffer, <span class="string">&quot;T*&quot;</span>) ||
!strcmp(buffer, <span class="string">&quot;\'&quot;</span>) || !strcmp(buffer, <span class="string">&quot;\&quot;&quot;</span>))
{
<span class="comment">// Text operators that advance to the next line in the block</span>
putchar(<span class="string">'\n'</span>);
first = <span class="reserved">true</span>;
}
}
<span class="reserved">if</span> (!first)
putchar(<span class="string">'\n'</span>);
</code></pre>
<h3 class="title" id="create-a-pdf-file-with-text-and-an-image">Create a PDF File With Text and an Image</h3>
<p>The <code>image2pdf.c</code> example code creates a PDF file containing a JPEG or PNG image file and optional caption on a single page. The <code>create_pdf_image_file</code> function creates the PDF file, embeds a base font and the named JPEG or PNG image file, and then creates a page with the image centered on the page with any text centered below:</p> <p>The <code>image2pdf.c</code> example code creates a PDF file containing a JPEG or PNG image file and optional caption on a single page. The <code>create_pdf_image_file</code> function creates the PDF file, embeds a base font and the named JPEG or PNG image file, and then creates a page with the image centered on the page with any text centered below:</p>
<pre><code class="language-c"><span class="directive">#include &lt;pdfio.h&gt;</span> <pre><code class="language-c"><span class="directive">#include &lt;pdfio.h&gt;</span>
<span class="directive">#include &lt;pdfio-content.h&gt;</span> <span class="directive">#include &lt;pdfio-content.h&gt;</span>
@ -1988,7 +2034,7 @@ dd-&gt;y -= margin_top + lineheight;
dd-&gt;y -= lineheight; dd-&gt;y -= lineheight;
} }
</code></pre> </code></pre>
<p>We then loops through the fragments for the current line, drawing checkboxes, images, and text as needed. When a hyperlink is present, we add the link to the <code>links</code> array in the <code>docdata_t</code> structure, mapping &quot;@&quot; and &quot;@@&quot; to an internal link corresponding to the linked text:</p> <p>We then loop through the fragments for the current line, drawing checkboxes, images, and text as needed. When a hyperlink is present, we add the link to the <code>links</code> array in the <code>docdata_t</code> structure, mapping &quot;@&quot; and &quot;@@&quot; to an internal link corresponding to the linked text:</p>
<pre><code class="language-c"><span class="reserved">if</span> (frag-&gt;url &amp;&amp; dd-&gt;num_links &lt; DOCLINK_MAX) <pre><code class="language-c"><span class="reserved">if</span> (frag-&gt;url &amp;&amp; dd-&gt;num_links &lt; DOCLINK_MAX)
{ {
doclink_t *l = dd-&gt;links + dd-&gt;num_links; doclink_t *l = dd-&gt;links + dd-&gt;num_links;
@ -2053,7 +2099,7 @@ dd-&gt;y -= margin_top + lineheight;
<span class="reserved">for</span> (col = <span class="number">0</span>; col &lt; num_cols; col ++) <span class="reserved">for</span> (col = <span class="number">0</span>; col &lt; num_cols; col ++)
{ {
dd<EFBFBD>&gt;y = row_y; dd-&gt;y = row_y;
format_block(dd, row-&gt;cells[col], deffont, SIZE_TABLE, cols[col].left, format_block(dd, row-&gt;cells[col], deffont, SIZE_TABLE, cols[col].left,
cols[col].right, <span class="comment">/*leader*/</span>NULL); cols[col].right, <span class="comment">/*leader*/</span>NULL);

View File

@ -864,6 +864,10 @@ escaping, as needed:
Examples Examples
======== ========
PDFio includes several example programs that are typically installed to the
`/usr/share/doc/pdfio/examples` or `/usr/local/share/doc/pdfio/examples`
directories. A makefile is included to build them.
Read PDF Metadata Read PDF Metadata
----------------- -----------------
@ -882,9 +886,11 @@ main(int argc, // I - Number of command-line arguments
{ {
const char *filename; // PDF filename const char *filename; // PDF filename
pdfio_file_t *pdf; // PDF file pdfio_file_t *pdf; // PDF file
const char *author; // Author name
time_t creation_date; // Creation date time_t creation_date; // Creation date
struct tm *creation_tm; // Creation date/time information struct tm *creation_tm; // Creation date/time information
char creation_text[256]; // Creation date/time as a string char creation_text[256]; // Creation date/time as a string
const char *title; // Title
// Get the filename from the command-line... // Get the filename from the command-line...
@ -902,15 +908,25 @@ main(int argc, // I - Number of command-line arguments
if (pdf == NULL) if (pdf == NULL)
return (1); return (1);
// Get the title and author...
author = pdfioFileGetAuthor(pdf);
title = pdfioFileGetTitle(pdf);
// Get the creation date and convert to a string... // Get the creation date and convert to a string...
creation_date = pdfioFileGetCreationDate(pdf); if ((creation_date = pdfioFileGetCreationDate(pdf)) > 0)
{
creation_tm = localtime(&creation_date); creation_tm = localtime(&creation_date);
strftime(creation_text, sizeof(creation_text), "%c", creation_tm); strftime(creation_text, sizeof(creation_text), "%c", creation_tm);
}
else
{
snprintf(creation_text, sizeof(creation_text), "-- not set --");
}
// Print file information to stdout... // Print file information to stdout...
printf("%s:\n", filename); printf("%s:\n", filename);
printf(" Title: %s\n", pdfioFileGetTitle(pdf)); printf(" Title: %s\n", title ? title : "-- not set --");
printf(" Author: %s\n", pdfioFileGetAuthor(pdf)); printf(" Author: %s\n", author ? author : "-- not set --");
printf(" Created On: %s\n", creation_text); printf(" Created On: %s\n", creation_text);
printf(" Number Pages: %u\n", (unsigned)pdfioFileGetNumPages(pdf)); printf(" Number Pages: %u\n", (unsigned)pdfioFileGetNumPages(pdf));
@ -922,8 +938,56 @@ main(int argc, // I - Number of command-line arguments
``` ```
Create PDF File With Text and Image Extract Text from PDF File
----------------------------------- --------------------------
The `pdf2text.c` example code extracts non-Unicode text from a PDF file by
scanning each page for strings and text drawing commands. Since it doesn't
look at the font encoding or support Unicode text, it is really only useful to
extract plain ASCII text from a PDF file. And since it writes text in the order
it appears in the page stream, it may not come out in the same order as appears
on the page.
The [`pdfioStreamGetToken`](@@) function is used to read individual tokens from
the page streams. Tokens starting with the open parenthesis are text strings,
while PDF operators are left as-is. We use some simple logic to make sure that
we include spaces between text strings and add newlines for the text operators
that start a new line in a text block:
```c
pdfio_stream_t *st; // Page stream
bool first = true; // First string on line?
char buffer[1024]; // Token buffer
// Read PDF tokens from the page stream...
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
{
if (buffer[0] == '(')
{
// Text string using an 8-bit encoding
if (first)
first = false;
else if (buffer[1] != ' ')
putchar(' ');
fputs(buffer + 1, stdout);
}
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") ||
!strcmp(buffer, "\'") || !strcmp(buffer, "\""))
{
// Text operators that advance to the next line in the block
putchar('\n');
first = true;
}
}
if (!first)
putchar('\n');
```
Create a PDF File With Text and an Image
----------------------------------------
The `image2pdf.c` example code creates a PDF file containing a JPEG or PNG The `image2pdf.c` example code creates a PDF file containing a JPEG or PNG
image file and optional caption on a single page. The `create_pdf_image_file` image file and optional caption on a single page. The `create_pdf_image_file`
@ -1941,7 +2005,7 @@ if ((dd->y - need_bottom) < dd->art_box.y1)
} }
``` ```
We then loops through the fragments for the current line, drawing checkboxes, We then loop through the fragments for the current line, drawing checkboxes,
images, and text as needed. When a hyperlink is present, we add the link to the images, and text as needed. When a hyperlink is present, we add the link to the
`links` array in the `docdata_t` structure, mapping "@" and "@@" to an internal `links` array in the `docdata_t` structure, mapping "@" and "@@" to an internal
link corresponding to the linked text: link corresponding to the linked text:

View File

@ -1,7 +1,7 @@
# #
# Makefile for PDFio examples. # Makefile for PDFio examples.
# #
# Copyright © 2024 by Michael R Sweet. # Copyright © 2024-2025 by Michael R Sweet.
# #
# Licensed under Apache License v2.0. See the file "LICENSE" for more # Licensed under Apache License v2.0. See the file "LICENSE" for more
# information. # information.
@ -12,10 +12,10 @@
# Common options # Common options
#CFLAGS = -g $(CPPFLAGS) CFLAGS = -g $(CPPFLAGS)
CFLAGS = -g -fsanitize=address $(CPPFLAGS) #CFLAGS = -g -fsanitize=address $(CPPFLAGS)
CPPFLAGS = -I.. CPPFLAGS = -I.. -I/usr/local/include
LIBS = -L.. -lpdfio -lz LIBS = -L.. -L/usr/local/lib -lpdfio -lz -lm
# Targets # Targets
@ -23,6 +23,7 @@ TARGETS = \
code128 \ code128 \
image2pdf \ image2pdf \
md2pdf \ md2pdf \
pdf2text \
pdfioinfo pdfioinfo
@ -50,6 +51,11 @@ md2pdf: md2pdf.c mmd.c mmd.h
$(CC) $(CFLAGS) -o $@ md2pdf.c mmd.c $(LIBS) $(CC) $(CFLAGS) -o $@ md2pdf.c mmd.c $(LIBS)
# pdfio text extraction (demo, doesn't handle a lot of things yet)
pdf2text: pdf2text.c
$(CC) $(CFLAGS) -o $@ pdf2text.c $(LIBS)
# pdfioinfo # pdfioinfo
pdfioinfo: pdfioinfo.c pdfioinfo: pdfioinfo.c
$(CC) $(CFLAGS) -o $@ pdfioinfo.c $(LIBS) $(CC) $(CFLAGS) -o $@ pdfioinfo.c $(LIBS)

View File

@ -0,0 +1,93 @@
Copyright 2011 The Roboto Project Authors (https://github.com/googlefonts/roboto-classic)
This Font Software is licensed under the SIL Open Font License, Version 1.1.
This license is copied below, and is also available with a FAQ at:
https://openfontlicense.org
-----------------------------------------------------------
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
-----------------------------------------------------------
PREAMBLE
The goals of the Open Font License (OFL) are to stimulate worldwide
development of collaborative font projects, to support the font creation
efforts of academic and linguistic communities, and to provide a free and
open framework in which fonts may be shared and improved in partnership
with others.
The OFL allows the licensed fonts to be used, studied, modified and
redistributed freely as long as they are not sold by themselves. The
fonts, including any derivative works, can be bundled, embedded,
redistributed and/or sold with any software provided that any reserved
names are not used by derivative works. The fonts and derivatives,
however, cannot be released under any other type of license. The
requirement for fonts to remain under this license does not apply
to any document created using the fonts or their derivatives.
DEFINITIONS
"Font Software" refers to the set of files released by the Copyright
Holder(s) under this license and clearly marked as such. This may
include source files, build scripts and documentation.
"Reserved Font Name" refers to any names specified as such after the
copyright statement(s).
"Original Version" refers to the collection of Font Software components as
distributed by the Copyright Holder(s).
"Modified Version" refers to any derivative made by adding to, deleting,
or substituting -- in part or in whole -- any of the components of the
Original Version, by changing formats or by porting the Font Software to a
new environment.
"Author" refers to any designer, engineer, programmer, technical
writer or other person who contributed to the Font Software.
PERMISSION & CONDITIONS
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Font Software, to use, study, copy, merge, embed, modify,
redistribute, and sell modified and unmodified copies of the Font
Software, subject to the following conditions:
1) Neither the Font Software nor any of its individual components,
in Original or Modified Versions, may be sold by itself.
2) Original or Modified Versions of the Font Software may be bundled,
redistributed and/or sold with any software, provided that each copy
contains the above copyright notice and this license. These can be
included either as stand-alone text files, human-readable headers or
in the appropriate machine-readable metadata fields within text or
binary files as long as those fields can be easily viewed by the user.
3) No Modified Version of the Font Software may use the Reserved Font
Name(s) unless explicit written permission is granted by the corresponding
Copyright Holder. This restriction only applies to the primary font name as
presented to the users.
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
Software shall not be used to promote, endorse or advertise any
Modified Version, except to acknowledge the contribution(s) of the
Copyright Holder(s) and the Author(s) or with their explicit written
permission.
5) The Font Software, modified or unmodified, in part or in whole,
must be distributed entirely under this license, and must not be
distributed under any other license. The requirement for fonts to
remain under this license does not apply to any document created
using the Font Software.
TERMINATION
This license becomes null and void if any of the above conditions are
not met.
DISCLAIMER
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.

View File

@ -0,0 +1,343 @@
Copyright 2003 Grandzebu, All Rights Reserved
http://grandzebu.net/informatique/codbar-en/code128.htm
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

Binary file not shown.

View File

@ -1,17 +1,17 @@
// //
// PDF to text program for PDFio. // PDF to text program for PDFio.
// //
// Copyright © 2022 by Michael R Sweet. // Copyright © 2022-2024 by Michael R Sweet.
// //
// Licensed under Apache License v2.0. See the file "LICENSE" for more // Licensed under Apache License v2.0. See the file "LICENSE" for more
// information. // information.
// //
// Usage: // Usage:
// //
// ./pdfiototext FILENAME.pdf > FILENAME.txt // ./pdf2text FILENAME.pdf > FILENAME.txt
// //
#include "pdfio.h" #include <pdfio.h>
#include <string.h> #include <string.h>
@ -36,16 +36,14 @@ main(int argc, // I - Number of command-line arguments
// Verify command-line arguments... // Verify command-line arguments...
if (argc != 2) if (argc != 2)
{ {
puts("Usage: pdfiototext FILENAME.pdf > FILENAME.txt"); puts("Usage: pdf2text FILENAME.pdf > FILENAME.txt");
return (1); return (1);
} }
// Open the PDF file... // Open the PDF file...
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL) if ((file = pdfioFileOpen(argv[1], /*password_cb*/NULL, /*password_data*/NULL, /*error_cb*/NULL, /*error_data*/NULL)) == NULL)
return (1); return (1);
// printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
// Try grabbing content from all of the pages... // Try grabbing content from all of the pages...
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++) for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
{ {
@ -54,29 +52,28 @@ main(int argc, // I - Number of command-line arguments
num_streams = pdfioPageGetNumStreams(obj); num_streams = pdfioPageGetNumStreams(obj);
// printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
for (j = 0; j < num_streams; j ++) for (j = 0; j < num_streams; j ++)
{ {
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL) if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
continue; continue;
// printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st); // Read PDF tokens from the page stream...
first = true; first = true;
while (pdfioStreamGetToken(st, buffer, sizeof(buffer))) while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
{ {
if (buffer[0] == '(') if (buffer[0] == '(')
{ {
// Text string using an 8-bit encoding
if (first) if (first)
first = false; first = false;
else else if (buffer[1] != ' ')
putchar(' '); putchar(' ');
fputs(buffer + 1, stdout); fputs(buffer + 1, stdout);
} }
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\"")) else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\""))
{ {
// Text operators that advance to the next line in the block
putchar('\n'); putchar('\n');
first = true; first = true;
} }

View File

@ -1,7 +1,7 @@
// //
// PDF metadata example for PDFio. // PDF metadata example for PDFio.
// //
// Copyright © 2023-2024 by Michael R Sweet. // Copyright © 2023-2025 by Michael R Sweet.
// //
// Licensed under Apache License v2.0. See the file "LICENSE" for more // Licensed under Apache License v2.0. See the file "LICENSE" for more
// information. // information.
@ -25,9 +25,11 @@ main(int argc, // I - Number of command-line arguments
{ {
const char *filename; // PDF filename const char *filename; // PDF filename
pdfio_file_t *pdf; // PDF file pdfio_file_t *pdf; // PDF file
const char *author; // Author name
time_t creation_date; // Creation date time_t creation_date; // Creation date
struct tm *creation_tm; // Creation date/time information struct tm *creation_tm; // Creation date/time information
char creation_text[256]; // Creation date/time as a string char creation_text[256]; // Creation date/time as a string
const char *title; // Title
// Get the filename from the command-line... // Get the filename from the command-line...
@ -46,15 +48,25 @@ main(int argc, // I - Number of command-line arguments
if (pdf == NULL) if (pdf == NULL)
return (1); return (1);
// Get the title and author...
author = pdfioFileGetAuthor(pdf);
title = pdfioFileGetTitle(pdf);
// Get the creation date and convert to a string... // Get the creation date and convert to a string...
creation_date = pdfioFileGetCreationDate(pdf); if ((creation_date = pdfioFileGetCreationDate(pdf)) > 0)
{
creation_tm = localtime(&creation_date); creation_tm = localtime(&creation_date);
strftime(creation_text, sizeof(creation_text), "%c", creation_tm); strftime(creation_text, sizeof(creation_text), "%c", creation_tm);
}
else
{
snprintf(creation_text, sizeof(creation_text), "-- not set --");
}
// Print file information to stdout... // Print file information to stdout...
printf("%s:\n", filename); printf("%s:\n", filename);
printf(" Title: %s\n", pdfioFileGetTitle(pdf)); printf(" Title: %s\n", title ? title : "-- not set --");
printf(" Author: %s\n", pdfioFileGetAuthor(pdf)); printf(" Author: %s\n", author ? author : "-- not set --");
printf(" Created On: %s\n", creation_text); printf(" Created On: %s\n", creation_text);
printf(" Number Pages: %u\n", (unsigned)pdfioFileGetNumPages(pdf)); printf(" Number Pages: %u\n", (unsigned)pdfioFileGetNumPages(pdf));

View File

@ -29,6 +29,15 @@ if test $(grep AC_INIT configure.ac | awk '{print $2}') != "[$version],"; then
exit 1 exit 1
fi fi
if test $(head -4 CHANGES.md | tail -1 | awk '{print $1}') != "v$version"; then
echo "Still need to update CHANGES.md version number."
exit 1
fi
if test $(head -4 CHANGES.md | tail -1 | awk '{print $3}') = "YYYY-MM-DD"; then
echo "Still need to update CHANGES.md release date."
exit 1
fi
if test $(grep PDFIO_VERSION= configure | awk -F \" '{print $2}') != "$version"; then if test $(grep PDFIO_VERSION= configure | awk -F \" '{print $2}') != "$version"; then
echo "Still need to run 'autoconf -f'." echo "Still need to run 'autoconf -f'."
exit 1 exit 1

View File

@ -1,7 +1,7 @@
// //
// Common support functions for pdfio. // Common support functions for pdfio.
// //
// Copyright © 2021-2024 by Michael R Sweet. // Copyright © 2021-2025 by Michael R Sweet.
// //
// Licensed under Apache License v2.0. See the file "LICENSE" for more // Licensed under Apache License v2.0. See the file "LICENSE" for more
// information. // information.
@ -368,7 +368,7 @@ _pdfioFileSeek(pdfio_file_t *pdf, // I - PDF file
if (pdf->mode == _PDFIO_MODE_READ) if (pdf->mode == _PDFIO_MODE_READ)
{ {
// Reading, see if we already have the data we need... // Reading, see if we already have the data we need...
if (whence != SEEK_END && offset >= pdf->bufpos && offset < (pdf->bufpos + pdf->bufend - pdf->buffer)) if (whence != SEEK_END && offset >= pdf->bufpos && pdf->bufend && offset < (pdf->bufpos + pdf->bufend - pdf->buffer))
{ {
// Yes, seek within existing buffer... // Yes, seek within existing buffer...
pdf->bufptr = pdf->buffer + (offset - pdf->bufpos); pdf->bufptr = pdf->buffer + (offset - pdf->bufpos);
@ -398,7 +398,10 @@ _pdfioFileSeek(pdfio_file_t *pdf, // I - PDF file
} }
// Seek within the file... // Seek within the file...
if ((offset = lseek(pdf->fd, offset, whence)) < 0) if ((offset = lseek(pdf->fd, offset, whence)) < 0 && whence == SEEK_END && errno == EINVAL)
offset = lseek(pdf->fd, 0, SEEK_SET);
if (offset < 0)
{ {
_pdfioFileError(pdf, "Unable to seek within file - %s", strerror(errno)); _pdfioFileError(pdf, "Unable to seek within file - %s", strerror(errno));
return (-1); return (-1);

View File

@ -1132,7 +1132,7 @@ pdfioContentTextMeasure(
} }
if (i < (sizeof(_pdfio_cp1252) / sizeof(_pdfio_cp1252[0]))) if (i < (sizeof(_pdfio_cp1252) / sizeof(_pdfio_cp1252[0])))
ch = i + 0x80; // Extra characters from 0x80 to 0x9f ch = (int)(i + 0x80); // Extra characters from 0x80 to 0x9f
else else
ch = '?'; // Unsupported chars map to ? ch = '?'; // Unsupported chars map to ?
} }

View File

@ -465,10 +465,134 @@ pdfioDictGetString(pdfio_dict_t *dict, // I - Dictionary
else if (value && value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096) else if (value && value->type == PDFIO_VALTYPE_BINARY && value->value.binary.datalen < 4096)
{ {
// Convert binary string to regular string... // Convert binary string to regular string...
char temp[4096]; // Temporary string char temp[4096], // Temporary string
*tempptr; // Pointer into temporary string
unsigned char *dataptr; // Pointer into the data string
if (!(value->value.binary.datalen & 1) && !memcmp(value->value.binary.data, "\377\376", 2))
{
// Copy UTF-16 BE
int ch; // Unicode character
size_t remaining; // Remaining bytes
for (dataptr = value->value.binary.data + 2, remaining = value->value.binary.datalen - 2, tempptr = temp; remaining > 1 && tempptr < (temp + sizeof(temp) - 5); dataptr += 2, remaining -= 2)
{
ch = (dataptr[0] << 8) | dataptr[1];
if (ch >= 0xd800 && ch <= 0xdbff && remaining > 3)
{
// Multi-word UTF-16 char...
int lch; // Lower bits
lch = (dataptr[2] << 8) | dataptr[3];
if (lch < 0xdc00 || lch >= 0xdfff)
break;
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
dataptr += 2;
remaining -= 2;
}
else if (ch >= 0xfffe)
{
continue;
}
if (ch < 128)
{
// ASCII
*tempptr++ = (char)ch;
}
else if (ch < 4096)
{
// 2-byte UTF-8
*tempptr++ = (char)(0xc0 | (ch >> 6));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else if (ch < 65536)
{
// 3-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 12));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else
{
// 4-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 18));
*tempptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
}
*tempptr = '\0';
}
else if (!(value->value.binary.datalen & 1) && !memcmp(value->value.binary.data, "\376\377", 2))
{
// Copy UTF-16 LE
int ch; // Unicode character
size_t remaining; // Remaining bytes
for (dataptr = value->value.binary.data + 2, remaining = value->value.binary.datalen - 2, tempptr = temp; remaining > 1 && tempptr < (temp + sizeof(temp) - 5); dataptr += 2, remaining -= 2)
{
ch = (dataptr[1] << 8) | dataptr[0];
if (ch >= 0xd800 && ch <= 0xdbff && remaining > 3)
{
// Multi-word UTF-16 char...
int lch; // Lower bits
lch = (dataptr[3] << 8) | dataptr[2];
if (lch < 0xdc00 || lch >= 0xdfff)
break;
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
dataptr += 2;
remaining -= 2;
}
else if (ch >= 0xfffe)
{
continue;
}
if (ch < 128)
{
// ASCII
*tempptr++ = (char)ch;
}
else if (ch < 4096)
{
// 2-byte UTF-8
*tempptr++ = (char)(0xc0 | (ch >> 6));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else if (ch < 65536)
{
// 3-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 12));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
else
{
// 4-byte UTF-8
*tempptr++ = (char)(0xe0 | (ch >> 18));
*tempptr++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*tempptr++ = (char)(0x80 | (ch & 0x3f));
}
}
*tempptr = '\0';
}
else
{
// Copy as-is...
memcpy(temp, value->value.binary.data, value->value.binary.datalen); memcpy(temp, value->value.binary.data, value->value.binary.datalen);
temp[value->value.binary.datalen] = '\0'; temp[value->value.binary.datalen] = '\0';
}
free(value->value.binary.data); free(value->value.binary.data);
value->type = PDFIO_VALTYPE_STRING; value->type = PDFIO_VALTYPE_STRING;

View File

@ -1,7 +1,7 @@
// //
// PDF file functions for PDFio. // PDF file functions for PDFio.
// //
// Copyright © 2021-2024 by Michael R Sweet. // Copyright © 2021-2025 by Michael R Sweet.
// //
// Licensed under Apache License v2.0. See the file "LICENSE" for more // Licensed under Apache License v2.0. See the file "LICENSE" for more
// information. // information.
@ -1517,6 +1517,7 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
cur_obj, // Current object cur_obj, // Current object
num_objs = 0; // Number of objects num_objs = 0; // Number of objects
pdfio_obj_t *objs[16384]; // Objects pdfio_obj_t *objs[16384]; // Objects
int count; // Count of objects
PDFIO_DEBUG("load_obj_stream(obj=%p(%d))\n", obj, (int)obj->number); PDFIO_DEBUG("load_obj_stream(obj=%p(%d))\n", obj, (int)obj->number);
@ -1528,12 +1529,17 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
return (false); return (false);
} }
count = (int)pdfioDictGetNumber(pdfioObjGetDict(obj), "N");
PDFIO_DEBUG("load_obj_stream: N=%d\n", count);
_pdfioTokenInit(&tb, obj->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st); _pdfioTokenInit(&tb, obj->pdf, (_pdfio_tconsume_cb_t)pdfioStreamConsume, (_pdfio_tpeek_cb_t)pdfioStreamPeek, st);
// Read the object numbers from the beginning of the stream... // Read the object numbers from the beginning of the stream...
while (_pdfioTokenGet(&tb, buffer, sizeof(buffer))) while (count > 0 && _pdfioTokenGet(&tb, buffer, sizeof(buffer)))
{ {
// Stop if this isn't an object number... // Stop if this isn't an object number...
PDFIO_DEBUG("load_obj_stream: %s\n", buffer);
if (!isdigit(buffer[0] & 255)) if (!isdigit(buffer[0] & 255))
break; break;
@ -1556,21 +1562,19 @@ load_obj_stream(pdfio_obj_t *obj) // I - Object to load
// Skip offset // Skip offset
_pdfioTokenGet(&tb, buffer, sizeof(buffer)); _pdfioTokenGet(&tb, buffer, sizeof(buffer));
PDFIO_DEBUG("load_obj_stream: %ld at offset %s\n", (long)number, buffer); PDFIO_DEBUG("load_obj_stream: %ld at offset %s\n", (long)number, buffer);
// One less compressed object...
count --;
} }
if (!buffer[0]) PDFIO_DEBUG("load_obj_stream: num_objs=%lu\n", (unsigned long)num_objs);
{
pdfioStreamClose(st);
return (false);
}
_pdfioTokenPush(&tb, buffer);
// Read the objects themselves... // Read the objects themselves...
for (cur_obj = 0; cur_obj < num_objs; cur_obj ++) for (cur_obj = 0; cur_obj < num_objs; cur_obj ++)
{ {
if (!_pdfioValueRead(obj->pdf, obj, &tb, &(objs[cur_obj]->value), 0)) if (!_pdfioValueRead(obj->pdf, obj, &tb, &(objs[cur_obj]->value), 0))
{ {
_pdfioFileError(obj->pdf, "Unable to read compressed object.");
pdfioStreamClose(st); pdfioStreamClose(st);
return (false); return (false);
} }
@ -1720,7 +1724,7 @@ load_xref(
pdfio_stream_t *st; // Stream pdfio_stream_t *st; // Stream
unsigned char buffer[32]; // Read buffer unsigned char buffer[32]; // Read buffer
size_t num_sobjs = 0, // Number of object streams size_t num_sobjs = 0, // Number of object streams
sobjs[8192]; // Object streams to load sobjs[16384]; // Object streams to load
pdfio_obj_t *current; // Current object pdfio_obj_t *current; // Current object
if ((number = strtoimax(line, &ptr, 10)) < 1) if ((number = strtoimax(line, &ptr, 10)) < 1)
@ -1938,7 +1942,6 @@ load_xref(
// Save the trailer dictionary and grab the root (catalog) and info // Save the trailer dictionary and grab the root (catalog) and info
// objects... // objects...
pdf->trailer_dict = trailer.value.dict; pdf->trailer_dict = trailer.value.dict;
pdf->info_obj = pdfioDictGetObj(pdf->trailer_dict, "Info");
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt"); pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID"); pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
@ -2086,7 +2089,6 @@ load_xref(
// Save the trailer dictionary and grab the root (catalog) and info // Save the trailer dictionary and grab the root (catalog) and info
// objects... // objects...
pdf->trailer_dict = trailer.value.dict; pdf->trailer_dict = trailer.value.dict;
pdf->info_obj = pdfioDictGetObj(pdf->trailer_dict, "Info");
pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt"); pdf->encrypt_obj = pdfioDictGetObj(pdf->trailer_dict, "Encrypt");
pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID"); pdf->id_array = pdfioDictGetArray(pdf->trailer_dict, "ID");
@ -2123,6 +2125,8 @@ load_xref(
// Once we have all of the xref tables loaded, get the important objects and // Once we have all of the xref tables loaded, get the important objects and
// build the pages array... // build the pages array...
pdf->info_obj = pdfioDictGetObj(pdf->trailer_dict, "Info");
if ((pdf->root_obj = pdfioDictGetObj(pdf->trailer_dict, "Root")) == NULL) if ((pdf->root_obj = pdfioDictGetObj(pdf->trailer_dict, "Root")) == NULL)
{ {
_pdfioFileError(pdf, "Missing Root object."); _pdfioFileError(pdf, "Missing Root object.");

View File

@ -1,7 +1,7 @@
// //
// Public header file for PDFio. // Public header file for PDFio.
// //
// Copyright © 2021-2024 by Michael R Sweet. // Copyright © 2021-2025 by Michael R Sweet.
// //
// Licensed under Apache License v2.0. See the file "LICENSE" for more // Licensed under Apache License v2.0. See the file "LICENSE" for more
// information. // information.
@ -23,7 +23,7 @@ extern "C" {
// Version number... // Version number...
// //
# define PDFIO_VERSION "1.4.0" # define PDFIO_VERSION "1.4.1"
// //

View File

@ -3,7 +3,7 @@
archiveVersion = 1; archiveVersion = 1;
classes = { classes = {
}; };
objectVersion = 50; objectVersion = 54;
objects = { objects = {
/* Begin PBXBuildFile section */ /* Begin PBXBuildFile section */
@ -31,6 +31,7 @@
27F2F0612710BE92008ECD36 /* pdfio-rc4.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F05E2710BE92008ECD36 /* pdfio-rc4.c */; }; 27F2F0612710BE92008ECD36 /* pdfio-rc4.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F05E2710BE92008ECD36 /* pdfio-rc4.c */; };
27F2F0622710BE92008ECD36 /* pdfio-crypto.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */; }; 27F2F0622710BE92008ECD36 /* pdfio-crypto.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */; };
27F2F0642711243D008ECD36 /* pdfio-sha256.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F0632711243D008ECD36 /* pdfio-sha256.c */; }; 27F2F0642711243D008ECD36 /* pdfio-sha256.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F0632711243D008ECD36 /* pdfio-sha256.c */; };
27FCBDE42D19F9B300485EEE /* pdfio-base-font-widths.h in Headers */ = {isa = PBXBuildFile; fileRef = 27FCBDE32D19F9B300485EEE /* pdfio-base-font-widths.h */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */ /* Begin PBXContainerItemProxy section */
@ -89,6 +90,7 @@
27F2F05E2710BE92008ECD36 /* pdfio-rc4.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-rc4.c"; sourceTree = "<group>"; }; 27F2F05E2710BE92008ECD36 /* pdfio-rc4.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-rc4.c"; sourceTree = "<group>"; };
27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-crypto.c"; sourceTree = "<group>"; }; 27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-crypto.c"; sourceTree = "<group>"; };
27F2F0632711243D008ECD36 /* pdfio-sha256.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-sha256.c"; sourceTree = "<group>"; }; 27F2F0632711243D008ECD36 /* pdfio-sha256.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-sha256.c"; sourceTree = "<group>"; };
27FCBDE32D19F9B300485EEE /* pdfio-base-font-widths.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "pdfio-base-font-widths.h"; sourceTree = "<group>"; };
/* End PBXFileReference section */ /* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */ /* Begin PBXFrameworksBuildPhase section */
@ -162,6 +164,7 @@
children = ( children = (
27CF90432711DFFE00E50FE4 /* pdfio-aes.c */, 27CF90432711DFFE00E50FE4 /* pdfio-aes.c */,
273440BA263D727800FBFD63 /* pdfio-array.c */, 273440BA263D727800FBFD63 /* pdfio-array.c */,
27FCBDE32D19F9B300485EEE /* pdfio-base-font-widths.h */,
273440BB263D727800FBFD63 /* pdfio-common.c */, 273440BB263D727800FBFD63 /* pdfio-common.c */,
271EA703265B2B1000ACDD39 /* pdfio-content.c */, 271EA703265B2B1000ACDD39 /* pdfio-content.c */,
27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */, 27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */,
@ -205,6 +208,7 @@
isa = PBXHeadersBuildPhase; isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
27FCBDE42D19F9B300485EEE /* pdfio-base-font-widths.h in Headers */,
273440CC263D727800FBFD63 /* pdfio.h in Headers */, 273440CC263D727800FBFD63 /* pdfio.h in Headers */,
271EA706265B2B1000ACDD39 /* pdfio-content.h in Headers */, 271EA706265B2B1000ACDD39 /* pdfio-content.h in Headers */,
273440C3263D727800FBFD63 /* pdfio-private.h in Headers */, 273440C3263D727800FBFD63 /* pdfio-private.h in Headers */,
@ -256,7 +260,8 @@
273440A8263D6FE200FBFD63 /* Project object */ = { 273440A8263D6FE200FBFD63 /* Project object */ = {
isa = PBXProject; isa = PBXProject;
attributes = { attributes = {
LastUpgradeCheck = 1300; BuildIndependentTargetsInParallel = YES;
LastUpgradeCheck = 1600;
TargetAttributes = { TargetAttributes = {
273440AF263D6FE200FBFD63 = { 273440AF263D6FE200FBFD63 = {
CreatedOnToolsVersion = 12.5; CreatedOnToolsVersion = 12.5;
@ -373,9 +378,11 @@
CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_IDENTITY = "Apple Development";
COPY_PHASE_STRIP = NO; COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1.1.2; CURRENT_PROJECT_VERSION = 1.1.2;
DEAD_CODE_STRIPPING = YES;
DEBUG_INFORMATION_FORMAT = dwarf; DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES; ENABLE_TESTABILITY = YES;
ENABLE_USER_SCRIPT_SANDBOXING = YES;
GCC_C_LANGUAGE_STANDARD = gnu11; GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES; GCC_NO_COMMON_BLOCKS = YES;
GCC_PREPROCESSOR_DEFINITIONS = ( GCC_PREPROCESSOR_DEFINITIONS = (
@ -450,15 +457,15 @@
CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_IDENTITY = "Apple Development";
COPY_PHASE_STRIP = NO; COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1.1.2; CURRENT_PROJECT_VERSION = 1.1.2;
DEAD_CODE_STRIPPING = YES;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_HARDENED_RUNTIME = YES; ENABLE_HARDENED_RUNTIME = YES;
ENABLE_NS_ASSERTIONS = NO; ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_USER_SCRIPT_SANDBOXING = YES;
GCC_C_LANGUAGE_STANDARD = gnu11; GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES; GCC_NO_COMMON_BLOCKS = YES;
GCC_PREPROCESSOR_DEFINITIONS = ( GCC_PREPROCESSOR_DEFINITIONS = "$(inherited)";
"$(inherited)",
);
GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES; GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_MISSING_FIELD_INITIALIZERS = YES; GCC_WARN_ABOUT_MISSING_FIELD_INITIALIZERS = YES;
@ -486,6 +493,7 @@
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
buildSettings = { buildSettings = {
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
DEAD_CODE_STRIPPING = YES;
DEVELOPMENT_TEAM = RU58A2256H; DEVELOPMENT_TEAM = RU58A2256H;
EXECUTABLE_PREFIX = lib; EXECUTABLE_PREFIX = lib;
PRODUCT_NAME = "$(TARGET_NAME)"; PRODUCT_NAME = "$(TARGET_NAME)";
@ -497,6 +505,7 @@
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
buildSettings = { buildSettings = {
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
DEAD_CODE_STRIPPING = YES;
DEVELOPMENT_TEAM = RU58A2256H; DEVELOPMENT_TEAM = RU58A2256H;
EXECUTABLE_PREFIX = lib; EXECUTABLE_PREFIX = lib;
PRODUCT_NAME = "$(TARGET_NAME)"; PRODUCT_NAME = "$(TARGET_NAME)";
@ -509,6 +518,7 @@
buildSettings = { buildSettings = {
CODE_SIGN_IDENTITY = "-"; CODE_SIGN_IDENTITY = "-";
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
DEAD_CODE_STRIPPING = YES;
DEVELOPMENT_TEAM = ""; DEVELOPMENT_TEAM = "";
ENABLE_HARDENED_RUNTIME = YES; ENABLE_HARDENED_RUNTIME = YES;
GCC_DYNAMIC_NO_PIC = NO; GCC_DYNAMIC_NO_PIC = NO;
@ -525,6 +535,7 @@
buildSettings = { buildSettings = {
CODE_SIGN_IDENTITY = "-"; CODE_SIGN_IDENTITY = "-";
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
DEAD_CODE_STRIPPING = YES;
DEVELOPMENT_TEAM = ""; DEVELOPMENT_TEAM = "";
ENABLE_HARDENED_RUNTIME = YES; ENABLE_HARDENED_RUNTIME = YES;
MACOSX_DEPLOYMENT_TARGET = 11.0; MACOSX_DEPLOYMENT_TARGET = 11.0;

View File

@ -3,7 +3,7 @@
<metadata> <metadata>
<id>pdfio_native</id> <id>pdfio_native</id>
<title>PDFio Library for VS2019+</title> <title>PDFio Library for VS2019+</title>
<version>1.3.2</version> <version>1.4.1</version>
<authors>Michael R Sweet</authors> <authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners> <owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl> <projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
@ -13,10 +13,10 @@
<requireLicenseAcceptance>false</requireLicenseAcceptance> <requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>PDFio Library for VS2019+</description> <description>PDFio Library for VS2019+</description>
<summary>PDFio is a simple C library for reading and writing PDF files. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary> <summary>PDFio is a simple C library for reading and writing PDF files. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2024 by Michael R Sweet</copyright> <copyright>Copyright © 2019-2025 by Michael R Sweet</copyright>
<tags>pdf file native</tags> <tags>pdf file native</tags>
<dependencies> <dependencies>
<dependency id="pdfio_native.redist" version="1.3.2" /> <dependency id="pdfio_native.redist" version="1.4.1" />
<dependency id="zlib_native.redist" version="1.2.11" /> <dependency id="zlib_native.redist" version="1.2.11" />
</dependencies> </dependencies>
</metadata> </metadata>

View File

@ -3,7 +3,7 @@
<metadata> <metadata>
<id>pdfio_native.redist</id> <id>pdfio_native.redist</id>
<title>PDFio Library for VS2019+</title> <title>PDFio Library for VS2019+</title>
<version>1.3.2</version> <version>1.4.1</version>
<authors>Michael R Sweet</authors> <authors>Michael R Sweet</authors>
<owners>michaelrsweet</owners> <owners>michaelrsweet</owners>
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl> <projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
@ -13,7 +13,7 @@
<requireLicenseAcceptance>false</requireLicenseAcceptance> <requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>PDFio Library for VS2019+</description> <description>PDFio Library for VS2019+</description>
<summary>PDFio is a simple C library for reading and writing PDF files. This package provides the redistributable content for the PDFio library. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary> <summary>PDFio is a simple C library for reading and writing PDF files. This package provides the redistributable content for the PDFio library. PDFio is licensed under the Apache License Version 2.0 with an (optional) exception to allow linking against GNU GPL2-only software.</summary>
<copyright>Copyright © 2019-2024 by Michael R Sweet</copyright> <copyright>Copyright © 2019-2025 by Michael R Sweet</copyright>
<tags>pdf file native</tags> <tags>pdf file native</tags>
<dependencies> <dependencies>
<dependency id="zlib_native.redist" version="1.2.11" /> <dependency id="zlib_native.redist" version="1.2.11" />

View File

@ -1,3 +1,7 @@
https://www.color.org/chardata/rgb/rommrgb.xalter
Copyright © 2006 Hewlett-Packard
Terms of use Terms of use
This profile is made available by ICC, and may be copied, distributed, embedded, made, used, and sold without restriction. Altered versions of this profile shall have the original identification and copyright information removed and shall not be misrepresented as the original profile. This profile is made available by ICC, and may be copied, distributed, embedded, made, used, and sold without restriction. Altered versions of this profile shall have the original identification and copyright information removed and shall not be misrepresented as the original profile.