mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2025-07-21 00:09:47 +02:00
Compare commits
28 Commits
d3d6683041
...
v1.4.0
Author | SHA1 | Date | |
---|---|---|---|
6d65a609e5 | |||
e96f9bfa6b | |||
10c15fc281 | |||
fd8427d68a | |||
ed1421287f | |||
aa91b141a8 | |||
5dc68f3285 | |||
52b508bdd2 | |||
41ebe39f3b | |||
62df5f5c78 | |||
a1237db52c | |||
a24fdee335 | |||
e4081f2ba3 | |||
5bc7ebee2c | |||
b872df5a1e | |||
53967552df | |||
f8639fbd64 | |||
9020e92928 | |||
48e6597337 | |||
d4e3bbcf16 | |||
2c8a996875 | |||
3d6d9e3e3e | |||
62fdf48ff9 | |||
294f5e07c5 | |||
4baafde74b | |||
2d175fdf70 | |||
56a0f290aa | |||
2e5319a623 |
4
.gitignore
vendored
4
.gitignore
vendored
@ -12,14 +12,16 @@
|
||||
/configure~
|
||||
/doc/pdfio.epub
|
||||
/examples/code128
|
||||
/examples/image2pdf
|
||||
/examples/md2pdf
|
||||
/examples/pdf2text
|
||||
/examples/pdfioinfo
|
||||
/Makefile
|
||||
/packages
|
||||
/pdfio.pc
|
||||
/pdfio.xcodeproj/xcshareddata
|
||||
/pdfio-*.tar.gz*
|
||||
/pdfio-*.zip*
|
||||
/pdfiototext
|
||||
/testpdfio
|
||||
/testpdfio-*.pdf
|
||||
/testttf
|
||||
|
@ -14,6 +14,7 @@ v1.4.0 - YYYY-MM-DD
|
||||
with `pdfioFileCreateFontObjFromBase` (Issue #84)
|
||||
- Fixed reading of PDF files whose trailer is missing a newline (Issue #80)
|
||||
- Fixed builds with some versions of VC++ (Issue #81)
|
||||
- Fixed validation of date/time values (Issue #83)
|
||||
|
||||
|
||||
v1.3.2 - 2024-08-15
|
||||
|
37
Makefile.in
37
Makefile.in
@ -103,15 +103,33 @@ LIBOBJS = \
|
||||
ttf.o
|
||||
OBJS = \
|
||||
$(LIBOBJS) \
|
||||
pdfiototext.o \
|
||||
testpdfio.o \
|
||||
testttf.o
|
||||
TARGETS = \
|
||||
$(LIBPDFIO) \
|
||||
$(LIBPDFIO_STATIC) \
|
||||
pdfiototext \
|
||||
testpdfio \
|
||||
testttf
|
||||
DOCFILES = \
|
||||
doc/pdfio.html \
|
||||
doc/pdfio-512.png \
|
||||
LICENSE \
|
||||
NOTICE
|
||||
EXAMPLES = \
|
||||
examples/Makefile \
|
||||
examples/Roboto-Bold.ttf \
|
||||
examples/Roboto-Italic.ttf \
|
||||
examples/Roboto-Regular.ttf \
|
||||
examples/RobotoMono-Regular.ttf \
|
||||
examples/code128.c \
|
||||
examples/code128.ttf \
|
||||
examples/image2pdf.c \
|
||||
examples/md2pdf.c \
|
||||
examples/md2pdf.md \
|
||||
examples/mmd.c \
|
||||
examples/mmd.h \
|
||||
examples/pdf2text.c \
|
||||
examples/pdfioinfo.c
|
||||
|
||||
|
||||
# Make everything
|
||||
@ -152,8 +170,13 @@ install: $(TARGETS)
|
||||
$(INSTALL) -c -m 644 pdfio.pc $(BUILDROOT)$(libdir)/pkgconfig
|
||||
echo Installing documentation to $(BUILDROOT)$(datadir)/doc/pdfio...
|
||||
$(INSTALL) -d -m 755 $(BUILDROOT)$(datadir)/doc/pdfio
|
||||
for file in doc/pdfio.html doc/pdfio-512.png LICENSE NOTICE; do \
|
||||
$(INSTALL) -c -m 644 $$file $(BUILDROOT)$(datadir)/doc/pdfio; \
|
||||
for file in $(DOCFILES); do \
|
||||
$(INSTALL) -c -m 644 $$file $(BUILDROOT)$(datadir)/doc/pdfio; \
|
||||
done
|
||||
echo Installing examples to $(BUILDROOT)$(datadir)/doc/pdfio/examples...
|
||||
$(INSTALL) -d -m 755 $(BUILDROOT)$(datadir)/doc/pdfio/examples
|
||||
for file in $(EXAMPLES); do \
|
||||
$(INSTALL) -c -m 644 $$file $(BUILDROOT)$(datadir)/doc/pdfio/examples; \
|
||||
done
|
||||
echo Installing man page to $(BUILDROOT)$(mandir)/man3...
|
||||
$(INSTALL) -d -m 755 $(BUILDROOT)$(mandir)/man3
|
||||
@ -201,12 +224,6 @@ pdfio1.def: $(LIBOBJS) Makefile
|
||||
grep -v '^_ttf' | sed -e '1,$$s/^_//' | sort >>$@
|
||||
|
||||
|
||||
# pdfio text extraction (demo, doesn't handle a lot of things yet)
|
||||
pdfiototext: pdfiototext.o libpdfio.a
|
||||
echo Linking $@...
|
||||
$(CC) $(LDFLAGS) -o $@ pdfiototext.o libpdfio.a $(LIBS)
|
||||
|
||||
|
||||
# pdfio test program
|
||||
testpdfio: testpdfio.o libpdfio.a
|
||||
echo Linking $@...
|
||||
|
1214
doc/pdfio.3
1214
doc/pdfio.3
File diff suppressed because it is too large
Load Diff
1066
doc/pdfio.html
1066
doc/pdfio.html
File diff suppressed because it is too large
Load Diff
1218
doc/pdfio.md
1218
doc/pdfio.md
File diff suppressed because it is too large
Load Diff
@ -13,14 +13,18 @@
|
||||
|
||||
# Common options
|
||||
CFLAGS = -g $(CPPFLAGS)
|
||||
CPPFLAGS = -I..
|
||||
LIBS = -L.. -lpdfio -lz
|
||||
#CFLAGS = -g -fsanitize=address $(CPPFLAGS)
|
||||
CPPFLAGS = -I.. -I/usr/local/include
|
||||
LIBS = -L.. -L/usr/local/lib -lpdfio -lz
|
||||
|
||||
|
||||
# Targets
|
||||
TARGETS = \
|
||||
code128 \
|
||||
md2pdf
|
||||
image2pdf \
|
||||
md2pdf \
|
||||
pdf2text \
|
||||
pdfioinfo
|
||||
|
||||
|
||||
# Make everything
|
||||
@ -37,10 +41,25 @@ code128: code128.c
|
||||
$(CC) $(CFLAGS) -o $@ code128.c $(LIBS)
|
||||
|
||||
|
||||
# image2pdf
|
||||
image2pdf: image2pdf.c
|
||||
$(CC) $(CFLAGS) -o $@ image2pdf.c $(LIBS)
|
||||
|
||||
|
||||
# md2pdf
|
||||
md2pdf: md2pdf.c mmd.c mmd.h
|
||||
$(CC) $(CFLAGS) -o $@ md2pdf.c mmd.c $(LIBS)
|
||||
|
||||
|
||||
# pdfio text extraction (demo, doesn't handle a lot of things yet)
|
||||
pdf2text: pdf2text.c
|
||||
$(CC) $(CFLAGS) -o $@ pdf2text.c $(LIBS)
|
||||
|
||||
|
||||
# pdfioinfo
|
||||
pdfioinfo: pdfioinfo.c
|
||||
$(CC) $(CFLAGS) -o $@ pdfioinfo.c $(LIBS)
|
||||
|
||||
|
||||
# Common dependencies...
|
||||
$(TARGETS): Makefile ../pdfio.h ../pdfio-content.h
|
||||
|
@ -23,7 +23,6 @@
|
||||
// extended characters are ignored in the source string.
|
||||
//
|
||||
|
||||
|
||||
static char * // O - Output string
|
||||
make_code128(char *dst, // I - Destination buffer
|
||||
const char *src, // I - Source string
|
||||
@ -54,9 +53,9 @@ make_code128(char *dst, // I - Destination buffer
|
||||
static const char code128_start_code_a = '\313';
|
||||
// Start code A
|
||||
static const char code128_start_code_b = '\314';
|
||||
// Start code A
|
||||
// Start code B
|
||||
static const char code128_start_code_c = '\315';
|
||||
// Start code A
|
||||
// Start code C
|
||||
static const char code128_stop = '\316';
|
||||
// Stop pattern
|
||||
|
||||
@ -149,7 +148,7 @@ main(int argc, // I - Number of command-line arguments
|
||||
// Load fonts...
|
||||
barcode_font = pdfioFileCreateFontObjFromFile(pdf, "code128.ttf", /*unicode*/false);
|
||||
if (text)
|
||||
text_font = pdfioFileCreateFontObjFromFile(pdf, "../testfiles/OpenSans-Regular.ttf", /*unicode*/true);
|
||||
text_font = pdfioFileCreateFontObjFromBase(pdf, "Helvetica");
|
||||
|
||||
// Generate Code128 characters for the desired barcode...
|
||||
if (!(barcode[0] & 0x80))
|
||||
@ -182,7 +181,7 @@ main(int argc, // I - Number of command-line arguments
|
||||
page_st = pdfioFileCreatePage(pdf, page_dict);
|
||||
|
||||
// Draw the page...
|
||||
pdfioContentSetStrokeColorGray(page_st, 0.0);
|
||||
pdfioContentSetFillColorGray(page_st, 0.0);
|
||||
|
||||
pdfioContentSetTextFont(page_st, "B128", barcode_height);
|
||||
pdfioContentTextBegin(page_st);
|
||||
@ -195,7 +194,7 @@ main(int argc, // I - Number of command-line arguments
|
||||
pdfioContentSetTextFont(page_st, "TEXT", text_height);
|
||||
pdfioContentTextBegin(page_st);
|
||||
pdfioContentTextMoveTo(page_st, 0.5 * (media_box.x2 - text_width), 9.0);
|
||||
pdfioContentTextShow(page_st, /*unicode*/true, text);
|
||||
pdfioContentTextShow(page_st, /*unicode*/false, text);
|
||||
pdfioContentTextEnd(page_st);
|
||||
}
|
||||
|
||||
|
139
examples/image2pdf.c
Normal file
139
examples/image2pdf.c
Normal file
@ -0,0 +1,139 @@
|
||||
//
|
||||
// Image example for PDFio.
|
||||
//
|
||||
// Copyright © 2023-2024 by Michael R Sweet.
|
||||
//
|
||||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
// information.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// ./image2pdf FILENAME.{jpg,png} FILENAME.pdf ["TEXT"]
|
||||
//
|
||||
|
||||
#include <pdfio.h>
|
||||
#include <pdfio-content.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
//
|
||||
// 'create_pdf_image_file()' - Create a PDF file of an image with optional caption.
|
||||
//
|
||||
|
||||
bool // O - True on success, false on failure
|
||||
create_pdf_image_file(
|
||||
const char *pdfname, // I - PDF filename
|
||||
const char *imagename, // I - Image filename
|
||||
const char *caption) // I - Caption filename
|
||||
{
|
||||
pdfio_file_t *pdf; // PDF file
|
||||
pdfio_obj_t *font; // Caption font
|
||||
pdfio_obj_t *image; // Image
|
||||
pdfio_dict_t *dict; // Page dictionary
|
||||
pdfio_stream_t *page; // Page stream
|
||||
double width, height; // Width and height of image
|
||||
double swidth, sheight; // Scaled width and height on page
|
||||
double tx, ty; // Position on page
|
||||
|
||||
|
||||
// Create the PDF file...
|
||||
pdf = pdfioFileCreate(pdfname, /*version*/NULL, /*media_box*/NULL,
|
||||
/*crop_box*/NULL, /*error_cb*/NULL,
|
||||
/*error_cbdata*/NULL);
|
||||
if (!pdf)
|
||||
return (false);
|
||||
|
||||
// Create a Courier base font for the caption
|
||||
font = pdfioFileCreateFontObjFromBase(pdf, "Courier");
|
||||
|
||||
if (!font)
|
||||
{
|
||||
pdfioFileClose(pdf);
|
||||
return (false);
|
||||
}
|
||||
|
||||
// Create an image object from the JPEG/PNG image file...
|
||||
image = pdfioFileCreateImageObjFromFile(pdf, imagename, true);
|
||||
|
||||
if (!image)
|
||||
{
|
||||
pdfioFileClose(pdf);
|
||||
return (false);
|
||||
}
|
||||
|
||||
// Create a page dictionary with the font and image...
|
||||
dict = pdfioDictCreate(pdf);
|
||||
pdfioPageDictAddFont(dict, "F1", font);
|
||||
pdfioPageDictAddImage(dict, "IM1", image);
|
||||
|
||||
// Create the page and its content stream...
|
||||
page = pdfioFileCreatePage(pdf, dict);
|
||||
|
||||
// Position and scale the image on the page...
|
||||
width = pdfioImageGetWidth(image);
|
||||
height = pdfioImageGetHeight(image);
|
||||
|
||||
// Default media_box is "universal" 595.28x792 points (8.27x11in or
|
||||
// 210x279mm). Use margins of 36 points (0.5in or 12.7mm) with another
|
||||
// 36 points for the caption underneath...
|
||||
swidth = 595.28 - 72.0;
|
||||
sheight = swidth * height / width;
|
||||
if (sheight > (792.0 - 36.0 - 72.0))
|
||||
{
|
||||
sheight = 792.0 - 36.0 - 72.0;
|
||||
swidth = sheight * width / height;
|
||||
}
|
||||
|
||||
tx = 0.5 * (595.28 - swidth);
|
||||
ty = 0.5 * (792 - 36 - sheight);
|
||||
|
||||
pdfioContentDrawImage(page, "IM1", tx, ty + 36.0, swidth, sheight);
|
||||
|
||||
// Draw the caption in black...
|
||||
pdfioContentSetFillColorDeviceGray(page, 0.0);
|
||||
|
||||
// Compute the starting point for the text - Courier is monospaced
|
||||
// with a nominal width of 0.6 times the text height...
|
||||
tx = 0.5 * (595.28 - 18.0 * 0.6 * strlen(caption));
|
||||
|
||||
// Position and draw the caption underneath...
|
||||
pdfioContentTextBegin(page);
|
||||
pdfioContentSetTextFont(page, "F1", 18.0);
|
||||
pdfioContentTextMoveTo(page, tx, ty);
|
||||
pdfioContentTextShow(page, /*unicode*/false, caption);
|
||||
pdfioContentTextEnd(page);
|
||||
|
||||
// Close the page stream and the PDF file...
|
||||
pdfioStreamClose(page);
|
||||
pdfioFileClose(pdf);
|
||||
|
||||
return (true);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// 'main()' - Produce a single-page file from an image.
|
||||
//
|
||||
|
||||
int // O - Exit status
|
||||
main(int argc, // I - Number of command-line arguments
|
||||
char *argv[]) // I - Command-line arguments
|
||||
{
|
||||
const char *imagefile, // Image filename
|
||||
*pdffile, // PDF filename
|
||||
*caption; // Caption text
|
||||
|
||||
|
||||
// Get the image file, PDF file, and optional caption text from the command-line...
|
||||
if (argc < 3 || argc > 4)
|
||||
{
|
||||
fputs("Usage: image2pdf FILENAME.{jpg,png} FILENAME.pdf [\"TEXT\"]\n", stderr);
|
||||
return (1);
|
||||
}
|
||||
|
||||
imagefile = argv[1];
|
||||
pdffile = argv[2];
|
||||
caption = argv[3];
|
||||
|
||||
return (create_pdf_image_file(imagefile, pdffile, caption) ? 0 : 1);
|
||||
}
|
1737
examples/md2pdf.c
1737
examples/md2pdf.c
File diff suppressed because it is too large
Load Diff
@ -1,194 +1,55 @@
|
||||
---
|
||||
title: Mini-Markdown Test Document
|
||||
title: Markdown to PDF Converter Test File
|
||||
...
|
||||
|
||||
All heading levels are supported from 1 to 6, using both the ATX and Setext
|
||||
forms. As an indented code block:
|
||||
|
||||
# Heading 1
|
||||
## Heading 2
|
||||
### Heading 3
|
||||
#### Heading 4
|
||||
##### Heading 5
|
||||
###### Heading 6
|
||||
Markdown to PDF Converter Test File
|
||||
===================================
|
||||
|
||||
Setext Heading 1
|
||||
================
|
||||
The `md2pdf` program is organized into three source files: `md2pdf.c` which
|
||||
contains the code to format the markdown content and `mmd.h` and `mmd.c` (from
|
||||
the [Miniature Markdown Library][MMD] project) which load the markdown content.
|
||||
|
||||
Setext Heading 2
|
||||
----------------
|
||||
[MMD]: https://www.msweet.org/mmd/
|
||||
|
||||
As block headings:
|
||||
This is a test file for `md2pdf`. Here is a bullet list:
|
||||
|
||||
# Heading 1
|
||||
## Heading 2
|
||||
### Heading 3
|
||||
#### Heading 4
|
||||
##### Heading 5
|
||||
###### Heading 6
|
||||
- Embed base and TrueType fonts,
|
||||
- Format text with embedded JPEG and PNG images and check boxes, with support
|
||||
for wrapping, alignment in table cells, leader text (as used for lists), and
|
||||
variable line height,
|
||||
- Add headers and footers, and
|
||||
- Add hyperlinks and document platform.
|
||||
|
||||
Setext Heading 1
|
||||
================
|
||||
And here is an ordered list:
|
||||
|
||||
Setext Heading 2
|
||||
----------------
|
||||
1. Embed base and TrueType fonts,
|
||||
2. Format text with embedded JPEG and PNG images and check boxes, with support
|
||||
for wrapping, alignment in table cells, leader text (as used for lists), and
|
||||
variable line height,
|
||||
3. Add headers and footers, and
|
||||
4. Add hyperlinks and document platform.
|
||||
|
||||
And block quotes:
|
||||
|
||||
> # BQ Heading 1
|
||||
> ## BQ Heading 2
|
||||
> ### BQ Heading 3
|
||||
> #### BQ Heading 4
|
||||
> ##### BQ Heading 5
|
||||
> ###### BQ Heading 6
|
||||
>
|
||||
> Setext Heading 1
|
||||
> ================
|
||||
>
|
||||
> Setext Heading 2
|
||||
> ----------------
|
||||
|
||||
And ordered lists:
|
||||
|
||||
1. First item.
|
||||
|
||||
2. Second item.
|
||||
|
||||
3. Third item with very long text that wraps
|
||||
across multiple lines.
|
||||
|
||||
With a secondary paragraph associated with
|
||||
the third item.
|
||||
|
||||
And unordered lists:
|
||||
|
||||
- First item.
|
||||
|
||||
+ Second item.
|
||||
|
||||
* Third item.
|
||||
|
||||
* [ ] Fourth item (unchecked)
|
||||
|
||||
- [x] Fifth item (checked)
|
||||
|
||||
Code block with `\``:
|
||||
Code Blocks
|
||||
-----------
|
||||
|
||||
```
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
puts("Hello, World!");
|
||||
return (0);
|
||||
}
|
||||
~~~
|
||||
0 1 2 3 4 5 6 7 8
|
||||
12345678901234567890123456789012345678901234567890123456789012345678901234567890
|
||||
```
|
||||
|
||||
Code block with `~`:
|
||||
|
||||
~~~
|
||||
#include <stdio.h>
|
||||
Images
|
||||
------
|
||||
|
||||
int main(void)
|
||||
{
|
||||
puts("Hello, World!");
|
||||
return (0);
|
||||
}
|
||||
```
|
||||
~~~
|
||||
PDFio book cover image:
|
||||
|
||||
Link to [mmd web site](https://michaelrsweet.github.io/mmd).
|
||||

|
||||
|
||||
Normal link to [Heading 1](@).
|
||||
|
||||
Code link to [`Heading 2`](@).
|
||||
|
||||
Inner emphasized link to [*Heading 3*](@).
|
||||
|
||||
Outer emphasized link to *[Heading 3](@)*.
|
||||
|
||||
Inner strong link to [**Heading 4**](@).
|
||||
|
||||
Outer strong link to **[Heading 4](@)**.
|
||||
|
||||
Implicit link to [reference1][].
|
||||
|
||||
Shortcut link to [reference1] without a link title.
|
||||
|
||||
[reference1]: https://michaelrsweet.github.io/mmd 'MMD Home Page'
|
||||
|
||||
[reference2]: https://michaelrsweet.github.io/mmd/mmd.html 'MMD Documentation'
|
||||
|
||||
[reference3]: https://michaelrsweet.github.io/mmd/mmd-160.png "MMD Logo"
|
||||
|
||||
Link to [mmd web site][reference1] works.
|
||||
Link to [mmd documentation][reference2] works.
|
||||
Link to ![mmd logo][reference3] image.
|
||||
Link to [bad reference][reference4] doesn't work.
|
||||
|
||||
Autolink to <https://michaelrsweet.github.io/mmd>.
|
||||
|
||||
Autolink in parenthesis (<https://michaelrsweet.github.io/mmd>).
|
||||
|
||||
[Link broken
|
||||
across two lines](https://michaelrsweet.github.io/mmd)
|
||||
|
||||
Color JPEG Image: 
|
||||
Grayscale JPEG Image: 
|
||||
Color PNG Image: 
|
||||
Grayscale PNG Image: 
|
||||
Indexed PNG Image: 
|
||||
|
||||
This sentence contains *Emphasized Text*, **Bold Text**, and `Code Text` for
|
||||
testing the MMD parser. The `<mmd.h>` header file.
|
||||
|
||||
This sentence contains _Emphasized Text_, __Bold Text__, and
|
||||
~~Strikethrough Text~~ for testing the MMD parser.
|
||||
|
||||
*Emphasized Text Split
|
||||
Across Two Lines*
|
||||
|
||||
**Bold Text Split
|
||||
Across Two Lines**
|
||||
|
||||
`Code Text Split
|
||||
Across Two lines`
|
||||
|
||||
_Emphasized Text Split
|
||||
Across Two Lines_
|
||||
|
||||
__Bold Text Split
|
||||
Across Two Lines__
|
||||
|
||||
~~Strikethrough Text Split
|
||||
Across Two Lines~~
|
||||
|
||||
All work and no play makes Johnny a dull boy.
|
||||
All work and no play makes Johnny a dull boy.
|
||||
All work and no play makes Johnny a dull boy.
|
||||
|
||||
All work and no play makes Johnny a dull boy.
|
||||
All work and no play makes Johnny a dull boy.
|
||||
All work and no play makes Johnny a dull boy.
|
||||
|
||||
\(Escaped Parenthesis)
|
||||
|
||||
\(*Emphasized Parenthesis*)
|
||||
|
||||
\(**Boldface Parenthesis**)
|
||||
|
||||
\(`Code Parenthesis`)
|
||||
|
||||
Escaped backtick (`\``)
|
||||
|
||||
Table as code:
|
||||
|
||||
| Heading 1 | Heading 2 | Heading 3 |
|
||||
| --------- | --------- | --------- |
|
||||
| Cell 1,1 | Cell 1,2 | Cell 1,3 |
|
||||
| Cell 2,1 | Cell 2,2 | Cell 2,3 |
|
||||
| Cell 3,1 | Cell 3,2 | Cell 3,3 |
|
||||
Tables
|
||||
------
|
||||
|
||||
Table with leading/trailing pipes:
|
||||
|
||||
@ -221,15 +82,3 @@ Table in block quote:
|
||||
> Cell 1,1 | Cell 1,2 | Cell 1,3
|
||||
> Cell 2,1 | Cell 2,2 | Cell 2,3
|
||||
> Cell 3,1 | Cell 3,2 | Cell 3,3
|
||||
|
||||
# Tests for Bugs/Edge Cases
|
||||
|
||||
Paragraph with "|" that should not
|
||||
be interpreted as a table.
|
||||
|
||||
code before a bulleted list
|
||||
|
||||
- First item
|
||||
- Second item
|
||||
- Some pathological nested link and inline style features supported by
|
||||
CommonMark like "`******Really Strong Text******`".
|
||||
|
@ -166,7 +166,7 @@ mmdCopyAllText(mmd_t *node) // I - Parent node
|
||||
char *all = NULL, // String buffer
|
||||
*allptr = NULL, // Pointer into string buffer
|
||||
*temp; // Temporary pointer
|
||||
size_t allsize = 0, // Size of "all" buffer
|
||||
size_t allsize = 1, // Size of "all" buffer
|
||||
textlen; // Length of "text" string
|
||||
mmd_t *current, // Current node
|
||||
*next; // Next node
|
||||
@ -544,7 +544,7 @@ mmdLoadIO(mmd_t *root, // I - Root node for document or `NULL` for a new d
|
||||
block = NULL;
|
||||
continue;
|
||||
}
|
||||
else if (*lineptr == '>' && (lineptr - linestart) < 4)
|
||||
else if (stackptr->parent->type != MMD_TYPE_CODE_BLOCK && *lineptr == '>' && (lineptr - linestart) < 4)
|
||||
{
|
||||
// Block quote. See if there is an existing blockquote...
|
||||
DEBUG_printf(" BLOCKQUOTE (stackptr=%ld)\n", stackptr - stack);
|
||||
|
@ -1,17 +1,17 @@
|
||||
//
|
||||
// PDF to text program for PDFio.
|
||||
//
|
||||
// Copyright © 2022 by Michael R Sweet.
|
||||
// Copyright © 2022-2024 by Michael R Sweet.
|
||||
//
|
||||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
// information.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// ./pdfiototext FILENAME.pdf > FILENAME.txt
|
||||
// ./pdf2text FILENAME.pdf > FILENAME.txt
|
||||
//
|
||||
|
||||
#include "pdfio.h"
|
||||
#include <pdfio.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
@ -36,16 +36,14 @@ main(int argc, // I - Number of command-line arguments
|
||||
// Verify command-line arguments...
|
||||
if (argc != 2)
|
||||
{
|
||||
puts("Usage: pdfiototext FILENAME.pdf > FILENAME.txt");
|
||||
puts("Usage: pdf2text FILENAME.pdf > FILENAME.txt");
|
||||
return (1);
|
||||
}
|
||||
|
||||
// Open the PDF file...
|
||||
if ((file = pdfioFileOpen(argv[1], NULL, NULL, NULL, NULL)) == NULL)
|
||||
if ((file = pdfioFileOpen(argv[1], /*password_cb*/NULL, /*password_data*/NULL, /*error_cb*/NULL, /*error_data*/NULL)) == NULL)
|
||||
return (1);
|
||||
|
||||
// printf("%s: %u pages\n", argv[1], (unsigned)pdfioFileGetNumPages(file));
|
||||
|
||||
// Try grabbing content from all of the pages...
|
||||
for (i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i ++)
|
||||
{
|
||||
@ -54,29 +52,28 @@ main(int argc, // I - Number of command-line arguments
|
||||
|
||||
num_streams = pdfioPageGetNumStreams(obj);
|
||||
|
||||
// printf("%s: page%u=%p, num_streams=%u\n", argv[1], (unsigned)i, obj, (unsigned)num_streams);
|
||||
|
||||
for (j = 0; j < num_streams; j ++)
|
||||
{
|
||||
if ((st = pdfioPageOpenStream(obj, j, true)) == NULL)
|
||||
continue;
|
||||
|
||||
// printf("%s: page%u st%u=%p\n", argv[1], (unsigned)i, (unsigned)j, st);
|
||||
|
||||
// Read PDF tokens from the page stream...
|
||||
first = true;
|
||||
while (pdfioStreamGetToken(st, buffer, sizeof(buffer)))
|
||||
{
|
||||
if (buffer[0] == '(')
|
||||
{
|
||||
// Text string using an 8-bit encoding
|
||||
if (first)
|
||||
first = false;
|
||||
else
|
||||
else if (buffer[1] != ' ')
|
||||
putchar(' ');
|
||||
|
||||
fputs(buffer + 1, stdout);
|
||||
}
|
||||
else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\""))
|
||||
{
|
||||
// Text operators that advance to the next line in the block
|
||||
putchar('\n');
|
||||
first = true;
|
||||
}
|
65
examples/pdfioinfo.c
Normal file
65
examples/pdfioinfo.c
Normal file
@ -0,0 +1,65 @@
|
||||
//
|
||||
// PDF metadata example for PDFio.
|
||||
//
|
||||
// Copyright © 2023-2024 by Michael R Sweet.
|
||||
//
|
||||
// Licensed under Apache License v2.0. See the file "LICENSE" for more
|
||||
// information.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// ./pdfioinfo FILENAME.pdf
|
||||
//
|
||||
|
||||
#include <pdfio.h>
|
||||
#include <time.h>
|
||||
|
||||
|
||||
//
|
||||
// 'main()' - Open a PDF file and show its metadata.
|
||||
//
|
||||
|
||||
int // O - Exit status
|
||||
main(int argc, // I - Number of command-line arguments
|
||||
char *argv[]) // Command-line arguments
|
||||
{
|
||||
const char *filename; // PDF filename
|
||||
pdfio_file_t *pdf; // PDF file
|
||||
time_t creation_date; // Creation date
|
||||
struct tm *creation_tm; // Creation date/time information
|
||||
char creation_text[256]; // Creation date/time as a string
|
||||
|
||||
|
||||
// Get the filename from the command-line...
|
||||
if (argc != 2)
|
||||
{
|
||||
fputs("Usage: ./pdfioinfo FILENAME.pdf\n", stderr);
|
||||
return (1);
|
||||
}
|
||||
|
||||
filename = argv[1];
|
||||
|
||||
// Open the PDF file with the default callbacks...
|
||||
pdf = pdfioFileOpen(filename, /*password_cb*/NULL,
|
||||
/*password_cbdata*/NULL, /*error_cb*/NULL,
|
||||
/*error_cbdata*/NULL);
|
||||
if (pdf == NULL)
|
||||
return (1);
|
||||
|
||||
// Get the creation date and convert to a string...
|
||||
creation_date = pdfioFileGetCreationDate(pdf);
|
||||
creation_tm = localtime(&creation_date);
|
||||
strftime(creation_text, sizeof(creation_text), "%c", creation_tm);
|
||||
|
||||
// Print file information to stdout...
|
||||
printf("%s:\n", filename);
|
||||
printf(" Title: %s\n", pdfioFileGetTitle(pdf));
|
||||
printf(" Author: %s\n", pdfioFileGetAuthor(pdf));
|
||||
printf(" Created On: %s\n", creation_text);
|
||||
printf(" Number Pages: %u\n", (unsigned)pdfioFileGetNumPages(pdf));
|
||||
|
||||
// Close the PDF file...
|
||||
pdfioFileClose(pdf);
|
||||
|
||||
return (0);
|
||||
}
|
@ -284,7 +284,7 @@ static short times_roman_widths[256] =
|
||||
};
|
||||
|
||||
|
||||
static short zapf_dingbats_widths[256] =
|
||||
static short zapfdingbats_widths[256] =
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -1131,28 +1131,13 @@ pdfioContentTextMeasure(
|
||||
break;
|
||||
}
|
||||
|
||||
if (i >= (sizeof(_pdfio_cp1252) / sizeof(_pdfio_cp1252[0])))
|
||||
if (i < (sizeof(_pdfio_cp1252) / sizeof(_pdfio_cp1252[0])))
|
||||
ch = (int)(i + 0x80); // Extra characters from 0x80 to 0x9f
|
||||
else
|
||||
ch = '?'; // Unsupported chars map to ?
|
||||
}
|
||||
|
||||
if (ch < 128)
|
||||
{
|
||||
// ASCII
|
||||
*tempptr++ = (char)ch;
|
||||
}
|
||||
else if (ch < 2048)
|
||||
{
|
||||
// 2-byte UTF-8
|
||||
*tempptr++ = (char)(0xc0 | ((ch >> 6) & 0x1f));
|
||||
*tempptr++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
else
|
||||
{
|
||||
// 3-byte UTF-8
|
||||
*tempptr++ = (char)(0xe0 | ((ch >> 12) & 0x0f));
|
||||
*tempptr++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
||||
*tempptr++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
*tempptr++ = (char)ch;
|
||||
}
|
||||
|
||||
*tempptr = '\0';
|
||||
@ -1186,8 +1171,8 @@ pdfioContentTextMeasure(
|
||||
widths = times_italic_widths;
|
||||
else if (!strcmp(basefont, "Times-Roman"))
|
||||
widths = times_roman_widths;
|
||||
else if (!strcmp(basefont, "Zapf-Dingbats"))
|
||||
widths = zapf_dingbats_widths;
|
||||
else if (!strcmp(basefont, "ZapfDingbats"))
|
||||
widths = zapfdingbats_widths;
|
||||
else
|
||||
return (0.0);
|
||||
|
||||
|
@ -755,6 +755,8 @@ get_date_time(const char *s) // I - PDF date/time value
|
||||
int offset; // Date offset
|
||||
|
||||
|
||||
PDFIO_DEBUG("get_date_time(s=\"%s\")\n", s);
|
||||
|
||||
// Possible date value of the form:
|
||||
//
|
||||
// (D:YYYYMMDDhhmmssZ)
|
||||
@ -772,10 +774,12 @@ get_date_time(const char *s) // I - PDF date/time value
|
||||
{
|
||||
if (s[i] == 'Z')
|
||||
{
|
||||
// UTC...
|
||||
i ++;
|
||||
}
|
||||
else if (s[i] == '-' || s[i] == '+')
|
||||
{
|
||||
// Timezone offset from UTC...
|
||||
if (isdigit(s[i + 1] & 255) && isdigit(s[i + 2] & 255) && s[i + 3] == '\'' && isdigit(s[i + 4] & 255) && isdigit(s[i + 5] & 255))
|
||||
{
|
||||
i += 6;
|
||||
@ -783,6 +787,11 @@ get_date_time(const char *s) // I - PDF date/time value
|
||||
i ++;
|
||||
}
|
||||
}
|
||||
else if (!s[i])
|
||||
{
|
||||
// Missing zone info, invalid date string...
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
if (s[i])
|
||||
|
@ -3,7 +3,7 @@
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 50;
|
||||
objectVersion = 54;
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
@ -31,6 +31,7 @@
|
||||
27F2F0612710BE92008ECD36 /* pdfio-rc4.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F05E2710BE92008ECD36 /* pdfio-rc4.c */; };
|
||||
27F2F0622710BE92008ECD36 /* pdfio-crypto.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */; };
|
||||
27F2F0642711243D008ECD36 /* pdfio-sha256.c in Sources */ = {isa = PBXBuildFile; fileRef = 27F2F0632711243D008ECD36 /* pdfio-sha256.c */; };
|
||||
27FCBDE42D19F9B300485EEE /* pdfio-base-font-widths.h in Headers */ = {isa = PBXBuildFile; fileRef = 27FCBDE32D19F9B300485EEE /* pdfio-base-font-widths.h */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXContainerItemProxy section */
|
||||
@ -89,6 +90,7 @@
|
||||
27F2F05E2710BE92008ECD36 /* pdfio-rc4.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-rc4.c"; sourceTree = "<group>"; };
|
||||
27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-crypto.c"; sourceTree = "<group>"; };
|
||||
27F2F0632711243D008ECD36 /* pdfio-sha256.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pdfio-sha256.c"; sourceTree = "<group>"; };
|
||||
27FCBDE32D19F9B300485EEE /* pdfio-base-font-widths.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "pdfio-base-font-widths.h"; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
@ -162,6 +164,7 @@
|
||||
children = (
|
||||
27CF90432711DFFE00E50FE4 /* pdfio-aes.c */,
|
||||
273440BA263D727800FBFD63 /* pdfio-array.c */,
|
||||
27FCBDE32D19F9B300485EEE /* pdfio-base-font-widths.h */,
|
||||
273440BB263D727800FBFD63 /* pdfio-common.c */,
|
||||
271EA703265B2B1000ACDD39 /* pdfio-content.c */,
|
||||
27F2F05F2710BE92008ECD36 /* pdfio-crypto.c */,
|
||||
@ -205,6 +208,7 @@
|
||||
isa = PBXHeadersBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
27FCBDE42D19F9B300485EEE /* pdfio-base-font-widths.h in Headers */,
|
||||
273440CC263D727800FBFD63 /* pdfio.h in Headers */,
|
||||
271EA706265B2B1000ACDD39 /* pdfio-content.h in Headers */,
|
||||
273440C3263D727800FBFD63 /* pdfio-private.h in Headers */,
|
||||
@ -256,7 +260,8 @@
|
||||
273440A8263D6FE200FBFD63 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
attributes = {
|
||||
LastUpgradeCheck = 1300;
|
||||
BuildIndependentTargetsInParallel = YES;
|
||||
LastUpgradeCheck = 1600;
|
||||
TargetAttributes = {
|
||||
273440AF263D6FE200FBFD63 = {
|
||||
CreatedOnToolsVersion = 12.5;
|
||||
@ -373,9 +378,11 @@
|
||||
CODE_SIGN_IDENTITY = "Apple Development";
|
||||
COPY_PHASE_STRIP = NO;
|
||||
CURRENT_PROJECT_VERSION = 1.1.2;
|
||||
DEAD_CODE_STRIPPING = YES;
|
||||
DEBUG_INFORMATION_FORMAT = dwarf;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_TESTABILITY = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu11;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
@ -450,15 +457,15 @@
|
||||
CODE_SIGN_IDENTITY = "Apple Development";
|
||||
COPY_PHASE_STRIP = NO;
|
||||
CURRENT_PROJECT_VERSION = 1.1.2;
|
||||
DEAD_CODE_STRIPPING = YES;
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
ENABLE_HARDENED_RUNTIME = YES;
|
||||
ENABLE_NS_ASSERTIONS = NO;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu11;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
"$(inherited)",
|
||||
);
|
||||
GCC_PREPROCESSOR_DEFINITIONS = "$(inherited)";
|
||||
GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_MISSING_FIELD_INITIALIZERS = YES;
|
||||
@ -486,6 +493,7 @@
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
DEAD_CODE_STRIPPING = YES;
|
||||
DEVELOPMENT_TEAM = RU58A2256H;
|
||||
EXECUTABLE_PREFIX = lib;
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
@ -497,6 +505,7 @@
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
DEAD_CODE_STRIPPING = YES;
|
||||
DEVELOPMENT_TEAM = RU58A2256H;
|
||||
EXECUTABLE_PREFIX = lib;
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
@ -509,6 +518,7 @@
|
||||
buildSettings = {
|
||||
CODE_SIGN_IDENTITY = "-";
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
DEAD_CODE_STRIPPING = YES;
|
||||
DEVELOPMENT_TEAM = "";
|
||||
ENABLE_HARDENED_RUNTIME = YES;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
@ -525,6 +535,7 @@
|
||||
buildSettings = {
|
||||
CODE_SIGN_IDENTITY = "-";
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
DEAD_CODE_STRIPPING = YES;
|
||||
DEVELOPMENT_TEAM = "";
|
||||
ENABLE_HARDENED_RUNTIME = YES;
|
||||
MACOSX_DEPLOYMENT_TARGET = 11.0;
|
||||
|
@ -3,7 +3,7 @@
|
||||
<metadata>
|
||||
<id>pdfio_native</id>
|
||||
<title>PDFio Library for VS2019+</title>
|
||||
<version>1.3.2</version>
|
||||
<version>1.4.0</version>
|
||||
<authors>Michael R Sweet</authors>
|
||||
<owners>michaelrsweet</owners>
|
||||
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
|
||||
@ -16,7 +16,7 @@
|
||||
<copyright>Copyright © 2019-2024 by Michael R Sweet</copyright>
|
||||
<tags>pdf file native</tags>
|
||||
<dependencies>
|
||||
<dependency id="pdfio_native.redist" version="1.3.2" />
|
||||
<dependency id="pdfio_native.redist" version="1.4.0" />
|
||||
<dependency id="zlib_native.redist" version="1.2.11" />
|
||||
</dependencies>
|
||||
</metadata>
|
||||
|
@ -3,7 +3,7 @@
|
||||
<metadata>
|
||||
<id>pdfio_native.redist</id>
|
||||
<title>PDFio Library for VS2019+</title>
|
||||
<version>1.3.2</version>
|
||||
<version>1.4.0</version>
|
||||
<authors>Michael R Sweet</authors>
|
||||
<owners>michaelrsweet</owners>
|
||||
<projectUrl>https://github.com/michaelrsweet/pappl</projectUrl>
|
||||
|
Reference in New Issue
Block a user