mirror of
https://github.com/michaelrsweet/pdfio.git
synced 2024-12-26 05:18:21 +01:00
Compare commits
2 Commits
e4081f2ba3
...
a1237db52c
Author | SHA1 | Date | |
---|---|---|---|
|
a1237db52c | ||
|
a24fdee335 |
227
doc/pdfio.md
227
doc/pdfio.md
@ -2,7 +2,7 @@ Introduction
|
||||
============
|
||||
|
||||
PDFio is a simple C library for reading and writing PDF files. The primary
|
||||
goals of pdfio are:
|
||||
goals of PDFio are:
|
||||
|
||||
- Read and write any version of PDF file
|
||||
- Provide access to pages, objects, and streams within a PDF file
|
||||
@ -1203,5 +1203,228 @@ a PDF file that can be distributed.
|
||||
|
||||
> Note: The md2pdf example is by far the most complex example code included with
|
||||
> PDFio and shows how to layout text, add headers and footers, add links, embed
|
||||
> images, and format tables.
|
||||
> images, format tables, and add an outline (table of contents) for navigation.
|
||||
|
||||
### Managing Document State
|
||||
|
||||
The `md2pdf` program needs to maintain three sets of state - one for the
|
||||
markdown document which is represented by nodes of type `mmd_t` and the others
|
||||
for the PDF document and current PDF page which are contained in the `docdata_t`
|
||||
structure:
|
||||
|
||||
```c
|
||||
typedef struct docdata_s // Document formatting data
|
||||
{
|
||||
// State for the whole document
|
||||
pdfio_file_t *pdf; // PDF file
|
||||
pdfio_rect_t media_box; // Media (page) box
|
||||
pdfio_rect_t crop_box; // Crop box (for margins)
|
||||
pdfio_rect_t art_box; // Art box (for markdown content)
|
||||
pdfio_obj_t *fonts[DOCFONT_MAX]; // Embedded fonts
|
||||
size_t num_images; // Number of embedded images
|
||||
docimage_t images[DOCIMAGE_MAX]; // Embedded images
|
||||
const char *title; // Document title
|
||||
char *heading; // Current document heading
|
||||
size_t num_actions; // Number of actions for this document
|
||||
docaction_t actions[DOCACTION_MAX]; // Actions for this document
|
||||
size_t num_targets; // Number of targets for this document
|
||||
doctarget_t targets[DOCTARGET_MAX]; // Targets for this document
|
||||
size_t num_toc; // Number of table-of-contents entries
|
||||
doctoc_t toc[DOCTOC_MAX]; // Table-of-contents entries
|
||||
|
||||
// State for the current page
|
||||
pdfio_stream_t *st; // Current page stream
|
||||
double y; // Current position on page
|
||||
docfont_t font; // Current font
|
||||
double fsize; // Current font size
|
||||
doccolor_t color; // Current color
|
||||
pdfio_array_t *annots_array; // Annotations array (for links)
|
||||
pdfio_obj_t *annots_obj; // Annotations object (for links)
|
||||
size_t num_links; // Number of links for this page
|
||||
doclink_t links[DOCLINK_MAX]; // Links for this page
|
||||
} docdata_t;
|
||||
```
|
||||
|
||||
|
||||
#### Document State
|
||||
|
||||
The output is fixed to the "universal" media size (the intersection of US Letter
|
||||
and ISO A4) with 1/2 inch margins - the `PAGE_` constants can be changed to
|
||||
select a different size or margins. The `media_box` member contains the
|
||||
"MediaBox" rectangle for the PDF pages, while the `crop_box` and `art_box`
|
||||
members contain the "CropBox" and "ArtBox" values, respectively.
|
||||
|
||||
Four embedded fonts are used:
|
||||
|
||||
- `DOCFONT_REGULAR`: the default font used for text,
|
||||
- `DOCFONT_BOLD`: a boldface font used for heading and strong text,
|
||||
- `DOCFONT_ITALIC`: an italic/oblique font used for emphasized text, and
|
||||
- `DOCFONT_MONOSPACE`: a fixed-width font used for code.
|
||||
|
||||
By default the code uses the base PostScript fonts Helvetica, Helvetica-Bold,
|
||||
Helvetica-Oblique, and Courier. The `USE_TRUETYPE` define can be used to
|
||||
replace these with the Roboto TrueType fonts.
|
||||
|
||||
Embedded JPEG and PNG images are copied into the PDF document, with the `images`
|
||||
array containing the list of the images and their objects.
|
||||
|
||||
The `title` member contains the document title, while the `heading` member
|
||||
contains the current heading text.
|
||||
|
||||
The `actions` array contains a list of action dictionaries for interior document
|
||||
links that need to be resolved, while the `targets` array keeps track of the
|
||||
location of the headings in the PDF document.
|
||||
|
||||
The `toc` array contains a list of headings and is used to construct the PDF
|
||||
outlines dictionaries/objects, which provides a table of contents for navigation
|
||||
in most PDF readers.
|
||||
|
||||
|
||||
#### Page State
|
||||
|
||||
The `st` member provides the stream for the current page content. The `color`,
|
||||
`font`, `fsize`, and `y` members provide the current graphics state on the page.
|
||||
|
||||
The `annots_array`, `annots_obj`, `num_links`, and `links` members contain a
|
||||
list of hyperlinks on the current page.
|
||||
|
||||
|
||||
### Creating Pages
|
||||
|
||||
The `new_page` function is used to start a new page. Aside from creating the
|
||||
new page object and stream, it adds a standard header and footer to the page.
|
||||
It starts by closing the current page if it is open:
|
||||
|
||||
```c
|
||||
// Close the current page...
|
||||
if (dd->st)
|
||||
{
|
||||
pdfioStreamClose(dd->st);
|
||||
add_links(dd);
|
||||
}
|
||||
```
|
||||
|
||||
The new page needs a dictionary containing any link annotations, the media and
|
||||
art boxes, the four fonts, and any images:
|
||||
|
||||
```c
|
||||
// Prep the new page...
|
||||
page_dict = pdfioDictCreate(dd->pdf);
|
||||
|
||||
dd->annots_array = pdfioArrayCreate(dd->pdf);
|
||||
dd->annots_obj = pdfioFileCreateArrayObj(dd->pdf, dd->annots_array);
|
||||
pdfioDictSetObj(page_dict, "Annots", dd->annots_obj);
|
||||
|
||||
pdfioDictSetRect(page_dict, "MediaBox", &dd->media_box);
|
||||
pdfioDictSetRect(page_dict, "ArtBox", &dd->art_box);
|
||||
|
||||
for (fontface = DOCFONT_REGULAR; fontface < DOCFONT_MAX; fontface ++)
|
||||
pdfioPageDictAddFont(page_dict, docfont_names[fontface],
|
||||
dd->fonts[fontface]);
|
||||
|
||||
for (i = 0; i < dd->num_images; i ++)
|
||||
pdfioPageDictAddImage(page_dict,
|
||||
pdfioStringCreatef(dd->pdf, "I%u",
|
||||
(unsigned)i),
|
||||
dd->images[i].obj);
|
||||
```
|
||||
|
||||
Once the page dictionary is initialized, we create a new page and initialize
|
||||
the current graphics state:
|
||||
|
||||
```c
|
||||
dd->st = pdfioFileCreatePage(dd->pdf, page_dict);
|
||||
dd->color = DOCCOLOR_BLACK;
|
||||
dd->font = DOCFONT_MAX;
|
||||
dd->fsize = 0.0;
|
||||
dd->y = dd->art_box.y2;
|
||||
```
|
||||
|
||||
The header consists of a dark gray separating line and the document title. We
|
||||
don't show the header on the first page:
|
||||
|
||||
```c
|
||||
// Add header/footer text
|
||||
set_color(dd, DOCCOLOR_GRAY);
|
||||
set_font(dd, DOCFONT_REGULAR, SIZE_HEADFOOT);
|
||||
|
||||
if (pdfioFileGetNumPages(dd->pdf) > 1 && dd->title)
|
||||
{
|
||||
// Show title in header...
|
||||
width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR],
|
||||
dd->title, SIZE_HEADFOOT);
|
||||
|
||||
pdfioContentTextBegin(dd->st);
|
||||
pdfioContentTextMoveTo(dd->st,
|
||||
dd->crop_box.x1 + 0.5 * (dd->crop_box.x2 -
|
||||
dd->crop_box.x1 - width),
|
||||
dd->crop_box.y2 - SIZE_HEADFOOT);
|
||||
pdfioContentTextShow(dd->st, UNICODE_VALUE, dd->title);
|
||||
pdfioContentTextEnd(dd->st);
|
||||
|
||||
pdfioContentPathMoveTo(dd->st, dd->crop_box.x1,
|
||||
dd->crop_box.y2 -
|
||||
2 * SIZE_HEADFOOT * LINE_HEIGHT +
|
||||
SIZE_HEADFOOT);
|
||||
pdfioContentPathLineTo(dd->st, dd->crop_box.x2,
|
||||
dd->crop_box.y2 -
|
||||
2 * SIZE_HEADFOOT * LINE_HEIGHT +
|
||||
SIZE_HEADFOOT);
|
||||
pdfioContentStroke(dd->st);
|
||||
}
|
||||
```
|
||||
|
||||
The footer contains the same dark gray separating line with the current heading
|
||||
and page number on opposite sides. The page number is always positioned on the
|
||||
outer edge for a two-sided print - right justified on odd numbered pages and
|
||||
left justified on even numbered pages:
|
||||
|
||||
```c
|
||||
// Show page number and current heading...
|
||||
pdfioContentPathMoveTo(dd->st, dd->crop_box.x1,
|
||||
dd->crop_box.y1 + SIZE_HEADFOOT * LINE_HEIGHT);
|
||||
pdfioContentPathLineTo(dd->st, dd->crop_box.x2,
|
||||
dd->crop_box.y1 + SIZE_HEADFOOT * LINE_HEIGHT);
|
||||
pdfioContentStroke(dd->st);
|
||||
|
||||
pdfioContentTextBegin(dd->st);
|
||||
snprintf(temp, sizeof(temp), "%u",
|
||||
(unsigned)pdfioFileGetNumPages(dd->pdf));
|
||||
if (pdfioFileGetNumPages(dd->pdf) & 1)
|
||||
{
|
||||
// Page number on right...
|
||||
width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR], temp,
|
||||
SIZE_HEADFOOT);
|
||||
pdfioContentTextMoveTo(dd->st, dd->crop_box.x2 - width,
|
||||
dd->crop_box.y1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Page number on left...
|
||||
pdfioContentTextMoveTo(dd->st, dd->crop_box.x1, dd->crop_box.y1);
|
||||
}
|
||||
|
||||
pdfioContentTextShow(dd->st, UNICODE_VALUE, temp);
|
||||
pdfioContentTextEnd(dd->st);
|
||||
|
||||
if (dd->heading)
|
||||
{
|
||||
pdfioContentTextBegin(dd->st);
|
||||
|
||||
if (pdfioFileGetNumPages(dd->pdf) & 1)
|
||||
{
|
||||
// Current heading on left...
|
||||
pdfioContentTextMoveTo(dd->st, dd->crop_box.x1, dd->crop_box.y1);
|
||||
}
|
||||
else
|
||||
{
|
||||
width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR],
|
||||
dd->heading, SIZE_HEADFOOT);
|
||||
pdfioContentTextMoveTo(dd->st, dd->crop_box.x2 - width,
|
||||
dd->crop_box.y1);
|
||||
}
|
||||
|
||||
pdfioContentTextShow(dd->st, UNICODE_VALUE, dd->heading);
|
||||
pdfioContentTextEnd(dd->st);
|
||||
}
|
||||
```
|
||||
|
@ -12,8 +12,8 @@
|
||||
|
||||
|
||||
# Common options
|
||||
CFLAGS = -g $(CPPFLAGS)
|
||||
#CFLAGS = -g -fsanitize=address $(CPPFLAGS)
|
||||
#CFLAGS = -g $(CPPFLAGS)
|
||||
CFLAGS = -g -fsanitize=address $(CPPFLAGS)
|
||||
CPPFLAGS = -I..
|
||||
LIBS = -L.. -lpdfio -lz
|
||||
|
||||
|
@ -101,15 +101,25 @@ typedef struct doctoc_s // Document table-of-contents entry
|
||||
|
||||
typedef struct docdata_s // Document formatting data
|
||||
{
|
||||
// State for the whole document
|
||||
pdfio_file_t *pdf; // PDF file
|
||||
pdfio_rect_t media_box; // Media (page) box
|
||||
pdfio_rect_t crop_box; // Crop box (for margins)
|
||||
pdfio_rect_t art_box; // Art box (for markdown content)
|
||||
pdfio_obj_t *fonts[DOCFONT_MAX]; // Embedded fonts
|
||||
double font_space; // Unit width of a space
|
||||
size_t num_images; // Number of embedded images
|
||||
docimage_t images[DOCIMAGE_MAX]; // Embedded images
|
||||
const char *title; // Document title
|
||||
char *heading; // Current document heading
|
||||
size_t num_actions; // Number of actions for this document
|
||||
docaction_t actions[DOCACTION_MAX]; // Actions for this document
|
||||
size_t num_targets; // Number of targets for this document
|
||||
doctarget_t targets[DOCTARGET_MAX]; // Targets for this document
|
||||
size_t num_toc; // Number of table-of-contents entries
|
||||
doctoc_t toc[DOCTOC_MAX]; // Table-of-contents entries
|
||||
|
||||
// State for the current page
|
||||
pdfio_stream_t *st; // Current page stream
|
||||
double y; // Current position on page
|
||||
docfont_t font; // Current font
|
||||
@ -119,12 +129,6 @@ typedef struct docdata_s // Document formatting data
|
||||
pdfio_obj_t *annots_obj; // Annotations object (for links)
|
||||
size_t num_links; // Number of links for this page
|
||||
doclink_t links[DOCLINK_MAX]; // Links for this page
|
||||
size_t num_actions; // Number of actions for this document
|
||||
docaction_t actions[DOCACTION_MAX]; // Actions for this document
|
||||
size_t num_targets; // Number of targets for this document
|
||||
doctarget_t targets[DOCTARGET_MAX]; // Targets for this document
|
||||
size_t num_toc; // Number of table-of-contents entries
|
||||
doctoc_t toc[DOCTOC_MAX]; // Table-of-contents entries
|
||||
} docdata_t;
|
||||
|
||||
typedef struct linefrag_s // Line fragment
|
||||
@ -302,7 +306,8 @@ main(int argc, // I - Number of command-line arguments
|
||||
dd.art_box.x2 = PAGE_RIGHT;
|
||||
dd.art_box.y2 = PAGE_TOP;
|
||||
|
||||
dd.title = mmdGetMetadata(doc, "title");
|
||||
if ((dd.title = mmdGetMetadata(doc, "title")) == NULL)
|
||||
dd.art_box.y2 = PAGE_HEADER; // No header if there is no title
|
||||
|
||||
if (argc == 2)
|
||||
{
|
||||
@ -345,6 +350,8 @@ main(int argc, // I - Number of command-line arguments
|
||||
#endif // USE_TRUETYPE
|
||||
}
|
||||
|
||||
dd.font_space = pdfioContentTextMeasure(dd.fonts[DOCFONT_REGULAR], " ", 1.0);
|
||||
|
||||
// Add images...
|
||||
add_images(&dd, doc);
|
||||
|
||||
@ -587,10 +594,14 @@ format_block(docdata_t *dd, // I - Document data
|
||||
if (leader)
|
||||
{
|
||||
// Add leader text on first line...
|
||||
frags[0].type = MMD_TYPE_NORMAL_TEXT;
|
||||
frags[0].width = pdfioContentTextMeasure(dd->fonts[deffont], leader, fsize);
|
||||
frags[0].height = fsize;
|
||||
frags[0].x = left - frags[0].width;
|
||||
frags[0].imagenum = 0;
|
||||
frags[0].text = leader;
|
||||
frags[0].url = NULL;
|
||||
frags[0].ws = false;
|
||||
frags[0].font = deffont;
|
||||
frags[0].color = DOCCOLOR_BLACK;
|
||||
|
||||
@ -616,7 +627,7 @@ format_block(docdata_t *dd, // I - Document data
|
||||
imagenum = 0;
|
||||
url = mmdGetURL(current);
|
||||
ws = mmdGetWhitespace(current);
|
||||
wswidth = 0.0;
|
||||
wswidth = ws ? dd->font_space * fsize : 0.0;
|
||||
next = mmd_walk_next(block, current);
|
||||
|
||||
// Process the node...
|
||||
@ -706,9 +717,6 @@ format_block(docdata_t *dd, // I - Document data
|
||||
|
||||
width = pdfioContentTextMeasure(dd->fonts[font], text, fsize);
|
||||
height = fsize * LINE_HEIGHT;
|
||||
|
||||
if (ws)
|
||||
wswidth = pdfioContentTextMeasure(dd->fonts[font], " ", fsize);
|
||||
}
|
||||
|
||||
// See if this node will fit on the current line...
|
||||
@ -806,20 +814,29 @@ format_code(docdata_t *dd, // I - Document data
|
||||
double right) // I - Right margin
|
||||
{
|
||||
mmd_t *code; // Current code block
|
||||
double lineheight; // Line height
|
||||
double lineheight, // Line height
|
||||
margin_top; // Top margin
|
||||
|
||||
|
||||
// Compute line height and initial top margin...
|
||||
lineheight = SIZE_CODEBLOCK * LINE_HEIGHT;
|
||||
margin_top = lineheight;
|
||||
|
||||
// Start a new page as needed...
|
||||
if (!dd->st)
|
||||
{
|
||||
new_page(dd);
|
||||
|
||||
lineheight = SIZE_CODEBLOCK * LINE_HEIGHT;
|
||||
dd->y -= 2.0 * lineheight;
|
||||
margin_top = (1.0 - LINE_HEIGHT) * lineheight;
|
||||
}
|
||||
|
||||
dd->y -= lineheight + margin_top;
|
||||
|
||||
if ((dd->y - lineheight) < dd->art_box.y1)
|
||||
{
|
||||
new_page(dd);
|
||||
|
||||
dd->y -= lineheight;
|
||||
dd->y -= lineheight / LINE_HEIGHT;
|
||||
}
|
||||
|
||||
// Start a code text block...
|
||||
@ -1338,15 +1355,15 @@ new_page(docdata_t *dd) // I - Document data
|
||||
|
||||
// Prep the new page...
|
||||
page_dict = pdfioDictCreate(dd->pdf);
|
||||
|
||||
dd->annots_array = pdfioArrayCreate(dd->pdf);
|
||||
dd->annots_obj = pdfioFileCreateArrayObj(dd->pdf, dd->annots_array);
|
||||
pdfioDictSetObj(page_dict, "Annots", dd->annots_obj);
|
||||
|
||||
pdfioDictSetRect(page_dict, "MediaBox", &dd->media_box);
|
||||
// pdfioDictSetRect(page_dict, "CropBox", &dd->crop_box);
|
||||
pdfioDictSetRect(page_dict, "ArtBox", &dd->art_box);
|
||||
|
||||
pdfioDictSetObj(page_dict, "Annots", dd->annots_obj);
|
||||
|
||||
for (fontface = DOCFONT_REGULAR; fontface < DOCFONT_MAX; fontface ++)
|
||||
pdfioPageDictAddFont(page_dict, docfont_names[fontface], dd->fonts[fontface]);
|
||||
|
||||
@ -1457,7 +1474,7 @@ render_line(docdata_t *dd, // I - Document data
|
||||
if (!dd->st)
|
||||
{
|
||||
new_page(dd);
|
||||
margin_top = 0.0;
|
||||
margin_top = (1.0 - LINE_HEIGHT) * lineheight;
|
||||
}
|
||||
|
||||
dd->y -= margin_top + lineheight;
|
||||
@ -1465,7 +1482,7 @@ render_line(docdata_t *dd, // I - Document data
|
||||
{
|
||||
new_page(dd);
|
||||
|
||||
dd->y -= lineheight;
|
||||
dd->y -= lineheight / LINE_HEIGHT;
|
||||
}
|
||||
|
||||
for (i = 0, frag = frags; i < num_frags; i ++, frag ++)
|
||||
@ -1499,9 +1516,6 @@ render_line(docdata_t *dd, // I - Document data
|
||||
else if (frag->text)
|
||||
{
|
||||
// Draw text
|
||||
set_color(dd, frag->color);
|
||||
set_font(dd, frag->font, frag->height);
|
||||
|
||||
if (!in_text)
|
||||
{
|
||||
pdfioContentTextBegin(dd->st);
|
||||
@ -1510,10 +1524,19 @@ render_line(docdata_t *dd, // I - Document data
|
||||
in_text = true;
|
||||
}
|
||||
|
||||
if (frag->ws)
|
||||
pdfioContentTextShowf(dd->st, UNICODE_VALUE, " %s", frag->text);
|
||||
else
|
||||
if (frag->ws && frag->font == DOCFONT_MONOSPACE)
|
||||
{
|
||||
set_font(dd, DOCFONT_REGULAR, frag->height);
|
||||
pdfioContentTextShow(dd->st, UNICODE_VALUE, " ");
|
||||
}
|
||||
|
||||
set_color(dd, frag->color);
|
||||
set_font(dd, frag->font, frag->height);
|
||||
|
||||
if (frag->font == DOCFONT_MONOSPACE)
|
||||
pdfioContentTextShow(dd->st, UNICODE_VALUE, frag->text);
|
||||
else
|
||||
pdfioContentTextShowf(dd->st, UNICODE_VALUE, "%s%s", frag->ws ? " " : "", frag->text);
|
||||
|
||||
if (frag->url && dd->num_links < DOCLINK_MAX)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user