diff --git a/doc/pdfio.3 b/doc/pdfio.3 index 6403a33..064ff35 100644 --- a/doc/pdfio.3 +++ b/doc/pdfio.3 @@ -1,9 +1,9 @@ -.TH pdfio 3 "pdf read/write library" "2024-12-19" "pdf read/write library" +.TH pdfio 3 "pdf read/write library" "2024-12-22" "pdf read/write library" .SH NAME pdfio \- pdf read/write library .SH Introduction .PP -PDFio is a simple C library for reading and writing PDF files. The primary goals of pdfio are: +PDFio is a simple C library for reading and writing PDF files. The primary goals of PDFio are: .IP \(bu 5 .PP Read and write any version of PDF file @@ -305,8 +305,8 @@ You open an existing PDF file using the pdfioFileOpen function: .nf pdfio_file_t *pdf = - pdfioFileOpen("myinputfile.pdf", password_cb, password_data, - error_cb, error_data); + pdfioFileOpen("myinputfile.pdf", password_cb, password_data, error_cb, + error_data); .fi .PP where the five arguments to the function are the filename ("myinputfile.pdf"), an optional password callback function (password_cb) and data pointer value (password_data), and an optional error callback function (error_cb) and data pointer value (error_data). The password callback is called for encrypted PDF files that are not using the default password, for example: @@ -454,8 +454,7 @@ You create a new PDF file using the pdfioFileCreate function: pdfio_rect_t media_box = { 0.0, 0.0, 612.0, 792.0 }; // US Letter pdfio_rect_t crop_box = { 36.0, 36.0, 576.0, 756.0 }; // w/0.5" margins - pdfio_file_t *pdf = pdfioFileCreate("myoutputfile.pdf", "2.0", - &media_box, &crop_box, + pdfio_file_t *pdf = pdfioFileCreate("myoutputfile.pdf", "2.0", &media_box, &crop_box, error_cb, error_data); .fi .PP @@ -467,9 +466,8 @@ Alternately you can stream a PDF file using the pdfioFileCreateOutput function: pdfio_rect_t media_box = { 0.0, 0.0, 612.0, 792.0 }; // US Letter pdfio_rect_t crop_box = { 36.0, 36.0, 576.0, 756.0 }; // w/0.5" margins - pdfio_file_t *pdf = pdfioFileCreateOutput(output_cb, output_ctx, "2.0", - &media_box, &crop_box, - error_cb, error_data); + pdfio_file_t *pdf = pdfioFileCreateOutput(output_cb, output_ctx, "2.0", &media_box, + &crop_box, error_cb, error_data); .fi .PP Once the file is created, use the pdfioFileCreateObj, pdfioFileCreatePage, and pdfioPageCopy functions to create objects and pages in the file. @@ -756,10 +754,9 @@ PDF supports images with many different color spaces and bit depths with optiona pdfio_file_t *pdf = pdfioFileCreate(...); unsigned char data[1024 * 1024 * 4]; // 1024x1024 RGBA image data pdfio_obj_t *img = - pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024, - /*height*/1024, /*num_colors*/3, - /*color_data*/NULL, /*alpha*/true, - /*interpolate*/false); + pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024, /*height*/1024, + /*num_colors*/3, /*color_data*/NULL, + /*alpha*/true, /*interpolate*/false); .fi .PP will create an object for a 1024x1024 RGBA image in memory, using the default color space for 3 colors ("DeviceRGB"). We can use one of the color space functions to use a specific color space for this image, for example: @@ -770,17 +767,14 @@ will create an object for a 1024x1024 RGBA image in memory, using the default co // Create an AdobeRGB color array pdfio_array_t *adobe_rgb = pdfioArrayCreateColorFromMatrix(pdf, 3, pdfioAdobeRGBGamma, - pdfioAdobeRGBMatrix, - pdfioAdobeRGBWhitePoint); + pdfioAdobeRGBMatrix, pdfioAdobeRGBWhitePoint); // Create a 1024x1024 RGBA image using AdobeRGB unsigned char data[1024 * 1024 * 4]; // 1024x1024 RGBA image data pdfio_obj_t *img = - pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024, - /*height*/1024, /*num_colors*/3, - /*color_data*/adobe_rgb, - /*alpha*/true, - /*interpolate*/false); + pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024, /*height*/1024, + /*num_colors*/3, /*color_data*/adobe_rgb, + /*alpha*/true, /*interpolate*/false); .fi .PP The "interpolate" argument specifies whether the colors in the image should be smoothed/interpolated when scaling. This is most useful for photographs but should be false for screenshot and barcode images. @@ -790,8 +784,7 @@ If you have a JPEG or PNG file, use the pdfioFileCreateImageObjFromFile function pdfio_file_t *pdf = pdfioFileCreate(...); pdfio_obj_t *img = - pdfioFileCreateImageObjFromFile(pdf, "myphoto.jpg", - /*interpolate*/true); + pdfioFileCreateImageObjFromFile(pdf, "myphoto.jpg", /*interpolate*/true); .fi .PP Note: Currently pdfioFileCreateImageObjFromFile does not support 12 bit JPEG files or PNG files with an alpha channel. @@ -1065,9 +1058,8 @@ The pdfioinfo.c example program opens a PDF file and prints the title, author, c filename = argv[1]; // Open the PDF file with the default callbacks... - pdf = pdfioFileOpen(filename, /*password_cb*/NULL, - /*password_cbdata*/NULL, /*error_cb*/NULL, - /*error_cbdata*/NULL); + pdf = pdfioFileOpen(filename, /*password_cb*/NULL, /*password_cbdata*/NULL, + /*error_cb*/NULL, /*error_cbdata*/NULL); if (pdf == NULL) return (1); @@ -1116,9 +1108,8 @@ The image2pdf.c example code creates a PDF file containing a JPEG or PNG image f // Create the PDF file... - pdf = pdfioFileCreate(pdfname, /*version*/NULL, /*media_box*/NULL, - /*crop_box*/NULL, /*error_cb*/NULL, - /*error_cbdata*/NULL); + pdf = pdfioFileCreate(pdfname, /*version*/NULL, /*media_box*/NULL, /*crop_box*/NULL, + /*error_cb*/NULL, /*error_cbdata*/NULL); if (!pdf) return (false); @@ -1152,9 +1143,9 @@ The image2pdf.c example code creates a PDF file containing a JPEG or PNG image f width = pdfioImageGetWidth(image); height = pdfioImageGetHeight(image); - // Default media_box is "universal" 595.28x792 points (8.27x11in or - // 210x279mm). Use margins of 36 points (0.5in or 12.7mm) with another - // 36 points for the caption underneath... + // Default media_box is "universal" 595.28x792 points (8.27x11in or 210x279mm). + // Use margins of 36 points (0.5in or 12.7mm) with another 36 points for the + // caption underneath... swidth = 595.28 \- 72.0; sheight = swidth * height / width; if (sheight > (792.0 \- 36.0 \- 72.0)) @@ -1171,8 +1162,8 @@ The image2pdf.c example code creates a PDF file containing a JPEG or PNG image f // Draw the caption in black... pdfioContentSetFillColorDeviceGray(page, 0.0); - // Compute the starting point for the text \- Courier is monospaced - // with a nominal width of 0.6 times the text height... + // Compute the starting point for the text \- Courier is monospaced with a + // nominal width of 0.6 times the text height... tx = 0.5 * (595.28 \- 18.0 * 0.6 * strlen(caption)); // Position and draw the caption underneath... @@ -1247,8 +1238,7 @@ The first thing you need to do is prepare the barcode string to use with the fon The main function does the rest of the work. The barcode font is imported using the pdfioFileCreateFontObjFromFile function. We pass false for the "unicode" argument since we just want the (default) ASCII encoding: .nf - barcode_font = pdfioFileCreateFontObjFromFile(pdf, "code128.ttf", - /*unicode*/false); + barcode_font = pdfioFileCreateFontObjFromFile(pdf, "code128.ttf", /*unicode*/false); .fi .PP Since barcodes usually have the number or text represented by the barcode printed underneath it, we also need a regular text font, for which we can choose one of the standard 14 PostScript base fonts using the pdfioFIleCreateFontObjFromBase function: @@ -1278,8 +1268,7 @@ Once we have these fonts we can measure the barcode and regular text labels usin if (text && text_font) { text_height = 9.0; - text_width = pdfioContentTextMeasure(text_font, text, - text_height); + text_width = pdfioContentTextMeasure(text_font, text, text_height); } // Compute the size of the PDF page... @@ -1287,8 +1276,7 @@ Once we have these fonts we can measure the barcode and regular text labels usin media_box.x1 = 0.0; media_box.y1 = 0.0; - media_box.x2 = (barcode_width > text_width ? - barcode_width : text_width) + 18.0; + media_box.x2 = (barcode_width > text_width ? barcode_width : text_width) + 18.0; media_box.y2 = barcode_height + text_height + 18.0; .fi .PP @@ -1336,8 +1324,789 @@ With the barcode font called "B128" and the text font called "TEXT", we can use .PP Markdown is a simple plain text format that supports things like headings, links, character styles, tables, and embedded images. The md2pdf.c example code uses the mmd library to convert markdown to a PDF file that can be distributed. .PP -Note: The md2pdf example is by far the most complex example code included with PDFio and shows how to layout text, add headers and footers, add links, embed images, and format tables. +Note: The md2pdf example is by far the most complex example code included with PDFio and shows how to layout text, add headers and footers, add links, embed images, format tables, and add an outline (table of contents) for navigation. +.PP +Managing Document State +.PP +The md2pdf program needs to maintain three sets of state \- one for the markdown document which is represented by nodes of type mmd_t and the others for the PDF document and current PDF page which are contained in the docdata_t structure: +.nf + + typedef struct docdata_s // Document formatting data + { + // State for the whole document + pdfio_file_t *pdf; // PDF file + pdfio_rect_t media_box; // Media (page) box + pdfio_rect_t crop_box; // Crop box (for margins) + pdfio_rect_t art_box; // Art box (for markdown content) + pdfio_obj_t *fonts[DOCFONT_MAX]; // Embedded fonts + double font_space; // Unit width of a space + size_t num_images; // Number of embedded images + docimage_t images[DOCIMAGE_MAX]; // Embedded images + const char *title; // Document title + char *heading; // Current document heading + size_t num_actions; // Number of actions for this document + docaction_t actions[DOCACTION_MAX]; // Actions for this document + size_t num_targets; // Number of targets for this document + doctarget_t targets[DOCTARGET_MAX]; // Targets for this document + size_t num_toc; // Number of table\-of\-contents entries + doctoc_t toc[DOCTOC_MAX]; // Table\-of\-contents entries + + // State for the current page + pdfio_stream_t *st; // Current page stream + double y; // Current position on page + docfont_t font; // Current font + double fsize; // Current font size + doccolor_t color; // Current color + pdfio_array_t *annots_array; // Annotations array (for links) + pdfio_obj_t *annots_obj; // Annotations object (for links) + size_t num_links; // Number of links for this page + doclink_t links[DOCLINK_MAX]; // Links for this page + } docdata_t; +.fi +.PP +Document State +.PP +The output is fixed to the "universal" media size (the intersection of US Letter and ISO A4) with 1/2 inch margins \- the PAGE_ constants can be changed to select a different size or margins. The media_box member contains the "MediaBox" rectangle for the PDF pages, while the crop_box and art_box members contain the "CropBox" and "ArtBox" values, respectively. +.PP +Four embedded fonts are used: +.IP \(bu 5 +.PP +DOCFONT_REGULAR: the default font used for text, + +.IP \(bu 5 +.PP +DOCFONT_BOLD: a boldface font used for heading and strong text, + +.IP \(bu 5 +.PP +DOCFONT_ITALIC: an italic/oblique font used for emphasized text, and + +.IP \(bu 5 +.PP +DOCFONT_MONOSPACE: a fixed\-width font used for code. + + +.PP +By default the code uses the base PostScript fonts Helvetica, Helvetica\-Bold, Helvetica\-Oblique, and Courier. The USE_TRUETYPE define can be used to replace these with the Roboto TrueType fonts. +.PP +Embedded JPEG and PNG images are copied into the PDF document, with the images array containing the list of the images and their objects. +.PP +The title member contains the document title, while the heading member contains the current heading text. +.PP +The actions array contains a list of action dictionaries for interior document links that need to be resolved, while the targets array keeps track of the location of the headings in the PDF document. +.PP +The toc array contains a list of headings and is used to construct the PDF outlines dictionaries/objects, which provides a table of contents for navigation in most PDF readers. +.PP +Page State +.PP +The st member provides the stream for the current page content. The color, font, fsize, and y members provide the current graphics state on the page. +.PP +The annots_array, annots_obj, num_links, and links members contain a list of hyperlinks on the current page. +.PP +Creating Pages +.PP +The new_page function is used to start a new page. Aside from creating the new page object and stream, it adds a standard header and footer to the page. It starts by closing the current page if it is open: +.nf + + // Close the current page... + if (dd\->st) + { + pdfioStreamClose(dd\->st); + add_links(dd); + } +.fi +.PP +The new page needs a dictionary containing any link annotations, the media and art boxes, the four fonts, and any images: +.nf + + // Prep the new page... + page_dict = pdfioDictCreate(dd\->pdf); + + dd\->annots_array = pdfioArrayCreate(dd\->pdf); + dd\->annots_obj = pdfioFileCreateArrayObj(dd\->pdf, dd\->annots_array); + pdfioDictSetObj(page_dict, "Annots", dd\->annots_obj); + + pdfioDictSetRect(page_dict, "MediaBox", &dd\->media_box); + pdfioDictSetRect(page_dict, "ArtBox", &dd\->art_box); + + for (fontface = DOCFONT_REGULAR; fontface < DOCFONT_MAX; fontface ++) + pdfioPageDictAddFont(page_dict, docfont_names[fontface], dd\->fonts[fontface]); + + for (i = 0; i < dd\->num_images; i ++) + pdfioPageDictAddImage(page_dict, pdfioStringCreatef(dd\->pdf, "I%u", (unsigned)i), + dd\->images[i].obj); +.fi +.PP +Once the page dictionary is initialized, we create a new page and initialize the current graphics state: +.nf + + dd\->st = pdfioFileCreatePage(dd\->pdf, page_dict); + dd\->color = DOCCOLOR_BLACK; + dd\->font = DOCFONT_MAX; + dd\->fsize = 0.0; + dd\->y = dd\->art_box.y2; +.fi +.PP +The header consists of a dark gray separating line and the document title. We don't show the header on the first page: +.nf + + // Add header/footer text + set_color(dd, DOCCOLOR_GRAY); + set_font(dd, DOCFONT_REGULAR, SIZE_HEADFOOT); + + if (pdfioFileGetNumPages(dd\->pdf) > 1 && dd\->title) + { + // Show title in header... + width = pdfioContentTextMeasure(dd\->fonts[DOCFONT_REGULAR], dd\->title, + SIZE_HEADFOOT); + + pdfioContentTextBegin(dd\->st); + pdfioContentTextMoveTo(dd\->st, + dd\->crop_box.x1 + 0.5 * (dd\->crop_box.x2 \- + dd\->crop_box.x1 \- width), + dd\->crop_box.y2 \- SIZE_HEADFOOT); + pdfioContentTextShow(dd\->st, UNICODE_VALUE, dd\->title); + pdfioContentTextEnd(dd\->st); + + pdfioContentPathMoveTo(dd\->st, dd\->crop_box.x1, + dd\->crop_box.y2 \- 2 * SIZE_HEADFOOT * LINE_HEIGHT + + SIZE_HEADFOOT); + pdfioContentPathLineTo(dd\->st, dd\->crop_box.x2, + dd\->crop_box.y2 \- 2 * SIZE_HEADFOOT * LINE_HEIGHT + + SIZE_HEADFOOT); + pdfioContentStroke(dd\->st); + } +.fi +.PP +The footer contains the same dark gray separating line with the current heading and page number on opposite sides. The page number is always positioned on the outer edge for a two\-sided print \- right justified on odd numbered pages and left justified on even numbered pages: +.nf + + // Show page number and current heading... + pdfioContentPathMoveTo(dd\->st, dd\->crop_box.x1, + dd\->crop_box.y1 + SIZE_HEADFOOT * LINE_HEIGHT); + pdfioContentPathLineTo(dd\->st, dd\->crop_box.x2, + dd\->crop_box.y1 + SIZE_HEADFOOT * LINE_HEIGHT); + pdfioContentStroke(dd\->st); + + pdfioContentTextBegin(dd\->st); + snprintf(temp, sizeof(temp), "%u", (unsigned)pdfioFileGetNumPages(dd\->pdf)); + if (pdfioFileGetNumPages(dd\->pdf) & 1) + { + // Page number on right... + width = pdfioContentTextMeasure(dd\->fonts[DOCFONT_REGULAR], temp, SIZE_HEADFOOT); + pdfioContentTextMoveTo(dd\->st, dd\->crop_box.x2 \- width, dd\->crop_box.y1); + } + else + { + // Page number on left... + pdfioContentTextMoveTo(dd\->st, dd\->crop_box.x1, dd\->crop_box.y1); + } + + pdfioContentTextShow(dd\->st, UNICODE_VALUE, temp); + pdfioContentTextEnd(dd\->st); + + if (dd\->heading) + { + pdfioContentTextBegin(dd\->st); + + if (pdfioFileGetNumPages(dd\->pdf) & 1) + { + // Current heading on left... + pdfioContentTextMoveTo(dd\->st, dd\->crop_box.x1, dd\->crop_box.y1); + } + else + { + width = pdfioContentTextMeasure(dd\->fonts[DOCFONT_REGULAR], dd\->heading, + SIZE_HEADFOOT); + pdfioContentTextMoveTo(dd\->st, dd\->crop_box.x2 \- width, dd\->crop_box.y1); + } + + pdfioContentTextShow(dd\->st, UNICODE_VALUE, dd\->heading); + pdfioContentTextEnd(dd\->st); + } +.fi +.PP +Formatting the Markdown Document +.PP +Four functions handle the formatting of the markdown document: +.IP \(bu 5 +.PP +format_block formats a single paragraph, heading, or table cell, + +.IP \(bu 5 +.PP +format_code: formats a block of code, + +.IP \(bu 5 +.PP +format_doc: formats the document as a whole, and + +.IP \(bu 5 +.PP +format_table: formats a table. + + +.PP +Formatted content is organized into arrays of linefrag_t and tablerow_t structures for a line of content or row of table cells, respectively. +.PP +High\-Level Formatting +.PP +The format_doc function iterates over the block nodes in the markdown document. We map a "thematic break" (horizontal rule) to a page break, which is implemented by moving the current vertical position to the bottom of the page: +.nf + + case MMD_TYPE_THEMATIC_BREAK : + // Force a page break + dd\->y = dd\->art_box.y1; + break; +.fi +.PP +A block quote is indented and uses the italic font by default: +.nf + + case MMD_TYPE_BLOCK_QUOTE : + format_doc(dd, current, DOCFONT_ITALIC, left + BQ_PADDING, right \- BQ_PADDING); + break; +.fi +.PP +Lists have a leading blank line and are indented: +.nf + + case MMD_TYPE_ORDERED_LIST : + case MMD_TYPE_UNORDERED_LIST : + if (dd\->st) + dd\->y \-= SIZE_BODY * LINE_HEIGHT; + + format_doc(dd, current, deffont, left + LIST_PADDING, right); + break; +.fi +.PP +List items do not have a leading blank line and make use of leader text that is shown in front of the list text. The leader text is either the current item number or a bullet, which then is directly formatted using the format_block function: +.nf + + case MMD_TYPE_LIST_ITEM : + if (doctype == MMD_TYPE_ORDERED_LIST) + { + snprintf(leader, sizeof(leader), "%d. ", i); + format_block(dd, current, deffont, SIZE_BODY, left, right, leader); + } + else + { + format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/"• "); + } + break; +.fi +.PP +Paragraphs have a leading blank line and are likewise directly formatted: +.nf + + case MMD_TYPE_PARAGRAPH : + // Add a blank line before the paragraph... + dd\->y \-= SIZE_BODY * LINE_HEIGHT; + + // Format the paragraph... + format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/NULL); + break; +.fi +.PP +Tables have a leading blank line and are formatted using the format_table function: +.nf + + case MMD_TYPE_TABLE : + // Add a blank line before the paragraph... + dd\->y \-= SIZE_BODY * LINE_HEIGHT; + + // Format the table... + format_table(dd, current, left, right); + break; +.fi +.PP +Code blocks have a leading blank line, are indented slightly (to account for the padded background), and are formatted using the format_code function: +.nf + + case MMD_TYPE_CODE_BLOCK : + // Add a blank line before the code block... + dd\->y \-= SIZE_BODY * LINE_HEIGHT; + + // Format the code block... + format_code(dd, current, left + CODE_PADDING, right \- CODE_PADDING); + break; +.fi +.PP +Headings get some extra processing. First, the current heading is remembered in the docdata_t structure so it can be used in the page footer: +.nf + + case MMD_TYPE_HEADING_1 : + case MMD_TYPE_HEADING_2 : + case MMD_TYPE_HEADING_3 : + case MMD_TYPE_HEADING_4 : + case MMD_TYPE_HEADING_5 : + case MMD_TYPE_HEADING_6 : + // Update the current heading + free(dd\->heading); + dd\->heading = mmdCopyAllText(current); +.fi +.PP +Then we add a blank line and format the heading with the boldface font at a larger size using the format_block function: +.nf + + // Add a blank line before the heading... + dd\->y \-= heading_sizes[curtype \- MMD_TYPE_HEADING_1] * LINE_HEIGHT; + + // Format the heading... + format_block(dd, current, DOCFONT_BOLD, + heading_sizes[curtype \- MMD_TYPE_HEADING_1], left, right, + /*leader*/NULL); +.fi +.PP +Once the heading is formatted, we record it in the toc array as a PDF outline item object/dictionary: +.nf + + // Add the heading to the table\-of\-contents... + if (dd\->num_toc < DOCTOC_MAX) + { + doctoc_t *t = dd\->toc + dd\->num_toc; + // New TOC + pdfio_array_t *dest; // Destination array + + t\->level = curtype \- MMD_TYPE_HEADING_1; + t\->dict = pdfioDictCreate(dd\->pdf); + t\->obj = pdfioFileCreateObj(dd\->pdf, t\->dict); + dest = pdfioArrayCreate(dd\->pdf); + + pdfioArrayAppendObj(dest, + pdfioFileGetPage(dd\->pdf, pdfioFileGetNumPages(dd\->pdf) \- 1)); + pdfioArrayAppendName(dest, "XYZ"); + pdfioArrayAppendNumber(dest, PAGE_LEFT); + pdfioArrayAppendNumber(dest, + dd\->y + heading_sizes[curtype \- MMD_TYPE_HEADING_1] * LINE_HEIGHT); + pdfioArrayAppendNumber(dest, 0.0); + + pdfioDictSetArray(t\->dict, "Dest", dest); + pdfioDictSetString(t\->dict, "Title", pdfioStringCreate(dd\->pdf, dd\->heading)); + + dd\->num_toc ++; + } +.fi +.PP +Finally, we also save the heading's target name and its location in the targets array to allow interior links to work: +.nf + + // Add the heading to the list of link targets... + if (dd\->num_targets < DOCTARGET_MAX) + { + doctarget_t *t = dd\->targets + dd\->num_targets; + // New target + + make_target_name(t\->name, dd\->heading, sizeof(t\->name)); + t\->page = pdfioFileGetNumPages(dd\->pdf) \- 1; + t\->y = dd\->y + heading_sizes[curtype \- MMD_TYPE_HEADING_1] * LINE_HEIGHT; + + dd\->num_targets ++; + } + break; +.fi +.PP +Formatting Paragraphs, Headings, List Items, and Table Cells +.PP +Paragraphs, headings, list items, and table cells all use the same basic formatting algorithm. Text, checkboxes, and images are collected until the nodes in the current block are used up or the content reaches the right margin. +.PP +In order to keep adjacent blocks of text together, the formatting algorithm makes sure that at least 3 lines of text can fit before the bottom edge of the page: +.nf + + if (mmdGetNextSibling(block)) + need_bottom = 3.0 * SIZE_BODY * LINE_HEIGHT; + else + need_bottom = 0.0; +.fi +.PP +Leader text (used for list items) is right justified to the left margin and becomes the first fragment on the line when present. +.nf + + if (leader) + { + // Add leader text on first line... + frags[0].type = MMD_TYPE_NORMAL_TEXT; + frags[0].width = pdfioContentTextMeasure(dd\->fonts[deffont], leader, fsize); + frags[0].height = fsize; + frags[0].x = left \- frags[0].width; + frags[0].imagenum = 0; + frags[0].text = leader; + frags[0].url = NULL; + frags[0].ws = false; + frags[0].font = deffont; + frags[0].color = DOCCOLOR_BLACK; + + num_frags = 1; + lineheight = fsize * LINE_HEIGHT; + } + else + { + // No leader text... + num_frags = 0; + lineheight = 0.0; + } + + frag = frags + num_frags; +.fi +.PP +If the current content fragment won't fit, we call render_line to draw what we have, adjusting the left margin as needed for table cells: +.nf + + // See if this node will fit on the current line... + if ((num_frags > 0 && (x + width + wswidth) >= right) || num_frags == LINEFRAG_MAX) + { + // No, render this line and start over... + if (blocktype == MMD_TYPE_TABLE_HEADER_CELL || + blocktype == MMD_TYPE_TABLE_BODY_CELL_CENTER) + margin_left = 0.5 * (right \- x); + else if (blocktype == MMD_TYPE_TABLE_BODY_CELL_RIGHT) + margin_left = right \- x; + else + margin_left = 0.0; + + render_line(dd, margin_left, need_bottom, lineheight, num_frags, frags); + + num_frags = 0; + frag = frags; + x = left; + lineheight = 0.0; + need_bottom = 0.0; +.fi +.PP +Block quotes (blocks use a default font of italic) have an orange bar to the left of the block: +.nf + + if (deffont == DOCFONT_ITALIC) + { + // Add an orange bar to the left of block quotes... + set_color(dd, DOCCOLOR_ORANGE); + pdfioContentSave(dd\->st); + pdfioContentSetLineWidth(dd\->st, 3.0); + pdfioContentPathMoveTo(dd\->st, left \- 6.0, dd\->y \- (LINE_HEIGHT \- 1.0) * fsize); + pdfioContentPathLineTo(dd\->st, left \- 6.0, dd\->y + fsize); + pdfioContentStroke(dd\->st); + pdfioContentRestore(dd\->st); + } +.fi +.PP +Finally, we add the current content fragment to the array: +.nf + + // Add the current node to the fragment list + if (num_frags == 0) + { + // No leading whitespace at the start of the line + ws = false; + wswidth = 0.0; + } + + frag\->type = type; + frag\->x = x; + frag\->width = width + wswidth; + frag\->height = text ? fsize : height; + frag\->imagenum = imagenum; + frag\->text = text; + frag\->url = url; + frag\->ws = ws; + frag\->font = font; + frag\->color = color; + + num_frags ++; + frag ++; + x += width + wswidth; + if (height > lineheight) + lineheight = height; +.fi +.PP +Formatting Code Blocks +.PP +Code blocks consist of one or more lines of plain monospaced text. We draw a light gray background behind each line with a small bit of padding at the top and bottom: +.nf + + // Draw the top padding... + set_color(dd, DOCCOLOR_LTGRAY); + pdfioContentPathRect(dd\->st, left \- CODE_PADDING, dd\->y + SIZE_CODEBLOCK, + right \- left + 2.0 * CODE_PADDING, CODE_PADDING); + pdfioContentFillAndStroke(dd\->st, false); + + // Start a code text block... + set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK); + pdfioContentTextBegin(dd\->st); + pdfioContentTextMoveTo(dd\->st, left, dd\->y); + + for (code = mmdGetFirstChild(block); code; code = mmdGetNextSibling(code)) + { + set_color(dd, DOCCOLOR_LTGRAY); + pdfioContentPathRect(dd\->st, left \- CODE_PADDING, + dd\->y \- (LINE_HEIGHT \- 1.0) * SIZE_CODEBLOCK, + right \- left + 2.0 * CODE_PADDING, lineheight); + pdfioContentFillAndStroke(dd\->st, false); + + set_color(dd, DOCCOLOR_RED); + pdfioContentTextShow(dd\->st, UNICODE_VALUE, mmdGetText(code)); + dd\->y \-= lineheight; + + if (dd\->y < dd\->art_box.y1) + { + // End the current text block... + pdfioContentTextEnd(dd\->st); + + // Start a new page... + new_page(dd); + set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK); + + dd\->y \-= lineheight; + + pdfioContentTextBegin(dd\->st); + pdfioContentTextMoveTo(dd\->st, left, dd\->y); + } + } + + // End the current text block... + pdfioContentTextEnd(dd\->st); + dd\->y += lineheight; + + // Draw the bottom padding... + set_color(dd, DOCCOLOR_LTGRAY); + pdfioContentPathRect(dd\->st, left \- CODE_PADDING, + dd\->y \- CODE_PADDING \- (LINE_HEIGHT \- 1.0) * SIZE_CODEBLOCK, + right \- left + 2.0 * CODE_PADDING, CODE_PADDING); + pdfioContentFillAndStroke(dd\->st, false); +.fi +.PP +Formatting Tables +.PP +Tables are the most difficult to format. We start by scanning the entire table and measuring every cell with the measure_cell function: +.nf + + for (num_cols = 0, num_rows = 0, rowptr = rows, current = mmdGetFirstChild(table); + current && num_rows < TABLEROW_MAX; + current = next) + { + next = mmd_walk_next(table, current); + type = mmdGetType(current); + + if (type == MMD_TYPE_TABLE_ROW) + { + // Parse row... + for (col = 0, current = mmdGetFirstChild(current); + current && num_cols < TABLECOL_MAX; + current = mmdGetNextSibling(current), col ++) + { + rowptr\->cells[col] = current; + + measure_cell(dd, current, cols + col); + + if (col >= num_cols) + num_cols = col + 1; + } + + rowptr ++; + num_rows ++; + } + } +.fi +.PP +The measure_cell function also updates the minimum and maximum width needed for each column. To this we add the cell padding to compute the total table width: +.nf + + // Figure out the width of each column... + for (col = 0, table_width = 0.0; col < num_cols; col ++) + { + cols[col].max_width += 2.0 * TABLE_PADDING; + + table_width += cols[col].max_width; + cols[col].width = cols[col].max_width; + } +.fi +.PP +If the calculated width is more than the available width, we need to adjust the width of the columns. The algorithm used here breaks the available width into N equal\-width columns \- any columns wider than this will be scaled proportionately. This works out as two steps \- one to calculate the the base width of "narrow" columns and a second to distribute the remaining width amongst the wider columns: +.nf + + format_width = right \- left \- 2.0 * TABLE_PADDING * num_cols; + + if (table_width > format_width) + { + // Content too wide, try scaling the widths... + double avg_width, // Average column width + base_width, // Base width + remaining_width, // Remaining width + scale_width; // Width for scaling + size_t num_remaining_cols = 0; // Number of remaining columns + + // First mark any columns that are narrower than the average width... + avg_width = format_width / num_cols; + + for (col = 0, base_width = 0.0, remaining_width = 0.0; col < num_cols; col ++) + { + if (cols[col].width > avg_width) + { + remaining_width += cols[col].width; + num_remaining_cols ++; + } + else + { + base_width += cols[col].width; + } + } + + // Then proportionately distribute the remaining width to the other columns... + format_width \-= base_width; + + for (col = 0, table_width = 0.0; col < num_cols; col ++) + { + if (cols[col].width > avg_width) + cols[col].width = cols[col].width * format_width / remaining_width; + + table_width += cols[col].width; + } + } +.fi +.PP +Now that we have the widths of the columns, we can calculate the left and right margins of each column for formatting the cell text: +.nf + + // Calculate the margins of each column in preparation for formatting + for (col = 0, x = left + TABLE_PADDING; col < num_cols; col ++) + { + cols[col].left = x; + cols[col].right = x + cols[col].width; + + x += cols[col].width + 2.0 * TABLE_PADDING; + } +.fi +.PP +Then we re\-measure the cells using the final column widths to determine the height of each cell and row: +.nf + + // Calculate the height of each row and cell in preparation for formatting + for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++) + { + for (col = 0; col < num_cols; col ++) + { + height = measure_cell(dd, rowptr\->cells[col], cols + col) + 2.0 * TABLE_PADDING; + if (height > rowptr\->height) + rowptr\->height = height; + } + } +.fi +.PP +Finally, we render each row in the table: +.nf + + // Render each table row... + for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++) + render_row(dd, num_cols, cols, rowptr); +.fi +.PP +Rendering the Markdown Document +.PP +The formatted content in arrays of linefrag_t and tablerow_t structures are passed to the render_line and render_row functions respectively to produce content in the PDF document. +.PP +Rendering a Line in a Paragraph, Heading, or Table Cell +.PP +The render_line function adds content from the linefrag_t array to a PDF page. It starts by determining whether a new page is needed: +.nf + + if (!dd\->st) + { + new_page(dd); + margin_top = 0.0; + } + + dd\->y \-= margin_top + lineheight; + if ((dd\->y \- need_bottom) < dd\->art_box.y1) + { + new_page(dd); + + dd\->y \-= lineheight; + } +.fi +.PP +We then loops through the fragments for the current line, drawing checkboxes, images, and text as needed. When a hyperlink is present, we add the link to the links array in the docdata_t structure, mapping "@" and "@@" to an internal link corresponding to the linked text: +.nf + + if (frag\->url && dd\->num_links < DOCLINK_MAX) + { + doclink_t *l = dd\->links + dd\->num_links; + // Pointer to this link record + + if (!strcmp(frag\->url, "@")) + { + // Use mapped text as link target... + char targetlink[129]; // Targeted link + + targetlink[0] = '#'; + make_target_name(targetlink + 1, frag\->text, sizeof(targetlink) \- 1); + + l\->url = pdfioStringCreate(dd\->pdf, targetlink); + } + else if (!strcmp(frag\->url, "@@")) + { + // Use literal text as anchor... + l\->url = pdfioStringCreatef(dd\->pdf, "#%s", frag\->text); + } + else + { + // Use URL as\-is... + l\->url = frag\->url; + } + + l\->box.x1 = frag\->x; + l\->box.y1 = dd\->y; + l\->box.x2 = frag\->x + frag\->width; + l\->box.y2 = dd\->y + frag\->height; + + dd\->num_links ++; + } +.fi +.PP +These are later written as annotations in the add_links function. +.PP +Rendering a Table Row +.PP +The render_row function takes a row of cells and the corresponding column definitions. It starts by drawing the border boxes around body cells: +.nf + + if (mmdGetType(row\->cells[0]) == MMD_TYPE_TABLE_HEADER_CELL) + { + // Header row, no border... + deffont = DOCFONT_BOLD; + } + else + { + // Regular body row, add borders... + deffont = DOCFONT_REGULAR; + + set_color(dd, DOCCOLOR_GRAY); + pdfioContentPathRect(dd\->st, cols[0].left \- TABLE_PADDING, dd\->y \- row\->height, + cols[num_cols \- 1].right \- cols[0].left + + 2.0 * TABLE_PADDING, row\->height); + for (col = 1; col < num_cols; col ++) + { + pdfioContentPathMoveTo(dd\->st, cols[col].left \- TABLE_PADDING, dd\->y); + pdfioContentPathLineTo(dd\->st, cols[col].left \- TABLE_PADDING, dd\->y \- row\->height); + } + pdfioContentStroke(dd\->st); + } +.fi +.PP +Then it formats each cell using the format_block function described previously. The page y value is reset before formatting each cell: +.nf + + row_y = dd\->y; + + for (col = 0; col < num_cols; col ++) + { + dd|>y = row_y; + + format_block(dd, row\->cells[col], deffont, SIZE_TABLE, cols[col].left, + cols[col].right, /*leader*/NULL); + } + + dd\->y = row_y \- row\->height; +.fi .SH ENUMERATIONS .SS pdfio_cs_e diff --git a/doc/pdfio.html b/doc/pdfio.html index b8bf7a4..8a1bf10 100644 --- a/doc/pdfio.html +++ b/doc/pdfio.html @@ -505,7 +505,7 @@ span.string {
PDFio is a simple C library for reading and writing PDF files. The primary goals of pdfio are:
+PDFio is a simple C library for reading and writing PDF files. The primary goals of PDFio are:
Read and write any version of PDF file
You open an existing PDF file using the pdfioFileOpen
function:
pdfio_file_t *pdf =
- pdfioFileOpen("myinputfile.pdf", password_cb, password_data,
- error_cb, error_data);
+ pdfioFileOpen("myinputfile.pdf", password_cb, password_data, error_cb,
+ error_data);
where the five arguments to the function are the filename ("myinputfile.pdf"), an optional password callback function (password_cb
) and data pointer value (password_data
), and an optional error callback function (error_cb
) and data pointer value (error_data
). The password callback is called for encrypted PDF files that are not using the default password, for example:
const char *
@@ -817,8 +817,7 @@ pdfio_array_t *crop_box; // CropBox array
pdfio_rect_t media_box = { 0.0, 0.0, 612.0, 792.0 }; // US Letter
pdfio_rect_t crop_box = { 36.0, 36.0, 576.0, 756.0 }; // w/0.5" margins
-pdfio_file_t *pdf = pdfioFileCreate("myoutputfile.pdf", "2.0",
- &media_box, &crop_box,
+pdfio_file_t *pdf = pdfioFileCreate("myoutputfile.pdf", "2.0", &media_box, &crop_box,
error_cb, error_data);
where the six arguments to the function are the filename ("myoutputfile.pdf"), PDF version ("2.0"), media box (media_box
), crop box (crop_box
), an optional error callback function (error_cb
), and an optional pointer value for the error callback function (error_data
). The units for the media and crop boxes are points (1/72nd of an inch).
@@ -826,9 +825,8 @@ pdfio_file_t *pdf = pdfioFileCreate("myoutputfile.pdf&
pdfio_rect_t media_box = { 0.0, 0.0, 612.0, 792.0 }; // US Letter
pdfio_rect_t crop_box = { 36.0, 36.0, 576.0, 756.0 }; // w/0.5" margins
-pdfio_file_t *pdf = pdfioFileCreateOutput(output_cb, output_ctx, "2.0",
- &media_box, &crop_box,
- error_cb, error_data);
+pdfio_file_t *pdf = pdfioFileCreateOutput(output_cb, output_ctx, "2.0", &media_box,
+ &crop_box, error_cb, error_data);
Once the file is created, use the pdfioFileCreateObj
, pdfioFileCreatePage
, and pdfioPageCopy
functions to create objects and pages in the file.
Finally, the pdfioFileClose
function writes the PDF cross-reference and "trailer" information, closes the file, and frees all memory that was used for it.
@@ -998,10 +996,9 @@ pdfio_obj_t *arial =
pdfio_file_t *pdf = pdfioFileCreate(...);
unsigned char data[1024 * 1024 * 4]; // 1024x1024 RGBA image data
pdfio_obj_t *img =
- pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024,
- /*height*/1024, /*num_colors*/3,
- /*color_data*/NULL, /*alpha*/true,
- /*interpolate*/false);
+ pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024, /*height*/1024,
+ /*num_colors*/3, /*color_data*/NULL,
+ /*alpha*/true, /*interpolate*/false);
will create an object for a 1024x1024 RGBA image in memory, using the default color space for 3 colors ("DeviceRGB"). We can use one of the color space functions to use a specific color space for this image, for example:
pdfio_file_t *pdf = pdfioFileCreate(...);
@@ -1009,24 +1006,20 @@ pdfio_obj_t *img =
// Create an AdobeRGB color array
pdfio_array_t *adobe_rgb =
pdfioArrayCreateColorFromMatrix(pdf, 3, pdfioAdobeRGBGamma,
- pdfioAdobeRGBMatrix,
- pdfioAdobeRGBWhitePoint);
+ pdfioAdobeRGBMatrix, pdfioAdobeRGBWhitePoint);
// Create a 1024x1024 RGBA image using AdobeRGB
unsigned char data[1024 * 1024 * 4]; // 1024x1024 RGBA image data
pdfio_obj_t *img =
- pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024,
- /*height*/1024, /*num_colors*/3,
- /*color_data*/adobe_rgb,
- /*alpha*/true,
- /*interpolate*/false);
+ pdfioFileCreateImageObjFromData(pdf, data, /*width*/1024, /*height*/1024,
+ /*num_colors*/3, /*color_data*/adobe_rgb,
+ /*alpha*/true, /*interpolate*/false);
The "interpolate" argument specifies whether the colors in the image should be smoothed/interpolated when scaling. This is most useful for photographs but should be false
for screenshot and barcode images.
If you have a JPEG or PNG file, use the pdfioFileCreateImageObjFromFile
function to copy the image into a PDF image object, for example:
pdfio_file_t *pdf = pdfioFileCreate(...);
pdfio_obj_t *img =
- pdfioFileCreateImageObjFromFile(pdf, "myphoto.jpg",
- /*interpolate*/true);
+ pdfioFileCreateImageObjFromFile(pdf, "myphoto.jpg", /*interpolate*/true);
Note: Currently pdfioFileCreateImageObjFromFile
does not support 12 bit JPEG files or PNG files with an alpha channel.
@@ -1181,9 +1174,8 @@ main(int argc, 1];
// Open the PDF file with the default callbacks...
- pdf = pdfioFileOpen(filename, /*password_cb*/NULL,
- /*password_cbdata*/NULL, /*error_cb*/NULL,
- /*error_cbdata*/NULL);
+ pdf = pdfioFileOpen(filename, /*password_cb*/NULL, /*password_cbdata*/NULL,
+ /*error_cb*/NULL, /*error_cbdata*/NULL);
if (pdf == NULL)
return (1);
@@ -1229,9 +1221,8 @@ create_pdf_image_file(
// Create the PDF file...
- pdf = pdfioFileCreate(pdfname, /*version*/NULL, /*media_box*/NULL,
- /*crop_box*/NULL, /*error_cb*/NULL,
- /*error_cbdata*/NULL);
+ pdf = pdfioFileCreate(pdfname, /*version*/NULL, /*media_box*/NULL, /*crop_box*/NULL,
+ /*error_cb*/NULL, /*error_cbdata*/NULL);
if (!pdf)
return (false);
@@ -1265,9 +1256,9 @@ create_pdf_image_file(
width = pdfioImageGetWidth(image);
height = pdfioImageGetHeight(image);
- // Default media_box is "universal" 595.28x792 points (8.27x11in or
- // 210x279mm). Use margins of 36 points (0.5in or 12.7mm) with another
- // 36 points for the caption underneath...
+ // Default media_box is "universal" 595.28x792 points (8.27x11in or 210x279mm).
+ // Use margins of 36 points (0.5in or 12.7mm) with another 36 points for the
+ // caption underneath...
swidth = 595.28 - 72.0;
sheight = swidth * height / width;
if (sheight > (792.0 - 36.0 - 72.0))
@@ -1284,8 +1275,8 @@ create_pdf_image_file(
// Draw the caption in black...
pdfioContentSetFillColorDeviceGray(page, 0.0);
- // Compute the starting point for the text - Courier is monospaced
- // with a nominal width of 0.6 times the text height...
+ // Compute the starting point for the text - Courier is monospaced with a
+ // nominal width of 0.6 times the text height...
tx = 0.5 * (595.28 - 18.0 * 0.6 * strlen(caption));
// Position and draw the caption underneath...
@@ -1353,8 +1344,7 @@ make_code128(char *dst,
The main
function does the rest of the work. The barcode font is imported using the pdfioFileCreateFontObjFromFile
function. We pass false
for the "unicode" argument since we just want the (default) ASCII encoding:
barcode_font = pdfioFileCreateFontObjFromFile(pdf, "code128.ttf",
- /*unicode*/false);
+barcode_font = pdfioFileCreateFontObjFromFile(pdf, "code128.ttf", /*unicode*/false);
Since barcodes usually have the number or text represented by the barcode printed underneath it, we also need a regular text font, for which we can choose one of the standard 14 PostScript base fonts using the pdfioFIleCreateFontObjFromBase
function:
text_font = pdfioFileCreateFontObjFromBase(pdf, "Helvetica");
@@ -1378,8 +1368,7 @@ make_code128(char *dst, if (text && text_font)
{
text_height = 9.0;
- text_width = pdfioContentTextMeasure(text_font, text,
- text_height);
+ text_width = pdfioContentTextMeasure(text_font, text, text_height);
}
// Compute the size of the PDF page...
@@ -1387,8 +1376,7 @@ pdfio_rect_t media_box;
media_box.x1 = 0.0;
media_box.y1 = 0.0;
-media_box.x2 = (barcode_width > text_width ?
- barcode_width : text_width) + 18.0;
+media_box.x2 = (barcode_width > text_width ? barcode_width : text_width) + 18.0;
media_box.y2 = barcode_height + text_height + 18.0;
Finally, we just need to create a page of the specified size that references the two fonts:
@@ -1429,8 +1417,650 @@ pdfioStreamClose(page_st);
Convert Markdown to PDF
Markdown is a simple plain text format that supports things like headings, links, character styles, tables, and embedded images. The md2pdf.c
example code uses the mmd library to convert markdown to a PDF file that can be distributed.
-Note: The md2pdf example is by far the most complex example code included with PDFio and shows how to layout text, add headers and footers, add links, embed images, and format tables.
+Note: The md2pdf example is by far the most complex example code included with PDFio and shows how to layout text, add headers and footers, add links, embed images, format tables, and add an outline (table of contents) for navigation.
+Managing Document State
+The md2pdf
program needs to maintain three sets of state - one for the markdown document which is represented by nodes of type mmd_t
and the others for the PDF document and current PDF page which are contained in the docdata_t
structure:
+typedef struct docdata_s // Document formatting data
+{
+ // State for the whole document
+ pdfio_file_t *pdf; // PDF file
+ pdfio_rect_t media_box; // Media (page) box
+ pdfio_rect_t crop_box; // Crop box (for margins)
+ pdfio_rect_t art_box; // Art box (for markdown content)
+ pdfio_obj_t *fonts[DOCFONT_MAX]; // Embedded fonts
+ double font_space; // Unit width of a space
+ size_t num_images; // Number of embedded images
+ docimage_t images[DOCIMAGE_MAX]; // Embedded images
+ const char *title; // Document title
+ char *heading; // Current document heading
+ size_t num_actions; // Number of actions for this document
+ docaction_t actions[DOCACTION_MAX]; // Actions for this document
+ size_t num_targets; // Number of targets for this document
+ doctarget_t targets[DOCTARGET_MAX]; // Targets for this document
+ size_t num_toc; // Number of table-of-contents entries
+ doctoc_t toc[DOCTOC_MAX]; // Table-of-contents entries
+
+ // State for the current page
+ pdfio_stream_t *st; // Current page stream
+ double y; // Current position on page
+ docfont_t font; // Current font
+ double fsize; // Current font size
+ doccolor_t color; // Current color
+ pdfio_array_t *annots_array; // Annotations array (for links)
+ pdfio_obj_t *annots_obj; // Annotations object (for links)
+ size_t num_links; // Number of links for this page
+ doclink_t links[DOCLINK_MAX]; // Links for this page
+} docdata_t;
+
+Document State
+The output is fixed to the "universal" media size (the intersection of US Letter and ISO A4) with 1/2 inch margins - the PAGE_
constants can be changed to select a different size or margins. The media_box
member contains the "MediaBox" rectangle for the PDF pages, while the crop_box
and art_box
members contain the "CropBox" and "ArtBox" values, respectively.
+Four embedded fonts are used:
+
+DOCFONT_REGULAR
: the default font used for text,
+
+DOCFONT_BOLD
: a boldface font used for heading and strong text,
+
+DOCFONT_ITALIC
: an italic/oblique font used for emphasized text, and
+
+DOCFONT_MONOSPACE
: a fixed-width font used for code.
+
+
+By default the code uses the base PostScript fonts Helvetica, Helvetica-Bold, Helvetica-Oblique, and Courier. The USE_TRUETYPE
define can be used to replace these with the Roboto TrueType fonts.
+Embedded JPEG and PNG images are copied into the PDF document, with the images
array containing the list of the images and their objects.
+The title
member contains the document title, while the heading
member contains the current heading text.
+The actions
array contains a list of action dictionaries for interior document links that need to be resolved, while the targets
array keeps track of the location of the headings in the PDF document.
+The toc
array contains a list of headings and is used to construct the PDF outlines dictionaries/objects, which provides a table of contents for navigation in most PDF readers.
+Page State
+The st
member provides the stream for the current page content. The color
, font
, fsize
, and y
members provide the current graphics state on the page.
+The annots_array
, annots_obj
, num_links
, and links
members contain a list of hyperlinks on the current page.
+Creating Pages
+The new_page
function is used to start a new page. Aside from creating the new page object and stream, it adds a standard header and footer to the page. It starts by closing the current page if it is open:
+// Close the current page...
+if (dd->st)
+{
+ pdfioStreamClose(dd->st);
+ add_links(dd);
+}
+
+The new page needs a dictionary containing any link annotations, the media and art boxes, the four fonts, and any images:
+// Prep the new page...
+page_dict = pdfioDictCreate(dd->pdf);
+
+dd->annots_array = pdfioArrayCreate(dd->pdf);
+dd->annots_obj = pdfioFileCreateArrayObj(dd->pdf, dd->annots_array);
+pdfioDictSetObj(page_dict, "Annots", dd->annots_obj);
+
+pdfioDictSetRect(page_dict, "MediaBox", &dd->media_box);
+pdfioDictSetRect(page_dict, "ArtBox", &dd->art_box);
+
+for (fontface = DOCFONT_REGULAR; fontface < DOCFONT_MAX; fontface ++)
+ pdfioPageDictAddFont(page_dict, docfont_names[fontface], dd->fonts[fontface]);
+
+for (i = 0; i < dd->num_images; i ++)
+ pdfioPageDictAddImage(page_dict, pdfioStringCreatef(dd->pdf, "I%u", (unsigned)i),
+ dd->images[i].obj);
+
+Once the page dictionary is initialized, we create a new page and initialize the current graphics state:
+dd->st = pdfioFileCreatePage(dd->pdf, page_dict);
+dd->color = DOCCOLOR_BLACK;
+dd->font = DOCFONT_MAX;
+dd->fsize = 0.0;
+dd->y = dd->art_box.y2;
+
+The header consists of a dark gray separating line and the document title. We don't show the header on the first page:
+// Add header/footer text
+set_color(dd, DOCCOLOR_GRAY);
+set_font(dd, DOCFONT_REGULAR, SIZE_HEADFOOT);
+
+if (pdfioFileGetNumPages(dd->pdf) > 1 && dd->title)
+{
+ // Show title in header...
+ width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR], dd->title,
+ SIZE_HEADFOOT);
+
+ pdfioContentTextBegin(dd->st);
+ pdfioContentTextMoveTo(dd->st,
+ dd->crop_box.x1 + 0.5 * (dd->crop_box.x2 -
+ dd->crop_box.x1 - width),
+ dd->crop_box.y2 - SIZE_HEADFOOT);
+ pdfioContentTextShow(dd->st, UNICODE_VALUE, dd->title);
+ pdfioContentTextEnd(dd->st);
+
+ pdfioContentPathMoveTo(dd->st, dd->crop_box.x1,
+ dd->crop_box.y2 - 2 * SIZE_HEADFOOT * LINE_HEIGHT +
+ SIZE_HEADFOOT);
+ pdfioContentPathLineTo(dd->st, dd->crop_box.x2,
+ dd->crop_box.y2 - 2 * SIZE_HEADFOOT * LINE_HEIGHT +
+ SIZE_HEADFOOT);
+ pdfioContentStroke(dd->st);
+}
+
+The footer contains the same dark gray separating line with the current heading and page number on opposite sides. The page number is always positioned on the outer edge for a two-sided print - right justified on odd numbered pages and left justified on even numbered pages:
+// Show page number and current heading...
+pdfioContentPathMoveTo(dd->st, dd->crop_box.x1,
+ dd->crop_box.y1 + SIZE_HEADFOOT * LINE_HEIGHT);
+pdfioContentPathLineTo(dd->st, dd->crop_box.x2,
+ dd->crop_box.y1 + SIZE_HEADFOOT * LINE_HEIGHT);
+pdfioContentStroke(dd->st);
+
+pdfioContentTextBegin(dd->st);
+snprintf(temp, sizeof(temp), "%u", (unsigned)pdfioFileGetNumPages(dd->pdf));
+if (pdfioFileGetNumPages(dd->pdf) & 1)
+{
+ // Page number on right...
+ width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR], temp, SIZE_HEADFOOT);
+ pdfioContentTextMoveTo(dd->st, dd->crop_box.x2 - width, dd->crop_box.y1);
+}
+else
+{
+ // Page number on left...
+ pdfioContentTextMoveTo(dd->st, dd->crop_box.x1, dd->crop_box.y1);
+}
+
+pdfioContentTextShow(dd->st, UNICODE_VALUE, temp);
+pdfioContentTextEnd(dd->st);
+
+if (dd->heading)
+{
+ pdfioContentTextBegin(dd->st);
+
+ if (pdfioFileGetNumPages(dd->pdf) & 1)
+ {
+ // Current heading on left...
+ pdfioContentTextMoveTo(dd->st, dd->crop_box.x1, dd->crop_box.y1);
+ }
+ else
+ {
+ width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR], dd->heading,
+ SIZE_HEADFOOT);
+ pdfioContentTextMoveTo(dd->st, dd->crop_box.x2 - width, dd->crop_box.y1);
+ }
+
+ pdfioContentTextShow(dd->st, UNICODE_VALUE, dd->heading);
+ pdfioContentTextEnd(dd->st);
+}
+
+Formatting the Markdown Document
+Four functions handle the formatting of the markdown document:
+
+format_block
formats a single paragraph, heading, or table cell,
+
+format_code
: formats a block of code,
+
+format_doc
: formats the document as a whole, and
+
+format_table
: formats a table.
+
+
+Formatted content is organized into arrays of linefrag_t
and tablerow_t
structures for a line of content or row of table cells, respectively.
+High-Level Formatting
+The format_doc
function iterates over the block nodes in the markdown document. We map a "thematic break" (horizontal rule) to a page break, which is implemented by moving the current vertical position to the bottom of the page:
+case MMD_TYPE_THEMATIC_BREAK :
+ // Force a page break
+ dd->y = dd->art_box.y1;
+ break;
+
+A block quote is indented and uses the italic font by default:
+case MMD_TYPE_BLOCK_QUOTE :
+ format_doc(dd, current, DOCFONT_ITALIC, left + BQ_PADDING, right - BQ_PADDING);
+ break;
+
+Lists have a leading blank line and are indented:
+case MMD_TYPE_ORDERED_LIST :
+case MMD_TYPE_UNORDERED_LIST :
+ if (dd->st)
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
+
+ format_doc(dd, current, deffont, left + LIST_PADDING, right);
+ break;
+
+List items do not have a leading blank line and make use of leader text that is shown in front of the list text. The leader text is either the current item number or a bullet, which then is directly formatted using the format_block
function:
+case MMD_TYPE_LIST_ITEM :
+ if (doctype == MMD_TYPE_ORDERED_LIST)
+ {
+ snprintf(leader, sizeof(leader), "%d. ", i);
+ format_block(dd, current, deffont, SIZE_BODY, left, right, leader);
+ }
+ else
+ {
+ format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/"• ");
+ }
+ break;
+
+Paragraphs have a leading blank line and are likewise directly formatted:
+case MMD_TYPE_PARAGRAPH :
+ // Add a blank line before the paragraph...
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
+
+ // Format the paragraph...
+ format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/NULL);
+ break;
+
+Tables have a leading blank line and are formatted using the format_table
function:
+case MMD_TYPE_TABLE :
+ // Add a blank line before the paragraph...
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
+
+ // Format the table...
+ format_table(dd, current, left, right);
+ break;
+
+Code blocks have a leading blank line, are indented slightly (to account for the padded background), and are formatted using the format_code
function:
+case MMD_TYPE_CODE_BLOCK :
+ // Add a blank line before the code block...
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
+
+ // Format the code block...
+ format_code(dd, current, left + CODE_PADDING, right - CODE_PADDING);
+ break;
+
+Headings get some extra processing. First, the current heading is remembered in the docdata_t
structure so it can be used in the page footer:
+case MMD_TYPE_HEADING_1 :
+case MMD_TYPE_HEADING_2 :
+case MMD_TYPE_HEADING_3 :
+case MMD_TYPE_HEADING_4 :
+case MMD_TYPE_HEADING_5 :
+case MMD_TYPE_HEADING_6 :
+ // Update the current heading
+ free(dd->heading);
+ dd->heading = mmdCopyAllText(current);
+
+Then we add a blank line and format the heading with the boldface font at a larger size using the format_block
function:
+ // Add a blank line before the heading...
+ dd->y -= heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT;
+
+ // Format the heading...
+ format_block(dd, current, DOCFONT_BOLD,
+ heading_sizes[curtype - MMD_TYPE_HEADING_1], left, right,
+ /*leader*/NULL);
+
+Once the heading is formatted, we record it in the toc
array as a PDF outline item object/dictionary:
+ // Add the heading to the table-of-contents...
+ if (dd->num_toc < DOCTOC_MAX)
+ {
+ doctoc_t *t = dd->toc + dd->num_toc;
+ // New TOC
+ pdfio_array_t *dest; // Destination array
+
+ t->level = curtype - MMD_TYPE_HEADING_1;
+ t->dict = pdfioDictCreate(dd->pdf);
+ t->obj = pdfioFileCreateObj(dd->pdf, t->dict);
+ dest = pdfioArrayCreate(dd->pdf);
+
+ pdfioArrayAppendObj(dest,
+ pdfioFileGetPage(dd->pdf, pdfioFileGetNumPages(dd->pdf) - 1));
+ pdfioArrayAppendName(dest, "XYZ");
+ pdfioArrayAppendNumber(dest, PAGE_LEFT);
+ pdfioArrayAppendNumber(dest,
+ dd->y + heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT);
+ pdfioArrayAppendNumber(dest, 0.0);
+
+ pdfioDictSetArray(t->dict, "Dest", dest);
+ pdfioDictSetString(t->dict, "Title", pdfioStringCreate(dd->pdf, dd->heading));
+
+ dd->num_toc ++;
+ }
+
+Finally, we also save the heading's target name and its location in the targets
array to allow interior links to work:
+ // Add the heading to the list of link targets...
+ if (dd->num_targets < DOCTARGET_MAX)
+ {
+ doctarget_t *t = dd->targets + dd->num_targets;
+ // New target
+
+ make_target_name(t->name, dd->heading, sizeof(t->name));
+ t->page = pdfioFileGetNumPages(dd->pdf) - 1;
+ t->y = dd->y + heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT;
+
+ dd->num_targets ++;
+ }
+ break;
+
+Formatting Paragraphs, Headings, List Items, and Table Cells
+Paragraphs, headings, list items, and table cells all use the same basic formatting algorithm. Text, checkboxes, and images are collected until the nodes in the current block are used up or the content reaches the right margin.
+In order to keep adjacent blocks of text together, the formatting algorithm makes sure that at least 3 lines of text can fit before the bottom edge of the page:
+if (mmdGetNextSibling(block))
+ need_bottom = 3.0 * SIZE_BODY * LINE_HEIGHT;
+else
+ need_bottom = 0.0;
+
+Leader text (used for list items) is right justified to the left margin and becomes the first fragment on the line when present.
+if (leader)
+{
+ // Add leader text on first line...
+ frags[0].type = MMD_TYPE_NORMAL_TEXT;
+ frags[0].width = pdfioContentTextMeasure(dd->fonts[deffont], leader, fsize);
+ frags[0].height = fsize;
+ frags[0].x = left - frags[0].width;
+ frags[0].imagenum = 0;
+ frags[0].text = leader;
+ frags[0].url = NULL;
+ frags[0].ws = false;
+ frags[0].font = deffont;
+ frags[0].color = DOCCOLOR_BLACK;
+
+ num_frags = 1;
+ lineheight = fsize * LINE_HEIGHT;
+}
+else
+{
+ // No leader text...
+ num_frags = 0;
+ lineheight = 0.0;
+}
+
+frag = frags + num_frags;
+
+If the current content fragment won't fit, we call render_line
to draw what we have, adjusting the left margin as needed for table cells:
+ // See if this node will fit on the current line...
+ if ((num_frags > 0 && (x + width + wswidth) >= right) || num_frags == LINEFRAG_MAX)
+ {
+ // No, render this line and start over...
+ if (blocktype == MMD_TYPE_TABLE_HEADER_CELL ||
+ blocktype == MMD_TYPE_TABLE_BODY_CELL_CENTER)
+ margin_left = 0.5 * (right - x);
+ else if (blocktype == MMD_TYPE_TABLE_BODY_CELL_RIGHT)
+ margin_left = right - x;
+ else
+ margin_left = 0.0;
+
+ render_line(dd, margin_left, need_bottom, lineheight, num_frags, frags);
+
+ num_frags = 0;
+ frag = frags;
+ x = left;
+ lineheight = 0.0;
+ need_bottom = 0.0;
+
+Block quotes (blocks use a default font of italic) have an orange bar to the left of the block:
+ if (deffont == DOCFONT_ITALIC)
+ {
+ // Add an orange bar to the left of block quotes...
+ set_color(dd, DOCCOLOR_ORANGE);
+ pdfioContentSave(dd->st);
+ pdfioContentSetLineWidth(dd->st, 3.0);
+ pdfioContentPathMoveTo(dd->st, left - 6.0, dd->y - (LINE_HEIGHT - 1.0) * fsize);
+ pdfioContentPathLineTo(dd->st, left - 6.0, dd->y + fsize);
+ pdfioContentStroke(dd->st);
+ pdfioContentRestore(dd->st);
+ }
+
+Finally, we add the current content fragment to the array:
+ // Add the current node to the fragment list
+ if (num_frags == 0)
+ {
+ // No leading whitespace at the start of the line
+ ws = false;
+ wswidth = 0.0;
+ }
+
+ frag->type = type;
+ frag->x = x;
+ frag->width = width + wswidth;
+ frag->height = text ? fsize : height;
+ frag->imagenum = imagenum;
+ frag->text = text;
+ frag->url = url;
+ frag->ws = ws;
+ frag->font = font;
+ frag->color = color;
+
+ num_frags ++;
+ frag ++;
+ x += width + wswidth;
+ if (height > lineheight)
+ lineheight = height;
+
+Formatting Code Blocks
+Code blocks consist of one or more lines of plain monospaced text. We draw a light gray background behind each line with a small bit of padding at the top and bottom:
+// Draw the top padding...
+set_color(dd, DOCCOLOR_LTGRAY);
+pdfioContentPathRect(dd->st, left - CODE_PADDING, dd->y + SIZE_CODEBLOCK,
+ right - left + 2.0 * CODE_PADDING, CODE_PADDING);
+pdfioContentFillAndStroke(dd->st, false);
+
+// Start a code text block...
+set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK);
+pdfioContentTextBegin(dd->st);
+pdfioContentTextMoveTo(dd->st, left, dd->y);
+
+for (code = mmdGetFirstChild(block); code; code = mmdGetNextSibling(code))
+{
+ set_color(dd, DOCCOLOR_LTGRAY);
+ pdfioContentPathRect(dd->st, left - CODE_PADDING,
+ dd->y - (LINE_HEIGHT - 1.0) * SIZE_CODEBLOCK,
+ right - left + 2.0 * CODE_PADDING, lineheight);
+ pdfioContentFillAndStroke(dd->st, false);
+
+ set_color(dd, DOCCOLOR_RED);
+ pdfioContentTextShow(dd->st, UNICODE_VALUE, mmdGetText(code));
+ dd->y -= lineheight;
+
+ if (dd->y < dd->art_box.y1)
+ {
+ // End the current text block...
+ pdfioContentTextEnd(dd->st);
+
+ // Start a new page...
+ new_page(dd);
+ set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK);
+
+ dd->y -= lineheight;
+
+ pdfioContentTextBegin(dd->st);
+ pdfioContentTextMoveTo(dd->st, left, dd->y);
+ }
+}
+
+// End the current text block...
+pdfioContentTextEnd(dd->st);
+dd->y += lineheight;
+
+// Draw the bottom padding...
+set_color(dd, DOCCOLOR_LTGRAY);
+pdfioContentPathRect(dd->st, left - CODE_PADDING,
+ dd->y - CODE_PADDING - (LINE_HEIGHT - 1.0) * SIZE_CODEBLOCK,
+ right - left + 2.0 * CODE_PADDING, CODE_PADDING);
+pdfioContentFillAndStroke(dd->st, false);
+
+Formatting Tables
+Tables are the most difficult to format. We start by scanning the entire table and measuring every cell with the measure_cell
function:
+for (num_cols = 0, num_rows = 0, rowptr = rows, current = mmdGetFirstChild(table);
+ current && num_rows < TABLEROW_MAX;
+ current = next)
+{
+ next = mmd_walk_next(table, current);
+ type = mmdGetType(current);
+
+ if (type == MMD_TYPE_TABLE_ROW)
+ {
+ // Parse row...
+ for (col = 0, current = mmdGetFirstChild(current);
+ current && num_cols < TABLECOL_MAX;
+ current = mmdGetNextSibling(current), col ++)
+ {
+ rowptr->cells[col] = current;
+
+ measure_cell(dd, current, cols + col);
+
+ if (col >= num_cols)
+ num_cols = col + 1;
+ }
+
+ rowptr ++;
+ num_rows ++;
+ }
+}
+
+The measure_cell
function also updates the minimum and maximum width needed for each column. To this we add the cell padding to compute the total table width:
+// Figure out the width of each column...
+for (col = 0, table_width = 0.0; col < num_cols; col ++)
+{
+ cols[col].max_width += 2.0 * TABLE_PADDING;
+
+ table_width += cols[col].max_width;
+ cols[col].width = cols[col].max_width;
+}
+
+If the calculated width is more than the available width, we need to adjust the width of the columns. The algorithm used here breaks the available width into N equal-width columns - any columns wider than this will be scaled proportionately. This works out as two steps - one to calculate the the base width of "narrow" columns and a second to distribute the remaining width amongst the wider columns:
+format_width = right - left - 2.0 * TABLE_PADDING * num_cols;
+
+if (table_width > format_width)
+{
+ // Content too wide, try scaling the widths...
+ double avg_width, // Average column width
+ base_width, // Base width
+ remaining_width, // Remaining width
+ scale_width; // Width for scaling
+ size_t num_remaining_cols = 0; // Number of remaining columns
+
+ // First mark any columns that are narrower than the average width...
+ avg_width = format_width / num_cols;
+
+ for (col = 0, base_width = 0.0, remaining_width = 0.0; col < num_cols; col ++)
+ {
+ if (cols[col].width > avg_width)
+ {
+ remaining_width += cols[col].width;
+ num_remaining_cols ++;
+ }
+ else
+ {
+ base_width += cols[col].width;
+ }
+ }
+
+ // Then proportionately distribute the remaining width to the other columns...
+ format_width -= base_width;
+
+ for (col = 0, table_width = 0.0; col < num_cols; col ++)
+ {
+ if (cols[col].width > avg_width)
+ cols[col].width = cols[col].width * format_width / remaining_width;
+
+ table_width += cols[col].width;
+ }
+}
+
+Now that we have the widths of the columns, we can calculate the left and right margins of each column for formatting the cell text:
+// Calculate the margins of each column in preparation for formatting
+for (col = 0, x = left + TABLE_PADDING; col < num_cols; col ++)
+{
+ cols[col].left = x;
+ cols[col].right = x + cols[col].width;
+
+ x += cols[col].width + 2.0 * TABLE_PADDING;
+}
+
+Then we re-measure the cells using the final column widths to determine the height of each cell and row:
+// Calculate the height of each row and cell in preparation for formatting
+for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++)
+{
+ for (col = 0; col < num_cols; col ++)
+ {
+ height = measure_cell(dd, rowptr->cells[col], cols + col) + 2.0 * TABLE_PADDING;
+ if (height > rowptr->height)
+ rowptr->height = height;
+ }
+}
+
+Finally, we render each row in the table:
+// Render each table row...
+for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++)
+ render_row(dd, num_cols, cols, rowptr);
+
+Rendering the Markdown Document
+The formatted content in arrays of linefrag_t
and tablerow_t
structures are passed to the render_line
and render_row
functions respectively to produce content in the PDF document.
+Rendering a Line in a Paragraph, Heading, or Table Cell
+The render_line
function adds content from the linefrag_t
array to a PDF page. It starts by determining whether a new page is needed:
+if (!dd->st)
+{
+ new_page(dd);
+ margin_top = 0.0;
+}
+
+dd->y -= margin_top + lineheight;
+if ((dd->y - need_bottom) < dd->art_box.y1)
+{
+ new_page(dd);
+
+ dd->y -= lineheight;
+}
+
+We then loops through the fragments for the current line, drawing checkboxes, images, and text as needed. When a hyperlink is present, we add the link to the links
array in the docdata_t
structure, mapping "@" and "@@" to an internal link corresponding to the linked text:
+if (frag->url && dd->num_links < DOCLINK_MAX)
+{
+ doclink_t *l = dd->links + dd->num_links;
+ // Pointer to this link record
+
+ if (!strcmp(frag->url, "@"))
+ {
+ // Use mapped text as link target...
+ char targetlink[129]; // Targeted link
+
+ targetlink[0] = '#';
+ make_target_name(targetlink + 1, frag->text, sizeof(targetlink) - 1);
+
+ l->url = pdfioStringCreate(dd->pdf, targetlink);
+ }
+ else if (!strcmp(frag->url, "@@"))
+ {
+ // Use literal text as anchor...
+ l->url = pdfioStringCreatef(dd->pdf, "#%s", frag->text);
+ }
+ else
+ {
+ // Use URL as-is...
+ l->url = frag->url;
+ }
+
+ l->box.x1 = frag->x;
+ l->box.y1 = dd->y;
+ l->box.x2 = frag->x + frag->width;
+ l->box.y2 = dd->y + frag->height;
+
+ dd->num_links ++;
+}
+
+These are later written as annotations in the add_links
function.
+Rendering a Table Row
+The render_row
function takes a row of cells and the corresponding column definitions. It starts by drawing the border boxes around body cells:
+if (mmdGetType(row->cells[0]) == MMD_TYPE_TABLE_HEADER_CELL)
+{
+ // Header row, no border...
+ deffont = DOCFONT_BOLD;
+}
+else
+{
+ // Regular body row, add borders...
+ deffont = DOCFONT_REGULAR;
+
+ set_color(dd, DOCCOLOR_GRAY);
+ pdfioContentPathRect(dd->st, cols[0].left - TABLE_PADDING, dd->y - row->height,
+ cols[num_cols - 1].right - cols[0].left +
+ 2.0 * TABLE_PADDING, row->height);
+ for (col = 1; col < num_cols; col ++)
+ {
+ pdfioContentPathMoveTo(dd->st, cols[col].left - TABLE_PADDING, dd->y);
+ pdfioContentPathLineTo(dd->st, cols[col].left - TABLE_PADDING, dd->y - row->height);
+ }
+ pdfioContentStroke(dd->st);
+}
+
+Then it formats each cell using the format_block
function described previously. The page y
value is reset before formatting each cell:
+row_y = dd->y;
+
+for (col = 0; col < num_cols; col ++)
+{
+ ddì>y = row_y;
+
+ format_block(dd, row->cells[col], deffont, SIZE_TABLE, cols[col].left,
+ cols[col].right, /*leader*/NULL);
+}
+
+dd->y = row_y - row->height;
+
Functions
pdfioArrayAppendArray
Add an array value to an array.
diff --git a/doc/pdfio.md b/doc/pdfio.md
index dd71910..aad6da6 100644
--- a/doc/pdfio.md
+++ b/doc/pdfio.md
@@ -1430,378 +1430,251 @@ structures for a line of content or row of table cells, respectively.
#### High-Level Formatting
+The `format_doc` function iterates over the block nodes in the markdown
+document. We map a "thematic break" (horizontal rule) to a page break, which
+is implemented by moving the current vertical position to the bottom of the
+page:
+
```c
-static void
-format_doc(docdata_t *dd, // I - Document data
- mmd_t *doc, // I - Document node to format
- docfont_t deffont, // I - Default font
- double left, // I - Left margin
- double right) // I - Right margin
-{
- int i; // Child number
- mmd_type_t doctype; // Document node type
- mmd_t *current; // Current node
- mmd_type_t curtype; // Current node type
- char leader[32]; // Leader
- static const double heading_sizes[] = // Heading font sizes
- {
- SIZE_HEADING_1,
- SIZE_HEADING_2,
- SIZE_HEADING_3,
- SIZE_HEADING_4,
- SIZE_HEADING_5,
- SIZE_HEADING_6
- };
-
-
- doctype = mmdGetType(doc);
-
- for (i = 1, current = mmdGetFirstChild(doc); current; i ++, current = mmdGetNextSibling(current))
- {
- switch (curtype = mmdGetType(current))
- {
- default :
- break;
-
- case MMD_TYPE_THEMATIC_BREAK :
- // Force a page break
- dd->y = dd->art_box.y1;
- break;
-
- case MMD_TYPE_BLOCK_QUOTE :
- format_doc(dd, current, DOCFONT_ITALIC, left + 36.0, right - 36.0);
- break;
-
- case MMD_TYPE_ORDERED_LIST :
- case MMD_TYPE_UNORDERED_LIST :
- if (dd->st)
- dd->y -= SIZE_BODY * LINE_HEIGHT;
-
- format_doc(dd, current, deffont, left + 36.0, right);
- break;
-
- case MMD_TYPE_LIST_ITEM :
- if (doctype == MMD_TYPE_ORDERED_LIST)
- {
- snprintf(leader, sizeof(leader), "%d. ", i);
- format_block(dd, current, deffont, SIZE_BODY, left, right, leader);
- }
- else
- {
- format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/"• ");
- }
- break;
-
- case MMD_TYPE_HEADING_1 :
- case MMD_TYPE_HEADING_2 :
- case MMD_TYPE_HEADING_3 :
- case MMD_TYPE_HEADING_4 :
- case MMD_TYPE_HEADING_5 :
- case MMD_TYPE_HEADING_6 :
- free(dd->heading);
-
- dd->heading = mmdCopyAllText(current);
-
- format_block(dd, current, DOCFONT_BOLD, heading_sizes[curtype - MMD_TYPE_HEADING_1], left, right, /*leader*/NULL);
-
- if (dd->num_toc < DOCTOC_MAX)
- {
- doctoc_t *t = dd->toc + dd->num_toc;
- // New TOC
- pdfio_array_t *dest; // Destination array
-
- t->level = curtype - MMD_TYPE_HEADING_1;
- t->dict = pdfioDictCreate(dd->pdf);
- t->obj = pdfioFileCreateObj(dd->pdf, t->dict);
- dest = pdfioArrayCreate(dd->pdf);
-
- pdfioArrayAppendObj(dest, pdfioFileGetPage(dd->pdf, pdfioFileGetNumPages(dd->pdf) - 1));
- pdfioArrayAppendName(dest, "XYZ");
- pdfioArrayAppendNumber(dest, PAGE_LEFT);
- pdfioArrayAppendNumber(dest, dd->y + heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT);
- pdfioArrayAppendNumber(dest, 0.0);
-
- pdfioDictSetArray(t->dict, "Dest", dest);
- pdfioDictSetString(t->dict, "Title", pdfioStringCreate(dd->pdf, dd->heading));
-
- dd->num_toc ++;
- }
-
- if (dd->num_targets < DOCTARGET_MAX)
- {
- doctarget_t *t = dd->targets + dd->num_targets;
- // New target
-
- make_target_name(t->name, dd->heading, sizeof(t->name));
- t->page = pdfioFileGetNumPages(dd->pdf) - 1;
- t->y = dd->y + heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT;
-
- dd->num_targets ++;
- }
- break;
-
- case MMD_TYPE_PARAGRAPH :
- format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/NULL);
- break;
-
- case MMD_TYPE_TABLE :
- format_table(dd, current, left, right);
- break;
-
- case MMD_TYPE_CODE_BLOCK :
- format_code(dd, current, left + CODE_PADDING, right - CODE_PADDING);
- break;
- }
- }
-}
+case MMD_TYPE_THEMATIC_BREAK :
+ // Force a page break
+ dd->y = dd->art_box.y1;
+ break;
```
-
-#### Formatting Paragraphs, Headings, and Table Cells
+A block quote is indented and uses the italic font by default:
```c
-static void
-format_block(docdata_t *dd, // I - Document data
- mmd_t *block, // I - Block to format
- docfont_t deffont, // I - Default font
- double fsize, // I - Size of font
- double left, // I - Left margin
- double right, // I - Right margin
- const char *leader) // I - Leader text on the first line
-{
- mmd_type_t blocktype; // Block type
- mmd_t *current, // Current node
- *next; // Next node
- size_t num_frags; // Number of line fragments
- linefrag_t frags[LINEFRAG_MAX], // Line fragments
- *frag; // Current fragment
- mmd_type_t type; // Current node type
- const char *text, // Current text
- *url; // Current URL, if any
- bool ws; // Current whitespace
- pdfio_obj_t *image; // Current image, if any
- size_t imagenum; // Current image number
- doccolor_t color = DOCCOLOR_BLACK; // Current text color
- docfont_t font = deffont; // Current text font
- double x, // Current position
- width, // Width of current fragment
- wswidth, // Width of whitespace
- margin_left, // Left margin
- margin_top, // Top margin
- need_bottom, // Space needed after this block
- height, // Height of current fragment
- lineheight; // Height of current line
+case MMD_TYPE_BLOCK_QUOTE :
+ format_doc(dd, current, DOCFONT_ITALIC, left + BQ_PADDING, right - BQ_PADDING);
+ break;
+```
+Lists have a leading blank line and are indented:
- blocktype = mmdGetType(block);
+```c
+case MMD_TYPE_ORDERED_LIST :
+case MMD_TYPE_UNORDERED_LIST :
+ if (dd->st)
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
- if ((blocktype >= MMD_TYPE_TABLE_HEADER_CELL && blocktype <= MMD_TYPE_TABLE_BODY_CELL_RIGHT) || blocktype == MMD_TYPE_LIST_ITEM)
- margin_top = 0.0;
- else
- margin_top = fsize * LINE_HEIGHT;
+ format_doc(dd, current, deffont, left + LIST_PADDING, right);
+ break;
+```
- if (mmdGetNextSibling(block))
- need_bottom = 3.0 * SIZE_BODY * LINE_HEIGHT;
- else
- need_bottom = 0.0;
+List items do not have a leading blank line and make use of leader text that is
+shown in front of the list text. The leader text is either the current item
+number or a bullet, which then is directly formatted using the `format_block`
+function:
- if (leader)
- {
- // Add leader text on first line...
- frags[0].type = MMD_TYPE_NORMAL_TEXT;
- frags[0].width = pdfioContentTextMeasure(dd->fonts[deffont], leader, fsize);
- frags[0].height = fsize;
- frags[0].x = left - frags[0].width;
- frags[0].imagenum = 0;
- frags[0].text = leader;
- frags[0].url = NULL;
- frags[0].ws = false;
- frags[0].font = deffont;
- frags[0].color = DOCCOLOR_BLACK;
-
- num_frags = 1;
- lineheight = fsize * LINE_HEIGHT;
- }
- else
- {
- // No leader text...
- num_frags = 0;
- lineheight = 0.0;
- }
-
- frag = frags + num_frags;
-
- // Loop through the block and render lines...
- for (current = mmdGetFirstChild(block), x = left; current; current = next)
- {
- // Get information about the current node...
- type = mmdGetType(current);
- text = mmdGetText(current);
- image = NULL;
- imagenum = 0;
- url = mmdGetURL(current);
- ws = mmdGetWhitespace(current);
- wswidth = ws ? dd->font_space * fsize : 0.0;
- next = mmd_walk_next(block, current);
-
- // Process the node...
- if (type == MMD_TYPE_IMAGE && url)
+```c
+case MMD_TYPE_LIST_ITEM :
+ if (doctype == MMD_TYPE_ORDERED_LIST)
{
- // Embed an image
- if ((image = find_image(dd, url, &imagenum)) == NULL)
- continue;
-
- // Image - treat as 100dpi
- width = 72.0 * pdfioImageGetWidth(image) / IMAGE_PPI;
- height = 72.0 * pdfioImageGetHeight(image) / IMAGE_PPI;
- text = NULL;
-
- if (width > (right - left))
- {
- // Too wide, scale to width...
- width = right - left;
- height = width * pdfioImageGetHeight(image) / pdfioImageGetWidth(image);
- }
- else if (height > (dd->art_box.y2 - dd->art_box.y1))
- {
- // Too tall, scale to height...
- height = dd->art_box.y2 - dd->art_box.y1;
- width = height * pdfioImageGetWidth(image) / pdfioImageGetHeight(image);
- }
- }
- else if (type == MMD_TYPE_HARD_BREAK && num_frags > 0)
- {
- if (blocktype == MMD_TYPE_TABLE_HEADER_CELL || blocktype == MMD_TYPE_TABLE_BODY_CELL_CENTER)
- margin_left = 0.5 * (right - x);
- else if (blocktype == MMD_TYPE_TABLE_BODY_CELL_RIGHT)
- margin_left = right - x;
- else
- margin_left = 0.0;
-
- render_line(dd, margin_left, margin_top, need_bottom, lineheight, num_frags, frags);
-
- if (deffont == DOCFONT_ITALIC)
- {
- // Add an orange bar to the left of block quotes...
- set_color(dd, DOCCOLOR_ORANGE);
- pdfioContentSave(dd->st);
- pdfioContentSetLineWidth(dd->st, 3.0);
- pdfioContentPathMoveTo(dd->st, left - 6.0, dd->y - (LINE_HEIGHT - 1.0) * fsize);
- pdfioContentPathLineTo(dd->st, left - 6.0, dd->y + fsize);
- pdfioContentStroke(dd->st);
- pdfioContentRestore(dd->st);
- }
-
- num_frags = 0;
- frag = frags;
- x = left;
- lineheight = 0.0;
- margin_top = 0.0;
- need_bottom = 0.0;
-
- continue;
- }
- else if (type == MMD_TYPE_CHECKBOX)
- {
- // Checkbox
- width = height = fsize;
- }
- else if (!text)
- {
- continue;
+ snprintf(leader, sizeof(leader), "%d. ", i);
+ format_block(dd, current, deffont, SIZE_BODY, left, right, leader);
}
else
{
- // Text fragment...
- if (type == MMD_TYPE_EMPHASIZED_TEXT)
- font = DOCFONT_ITALIC;
- else if (type == MMD_TYPE_STRONG_TEXT)
- font = DOCFONT_BOLD;
- else if (type == MMD_TYPE_CODE_TEXT)
- font = DOCFONT_MONOSPACE;
- else
- font = deffont;
-
- if (type == MMD_TYPE_CODE_TEXT)
- color = DOCCOLOR_RED;
- else if (type == MMD_TYPE_LINKED_TEXT)
- color = DOCCOLOR_BLUE;
- else
- color = DOCCOLOR_BLACK;
-
- width = pdfioContentTextMeasure(dd->fonts[font], text, fsize);
- height = fsize * LINE_HEIGHT;
+ format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/"• ");
}
+ break;
+```
- // See if this node will fit on the current line...
- if ((num_frags > 0 && (x + width + wswidth) >= right) || num_frags == LINEFRAG_MAX)
+Paragraphs have a leading blank line and are likewise directly formatted:
+
+```c
+case MMD_TYPE_PARAGRAPH :
+ // Add a blank line before the paragraph...
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
+
+ // Format the paragraph...
+ format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/NULL);
+ break;
+```
+
+Tables have a leading blank line and are formatted using the `format_table`
+function:
+
+```c
+case MMD_TYPE_TABLE :
+ // Add a blank line before the paragraph...
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
+
+ // Format the table...
+ format_table(dd, current, left, right);
+ break;
+```
+
+Code blocks have a leading blank line, are indented slightly (to account for the
+padded background), and are formatted using the `format_code` function:
+
+```c
+case MMD_TYPE_CODE_BLOCK :
+ // Add a blank line before the code block...
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
+
+ // Format the code block...
+ format_code(dd, current, left + CODE_PADDING, right - CODE_PADDING);
+ break;
+```
+
+Headings get some extra processing. First, the current heading is remembered in
+the `docdata_t` structure so it can be used in the page footer:
+
+```c
+case MMD_TYPE_HEADING_1 :
+case MMD_TYPE_HEADING_2 :
+case MMD_TYPE_HEADING_3 :
+case MMD_TYPE_HEADING_4 :
+case MMD_TYPE_HEADING_5 :
+case MMD_TYPE_HEADING_6 :
+ // Update the current heading
+ free(dd->heading);
+ dd->heading = mmdCopyAllText(current);
+```
+
+Then we add a blank line and format the heading with the boldface font at a
+larger size using the `format_block` function:
+
+```c
+ // Add a blank line before the heading...
+ dd->y -= heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT;
+
+ // Format the heading...
+ format_block(dd, current, DOCFONT_BOLD,
+ heading_sizes[curtype - MMD_TYPE_HEADING_1], left, right,
+ /*leader*/NULL);
+```
+
+Once the heading is formatted, we record it in the `toc` array as a PDF outline
+item object/dictionary:
+
+```c
+ // Add the heading to the table-of-contents...
+ if (dd->num_toc < DOCTOC_MAX)
{
- // No, render this line and start over...
- if (blocktype == MMD_TYPE_TABLE_HEADER_CELL || blocktype == MMD_TYPE_TABLE_BODY_CELL_CENTER)
- margin_left = 0.5 * (right - x);
- else if (blocktype == MMD_TYPE_TABLE_BODY_CELL_RIGHT)
- margin_left = right - x;
- else
- margin_left = 0.0;
+ doctoc_t *t = dd->toc + dd->num_toc;
+ // New TOC
+ pdfio_array_t *dest; // Destination array
- render_line(dd, margin_left, margin_top, need_bottom, lineheight, num_frags, frags);
+ t->level = curtype - MMD_TYPE_HEADING_1;
+ t->dict = pdfioDictCreate(dd->pdf);
+ t->obj = pdfioFileCreateObj(dd->pdf, t->dict);
+ dest = pdfioArrayCreate(dd->pdf);
- if (deffont == DOCFONT_ITALIC)
- {
- // Add an orange bar to the left of block quotes...
- set_color(dd, DOCCOLOR_ORANGE);
- pdfioContentSave(dd->st);
- pdfioContentSetLineWidth(dd->st, 3.0);
- pdfioContentPathMoveTo(dd->st, left - 6.0, dd->y - (LINE_HEIGHT - 1.0) * fsize);
- pdfioContentPathLineTo(dd->st, left - 6.0, dd->y + fsize);
- pdfioContentStroke(dd->st);
- pdfioContentRestore(dd->st);
- }
+ pdfioArrayAppendObj(dest,
+ pdfioFileGetPage(dd->pdf, pdfioFileGetNumPages(dd->pdf) - 1));
+ pdfioArrayAppendName(dest, "XYZ");
+ pdfioArrayAppendNumber(dest, PAGE_LEFT);
+ pdfioArrayAppendNumber(dest,
+ dd->y + heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT);
+ pdfioArrayAppendNumber(dest, 0.0);
- num_frags = 0;
- frag = frags;
- x = left;
- lineheight = 0.0;
- margin_top = 0.0;
- need_bottom = 0.0;
+ pdfioDictSetArray(t->dict, "Dest", dest);
+ pdfioDictSetString(t->dict, "Title", pdfioStringCreate(dd->pdf, dd->heading));
+
+ dd->num_toc ++;
}
+```
- // Add the current node to the fragment list
- if (num_frags == 0)
+Finally, we also save the heading's target name and its location in the
+`targets` array to allow interior links to work:
+
+```c
+ // Add the heading to the list of link targets...
+ if (dd->num_targets < DOCTARGET_MAX)
{
- ws = false;
- wswidth = 0.0;
+ doctarget_t *t = dd->targets + dd->num_targets;
+ // New target
+
+ make_target_name(t->name, dd->heading, sizeof(t->name));
+ t->page = pdfioFileGetNumPages(dd->pdf) - 1;
+ t->y = dd->y + heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT;
+
+ dd->num_targets ++;
}
+ break;
+```
- frag->type = type;
- frag->x = x;
- frag->width = width + wswidth;
- frag->height = text ? fsize : height;
- frag->imagenum = imagenum;
- frag->text = text;
- frag->url = url;
- frag->ws = ws;
- frag->font = font;
- frag->color = color;
- num_frags ++;
- frag ++;
- x += width + wswidth;
- if (height > lineheight)
- lineheight = height;
- }
+#### Formatting Paragraphs, Headings, List Items, and Table Cells
- if (num_frags > 0)
+Paragraphs, headings, list items, and table cells all use the same basic
+formatting algorithm. Text, checkboxes, and images are collected until the
+nodes in the current block are used up or the content reaches the right margin.
+
+In order to keep adjacent blocks of text together, the formatting algorithm
+makes sure that at least 3 lines of text can fit before the bottom edge of the
+page:
+
+```c
+if (mmdGetNextSibling(block))
+ need_bottom = 3.0 * SIZE_BODY * LINE_HEIGHT;
+else
+ need_bottom = 0.0;
+```
+
+Leader text (used for list items) is right justified to the left margin and
+becomes the first fragment on the line when present.
+
+```c
+if (leader)
+{
+ // Add leader text on first line...
+ frags[0].type = MMD_TYPE_NORMAL_TEXT;
+ frags[0].width = pdfioContentTextMeasure(dd->fonts[deffont], leader, fsize);
+ frags[0].height = fsize;
+ frags[0].x = left - frags[0].width;
+ frags[0].imagenum = 0;
+ frags[0].text = leader;
+ frags[0].url = NULL;
+ frags[0].ws = false;
+ frags[0].font = deffont;
+ frags[0].color = DOCCOLOR_BLACK;
+
+ num_frags = 1;
+ lineheight = fsize * LINE_HEIGHT;
+}
+else
+{
+ // No leader text...
+ num_frags = 0;
+ lineheight = 0.0;
+}
+
+frag = frags + num_frags;
+```
+
+If the current content fragment won't fit, we call `render_line` to draw what we
+have, adjusting the left margin as needed for table cells:
+
+```c
+ // See if this node will fit on the current line...
+ if ((num_frags > 0 && (x + width + wswidth) >= right) || num_frags == LINEFRAG_MAX)
{
- if (blocktype == MMD_TYPE_TABLE_HEADER_CELL || blocktype == MMD_TYPE_TABLE_BODY_CELL_CENTER)
+ // No, render this line and start over...
+ if (blocktype == MMD_TYPE_TABLE_HEADER_CELL ||
+ blocktype == MMD_TYPE_TABLE_BODY_CELL_CENTER)
margin_left = 0.5 * (right - x);
else if (blocktype == MMD_TYPE_TABLE_BODY_CELL_RIGHT)
margin_left = right - x;
else
margin_left = 0.0;
- render_line(dd, margin_left, margin_top, need_bottom, lineheight, num_frags, frags);
+ render_line(dd, margin_left, need_bottom, lineheight, num_frags, frags);
+ num_frags = 0;
+ frag = frags;
+ x = left;
+ lineheight = 0.0;
+ need_bottom = 0.0;
+```
+
+Block quotes (blocks use a default font of italic) have an orange bar to the
+left of the block:
+
+```c
if (deffont == DOCFONT_ITALIC)
{
// Add an orange bar to the left of block quotes...
@@ -1813,227 +1686,233 @@ format_block(docdata_t *dd, // I - Document data
pdfioContentStroke(dd->st);
pdfioContentRestore(dd->st);
}
+```
+
+Finally, we add the current content fragment to the array:
+
+```c
+ // Add the current node to the fragment list
+ if (num_frags == 0)
+ {
+ // No leading whitespace at the start of the line
+ ws = false;
+ wswidth = 0.0;
}
-}
+
+ frag->type = type;
+ frag->x = x;
+ frag->width = width + wswidth;
+ frag->height = text ? fsize : height;
+ frag->imagenum = imagenum;
+ frag->text = text;
+ frag->url = url;
+ frag->ws = ws;
+ frag->font = font;
+ frag->color = color;
+
+ num_frags ++;
+ frag ++;
+ x += width + wswidth;
+ if (height > lineheight)
+ lineheight = height;
```
#### Formatting Code Blocks
+Code blocks consist of one or more lines of plain monospaced text. We draw a
+light gray background behind each line with a small bit of padding at the top
+and bottom:
+
```c
-static void
-format_code(docdata_t *dd, // I - Document data
- mmd_t *block, // I - Code block
- double left, // I - Left margin
- double right) // I - Right margin
+// Draw the top padding...
+set_color(dd, DOCCOLOR_LTGRAY);
+pdfioContentPathRect(dd->st, left - CODE_PADDING, dd->y + SIZE_CODEBLOCK,
+ right - left + 2.0 * CODE_PADDING, CODE_PADDING);
+pdfioContentFillAndStroke(dd->st, false);
+
+// Start a code text block...
+set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK);
+pdfioContentTextBegin(dd->st);
+pdfioContentTextMoveTo(dd->st, left, dd->y);
+
+for (code = mmdGetFirstChild(block); code; code = mmdGetNextSibling(code))
{
- mmd_t *code; // Current code block
- double lineheight, // Line height
- margin_top; // Top margin
-
-
- // Compute line height and initial top margin...
- lineheight = SIZE_CODEBLOCK * LINE_HEIGHT;
- margin_top = lineheight;
-
- // Start a new page as needed...
- if (!dd->st)
- {
- new_page(dd);
-
- margin_top = 0.0;
- }
-
- dd->y -= lineheight + margin_top + CODE_PADDING;
-
- if ((dd->y - lineheight) < dd->art_box.y1)
- {
- new_page(dd);
-
- dd->y -= lineheight + CODE_PADDING;
- }
-
- // Draw the top padding...
set_color(dd, DOCCOLOR_LTGRAY);
- pdfioContentPathRect(dd->st, left - CODE_PADDING, dd->y + SIZE_CODEBLOCK, right - left + 2.0 * CODE_PADDING, CODE_PADDING);
+ pdfioContentPathRect(dd->st, left - CODE_PADDING,
+ dd->y - (LINE_HEIGHT - 1.0) * SIZE_CODEBLOCK,
+ right - left + 2.0 * CODE_PADDING, lineheight);
pdfioContentFillAndStroke(dd->st, false);
- // Start a code text block...
- set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK);
- pdfioContentTextBegin(dd->st);
- pdfioContentTextMoveTo(dd->st, left, dd->y);
+ set_color(dd, DOCCOLOR_RED);
+ pdfioContentTextShow(dd->st, UNICODE_VALUE, mmdGetText(code));
+ dd->y -= lineheight;
- for (code = mmdGetFirstChild(block); code; code = mmdGetNextSibling(code))
+ if (dd->y < dd->art_box.y1)
{
- set_color(dd, DOCCOLOR_LTGRAY);
- pdfioContentPathRect(dd->st, left - CODE_PADDING, dd->y - (LINE_HEIGHT - 1.0) * SIZE_CODEBLOCK, right - left + 2.0 * CODE_PADDING, lineheight);
- pdfioContentFillAndStroke(dd->st, false);
+ // End the current text block...
+ pdfioContentTextEnd(dd->st);
+
+ // Start a new page...
+ new_page(dd);
+ set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK);
- set_color(dd, DOCCOLOR_RED);
- pdfioContentTextShow(dd->st, UNICODE_VALUE, mmdGetText(code));
dd->y -= lineheight;
- if (dd->y < dd->art_box.y1)
- {
- // End the current text block...
- pdfioContentTextEnd(dd->st);
-
- // Start a new page...
- new_page(dd);
- set_font(dd, DOCFONT_MONOSPACE, SIZE_CODEBLOCK);
-
- dd->y -= lineheight;
-
- pdfioContentTextBegin(dd->st);
- pdfioContentTextMoveTo(dd->st, left, dd->y);
- }
+ pdfioContentTextBegin(dd->st);
+ pdfioContentTextMoveTo(dd->st, left, dd->y);
}
-
- // End the current text block...
- pdfioContentTextEnd(dd->st);
- dd->y += lineheight;
-
- // Draw the bottom padding...
- set_color(dd, DOCCOLOR_LTGRAY);
- pdfioContentPathRect(dd->st, left - CODE_PADDING, dd->y - CODE_PADDING - (LINE_HEIGHT - 1.0) * SIZE_CODEBLOCK, right - left + 2.0 * CODE_PADDING, CODE_PADDING);
- pdfioContentFillAndStroke(dd->st, false);
}
+
+// End the current text block...
+pdfioContentTextEnd(dd->st);
+dd->y += lineheight;
+
+// Draw the bottom padding...
+set_color(dd, DOCCOLOR_LTGRAY);
+pdfioContentPathRect(dd->st, left - CODE_PADDING,
+ dd->y - CODE_PADDING - (LINE_HEIGHT - 1.0) * SIZE_CODEBLOCK,
+ right - left + 2.0 * CODE_PADDING, CODE_PADDING);
+pdfioContentFillAndStroke(dd->st, false);
```
#### Formatting Tables
+Tables are the most difficult to format. We start by scanning the entire table
+and measuring every cell with the `measure_cell` function:
+
```c
-static void
-format_table(docdata_t *dd, // I - Document data
- mmd_t *table, // I - Table node
- double left, // I - Left margin
- double right) // I - Right margin
+for (num_cols = 0, num_rows = 0, rowptr = rows, current = mmdGetFirstChild(table);
+ current && num_rows < TABLEROW_MAX;
+ current = next)
{
- mmd_t *current, // Current node
- *next; // Next node
- mmd_type_t type; // Node type
- size_t col, // Current column
- num_cols; // Number of columns
- tablecol_t cols[TABLECOL_MAX]; // Columns
- size_t row, // Current row
- num_rows; // Number of rows
- tablerow_t rows[TABLEROW_MAX], // Rows
- *rowptr; // Pointer to current row
- double x, // Current X position
- height, // Height of cell
- format_width, // Maximum format width of table
- table_width; // Total width of table
+ next = mmd_walk_next(table, current);
+ type = mmdGetType(current);
-
- // Find all of the rows and columns in the table...
- num_cols = num_rows = 0;
-
- memset(cols, 0, sizeof(cols));
- memset(rows, 0, sizeof(rows));
-
- rowptr = rows;
-
- for (current = mmdGetFirstChild(table); current && num_rows < TABLEROW_MAX; current = next)
+ if (type == MMD_TYPE_TABLE_ROW)
{
- next = mmd_walk_next(table, current);
- type = mmdGetType(current);
-
- if (type == MMD_TYPE_TABLE_ROW)
+ // Parse row...
+ for (col = 0, current = mmdGetFirstChild(current);
+ current && num_cols < TABLECOL_MAX;
+ current = mmdGetNextSibling(current), col ++)
{
- // Parse row...
- for (col = 0, current = mmdGetFirstChild(current); current && num_cols < TABLECOL_MAX; current = mmdGetNextSibling(current), col ++)
- {
- rowptr->cells[col] = current;
+ rowptr->cells[col] = current;
- measure_cell(dd, current, cols + col);
+ measure_cell(dd, current, cols + col);
- if (col >= num_cols)
- num_cols = col + 1;
- }
-
- rowptr ++;
- num_rows ++;
- }
- }
-
- // Figure out the width of each column...
- for (col = 0, table_width = 0.0; col < num_cols; col ++)
- {
- cols[col].max_width += 2.0 * TABLE_PADDING;
-
- table_width += cols[col].max_width;
- cols[col].width = cols[col].max_width;
- }
-
- format_width = right - left - 2.0 * TABLE_PADDING * num_cols;
-
- if (table_width > format_width)
- {
- // Content too wide, try scaling the widths...
- double avg_width, // Average column width
- base_width, // Base width
- remaining_width, // Remaining width
- scale_width; // Width for scaling
- size_t num_remaining_cols = 0; // Number of remaining columns
-
- // First mark any columns that are narrower than the average width...
- avg_width = format_width / num_cols;
-
- for (col = 0, base_width = 0.0, remaining_width = 0.0; col < num_cols; col ++)
- {
- if (cols[col].width > avg_width)
- {
- remaining_width += cols[col].width;
- num_remaining_cols ++;
- }
- else
- {
- base_width += cols[col].width;
- }
+ if (col >= num_cols)
+ num_cols = col + 1;
}
- // Then proportionately distribute the remaining width to the other columns...
- format_width -= base_width;
-
- for (col = 0, table_width = 0.0; col < num_cols; col ++)
- {
- if (cols[col].width > avg_width)
- cols[col].width = cols[col].width * format_width / remaining_width;
-
- table_width += cols[col].width;
- }
+ rowptr ++;
+ num_rows ++;
}
-
- // Calculate the margins of each column in preparation for formatting
- for (col = 0, x = left + TABLE_PADDING; col < num_cols; col ++)
- {
- cols[col].left = x;
- cols[col].right = x + cols[col].width;
-
- x += cols[col].width + 2.0 * TABLE_PADDING;
- }
-
- // Calculate the height of each row and cell in preparation for formatting
- for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++)
- {
- for (col = 0; col < num_cols; col ++)
- {
- height = measure_cell(dd, rowptr->cells[col], cols + col) + 2.0 * TABLE_PADDING;
- if (height > rowptr->height)
- rowptr->height = height;
- }
- }
-
- // Render each table row...
- if (dd->st)
- dd->y -= SIZE_TABLE * LINE_HEIGHT;
-
- for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++)
- render_row(dd, num_cols, cols, rowptr);
}
```
+The `measure_cell` function also updates the minimum and maximum width needed
+for each column. To this we add the cell padding to compute the total table
+width:
+
+```c
+// Figure out the width of each column...
+for (col = 0, table_width = 0.0; col < num_cols; col ++)
+{
+ cols[col].max_width += 2.0 * TABLE_PADDING;
+
+ table_width += cols[col].max_width;
+ cols[col].width = cols[col].max_width;
+}
+```
+
+If the calculated width is more than the available width, we need to adjust the
+width of the columns. The algorithm used here breaks the available width into
+N equal-width columns - any columns wider than this will be scaled
+proportionately. This works out as two steps - one to calculate the the base
+width of "narrow" columns and a second to distribute the remaining width amongst
+the wider columns:
+
+```c
+format_width = right - left - 2.0 * TABLE_PADDING * num_cols;
+
+if (table_width > format_width)
+{
+ // Content too wide, try scaling the widths...
+ double avg_width, // Average column width
+ base_width, // Base width
+ remaining_width, // Remaining width
+ scale_width; // Width for scaling
+ size_t num_remaining_cols = 0; // Number of remaining columns
+
+ // First mark any columns that are narrower than the average width...
+ avg_width = format_width / num_cols;
+
+ for (col = 0, base_width = 0.0, remaining_width = 0.0; col < num_cols; col ++)
+ {
+ if (cols[col].width > avg_width)
+ {
+ remaining_width += cols[col].width;
+ num_remaining_cols ++;
+ }
+ else
+ {
+ base_width += cols[col].width;
+ }
+ }
+
+ // Then proportionately distribute the remaining width to the other columns...
+ format_width -= base_width;
+
+ for (col = 0, table_width = 0.0; col < num_cols; col ++)
+ {
+ if (cols[col].width > avg_width)
+ cols[col].width = cols[col].width * format_width / remaining_width;
+
+ table_width += cols[col].width;
+ }
+}
+```
+
+Now that we have the widths of the columns, we can calculate the left and right
+margins of each column for formatting the cell text:
+
+```c
+// Calculate the margins of each column in preparation for formatting
+for (col = 0, x = left + TABLE_PADDING; col < num_cols; col ++)
+{
+ cols[col].left = x;
+ cols[col].right = x + cols[col].width;
+
+ x += cols[col].width + 2.0 * TABLE_PADDING;
+}
+```
+
+Then we re-measure the cells using the final column widths to determine the
+height of each cell and row:
+
+```c
+// Calculate the height of each row and cell in preparation for formatting
+for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++)
+{
+ for (col = 0; col < num_cols; col ++)
+ {
+ height = measure_cell(dd, rowptr->cells[col], cols + col) + 2.0 * TABLE_PADDING;
+ if (height > rowptr->height)
+ rowptr->height = height;
+ }
+}
+```
+
+Finally, we render each row in the table:
+
+```c
+// Render each table row...
+for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++)
+ render_row(dd, num_cols, cols, rowptr);
+```
+
+
### Rendering the Markdown Document
The formatted content in arrays of `linefrag_t` and `tablerow_t` structures
@@ -2109,9 +1988,34 @@ These are later written as annotations in the `add_links` function.
#### Rendering a Table Row
The `render_row` function takes a row of cells and the corresponding column
-definitions, draws the border boxes around body cells, and then formats each
-cell using the `format_block` function described previously. The key is to
-reset the page `y` value before formatting each cell:
+definitions. It starts by drawing the border boxes around body cells:
+
+```c
+if (mmdGetType(row->cells[0]) == MMD_TYPE_TABLE_HEADER_CELL)
+{
+ // Header row, no border...
+ deffont = DOCFONT_BOLD;
+}
+else
+{
+ // Regular body row, add borders...
+ deffont = DOCFONT_REGULAR;
+
+ set_color(dd, DOCCOLOR_GRAY);
+ pdfioContentPathRect(dd->st, cols[0].left - TABLE_PADDING, dd->y - row->height,
+ cols[num_cols - 1].right - cols[0].left +
+ 2.0 * TABLE_PADDING, row->height);
+ for (col = 1; col < num_cols; col ++)
+ {
+ pdfioContentPathMoveTo(dd->st, cols[col].left - TABLE_PADDING, dd->y);
+ pdfioContentPathLineTo(dd->st, cols[col].left - TABLE_PADDING, dd->y - row->height);
+ }
+ pdfioContentStroke(dd->st);
+}
+```
+
+Then it formats each cell using the `format_block` function described
+previously. The page `y` value is reset before formatting each cell:
```c
row_y = dd->y;
@@ -2120,7 +2024,8 @@ for (col = 0; col < num_cols; col ++)
{
dd->y = row_y;
- format_block(dd, row->cells[col], deffont, SIZE_TABLE, cols[col].left, cols[col].right, /*leader*/NULL);
+ format_block(dd, row->cells[col], deffont, SIZE_TABLE, cols[col].left,
+ cols[col].right, /*leader*/NULL);
}
dd->y = row_y - row->height;
diff --git a/examples/md2pdf.c b/examples/md2pdf.c
index 18e8399..17580aa 100644
--- a/examples/md2pdf.c
+++ b/examples/md2pdf.c
@@ -218,6 +218,8 @@ static const char * const docfont_names[] =
#define LINE_HEIGHT 1.4 // Multiplier for line height
+#define LIST_PADDING 36.0 // Padding/indentation for lists
+
#define SIZE_BODY 11.0 // Size of body text (points)
#define SIZE_CODEBLOCK 10.0 // Size of code block text (points)
#define SIZE_HEADFOOT 9.0 // Size of header/footer text (points)
@@ -260,7 +262,7 @@ static double measure_cell(docdata_t *dd, mmd_t *cell, tablecol_t *col);
static mmd_t *mmd_walk_next(mmd_t *top, mmd_t *node);
static void new_page(docdata_t *dd);
static ssize_t output_cb(void *output_cbdata, const void *buffer, size_t bytes);
-static void render_line(docdata_t *dd, double margin_left, double margin_top, double need_bottom, double lineheight, size_t num_frags, linefrag_t *frags);
+static void render_line(docdata_t *dd, double margin_left, double need_bottom, double lineheight, size_t num_frags, linefrag_t *frags);
static void render_row(docdata_t *dd, size_t num_cols, tablecol_t *cols, tablerow_t *row);
static void set_color(docdata_t *dd, doccolor_t color);
static void set_font(docdata_t *dd, docfont_t font, double fsize);
@@ -578,7 +580,6 @@ format_block(docdata_t *dd, // I - Document data
width, // Width of current fragment
wswidth, // Width of whitespace
margin_left, // Left margin
- margin_top, // Top margin
need_bottom, // Space needed after this block
height, // Height of current fragment
lineheight; // Height of current line
@@ -586,11 +587,6 @@ format_block(docdata_t *dd, // I - Document data
blocktype = mmdGetType(block);
- if ((blocktype >= MMD_TYPE_TABLE_HEADER_CELL && blocktype <= MMD_TYPE_TABLE_BODY_CELL_RIGHT) || blocktype == MMD_TYPE_LIST_ITEM)
- margin_top = 0.0;
- else
- margin_top = fsize * LINE_HEIGHT;
-
if (mmdGetNextSibling(block))
need_bottom = 3.0 * SIZE_BODY * LINE_HEIGHT;
else
@@ -669,7 +665,7 @@ format_block(docdata_t *dd, // I - Document data
else
margin_left = 0.0;
- render_line(dd, margin_left, margin_top, need_bottom, lineheight, num_frags, frags);
+ render_line(dd, margin_left, need_bottom, lineheight, num_frags, frags);
if (deffont == DOCFONT_ITALIC)
{
@@ -683,7 +679,6 @@ format_block(docdata_t *dd, // I - Document data
frag = frags;
x = left;
lineheight = 0.0;
- margin_top = 0.0;
need_bottom = 0.0;
continue;
@@ -731,7 +726,13 @@ format_block(docdata_t *dd, // I - Document data
else
margin_left = 0.0;
- render_line(dd, margin_left, margin_top, need_bottom, lineheight, num_frags, frags);
+ render_line(dd, margin_left, need_bottom, lineheight, num_frags, frags);
+
+ num_frags = 0;
+ frag = frags;
+ x = left;
+ lineheight = 0.0;
+ need_bottom = 0.0;
if (deffont == DOCFONT_ITALIC)
{
@@ -744,18 +745,12 @@ format_block(docdata_t *dd, // I - Document data
pdfioContentStroke(dd->st);
pdfioContentRestore(dd->st);
}
-
- num_frags = 0;
- frag = frags;
- x = left;
- lineheight = 0.0;
- margin_top = 0.0;
- need_bottom = 0.0;
}
// Add the current node to the fragment list
if (num_frags == 0)
{
+ // No leading whitespace at the start of the line
ws = false;
wswidth = 0.0;
}
@@ -787,7 +782,7 @@ format_block(docdata_t *dd, // I - Document data
else
margin_left = 0.0;
- render_line(dd, margin_left, margin_top, need_bottom, lineheight, num_frags, frags);
+ render_line(dd, margin_left, need_bottom, lineheight, num_frags, frags);
if (deffont == DOCFONT_ITALIC)
{
@@ -815,23 +810,17 @@ format_code(docdata_t *dd, // I - Document data
double right) // I - Right margin
{
mmd_t *code; // Current code block
- double lineheight, // Line height
- margin_top; // Top margin
+ double lineheight; // Line height
- // Compute line height and initial top margin...
+ // Compute line height...
lineheight = SIZE_CODEBLOCK * LINE_HEIGHT;
- margin_top = lineheight;
// Start a new page as needed...
if (!dd->st)
- {
new_page(dd);
- margin_top = 0.0;
- }
-
- dd->y -= lineheight + margin_top + CODE_PADDING;
+ dd->y -= lineheight + CODE_PADDING;
if ((dd->y - lineheight) < dd->art_box.y1)
{
@@ -934,10 +923,9 @@ format_doc(docdata_t *dd, // I - Document data
case MMD_TYPE_ORDERED_LIST :
case MMD_TYPE_UNORDERED_LIST :
- if (dd->st)
- dd->y -= SIZE_BODY * LINE_HEIGHT;
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
- format_doc(dd, current, deffont, left + 36.0, right);
+ format_doc(dd, current, deffont, left + LIST_PADDING, right);
break;
case MMD_TYPE_LIST_ITEM :
@@ -958,12 +946,17 @@ format_doc(docdata_t *dd, // I - Document data
case MMD_TYPE_HEADING_4 :
case MMD_TYPE_HEADING_5 :
case MMD_TYPE_HEADING_6 :
+ // Update the current heading
free(dd->heading);
-
dd->heading = mmdCopyAllText(current);
+ // Add a blank line before the heading...
+ dd->y -= heading_sizes[curtype - MMD_TYPE_HEADING_1] * LINE_HEIGHT;
+
+ // Format the heading...
format_block(dd, current, DOCFONT_BOLD, heading_sizes[curtype - MMD_TYPE_HEADING_1], left, right, /*leader*/NULL);
+ // Add the heading to the table-of-contents...
if (dd->num_toc < DOCTOC_MAX)
{
doctoc_t *t = dd->toc + dd->num_toc;
@@ -987,6 +980,7 @@ format_doc(docdata_t *dd, // I - Document data
dd->num_toc ++;
}
+ // Add the heading to the list of link targets...
if (dd->num_targets < DOCTARGET_MAX)
{
doctarget_t *t = dd->targets + dd->num_targets;
@@ -1001,14 +995,26 @@ format_doc(docdata_t *dd, // I - Document data
break;
case MMD_TYPE_PARAGRAPH :
+ // Add a blank line before the paragraph...
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
+
+ // Format the paragraph...
format_block(dd, current, deffont, SIZE_BODY, left, right, /*leader*/NULL);
break;
case MMD_TYPE_TABLE :
+ // Add a blank line before the paragraph...
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
+
+ // Format the table...
format_table(dd, current, left, right);
break;
case MMD_TYPE_CODE_BLOCK :
+ // Add a blank line before the code block...
+ dd->y -= SIZE_BODY * LINE_HEIGHT;
+
+ // Format the code block...
format_code(dd, current, left + CODE_PADDING, right - CODE_PADDING);
break;
}
@@ -1043,14 +1049,10 @@ format_table(docdata_t *dd, // I - Document data
// Find all of the rows and columns in the table...
- num_cols = num_rows = 0;
-
memset(cols, 0, sizeof(cols));
memset(rows, 0, sizeof(rows));
- rowptr = rows;
-
- for (current = mmdGetFirstChild(table); current && num_rows < TABLEROW_MAX; current = next)
+ for (num_cols = 0, num_rows = 0, rowptr = rows, current = mmdGetFirstChild(table); current && num_rows < TABLEROW_MAX; current = next)
{
next = mmd_walk_next(table, current);
type = mmdGetType(current);
@@ -1142,9 +1144,6 @@ format_table(docdata_t *dd, // I - Document data
}
// Render each table row...
- if (dd->st)
- dd->y -= SIZE_TABLE * LINE_HEIGHT;
-
for (row = 0, rowptr = rows; row < num_rows; row ++, rowptr ++)
render_row(dd, num_cols, cols, rowptr);
}
@@ -1471,7 +1470,6 @@ output_cb(void *output_cbdata, // I - Callback data (not used)
static void
render_line(docdata_t *dd, // I - Document data
double margin_left, // I - Left margin
- double margin_top, // I - Top margin
double need_bottom, // I - How much space is needed after
double lineheight, // I - Height of line
size_t num_frags, // I - Number of line fragments
@@ -1483,12 +1481,9 @@ render_line(docdata_t *dd, // I - Document data
if (!dd->st)
- {
new_page(dd);
- margin_top = 0.0;
- }
- dd->y -= margin_top + lineheight;
+ dd->y -= lineheight;
if ((dd->y - need_bottom) < dd->art_box.y1)
{
new_page(dd);