From dc65eb8d2f88b65e4671d4bb9872fa87e8e450ac Mon Sep 17 00:00:00 2001 From: Michael R Sweet Date: Wed, 11 Dec 2024 15:37:03 -0500 Subject: [PATCH] Save work on image support. --- examples/md2pdf.c | 259 +++++++++++++++++++++++++++++++++++++-------- examples/md2pdf.md | 235 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 450 insertions(+), 44 deletions(-) create mode 100644 examples/md2pdf.md diff --git a/examples/md2pdf.c b/examples/md2pdf.c index 79db03d..bf17763 100644 --- a/examples/md2pdf.c +++ b/examples/md2pdf.c @@ -20,6 +20,11 @@ #include #include #include +#ifdef _WIN32 +# include +#else +# include +#endif // _WIN32 #include "mmd.h" #include #include @@ -55,6 +60,14 @@ typedef struct docimage_s // Document image info #define DOCIMAGE_MAX 1000 // Maximum number of images +typedef struct doclink_s // Document link info +{ + const char *url; // Reference URL + pdfio_rect_t box; // Link box +} doclink_t; + +#define DOCLINK_MAX 1000 // Maximum number of links/page + typedef struct docdata_s // Document formatting data { pdfio_file_t *pdf; // PDF file @@ -69,6 +82,9 @@ typedef struct docdata_s // Document formatting data pdfio_stream_t *st; // Current page stream double y; // Current position on page doccolor_t color; // Current color + pdfio_obj_t *annots; // Annotations object (for links) + size_t num_links; // Number of links for this page + doclink_t links[DOCLINK_MAX]; // Links for this page } docdata_t; @@ -128,6 +144,90 @@ static const char * const docfont_names[] = #define UNICODE_VALUE true // `true` for Unicode text, `false` for ISO-8859-1 +// +// 'mmd_walk_next()' - Find the next markdown node. +// + +static mmd_t * // O - Next node or `NULL` at end +mmd_walk_next(mmd_t *top, // I - Top node + mmd_t *node) // I - Current node +{ + mmd_t *next, // Next node + *parent; // Parent node + + + // Figure out the next node under "top"... + if ((next = mmdGetFirstChild(node)) == NULL) + { + if ((next = mmdGetNextSibling(node)) == NULL) + { + if ((parent = mmdGetParent(node)) != top) + { + while ((next = mmdGetNextSibling(parent)) == NULL) + { + if ((parent = mmdGetParent(parent)) == top) + break; + } + } + } + } + + return (next); +} + + +// +// 'add_images()' - Scan the markdown document for images. +// + +static void +add_images(docdata_t *dd, // I - Document data + mmd_t *doc) // I - Markdown document +{ + mmd_t *current, // Current node + *next; // Next node + + + // Scan the entire document for images... + for (current = mmdGetFirstChild(doc); current; current = next) + { + // Get next node + next = mmd_walk_next(doc, current); + + // Look for image nodes... + if (mmdGetType(current) == MMD_TYPE_IMAGE) + { + const char *url, // URL for image + *ext; // Extension + + url = mmdGetURL(current); + ext = strrchr(url, '.'); + + fprintf(stderr, "IMAGE(%s), ext=\"%s\"\n", url, ext); + + if (!access(url, 0) && ext && (!strcmp(ext, ".png") || !strcmp(ext, ".jpg") || !strcmp(ext, ".jpeg"))) + { + // Local JPEG or PNG file, so add it if we haven't already... + size_t i; // Looping var + + for (i = 0; i < dd->num_images; i ++) + { + if (!strcmp(dd->images[i].url, url)) + break; + } + + if (i >= dd->num_images && dd->num_images < DOCIMAGE_MAX) + { + dd->images[i].url = url; + if ((dd->images[i].obj = pdfioFileCreateImageObjFromFile(dd->pdf, url, false)) != NULL) + dd->num_images ++; + } + } + } + } +} + + // // 'set_color()' - Set the stroke and fill color. // @@ -286,10 +386,13 @@ format_block(docdata_t *dd, // I - Document data const char *curtext, // Current text *cururl; // Current URL, if any bool curws; // Current whitespace + pdfio_obj_t *curimage; // Current image, if any + char curimagename[32]; // Current image name docfont_t curface, // Current font face prevface; // Previous font face double x, y; // Current position double width, // Width of current fragment + height, // Height of current fragment lwidth, // Leader width wswidth; // Width of whitespace doccolor_t color; // Color of text @@ -327,34 +430,37 @@ format_block(docdata_t *dd, // I - Document data for (current = mmdGetFirstChild(block), x = left; current; current = next) { // Get information about the current node... - curtype = mmdGetType(current); - curtext = mmdGetText(current); - cururl = mmdGetURL(current); - curws = mmdGetWhitespace(current); - -// fprintf(stderr, "current=%p, curtype=%d, curtext=\"%s\", cururl=\"%s\", curws=%s\n", (void *)current, curtype, curtext, cururl, curws ? "true" : "false"); - - // Figure out the next node under this block... - if ((next = mmdGetFirstChild(current)) == NULL) - { - if ((next = mmdGetNextSibling(current)) == NULL) - { - mmd_t *parent; // Parent node - - if ((parent = mmdGetParent(current)) != block) - { - while ((next = mmdGetNextSibling(parent)) == NULL) - { - if ((parent = mmdGetParent(parent)) == block) - break; - } - } - } - } + curtype = mmdGetType(current); + curtext = mmdGetText(current); + curimage = NULL; + curimagename[0] = '\0'; + cururl = mmdGetURL(current); + curws = mmdGetWhitespace(current); + next = mmd_walk_next(block, current); // Process the node... - if (!curtext) + if (curtype == MMD_TYPE_IMAGE && cururl) + { + // Embed an image + size_t i; // Looping var + + for (i = 0; i < dd->num_images; i ++) + { + if (!strcmp(dd->images[i].url, cururl)) + { + curimage = dd->images[i].obj; + snprintf(curimagename, sizeof(curimagename), "I%u", (unsigned)i); + break; + } + } + + if (!curimage) + continue; + } + else if (!curtext) + { continue; + } if (curtype == MMD_TYPE_EMPHASIZED_TEXT) curface = DOCFONT_ITALIC; @@ -372,7 +478,52 @@ format_block(docdata_t *dd, // I - Document data else color = DOCCOLOR_BLACK; - width = pdfioContentTextMeasure(dd->fonts[curface], curtext, fontsize); + if (curimage) + { + // Image - treat as 100dpi + width = 72.0 * pdfioImageGetWidth(curimage) / 100.0; + height = 72.0 * pdfioImageGetHeight(curimage) / 100.0; + + if (width > (right - left)) + { + // Too wide, scale to width... + width = right - left; + height = width * pdfioImageGetHeight(curimage) / pdfioImageGetWidth(curimage); + } + else if (height > (dd->art_box.y2 - dd->art_box.y1)) + { + // Too tall, scale to height... + height = dd->art_box.y2 - dd->art_box.y1; + width = height * pdfioImageGetWidth(curimage) / pdfioImageGetHeight(curimage); + } + + if (x <= left) + { + y -= height - fontsize * LINE_HEIGHT; + + if (prevface != DOCFONT_MAX) + { + pdfioContentTextEnd(dd->st); + prevface = DOCFONT_MAX; + } + + if (y < dd->art_box.y1) + { + // New page... + new_page(dd); + + x = left; + y = dd->y - height; + } + } + } + else + { + // Text fragment... + width = pdfioContentTextMeasure(dd->fonts[curface], curtext, fontsize); + height = fontsize * LINE_HEIGHT; + } + if (curws) wswidth = pdfioContentTextMeasure(dd->fonts[curface], " ", fontsize); else @@ -382,7 +533,7 @@ format_block(docdata_t *dd, // I - Document data { // New line... x = left; - y -= fontsize * LINE_HEIGHT; + y -= height; if (y < dd->art_box.y1) { @@ -395,7 +546,7 @@ format_block(docdata_t *dd, // I - Document data new_page(dd); - y = dd->y - fontsize * LINE_HEIGHT; + y = dd->y - height; } else { @@ -404,29 +555,46 @@ format_block(docdata_t *dd, // I - Document data } } - if (curface != prevface) + fprintf(stderr, "curtext=\"%s\", curimage=\"%s\", x=%g, y=%g, width=%g, height=%g\n", curtext, curimagename, x, y, width, height); + + if (curimage) { - if (prevface == DOCFONT_MAX) + // Image + if (prevface != DOCFONT_MAX) { - pdfioContentTextBegin(dd->st); - pdfioContentTextMoveTo(dd->st, x, y); + pdfioContentTextEnd(dd->st); + prevface = DOCFONT_MAX; } - pdfioContentSetTextFont(dd->st, docfont_names[prevface = curface], fontsize); - } - - if (color != dd->color) - set_color(dd, color); - - if (x > left && curws) - { - pdfioContentTextShowf(dd->st, UNICODE_VALUE, " %s", curtext); - x += width + wswidth; + pdfioContentDrawImage(dd->st, curimagename, x, y, width, height); } else { - pdfioContentTextShow(dd->st, UNICODE_VALUE, curtext); - x += width; + // Text + if (curface != prevface) + { + if (prevface == DOCFONT_MAX) + { + pdfioContentTextBegin(dd->st); + pdfioContentTextMoveTo(dd->st, x, y); + } + + pdfioContentSetTextFont(dd->st, docfont_names[prevface = curface], fontsize); + } + + if (color != dd->color) + set_color(dd, color); + + if (x > left && curws) + { + pdfioContentTextShowf(dd->st, UNICODE_VALUE, " %s", curtext); + x += width + wswidth; + } + else + { + pdfioContentTextShow(dd->st, UNICODE_VALUE, curtext); + x += width; + } } if (blocktype == MMD_TYPE_CODE_BLOCK) @@ -632,6 +800,9 @@ main(int argc, // I - Number of command-line arguments return (1); } + // Add images... + add_images(&dd, doc); + // Parse the markdown document... format_doc(&dd, doc, dd.art_box.x1, dd.art_box.x2); diff --git a/examples/md2pdf.md b/examples/md2pdf.md new file mode 100644 index 0000000..7815f32 --- /dev/null +++ b/examples/md2pdf.md @@ -0,0 +1,235 @@ +--- +title: Mini-Markdown Test Document +... + +All heading levels are supported from 1 to 6, using both the ATX and Setext +forms. As an indented code block: + + # Heading 1 + ## Heading 2 + ### Heading 3 + #### Heading 4 + ##### Heading 5 + ###### Heading 6 + + Setext Heading 1 + ================ + + Setext Heading 2 + ---------------- + +As block headings: + +# Heading 1 +## Heading 2 +### Heading 3 +#### Heading 4 +##### Heading 5 +###### Heading 6 + +Setext Heading 1 +================ + +Setext Heading 2 +---------------- + +And block quotes: + +> # BQ Heading 1 +> ## BQ Heading 2 +> ### BQ Heading 3 +> #### BQ Heading 4 +> ##### BQ Heading 5 +> ###### BQ Heading 6 +> +> Setext Heading 1 +> ================ +> +> Setext Heading 2 +> ---------------- + +And ordered lists: + +1. First item. + +2. Second item. + +3. Third item with very long text that wraps + across multiple lines. + + With a secondary paragraph associated with + the third item. + +And unordered lists: + +- First item. + ++ Second item. + +* Third item. + +* [ ] Fourth item (unchecked) + +- [x] Fifth item (checked) + +Code block with `\``: + +``` +#include + +int main(void) +{ + puts("Hello, World!"); + return (0); +} +~~~ +``` + +Code block with `~`: + +~~~ +#include + +int main(void) +{ + puts("Hello, World!"); + return (0); +} +``` +~~~ + +Link to [mmd web site](https://michaelrsweet.github.io/mmd). + +Normal link to [Heading 1](@). + +Code link to [`Heading 2`](@). + +Inner emphasized link to [*Heading 3*](@). + +Outer emphasized link to *[Heading 3](@)*. + +Inner strong link to [**Heading 4**](@). + +Outer strong link to **[Heading 4](@)**. + +Implicit link to [reference1][]. + +Shortcut link to [reference1] without a link title. + +[reference1]: https://michaelrsweet.github.io/mmd 'MMD Home Page' + +[reference2]: https://michaelrsweet.github.io/mmd/mmd.html 'MMD Documentation' + +[reference3]: https://michaelrsweet.github.io/mmd/mmd-160.png "MMD Logo" + +Link to [mmd web site][reference1] works. +Link to [mmd documentation][reference2] works. +Link to ![mmd logo][reference3] image. +Link to [bad reference][reference4] doesn't work. + +Autolink to . + +Autolink in parenthesis (). + +[Link broken +across two lines](https://michaelrsweet.github.io/mmd) + +Color JPEG Image: ![Color JPEG Image](../testfiles/color.jpg) +Grayscale JPEG Image: ![Grayscale JPEG Image](../testfiles/gray.jpg) +Color PNG Image: ![Color PNG Image](../testfiles/pdfio-color.png) +Grayscale PNG Image: ![Grayscale PNG Image](../testfiles/pdfio-gray.png) +Indexed PNG Image: ![Indexed PNG Image](../testfiles/pdfio-indexed.png) + +This sentence contains *Emphasized Text*, **Bold Text**, and `Code Text` for +testing the MMD parser. The `` header file. + +This sentence contains _Emphasized Text_, __Bold Text__, and +~~Strikethrough Text~~ for testing the MMD parser. + +*Emphasized Text Split +Across Two Lines* + +**Bold Text Split +Across Two Lines** + +`Code Text Split +Across Two lines` + +_Emphasized Text Split +Across Two Lines_ + +__Bold Text Split +Across Two Lines__ + +~~Strikethrough Text Split +Across Two Lines~~ + +All work and no play makes Johnny a dull boy. +All work and no play makes Johnny a dull boy. +All work and no play makes Johnny a dull boy. + +All work and no play makes Johnny a dull boy. +All work and no play makes Johnny a dull boy. +All work and no play makes Johnny a dull boy. + +\(Escaped Parenthesis) + +\(*Emphasized Parenthesis*) + +\(**Boldface Parenthesis**) + +\(`Code Parenthesis`) + +Escaped backtick (`\``) + +Table as code: + + | Heading 1 | Heading 2 | Heading 3 | + | --------- | --------- | --------- | + | Cell 1,1 | Cell 1,2 | Cell 1,3 | + | Cell 2,1 | Cell 2,2 | Cell 2,3 | + | Cell 3,1 | Cell 3,2 | Cell 3,3 | + +Table with leading/trailing pipes: + +| Heading 1 | Heading 2 | Heading 3 | +| --------- | --------- | --------- | +| Cell 1,1 | Cell 1,2 | Cell 1,3 | +| Cell 2,1 | Cell 2,2 | Cell 2,3 | +| Cell 3,1 | Cell 3,2 | Cell 3,3 | + +Table without leading/trailing pipes: + +Heading 1 | Heading 2 | Heading 3 +--------- | --------- | --------- +Cell 1,1 | Cell 1,2 | Cell 1,3 +Cell 2,1 | Cell 2,2 | Cell 2,3 +Cell 3,1 | Cell 3,2 | Cell 3,3 + +Table with alignment: + +Left Alignment | Center Alignment | Right Alignment +:-------- | :-------: | --------: +Cell 1,1 | Cell 1,2 | 1 +Cell 2,1 | Cell 2,2 | 12 +Cell 3,1 | Cell 3,2 | 123 + +Table in block quote: + +> Heading 1 | Heading 2 | Heading 3 +> --------- | --------- | --------- +> Cell 1,1 | Cell 1,2 | Cell 1,3 +> Cell 2,1 | Cell 2,2 | Cell 2,3 +> Cell 3,1 | Cell 3,2 | Cell 3,3 + +# Tests for Bugs/Edge Cases + +Paragraph with "|" that should not +be interpreted as a table. + + code before a bulleted list + +- First item +- Second item +- Some pathological nested link and inline style features supported by + CommonMark like "`******Really Strong Text******`".