diff --git a/.gitignore b/.gitignore index 439f92e..1bda2f9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.1.dylib *.a +*.dSYM *.log *.o *.so.1 @@ -11,6 +12,7 @@ /configure~ /doc/pdfio.epub /examples/code128 +/examples/md2pdf /Makefile /packages /pdfio.pc diff --git a/EXAMPLES.md b/EXAMPLES.md index a4d349f..3607a12 100644 --- a/EXAMPLES.md +++ b/EXAMPLES.md @@ -9,3 +9,10 @@ code128.c --------- This example shows how to embed and use a barcode font. + + +md2pdf.c +-------- + +This example shows how to generate pages with multiple fonts, embedded images, +and headers and footers. diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 0000000..5f84214 --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,46 @@ +# +# Makefile for PDFio examples. +# +# Copyright © 2024 by Michael R Sweet. +# +# Licensed under Apache License v2.0. See the file "LICENSE" for more +# information. +# + +# POSIX makefile +.POSIX: + + +# Common options +CFLAGS = -g $(CPPFLAGS) +CPPFLAGS = -I.. +LIBS = -L.. -lpdfio -lz + + +# Targets +TARGETS = \ + code128 \ + md2pdf + + +# Make everything +all: $(TARGETS) + + +# Clean everything +clean: + rm -f $(TARGETS) + + +# code128 +code128: code128.c + $(CC) $(CFLAGS) -o $@ code128.c $(LIBS) + + +# md2pdf +md2pdf: md2pdf.c mmd.c mmd.h + $(CC) $(CFLAGS) -o $@ md2pdf.c mmd.c $(LIBS) + + +# Common dependencies... +$(TARGETS): Makefile ../pdfio.h ../pdfio-content.h diff --git a/examples/Roboto-Bold.ttf b/examples/Roboto-Bold.ttf new file mode 100644 index 0000000..043bc1c Binary files /dev/null and b/examples/Roboto-Bold.ttf differ diff --git a/examples/Roboto-Italic.ttf b/examples/Roboto-Italic.ttf new file mode 100644 index 0000000..0ac06c3 Binary files /dev/null and b/examples/Roboto-Italic.ttf differ diff --git a/examples/Roboto-Regular.ttf b/examples/Roboto-Regular.ttf new file mode 100644 index 0000000..a8c97ba Binary files /dev/null and b/examples/Roboto-Regular.ttf differ diff --git a/examples/RobotoMono-Regular.ttf b/examples/RobotoMono-Regular.ttf new file mode 100644 index 0000000..b158a33 Binary files /dev/null and b/examples/RobotoMono-Regular.ttf differ diff --git a/examples/code128.c b/examples/code128.c index c267c04..8013039 100644 --- a/examples/code128.c +++ b/examples/code128.c @@ -31,7 +31,7 @@ make_code128(char *dst, // I - Destination buffer { char *dstptr, // Pointer into destination buffer *dstend; // End of destination buffer - int sum = 0; // Weighted sum + int sum; // Weighted sum static const char *code128_chars = // Code 128 characters " !\"#$%&'()*+,-./0123456789:;<=>?" "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_" diff --git a/examples/md2pdf.c b/examples/md2pdf.c new file mode 100644 index 0000000..20ab05f --- /dev/null +++ b/examples/md2pdf.c @@ -0,0 +1,556 @@ +// +// Simple markdown to PDF converter example for PDFio. +// +// Copyright © 2024 by Michael R Sweet. +// +// Licensed under Apache License v2.0. See the file "LICENSE" for more +// information. +// +// Usage: +// +// ./md2pdf FILENAME.md >FILENAME.pdf +// +// The generated PDF file is formatted for a "universal" paper size (8.27x11", +// the intersection of US Letter and ISO A4) with 1" top and bottom margins and +// 0.5" side margins. The document title (if present) is centered at the top +// of the second and subsequent pages while the current heading and page number +// are provided at the bottom of each page. +// + +#include +#include +#include +#include "mmd.h" +#include +#include + + +// +// Types... +// + +typedef enum docfont_e // Document font enumeration +{ + DOCFONT_REGULAR, // Roboto-Regular + DOCFONT_BOLD, // Roboto-Bold + DOCFONT_ITALIC, // Roboto-Italic + DOCFONT_MONOSPACE, // RobotoMono-Regular + DOCFONT_MAX // Number of fonts +} docfont_t; + +typedef struct docimage_s // Document image info +{ + const char *url; // Reference URL + pdfio_obj_t *obj; // Image object +} docimage_t; + +#define DOCIMAGE_MAX 1000 // Maximum number of images + +typedef struct docdata_s // Document formatting data +{ + pdfio_file_t *pdf; // PDF file + pdfio_rect_t media_box; // Media (page) box + pdfio_rect_t crop_box; // Crop box (for margins) + pdfio_rect_t art_box; // Art box (for markdown content) + pdfio_obj_t *fonts[DOCFONT_MAX]; // Embedded fonts + size_t num_images; // Number of embedded images + docimage_t images[DOCIMAGE_MAX]; // Embedded images + const char *title; // Document title + char *heading; // Current document heading + pdfio_stream_t *st; // Current page stream + double y; // Current position on page +} docdata_t; + + +// +// Macros... +// + +#define in2pt(in) (in * 72.0) +#define mm2pt(mm) (mm * 72.0 / 25.4) + + +// +// Constants... +// + +static const char * const docfont_filenames[] = +{ + "Roboto-Regular.ttf", + "Roboto-Bold.ttf", + "Roboto-Italic.ttf", + "RobotoMono-Regular.ttf" +}; + +static const char * const docfont_names[] = +{ + "FR", + "FB", + "FI", + "FM" +}; + +#define LINE_HEIGHT 1.2 // Multiplier for line height + +#define SIZE_BODY 11.0 // Size of body text (points) +#define SIZE_HEADFOOT 9.0 // Size of header/footer text +#define SIZE_HEADING_1 18.0 // Size of first level heading +#define SIZE_HEADING_2 16.0 // Size of top level heading +#define SIZE_HEADING_3 15.0 // Size of top level heading +#define SIZE_HEADING_4 14.0 // Size of top level heading +#define SIZE_HEADING_5 13.0 // Size of top level heading +#define SIZE_HEADING_6 12.0 // Size of top level heading +#define SIZE_TABLE 10.0 // Size of table text + +#define PAGE_WIDTH mm2pt(210) // Page width in points +#define PAGE_LENGTH in2pt(11) // Page length in points +#define PAGE_LEFT in2pt(0.5) // Left margin in points +#define PAGE_RIGHT (PAGE_WIDTH - in2pt(0.5)) + // Right margin in points +#define PAGE_BOTTOM in2pt(1.0) // Bottom margin in points +#define PAGE_TOP (PAGE_LENGTH - in2pt(1.0)) + // Top margin in points +#define PAGE_HEADER (PAGE_LENGTH - in2pt(0.5)) + // Vertical position of header +#define PAGE_FOOTER in2pt(0.5) // Vertical position of footer + + +// +// 'new_page()' - Start a new page. +// + +static void +new_page(docdata_t *dd) // I - Document data +{ + pdfio_dict_t *page_dict; // Page dictionary + docfont_t fontface; // Current font face + size_t i; // Looping var + char temp[32]; // Temporary string + double width; // Width of fragment + + + // Close the current page... + pdfioStreamClose(dd->st); + + // Prep the new page... + page_dict = pdfioDictCreate(dd->pdf); + + pdfioDictSetRect(page_dict, "MediaBox", &dd->media_box); +// pdfioDictSetRect(page_dict, "CropBox", &dd->crop_box); + pdfioDictSetRect(page_dict, "ArtBox", &dd->art_box); + + for (fontface = DOCFONT_REGULAR; fontface < DOCFONT_MAX; fontface ++) + pdfioPageDictAddFont(page_dict, docfont_names[fontface], dd->fonts[fontface]); + + for (i = 0; i < dd->num_images; i ++) + { + snprintf(temp, sizeof(temp), "I%u", (unsigned)i); + pdfioPageDictAddImage(page_dict, temp, dd->images[i].obj); + } + + dd->st = pdfioFileCreatePage(dd->pdf, page_dict); + + // Add header/footer text + pdfioContentSetFillColorGray(dd->st, 0.333); + pdfioContentSetStrokeColorGray(dd->st, 0.333); + pdfioContentSetTextFont(dd->st, docfont_names[DOCFONT_REGULAR], SIZE_HEADFOOT); + + if (pdfioFileGetNumPages(dd->pdf) > 1 && dd->title) + { + // Show title in header... + width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR], dd->title, SIZE_HEADFOOT); + + pdfioContentTextBegin(dd->st); + pdfioContentTextMoveTo(dd->st, dd->crop_box.x1 + 0.5 * (dd->crop_box.x2 - dd->crop_box.x1 - width), dd->crop_box.y2 - SIZE_HEADFOOT); + pdfioContentTextShow(dd->st, /*unicode*/true, dd->title); + pdfioContentTextEnd(dd->st); + + pdfioContentPathMoveTo(dd->st, dd->crop_box.x1, dd->crop_box.y2 - 2 * SIZE_HEADFOOT * LINE_HEIGHT + SIZE_HEADFOOT); + pdfioContentPathLineTo(dd->st, dd->crop_box.x2, dd->crop_box.y2 - 2 * SIZE_HEADFOOT * LINE_HEIGHT + SIZE_HEADFOOT); + pdfioContentStroke(dd->st); + } + + // Show page number and current heading... + pdfioContentPathMoveTo(dd->st, dd->crop_box.x1, dd->crop_box.y1 + SIZE_HEADFOOT * LINE_HEIGHT); + pdfioContentPathLineTo(dd->st, dd->crop_box.x2, dd->crop_box.y1 + SIZE_HEADFOOT * LINE_HEIGHT); + pdfioContentStroke(dd->st); + + pdfioContentTextBegin(dd->st); + snprintf(temp, sizeof(temp), "%u", (unsigned)pdfioFileGetNumPages(dd->pdf)); + if (pdfioFileGetNumPages(dd->pdf) & 1) + { + // Page number on right... + width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR], temp, SIZE_HEADFOOT); + pdfioContentTextMoveTo(dd->st, dd->crop_box.x2 - width, dd->crop_box.y1); + } + else + { + // Page number on left... + pdfioContentTextMoveTo(dd->st, dd->crop_box.x1, dd->crop_box.y1); + } + + pdfioContentTextShow(dd->st, /*unicode*/true, temp); + pdfioContentTextEnd(dd->st); + + if (dd->heading) + { + pdfioContentTextBegin(dd->st); + if (pdfioFileGetNumPages(dd->pdf) & 1) + { + // Current heading on left... + pdfioContentTextMoveTo(dd->st, dd->crop_box.x1, dd->crop_box.y1); + } + else + { + width = pdfioContentTextMeasure(dd->fonts[DOCFONT_REGULAR], dd->heading, SIZE_HEADFOOT); + pdfioContentTextMoveTo(dd->st, dd->crop_box.x2 - width, dd->crop_box.y1); + } + + pdfioContentTextShow(dd->st, /*unicode*/true, dd->heading); + pdfioContentTextEnd(dd->st); + } + + // The rest of the text will be full black... + pdfioContentSetFillColorGray(dd->st, 0.0); + pdfioContentSetStrokeColorGray(dd->st, 0.0); + + dd->y = dd->art_box.y2; +} + + +// +// 'format_block()' - Format a block of text +// + +static void +format_block(docdata_t *dd, // I - Document data + mmd_t *block, // I - Block to format + docfont_t fontface, // I - Default font + double fontsize, // I - Size of font + double left, // I - Left margin + double right, // I - Right margin + const char *leader) // I - Leader text on the first line +{ + mmd_type_t blocktype; // Block type + mmd_t *current, // Current node + *next; // Next node + mmd_type_t curtype; // Current node type + const char *curtext, // Current text + *cururl; // Current URL, if any + bool curws; // Current whitespace + docfont_t curface, // Current font face + prevface; // Previous font face + double x, y; // Current position + double width, // Width of current fragment + wswidth; // Width of whitespace + + + blocktype = mmdGetType(block); + + if (!dd->st) + new_page(dd); + + if ((y = dd->y - 2.0 * fontsize * LINE_HEIGHT) < dd->art_box.y1) + { + new_page(dd); + y = dd->y - fontsize; + } + + if (leader) + { + // Add leader text on first line... + pdfioContentSetTextFont(dd->st, docfont_names[prevface = fontface], fontsize); + + width = pdfioContentTextMeasure(dd->fonts[fontface], leader, fontsize); + + pdfioContentTextBegin(dd->st); + pdfioContentTextMoveTo(dd->st, left - width, y); + pdfioContentTextShow(dd->st, /*unicode*/true, leader); + } + else + { + // No leader text... + prevface = DOCFONT_MAX; + } + + for (current = mmdGetFirstChild(block), x = left; current; current = next) + { + // Get information about the current node... + curtype = mmdGetType(current); + curtext = mmdGetText(current); + cururl = mmdGetURL(current); + curws = mmdGetWhitespace(current); + +// fprintf(stderr, "current=%p, curtype=%d, curtext=\"%s\", cururl=\"%s\", curws=%s\n", (void *)current, curtype, curtext, cururl, curws ? "true" : "false"); + + // Figure out the next node under this block... + if ((next = mmdGetFirstChild(current)) == NULL) + { + if ((next = mmdGetNextSibling(current)) == NULL) + { + mmd_t *parent; // Parent node + + if ((parent = mmdGetParent(current)) != block) + { + while ((next = mmdGetNextSibling(parent)) == NULL) + { + if ((parent = mmdGetParent(parent)) == block) + break; + } + } + } + } + + // Process the node... + if (!curtext) + continue; + + if (curtype == MMD_TYPE_EMPHASIZED_TEXT) + curface = DOCFONT_ITALIC; + else if (curtype == MMD_TYPE_STRONG_TEXT) + curface = DOCFONT_BOLD; + else if (curtype == MMD_TYPE_CODE_TEXT) + curface = DOCFONT_MONOSPACE; + else + curface = fontface; + + width = pdfioContentTextMeasure(dd->fonts[curface], curtext, fontsize); + if (curws) + wswidth = pdfioContentTextMeasure(dd->fonts[curface], " ", fontsize); + else + wswidth = 0.0; + + if (x > left && (x + width + wswidth) >= right) + { + // New line... + x = left; + y -= fontsize * LINE_HEIGHT; + + if (prevface != DOCFONT_MAX) + pdfioContentTextEnd(dd->st); + + prevface = DOCFONT_MAX; + + if (y < dd->art_box.y1) + { + // New page... + new_page(dd); + + y = dd->y - fontsize * LINE_HEIGHT; + } + } + + if (curface != prevface) + { + if (prevface != DOCFONT_MAX) + pdfioContentTextEnd(dd->st); + + pdfioContentSetTextFont(dd->st, docfont_names[prevface = curface], fontsize); + pdfioContentTextBegin(dd->st); + pdfioContentTextMoveTo(dd->st, x, y); + } + + if (x > left && curws) + { + pdfioContentTextShowf(dd->st, /*unicode*/true, " %s", curtext); + x += width + wswidth; + } + else + { + pdfioContentTextShow(dd->st, /*unicode*/true, curtext); + x += width; + } + + if (blocktype == MMD_TYPE_CODE_BLOCK) + { + // Force a new line... + x = left; + y -= fontsize * LINE_HEIGHT; + + if (prevface != DOCFONT_MAX) + pdfioContentTextEnd(dd->st); + + prevface = DOCFONT_MAX; + + if (y < dd->art_box.y1) + { + // New page... + new_page(dd); + + y = dd->y - fontsize * LINE_HEIGHT; + } + } + } + + // End the current text block and save out position on the page... + if (prevface != DOCFONT_MAX) + pdfioContentTextEnd(dd->st); + + dd->y = y; +} + + +// +// 'format_doc()' - Format a document. +// + +static void +format_doc(docdata_t *dd, // I - Document data + mmd_t *doc, // I - Document node to format + double left, // I - Left margin + double right) // I - Right margin +{ + int i; // Child number + mmd_type_t doctype; // Document node type + mmd_t *current; // Current node + mmd_type_t curtype; // Current node type + char leader[32]; // Leader + static const double heading_sizes[] = // Heading font sizes + { + SIZE_HEADING_1, + SIZE_HEADING_2, + SIZE_HEADING_3, + SIZE_HEADING_4, + SIZE_HEADING_5, + SIZE_HEADING_6 + }; + + + doctype = mmdGetType(doc); + + for (i = 1, current = mmdGetFirstChild(doc); current; i ++, current = mmdGetNextSibling(current)) + { + switch (curtype = mmdGetType(current)) + { + default : + break; + + case MMD_TYPE_BLOCK_QUOTE : + format_doc(dd, current, left + 36.0, right - 36.0); + break; + + case MMD_TYPE_ORDERED_LIST : + case MMD_TYPE_UNORDERED_LIST : + format_doc(dd, current, left + 36.0, right); + break; + + case MMD_TYPE_LIST_ITEM : + if (doctype == MMD_TYPE_ORDERED_LIST) + { + snprintf(leader, sizeof(leader), "%d. ", i); + format_block(dd, current, DOCFONT_REGULAR, SIZE_BODY, left, right, leader); + } + else + { + format_block(dd, current, DOCFONT_REGULAR, SIZE_BODY, left, right, /*leader*/"• "); + } + break; + + case MMD_TYPE_HEADING_1 : + case MMD_TYPE_HEADING_2 : + case MMD_TYPE_HEADING_3 : + case MMD_TYPE_HEADING_4 : + case MMD_TYPE_HEADING_5 : + case MMD_TYPE_HEADING_6 : + if (dd->heading) + free(dd->heading); + + dd->heading = mmdCopyAllText(current); + + format_block(dd, current, DOCFONT_BOLD, heading_sizes[curtype - MMD_TYPE_HEADING_1], left, right, /*leader*/NULL); + break; + + case MMD_TYPE_PARAGRAPH : + format_block(dd, current, DOCFONT_REGULAR, SIZE_BODY, left, right, /*leader*/NULL); + break; + + case MMD_TYPE_CODE_BLOCK : + format_block(dd, current, DOCFONT_MONOSPACE, SIZE_BODY, left + 36.0, right, /*leader*/NULL); + break; + } + } +} + + +// +// 'output_cb()' - Write PDF data to the standard output... +// + +static ssize_t // O - Number of bytes written +output_cb(void *output_cbdata, // I - Callback data (not used) + const void *buffer, // I - Buffer to write + size_t bytes) // I - Number of bytes to write +{ + (void)output_cbdata; + + return ((ssize_t)fwrite(buffer, 1, bytes, stdout)); +} + + +// +// 'main()' - Convert markdown to PDF. +// + +int // O - Exit status +main(int argc, // I - Number of command-line arguments + char *argv[]) // I - Command-line arguments +{ + docdata_t dd; // Document data + docfont_t fontface; // Current font + mmd_t *doc; // Markdown document + + + // Get the markdown file from the command-line... + if (argc != 2) + { + fputs("Usage: md2pdf FILENANE.md >FILENAME.pdf\n", stderr); + return (1); + } + + if ((doc = mmdLoad(/*root*/NULL, argv[1])) == NULL) + return (1); + + // Initialize the document data + memset(&dd, 0, sizeof(dd)); + + dd.media_box.x2 = PAGE_WIDTH; + dd.media_box.y2 = PAGE_LENGTH; + + dd.crop_box.x1 = PAGE_LEFT; + dd.crop_box.y1 = PAGE_FOOTER; + dd.crop_box.x2 = PAGE_RIGHT; + dd.crop_box.y2 = PAGE_HEADER; + + dd.art_box.x1 = PAGE_LEFT; + dd.art_box.y1 = PAGE_BOTTOM; + dd.art_box.x2 = PAGE_RIGHT; + dd.art_box.y2 = PAGE_TOP; + + dd.title = mmdGetMetadata(doc, "title"); + + // Output a PDF file to the standard output... +#ifdef _WIN32 + setmode(1, O_BINARY); // Force binary output on Windows +#endif // _WIN32 + + if ((dd.pdf = pdfioFileCreateOutput(output_cb, /*output_cbdata*/NULL, /*version*/NULL, /*media_box*/NULL, /*crop_box*/NULL, /*error_cb*/NULL, /*error_data*/NULL)) == NULL) + return (1); + + // Add fonts... + for (fontface = DOCFONT_REGULAR; fontface < DOCFONT_MAX; fontface ++) + { + if ((dd.fonts[fontface] = pdfioFileCreateFontObjFromFile(dd.pdf, docfont_filenames[fontface], /*unicode*/true)) == NULL) + return (1); + } + + // Parse the markdown document... + format_doc(&dd, doc, dd.art_box.x1, dd.art_box.x2); + + // Close the PDF and return... + pdfioStreamClose(dd.st); + pdfioFileClose(dd.pdf); + + mmdFree(doc); + + return (0); +} diff --git a/examples/mmd.c b/examples/mmd.c new file mode 100644 index 0000000..b8f8e4b --- /dev/null +++ b/examples/mmd.c @@ -0,0 +1,2381 @@ +// +// Implementation of miniature markdown library. +// +// https://www.msweet.org/mmd +// +// Copyright © 2017-2024 by Michael R Sweet. +// +// Licensed under Apache License v2.0. See the file "LICENSE" for more +// information. +// + +// +// Define DEBUG to get debug printf messages to stderr. +// + +#define DEBUG 0 +#if DEBUG > 0 +# define DEBUG_printf(...) fprintf(stderr, __VA_ARGS__) +# define DEBUG_puts(s) fputs(s, stderr); +#else +# define DEBUG_printf(...) +# define DEBUG_puts(s) +#endif // DEBUG > 0 +#if DEBUG > 1 +# define DEBUG2_printf(...) fprintf(stderr, __VA_ARGS__) +# define DEBUG2_puts(s) fputs(s, stderr); +#else +# define DEBUG2_printf(...) +# define DEBUG2_puts(s) +#endif // DEBUG > 1 + + +// +// Beginning with VC2005, Microsoft breaks ISO C and POSIX conformance +// by deprecating a number of functions in the name of security, even +// when many of the affected functions are otherwise completely secure. +// The _CRT_SECURE_NO_DEPRECATE definition ensures that we won't get +// warnings from their use... +// +// Then Microsoft decided that they should ignore this in VC2008 and use +// yet another define (_CRT_SECURE_NO_WARNINGS) instead... +// + +#define _CRT_SECURE_NO_DEPRECATE +#define _CRT_SECURE_NO_WARNINGS +#if _WIN32 +# define strcasecmp stricmp +#endif // _WIN32 + + +#include "mmd.h" +#include +#include +#include + + +// +// Microsoft renames the POSIX functions to _name, and introduces a broken +// compatibility layer using the original names. As a result, random crashes +// can occur when, for example, strdup() allocates memory from a different heap +// than used by malloc() and free(). +// +// To avoid moronic problems like this, we #define the POSIX function names to +// the corresponding non-standard Microsoft names. +// + +#ifdef _WIN32 +# define snprintf _snprintf +# define strdup _strdup +#endif // _WIN32 + + +// +// Private structures... +// + +struct _mmd_s +{ + mmd_type_t type; // Node type + bool whitespace; // Leading whitespace? + char *text, // Text + *url, // Reference URL (image/link/etc.) + *extra; // Title, language name, etc. + mmd_t *parent, // Parent node + *first_child, // First child node + *last_child, // Last child node + *prev_sibling, // Previous sibling node + *next_sibling; // Next sibling node +}; + +typedef struct _mmd_filebuf_s // Buffered file +{ + mmd_iocb_t cb; // Read callback function + void *cbdata; // Read callback data + char buffer[65536], // Buffer + *bufptr, // Pointer into buffer + *bufend; // End of buffer +} _mmd_filebuf_t; + +typedef struct _mmd_ref_s // Reference link +{ + char *name, // Name of reference + *url, // Reference URL + *title; // Title, if any + size_t num_pending; // Number of pending nodes + mmd_t **pending; // Pending nodes +} _mmd_ref_t; + +typedef struct _mmd_doc_s // Markdown document +{ + mmd_t *root; // Root node + size_t num_references; // Number of references + _mmd_ref_t *references; // References +} _mmd_doc_t; + +typedef struct _mmd_stack_s // Markdown block stack +{ + mmd_t *parent; // Parent node + int indent; // Indentation + char fence; // Code fence character + size_t fencelen; // Length of code fence +} _mmd_stack_t; + + +// +// Local globals... +// + +static mmd_option_t mmd_options = MMD_OPTION_ALL; + // Markdown extensions to support + + +// +// Local functions... +// + +static mmd_t *mmd_add(mmd_t *parent, mmd_type_t type, int whitespace, char *text, char *url); +static void mmd_free(mmd_t *node); +static int mmd_has_continuation(const char *line, _mmd_filebuf_t *file, int indent); +static size_t mmd_iocb_file(FILE *fp, char *buffer, size_t bytes); +static size_t mmd_iocb_string(const char **s, char *buffer, size_t bytes); +static size_t mmd_is_chars(const char *lineptr, const char *chars, size_t minchars); +static size_t mmd_is_codefence(char *lineptr, char fence, size_t fencelen, char **language); +static bool mmd_is_table(_mmd_filebuf_t *file, int indent); +static void mmd_parse_inline(_mmd_doc_t *doc, mmd_t *parent, char *lineptr); +static char *mmd_parse_link(_mmd_doc_t *doc, char *lineptr, char **text, char **url, char **title, char **refname); +static void mmd_read_buffer(_mmd_filebuf_t *file); +static char *mmd_read_line(_mmd_filebuf_t *file, char *line, size_t linesize); +static void mmd_ref_add(_mmd_doc_t *doc, mmd_t *node, const char *name, const char *url, const char *title); +static _mmd_ref_t *mmd_ref_find(_mmd_doc_t *doc, const char *name); +static void mmd_remove(mmd_t *node); +#if DEBUG +static const char *mmd_type_string(mmd_type_t type); +#endif // DEBUG + + +// +// 'mmdCopyAllText()' - Make a copy of all the text under a given node. +// +// The returned string must be freed using free(). +// + +char * // O - Copied string +mmdCopyAllText(mmd_t *node) // I - Parent node +{ + char *all = NULL, // String buffer + *allptr = NULL, // Pointer into string buffer + *temp; // Temporary pointer + size_t allsize = 0, // Size of "all" buffer + textlen; // Length of "text" string + mmd_t *current, // Current node + *next; // Next node + + + current = mmdGetFirstChild(node); + + while (current != node) + { + if (current->text) + { + // Append this node's text to the string... + textlen = strlen(current->text); + allsize += textlen + (size_t)current->whitespace; + temp = realloc(all, allsize); + + if (!temp) + { + free(all); + return (NULL); + } + + allptr = temp + (allptr - all); + all = temp; + + if (current->whitespace) + *allptr++ = ' '; + + memcpy(allptr, current->text, textlen); + allptr += textlen; + } + + // Find the next logical node... + if ((next = mmdGetNextSibling(current)) == NULL) + { + next = mmdGetParent(current); + + while (next && next != node && mmdGetNextSibling(next) == NULL) + next = mmdGetParent(next); + + if (next != node) + next = mmdGetNextSibling(next); + } + + current = next; + } + + if (allptr) + *allptr = '\0'; + + return (all); +} + + +// +// 'mmdFree()' - Free a markdown tree. +// + +void +mmdFree(mmd_t *node) // I - First node +{ + mmd_t *current, // Current node + *next; // Next node + + + mmd_remove(node); + + for (current = node->first_child; current; current = next) + { + // Get the next node... + if ((next = current->first_child) != NULL) + { + // Free parent nodes after child nodes have been freed... + current->first_child = NULL; + continue; + } + + if ((next = current->next_sibling) == NULL) + { + // Next node is the parent, which we'll free as needed... + if ((next = current->parent) == node) + next = NULL; + } + + // Free child... + mmd_free(current); + } + + // Then free the memory used by the parent node... + mmd_free(node); +} + + +// +// 'mmdGetExtra()' - Get extra text (title, language, etc.) associated with a +// node. +// + +const char * // O - Extra text or NULL if none +mmdGetExtra(mmd_t *node) // I - Node +{ + return (node ? node->extra : NULL); +} + + +// +// 'mmdGetFirstChild()' - Return the first child of a node, if any. +// + +mmd_t * // O - First child or @code NULL@ if none +mmdGetFirstChild(mmd_t *node) // I - Node +{ + return (node ? node->first_child : NULL); +} + + +// +// 'mmdGetLastChild()' - Return the last child of a node, if any. +// + +mmd_t * // O - Last child or @code NULL@ if none +mmdGetLastChild(mmd_t *node) // I - Node +{ + return (node ? node->last_child : NULL); +} + + +// +// 'mmdGetMetadata()' - Return the metadata for the given keyword. +// + +const char * // O - Value or @code NULL@ if none +mmdGetMetadata(mmd_t *doc, // I - Document + const char *keyword) // I - Keyword +{ + mmd_t *metadata, // Metadata node + *current; // Current node + char prefix[256]; // Prefix string + size_t prefix_len; // Length of prefix string + const char *value; // Pointer to value + + + if (!doc || (metadata = doc->first_child) == NULL || metadata->type != MMD_TYPE_METADATA) + return (NULL); + + snprintf(prefix, sizeof(prefix), "%s:", keyword); + prefix_len = strlen(prefix); + + for (current = metadata->first_child; current; current = current->next_sibling) + { + if (strncmp(current->text, prefix, prefix_len)) + continue; + + value = current->text + prefix_len; + while (isspace(*value & 255)) + value ++; + + return (value); + } + + return (NULL); +} + + +// +// 'mmdGetNextSibling()' - Return the next sibling of a node, if any. +// + +mmd_t * // O - Next sibling or @code NULL@ if none +mmdGetNextSibling(mmd_t *node) // I - Node +{ + return (node ? node->next_sibling : NULL); +} + + +// +// 'mmdGetOptions()' - Get the enabled markdown processing options/extensions. +// + +mmd_option_t // O - Enabled options +mmdGetOptions(void) +{ + return (mmd_options); +} + + +// +// 'mmdGetParent()' - Return the parent of a node, if any. +// + +mmd_t * // O - Parent node or @code NULL@ if none +mmdGetParent(mmd_t *node) // I - Node +{ + return (node ? node->parent : NULL); +} + + +// +// 'mmdGetPrevSibling()' - Return the previous sibling of a node, if any. +// + +mmd_t * // O - Previous sibling or @code NULL@ if none +mmdGetPrevSibling(mmd_t *node) // I - Node +{ + return (node ? node->prev_sibling : NULL); +} + + +// +// 'mmdGetText()' - Return the text associated with a node, if any. +// + +const char * // O - Text or @code NULL@ if none +mmdGetText(mmd_t *node) // I - Node +{ + return (node ? node->text : NULL); +} + + +// +// 'mmdGetType()' - Return the type of a node, if any. +// + +mmd_type_t // O - Type or @code MMD_TYPE_NONE@ if none +mmdGetType(mmd_t *node) // I - Node +{ + return (node ? node->type : MMD_TYPE_NONE); +} + + +// +// 'mmdGetURL()' - Return the URL associated with a node, if any. +// + +const char * // O - URL or @code NULL@ if none +mmdGetURL(mmd_t *node) // I - Node +{ + return (node ? node->url : NULL); +} + + +// +// 'mmdGetWhitespace()' - Return whether whitespace preceded a node. +// + +bool // O - `true` for leading whitespace, `false` for none +mmdGetWhitespace(mmd_t *node) // I - Node +{ + return (node ? node->whitespace : false); +} + + +// +// 'mmdIsBlock()' - Return whether the node is a block. +// + +bool // O - `true` for block nodes, `false` otherwise +mmdIsBlock(mmd_t *node) // I - Node +{ + return (node ? node->type < MMD_TYPE_NORMAL_TEXT : false); +} + + +// +// 'mmdLoad()' - Load a markdown file into nodes. +// + +mmd_t * // O - Root node in markdown +mmdLoad(mmd_t *root, // I - Root node for document or `NULL` for a new document + const char *filename) // I - File to load +{ + FILE *fp; // File + + + // Open the file and load the document... + if ((fp = fopen(filename, "r")) == NULL) + return (NULL); + + root = mmdLoadIO(root, (mmd_iocb_t)mmd_iocb_file, fp); + + // Close and return... + fclose(fp); + + return (root); +} + + +// +// 'mmdLoadFile()' - Load a markdown file into nodes from a stdio file. +// + +mmd_t * // O - First node in markdown +mmdLoadFile(mmd_t *root, // I - Root node for document or `NULL` for a new document + FILE *fp) // I - File to load +{ + return (mmdLoadIO(root, (mmd_iocb_t)mmd_iocb_file, fp)); +} + + +// +// 'mmdLoadIO()' - Load a markdown file into nodes using a callback. +// + +mmd_t * // O - First node in markdown +mmdLoadIO(mmd_t *root, // I - Root node for document or `NULL` for a new document + mmd_iocb_t cb, // I - Read callback function + void *cbdata) // I - Read callback data +{ + size_t i; // Looping var + _mmd_doc_t doc; // Document + _mmd_ref_t *reference; // Current reference + mmd_t *block = NULL; // Current block + mmd_type_t type; // Type for line + _mmd_filebuf_t file; // File buffer + char line[8192], // Read line + *linestart, // Start of line + *lineptr, // Pointer into line + *lineend, // End of line + *temp; // Temporary pointer + int newindent; // New indentation + int blank_code = 0; // Saved indented blank code line + mmd_type_t columns[256]; // Alignment of table columns + int num_columns = 0, // Number of columns in table + rows = 0; // Number of rows in table + _mmd_stack_t stack[32], // Block stack + *stackptr = stack; // Pointer to top of stack + + + // Create an empty document as needed... + DEBUG_printf("mmdLoadIO: mmd_options=%d%s%s\n", mmd_options, (mmd_options & MMD_OPTION_METADATA) ? " METADATA" : "", (mmd_options & MMD_OPTION_TABLES) ? " TABLES" : ""); + + memset(&doc, 0, sizeof(doc)); + + if (root) + doc.root = root; + else + doc.root = mmd_add(NULL, MMD_TYPE_DOCUMENT, 0, NULL, NULL); + + if (!doc.root) + return (NULL); + + // Initialize the block stack... + memset(stack, 0, sizeof(stack)); + stackptr->parent = doc.root; + + // Read lines until end-of-file... + memset(&file, 0, sizeof(file)); + file.cb = cb; + file.cbdata = cbdata; + +#ifdef __clang_analyzer__ + memset(line, 0, sizeof(line)); +#endif // __clang_analyzer__ + + while ((lineptr = mmd_read_line(&file, line, sizeof(line))) != NULL) + { + DEBUG_printf("%03d %-12s %s", stackptr->indent, mmd_type_string(stackptr->parent->type) + 9, lineptr); +#if DEBUG + if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK) + DEBUG2_printf(" blank_code=%d\n", blank_code); +#endif // DEBUG + + linestart = lineptr; + + while (isspace(*lineptr & 255)) + lineptr ++; + + DEBUG2_printf(" line indent=%d\n", (int)(lineptr - line)); + DEBUG2_printf(" stackptr=%d\n", (int)(stackptr - stack)); + + if (!*lineptr && stackptr->parent->type == MMD_TYPE_TABLE) + { + DEBUG2_puts("END TABLE\n"); + stackptr --; + block = NULL; + continue; + } + else if (*lineptr == '>' && (lineptr - linestart) < 4) + { + // Block quote. See if there is an existing blockquote... + DEBUG_printf(" BLOCKQUOTE (stackptr=%ld)\n", stackptr - stack); + + if (stackptr == stack || stack[1].parent->type != MMD_TYPE_BLOCK_QUOTE) + { + block = NULL; + stackptr = stack + 1; + stackptr->parent = mmd_add(doc.root, MMD_TYPE_BLOCK_QUOTE, 0, NULL, NULL); + stackptr->indent = 2; + stackptr->fence = '\0'; + } + + // Skip whitespace after the ">"... + lineptr ++; + if (isspace(*lineptr & 255)) + lineptr ++; + + linestart = lineptr; + + while (isspace(*lineptr & 255)) + lineptr ++; + } + else if (*lineptr != '>' && stackptr > stack && stack[1].parent->type == MMD_TYPE_BLOCK_QUOTE && (!block || *lineptr == '\n' || mmd_is_chars(lineptr, "- \t", 3) || mmd_is_chars(lineptr, "_ \t", 3) || mmd_is_chars(lineptr, "* \t", 3))) + { + // Not a lazy continuation so terminate this block quote... + DEBUG_puts(" Terminating BLOCKQUOTE\n"); + block = NULL; + stackptr = stack; + } + + // Now handle all other markup not related to block quotes... + DEBUG2_printf(" stackptr=%d (%s), block=%p (%s)\n", (int)(stackptr - stack), mmd_type_string(stackptr->parent->type) + 9, block, block ? mmd_type_string(block->type) + 9 : ""); + DEBUG2_printf(" strchr(lineptr, '|')=%p, mmd_is_table(&file, stackptr->indent)=%d\n", strchr(lineptr, '|'), mmd_is_table(&file, stackptr->indent)); + DEBUG2_printf(" linestart=%d, lineptr=%d\n", (int)(linestart - line), (int)(lineptr - line)); + DEBUG2_printf(" mmd_is_chars(lineptr, \"-\", 1)=%d\n", (int)mmd_is_chars(lineptr, "-", 1)); + DEBUG2_printf(" mmd_is_chars(lineptr, \"=\", 1)=%d\n", (int)mmd_is_chars(lineptr, "=", 1)); + + if ((lineptr - line - stackptr->indent) < 4 && ((stackptr->parent->type != MMD_TYPE_CODE_BLOCK && !stackptr->fence && mmd_is_codefence(lineptr, '\0', 0, NULL)) || (stackptr->fence && mmd_is_codefence(lineptr, stackptr->fence, stackptr->fencelen, NULL)))) + { + // Code fence... + DEBUG2_printf("stackptr->indent=%d, fence='%c', fencelen=%d\n", stackptr->indent, stackptr->fence, (int)stackptr->fencelen); + + if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK) + { + DEBUG2_puts("Ending code block...\n"); + stackptr --; + } + else if (stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) + { + char *language; // Language name, if any + + DEBUG2_printf("Starting code block with fence '%c'.\n", *lineptr); + + block = NULL; + stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_CODE_BLOCK, 0, NULL, NULL); + stackptr[1].indent = lineptr - line; + stackptr[1].fence = *lineptr; + stackptr[1].fencelen = mmd_is_codefence(lineptr, '\0', 0, &language); + stackptr ++; + + DEBUG2_printf("Code language=\"%s\"\n", language); + + if (language) + stackptr->parent->extra = strdup(language); + + blank_code = 0; + } + continue; + } + else if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK && (lineptr - line) >= stackptr->indent) + { + if (line[stackptr->indent] == '\n') + { + blank_code ++; + } + else + { + while (blank_code > 0) + { + mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, "\n", NULL); + blank_code --; + } + + mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, line + stackptr->indent, NULL); + } + continue; + } + else if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK && stackptr->fence) + { + DEBUG2_printf(" fence='%c'\n", stackptr->fence); + + if (!*lineptr) + { + blank_code ++; + } + else + { + while (blank_code > 0) + { + mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, "\n", NULL); + blank_code --; + } + + mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, lineptr, NULL); + } + continue; + } + else if (!strncmp(lineptr, "---", 3) && doc.root->first_child == NULL && (mmd_options & MMD_OPTION_METADATA)) + { + // Document metadata... + block = mmd_add(doc.root, MMD_TYPE_METADATA, 0, NULL, NULL); + + while ((lineptr = mmd_read_line(&file, line, sizeof(line))) != NULL) + { + while (isspace(*lineptr & 255)) + lineptr ++; + + if (!strncmp(lineptr, "---", 3) || !strncmp(lineptr, "...", 3)) + break; + + lineend = lineptr + strlen(lineptr) - 1; + if (lineend > lineptr && *lineend == '\n') + *lineend = '\0'; + + mmd_add(block, MMD_TYPE_METADATA_TEXT, 0, lineptr, NULL); + } + continue; + } + else if (block && block->type == MMD_TYPE_PARAGRAPH && (lineptr - linestart) < 4 && (lineptr - line) >= stackptr->indent && (mmd_is_chars(lineptr, "-", 1) || mmd_is_chars(lineptr, "=", 1))) + { + int ch = *lineptr; + + DEBUG_puts(" SETEXT HEADING\n"); + + lineptr += 3; + while (*lineptr == ch) + lineptr ++; + while (isspace(*lineptr & 255)) + lineptr ++; + + if (!*lineptr) + { + if (ch == '=') + block->type = MMD_TYPE_HEADING_1; + else + block->type = MMD_TYPE_HEADING_2; + + block = NULL; + continue; + } + + type = MMD_TYPE_PARAGRAPH; + } + else if ((lineptr - linestart) < 4 && (mmd_is_chars(lineptr, "- \t", 3) || mmd_is_chars(lineptr, "_ \t", 3) || mmd_is_chars(lineptr, "* \t", 3))) + { + DEBUG_puts(" THEMATIC BREAK\n"); + + if (line[0] == '>') + stackptr = stack + 1; + else + stackptr = stack; + + mmd_add(stackptr->parent, MMD_TYPE_THEMATIC_BREAK, 0, NULL, NULL); +// type = MMD_TYPE_PARAGRAPH; + block = NULL; + continue; + } + else if ((*lineptr == '-' || *lineptr == '+' || *lineptr == '*') && (lineptr[1] == '\t' || lineptr[1] == ' ')) + { + // Bulleted list... + DEBUG_puts(" UNORDERED LIST\n"); + + lineptr += 2; + linestart = lineptr; + newindent = linestart - line; + + while (isspace(*lineptr & 255)) + lineptr ++; + + while (stackptr > stack && stackptr->indent > newindent) + stackptr --; + + if (stackptr > stack && stackptr->parent->type == MMD_TYPE_LIST_ITEM && stackptr->indent == newindent) + stackptr --; + + if (stackptr > stack && stackptr->parent->type == MMD_TYPE_ORDERED_LIST && stackptr->indent == newindent) + stackptr --; + + if (stackptr > stack && stackptr->parent->type == MMD_TYPE_BLOCK_QUOTE && line[0] != '>') + stackptr --; + + if (stackptr->parent->type != MMD_TYPE_UNORDERED_LIST && stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) + { + stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_UNORDERED_LIST, 0, NULL, NULL); + stackptr[1].indent = linestart - line; + stackptr[1].fence = '\0'; + stackptr ++; + } + + if (stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) + { + stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_LIST_ITEM, 0, NULL, NULL); + stackptr[1].indent = linestart - line; + stackptr[1].fence = '\0'; + stackptr ++; + } + + type = MMD_TYPE_PARAGRAPH; + block = NULL; + + if (mmd_is_chars(lineptr, "- \t", 3) || mmd_is_chars(lineptr, "_ \t", 3) || mmd_is_chars(lineptr, "* \t", 3)) + { + mmd_add(stackptr->parent, MMD_TYPE_THEMATIC_BREAK, 0, NULL, NULL); + continue; + } + } + else if (isdigit(*lineptr & 255)) + { + // Ordered list? + DEBUG_puts(" ORDERED LIST?\n"); + + temp = lineptr + 1; + + while (isdigit(*temp & 255)) + temp ++; + + if ((*temp == '.' || *temp == ')') && (temp[1] == '\t' || temp[1] == ' ')) + { + // Yes, ordered list. + lineptr = temp + 2; + linestart = lineptr; + newindent = linestart - line; + + while (isspace(*lineptr & 255)) + lineptr ++; + + while (stackptr > stack && stackptr->indent > newindent) + stackptr --; + + if (stackptr->parent->type == MMD_TYPE_LIST_ITEM && stackptr->indent == newindent) + stackptr --; + + if (stackptr->parent->type == MMD_TYPE_UNORDERED_LIST && stackptr->indent == newindent) + stackptr --; + + if (stackptr->parent->type == MMD_TYPE_BLOCK_QUOTE && line[0] != '>') + stackptr --; + + if (stackptr->parent->type != MMD_TYPE_ORDERED_LIST && stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) + { + stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_ORDERED_LIST, 0, NULL, NULL); + stackptr[1].indent = linestart - line; + stackptr[1].fence = '\0'; + stackptr ++; + } + + if (stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) + { + stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_LIST_ITEM, 0, NULL, NULL); + stackptr[1].indent = linestart - line; + stackptr[1].fence = '\0'; + stackptr ++; + } + + type = MMD_TYPE_PARAGRAPH; + block = NULL; + } + else + { + // No, just a regular paragraph... + type = block ? block->type : MMD_TYPE_PARAGRAPH; + } + } + else if (*lineptr == '#' && (lineptr - linestart) < 4) + { + // Heading, count the number of '#' for the heading level... + DEBUG_puts(" HEADING?\n"); + + newindent = lineptr - line; + temp = lineptr + 1; + + while (*temp == '#') + temp ++; + + if ((temp - lineptr) <= 6 && isspace(*temp & 255)) + { + // Heading 1-6... + type = MMD_TYPE_HEADING_1 + (temp - lineptr - 1); + block = NULL; + + // Skip whitespace after "#"... + lineptr = temp; + while (isspace(*lineptr & 255)) + lineptr ++; + + linestart = lineptr; + + // Strip trailing "#" characters and whitespace... + temp = lineptr + strlen(lineptr) - 1; + while (temp > lineptr && isspace(*temp & 255)) + *temp-- = '\0'; + while (temp > lineptr && *temp == '#') + temp --; + if (isspace(*temp & 255)) + { + while (temp > lineptr && isspace(*temp & 255)) + *temp-- = '\0'; + } + else if (temp == lineptr) + *temp = '\0'; + + while (stackptr > stack && stackptr->indent > newindent) + stackptr --; + + block = mmd_add(stackptr->parent, type, 0, NULL, NULL); + } + else + { + // More than 6 #'s, just treat as a paragraph... + type = MMD_TYPE_PARAGRAPH; + } + } + else if (block && block->type >= MMD_TYPE_HEADING_1 && block->type <= MMD_TYPE_HEADING_6) + { + DEBUG_puts(" PARAGRAPH\n"); + + type = MMD_TYPE_PARAGRAPH; + block = NULL; + } + else if (!block) + { + type = MMD_TYPE_PARAGRAPH; + + if (lineptr == line && stackptr->parent->type != MMD_TYPE_TABLE) + stackptr = stack; + } + else + type = block->type; + + if (!*lineptr) + { + if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK) + blank_code ++; + else if (stackptr->parent->type == MMD_TYPE_BLOCK_QUOTE && line[0] != '>') + stackptr --; + + block = NULL; + continue; + } + else if (!strcmp(lineptr, "+")) + { + if (block) + { + if (block->type == MMD_TYPE_LIST_ITEM) + block = mmd_add(block, MMD_TYPE_PARAGRAPH, 0, NULL, NULL); + else if (block->parent->type == MMD_TYPE_LIST_ITEM) + block = mmd_add(block->parent, MMD_TYPE_PARAGRAPH, 0, NULL, NULL); + else + block = NULL; + } + continue; + } + else if ((mmd_options & MMD_OPTION_TABLES) && strchr(lineptr, '|') && (stackptr->parent->type == MMD_TYPE_TABLE || mmd_is_table(&file, stackptr->indent))) + { + // Table... + int col; // Current column + char *start, // Start of column/cell + *end; // End of column/cell + mmd_t *row = NULL, // Current row + *cell; // Current cell + + DEBUG2_printf("TABLE stackptr->parent=%p (%d), rows=%d\n", stackptr->parent, stackptr->parent->type, rows); + + if (stackptr->parent->type != MMD_TYPE_TABLE && stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) + { + DEBUG2_printf("ADDING NEW TABLE to %p (%s)\n", stackptr->parent, mmd_type_string(stackptr->parent->type)); + + stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_TABLE, 0, NULL, NULL); + stackptr[1].indent = stackptr->indent; + stackptr[1].fence = '\0'; + stackptr ++; + + block = mmd_add(stackptr->parent, MMD_TYPE_TABLE_HEADER, 0, NULL, NULL); + + for (col = 0; col < (int)(sizeof(columns) / sizeof(columns[0])); col ++) + columns[col] = MMD_TYPE_TABLE_BODY_CELL_LEFT; + + num_columns = 0; + rows = -1; + } + else if (rows > 0) + { + if (rows == 1) + block = mmd_add(stackptr->parent, MMD_TYPE_TABLE_BODY, 0, NULL, NULL); + } + else + block = NULL; + + if (block) + row = mmd_add(block, MMD_TYPE_TABLE_ROW, 0, NULL, NULL); + + if (*lineptr == '|') + lineptr ++; // Skip leading pipe + + if ((end = lineptr + strlen(lineptr) - 1) > lineptr) + { + while ((*end == '\n' || *end == 'r') && end > lineptr) + end --; + + if (end > lineptr && *end == '|') + *end = '\0'; // Truncate trailing pipe + } + + for (col = 0; lineptr && *lineptr && col < (int)(sizeof(columns) / sizeof(columns[0])); col ++) + { + // Get the bounds of the stackptr->parent cell... + start = lineptr; + if ((lineptr = strchr(lineptr + 1, '|')) != NULL) + *lineptr++ = '\0'; + + if (block) + { + // Add a cell to this row... + if (block->type == MMD_TYPE_TABLE_HEADER) + cell = mmd_add(row, MMD_TYPE_TABLE_HEADER_CELL, 0, NULL, NULL); + else + cell = mmd_add(row, columns[col], 0, NULL, NULL); + + mmd_parse_inline(&doc, cell, start); + } + else + { + // Process separator row for alignment... + while (isspace(*start & 255)) + start ++; + + for (end = start + strlen(start) - 1; end > start && isspace(*end & 255); end --) + ; // Find the last non-space character + + if (*start == ':' && *end == ':') + columns[col] = MMD_TYPE_TABLE_BODY_CELL_CENTER; + else if (*end == ':') + columns[col] = MMD_TYPE_TABLE_BODY_CELL_RIGHT; + + DEBUG2_printf("COLUMN %d SEPARATOR=\"%s\", TYPE=%d\n", col, start, columns[col]); + } + } + + // Make sure the table is balanced... + if (col > num_columns) + { + num_columns = col; + } + else if (block && block->type != MMD_TYPE_TABLE_HEADER) + { + while (col < num_columns) + { + mmd_add(row, columns[col], 0, NULL, NULL); + col ++; + } + } + + rows ++; + continue; + } + else if (stackptr->parent->type == MMD_TYPE_TABLE) + { + DEBUG2_puts("END TABLE\n"); + stackptr --; + block = NULL; + } + + if (stackptr->parent->type != MMD_TYPE_CODE_BLOCK && (!block || block->type == MMD_TYPE_CODE_BLOCK) && (lineptr - linestart) >= (stackptr->indent + 4)) + { + // Indented code block. + if (stackptr->parent->type != MMD_TYPE_CODE_BLOCK && stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) + { + stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_CODE_BLOCK, 0, NULL, NULL); + stackptr[1].indent = stackptr->indent + 4; + stackptr[1].fence = '\0'; + stackptr ++; + + blank_code = 0; + } + + while (blank_code > 0) + { + mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, "\n", NULL); + blank_code --; + } + + mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, line + stackptr->indent, NULL); + + continue; + } + + if (!block || block->type != type) + { + if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK) + stackptr --; + + block = mmd_add(stackptr->parent, type, 0, NULL, NULL); + } + + // Read continuation lines before parsing this... + while (mmd_has_continuation(line, &file, stackptr->indent)) + { + char *ptr = line + strlen(line); + + if (!mmd_read_line(&file, ptr, sizeof(line) - (size_t)(ptr - line))) + break; + else if (line[0] == '>' && *ptr == '>') + memmove(ptr, ptr + 1, strlen(ptr)); + } + + mmd_parse_inline(&doc, block, lineptr); + + if (block->type == MMD_TYPE_PARAGRAPH && !block->first_child) + { + mmd_remove(block); + mmd_free(block); + block = NULL; + } + } + + // Free any references... + for (i = doc.num_references, reference = doc.references; i > 0; i --, reference ++) + { + if (reference->pending) + { + char text[8192]; // Reference text + size_t j; // Looping var + + DEBUG2_printf("Clearing links for '%s'.\n", reference->name); + snprintf(text, sizeof(text), "[%s]", reference->name); + + for (j = 0; j < reference->num_pending; j ++) + { + free(reference->pending[j]->text); + reference->pending[j]->text = strdup(text); + reference->pending[j]->type = MMD_TYPE_NORMAL_TEXT; + } + + free(reference->pending); + } + + free(reference->name); + free(reference->url); + } + + free(doc.references); + + // Return the root node... + return (doc.root); +} + + +// +// 'mmdLoadString()' - Load a markdown string into nodes. +// + +mmd_t * // O - Root node in markdown +mmdLoadString(mmd_t *root, // I - Root node for document or `NULL` for a new document + const char *s) // I - String to load +{ + return (mmdLoadIO(root, (mmd_iocb_t)mmd_iocb_string, &s)); +} + + +// +// 'mmdSetOptions()' - Set (enable/disable) support for various markdown options. +// + +void +mmdSetOptions(mmd_option_t options) // I - Options +{ + mmd_options = options; +} + + +// +// 'mmd_add()' - Add a new markdown node. +// + +static mmd_t * // O - New node +mmd_add(mmd_t *parent, // I - Parent node + mmd_type_t type, // I - Node type + int whitespace, // I - 1 if whitespace precedes this node + char *text, // I - Text, if any + char *url) // I - URL, if any +{ + mmd_t *temp; // New node + + + DEBUG2_printf("Adding %s to %p(%s), whitespace=%d, text=\"%s\", url=\"%s\"\n", mmd_type_string(type), parent, parent ? mmd_type_string(parent->type) : "", whitespace, text ? text : "(null)", url ? url : "(null)"); + + if (!parent && type != MMD_TYPE_DOCUMENT) + return (NULL); // Only document nodes can be at the root + + if ((temp = calloc(1, sizeof(mmd_t))) != NULL) + { + if (parent) + { + // Add node to the parent... + temp->parent = parent; + + if (parent->last_child) + { + parent->last_child->next_sibling = temp; + temp->prev_sibling = parent->last_child; + parent->last_child = temp; + } + else + { + parent->first_child = parent->last_child = temp; + } + } + + // Copy the node values... + temp->type = type; + temp->whitespace = whitespace; + + if (text) + temp->text = strdup(text); + + if (url) + temp->url = strdup(url); + } + + return (temp); +} + + +// +// 'mmd_free()' - Free memory used by a node. +// + +static void +mmd_free(mmd_t *node) // I - Node +{ + free(node->text); + free(node->url); + free(node->extra); + free(node); +} + + +// +// 'mmd_has_continuation()' - Determine whether the next line is a continuation +// of the current one. +// + +static int // O - 1 if the next line continues, 0 otherwise +mmd_has_continuation( + const char *line, // I - Current line + _mmd_filebuf_t *file, // I - File buffer + int indent) // I - Indentation for current block +{ + const char *lineptr = line; // Pointer into current line + const char *fileptr = file->bufptr;// Pointer into next line + + + if (*fileptr == '\n' || *fileptr == '\r') + return (0); + + do + { + while (isspace(*lineptr & 255)) + lineptr ++; + + if (*lineptr == '[' && (lineptr - line - indent) < 4 && (*fileptr == ' ' || *fileptr == '\t')) + return (1); + + while (isspace(*fileptr & 255)) + fileptr ++; + + if (*lineptr == '>' && *fileptr == '>') + { + lineptr ++; + fileptr ++; + } + else if (*fileptr == '>') + return (0); + + if (*fileptr == '\n' || *fileptr == '\r') + return (0); + } + while (isspace(*lineptr & 255) || isspace(*fileptr & 255)); + + if (*lineptr == '#') + return (0); + + if (strchr("-+*", *fileptr) && isspace(fileptr[1] & 255)) + { + // Bullet list item... + return (0); + } + + if (isdigit(*fileptr & 255)) + { + // Ordered list item... + while (*fileptr && isdigit(*fileptr & 255)) + fileptr ++; + + if (*fileptr == '.' || *fileptr == '(') + return (0); + } + + if (mmd_is_codefence((char *)fileptr, '\0', 0, NULL)) + return (0); + + if (mmd_is_chars(fileptr, "- \t", 3) || mmd_is_chars(fileptr, "_ \t", 3) || mmd_is_chars(fileptr, "* \t", 3)) + { + // Thematic break... + return (0); + } + + if (mmd_is_chars(fileptr, "-", 1) || mmd_is_chars(fileptr, "=", 1)) + { + // Heading... + return (0); + } + + if (*fileptr == '#') + { + // Possible heading... + int count = 0; + + while (*fileptr == '#') + { + fileptr ++; + count ++; + } + + if (count <= 6) + return (0); + } + + return ((fileptr - file->bufptr) <= indent); +} + + +// +// 'mmd_iocb_file()' - Read from a file. +// + +static size_t // O - Number of bytes read +mmd_iocb_file(FILE *fp, // I - File pointer + char *buffer, // I - Buffer + size_t bytes) // I - Number of bytes to read +{ + return (fread(buffer, 1, bytes, fp)); +} + + +// +// 'mmd_iocb_string()' - Read from a string. +// + +static size_t // O - Number of bytes read +mmd_iocb_string(const char **s, // I - Pointer into string + char *buffer, // I - Buffer + size_t bytes) // I - Number of bytes to read +{ + size_t ret; // Bytes read/returned + + + // See how many bytes remain in the string... + if ((ret = strlen(*s)) > bytes) + ret = bytes; + + if (ret > 0) + { + // Copy bytes from the string... + memcpy(buffer, *s, ret); + (*s) += ret; + } + + return (ret); +} + + +// +// 'mmd_is_chars()' - Determine whether a line consists solely of whitespace +// and the specified character. +// + +static size_t // O - 1 if as specified, 0 otherwise +mmd_is_chars(const char *lineptr, // I - Current line + const char *chars, // I - Non-space character + size_t minchars) // I - Minimum number of non-space characters +{ + size_t found_ch = 0; // Did we find the specified characters? + + while (*lineptr == *chars) + { + found_ch ++; + lineptr ++; + } + + if (minchars > 1) + { + while (*lineptr && strchr(chars, *lineptr)) + { + if (*lineptr == *chars) + found_ch ++; + + lineptr ++; + } + } + + while (*lineptr && isspace(*lineptr & 255) && *lineptr != '\n') + lineptr ++; + + if ((*lineptr && *lineptr != '\n') || found_ch < minchars) + return (0); + else + return (found_ch); +} + + +// +// 'mmd_is_codefence()' - Determine whether the line contains a code fence. +// + +static size_t // O - Length of fence or 0 otherwise +mmd_is_codefence(char *lineptr, // I - Line + char fence, // I - Current fence character, if any + size_t fencelen, // I - Current fence length + char **language) // O - Language name, if any +{ + char match = fence; // Character to match + size_t len = 0; // Length of fence chars + + + if (language) + *language = NULL; + + if (!match) + { + if (*lineptr == '~' || *lineptr == '`') + match = *lineptr; + else + return (0); + } + + while (*lineptr == match) + { + lineptr ++; + len ++; + } + + if (len < 3 || (fencelen && len < fencelen)) + return (0); + + if (*lineptr && *lineptr != '\n' && fence) + return (0); + else if (*lineptr && *lineptr != '\n' && !fence) + { + if (match == '`' && strchr(lineptr, match)) + return (0); + + while (isspace(*lineptr & 255)) + lineptr ++; + + if (*lineptr && language) + { + *language = lineptr; + + while (*lineptr && !isspace(*lineptr & 255)) + { + if (*lineptr == '\\' && lineptr[1]) + { + // Remove "\" + memmove(lineptr, lineptr + 1, strlen(lineptr)); + } + + lineptr ++; + } + *lineptr = '\0'; + } + } + + return (len); +} + + +// +// 'mmd_is_table()' - Look ahead to see if the next line contains a heading +// divider for a table. +// + +static bool // O - `true` if this is a table, `false` otherwise +mmd_is_table(_mmd_filebuf_t *file, // I - File to read from + int indent) // I - Indentation of table line +{ + const char *ptr; // Pointer into buffer + + + ptr = file->bufptr; + while (*ptr) + { + if (!strchr(" \t>", *ptr)) + break; + + ptr ++; + } + + if ((ptr - file->bufptr - indent) >= 4) + return (false); + + while (*ptr) + { + if (!strchr(" \t:-|", *ptr)) + break; + + ptr ++; + } + + return (*ptr == '\r' || *ptr == '\n'); +} + + +// +// 'mmd_parse_inline()' - Parse inline formatting. +// + +static void +mmd_parse_inline(_mmd_doc_t *doc, // I - Document + mmd_t *parent, // I - Parent node + char *lineptr) // I - Pointer into line +{ + mmd_t *node; // New node + mmd_type_t type; // Current node type + int whitespace; // Whitespace precedes? + char *text, // Text fragment in line + *title, // Link title + *url, // URL in link + *refname; // Reference name + const char *delim = NULL; // Delimiter + size_t delimlen = 0; // Length of delimiter + + + whitespace = parent->last_child != NULL; + + for (text = NULL, type = MMD_TYPE_NORMAL_TEXT; *lineptr; lineptr ++) + { + DEBUG2_printf("mmd_parse_inline: lineptr=%p(\"%32.32s...\"), type=%d, text=%p, whitespace=%d\n", lineptr, lineptr, type, text, whitespace); + + if (isspace(*lineptr & 255) && type != MMD_TYPE_CODE_TEXT) + { + if (text) + { + *lineptr = '\0'; + mmd_add(parent, type, whitespace, text, NULL); + + text = NULL; + } + + if (!strncmp(lineptr + 1, " \n", 2) && lineptr[3]) + { + DEBUG2_printf("mmd_parse_inline: Adding hard break to %p(%d)\n", parent, parent->type); + mmd_add(parent, MMD_TYPE_HARD_BREAK, 0, NULL, NULL); + lineptr += 2; + whitespace = 0; + } + else + { + whitespace = 1; + } + } + else if (*lineptr == '!' && lineptr[1] == '[' && type != MMD_TYPE_CODE_TEXT) + { + // Image... + if (text) + { + mmd_add(parent, type, whitespace, text, NULL); + + text = NULL; + whitespace = 0; + } + + lineptr = mmd_parse_link(doc, lineptr + 1, &text, &url, NULL, &refname); + + if (url || refname) + { + node = mmd_add(parent, MMD_TYPE_IMAGE, whitespace, text, url); + + if (refname) + mmd_ref_add(doc, node, refname, NULL, NULL); + } + + if (!*lineptr) + return; + + text = url = NULL; + whitespace = 0; + lineptr --; + } + else if (*lineptr == '[' && type != MMD_TYPE_CODE_TEXT) + { + // Link or checkbox... + if (text) + { + *lineptr = '\0'; + mmd_add(parent, type, whitespace, text, NULL); + *lineptr = '['; + + text = NULL; + whitespace = 0; + } + + if ((mmd_options & MMD_OPTION_TASKS) && (!strncmp(lineptr, "[ ]", 3) || !strncmp(lineptr, "[x]", 3) || !strncmp(lineptr, "[X]", 3))) + { + // Checkbox + mmd_add(parent, MMD_TYPE_CHECKBOX, 0, lineptr[1] == ' ' ? NULL : "x", NULL); + lineptr += 2; + } + else + { + // Link + lineptr = mmd_parse_link(doc, lineptr, &text, &url, &title, &refname); + + if (text) + { + char *end = text + strlen(text) - 1; + // End of text fragment + + if (*text == '`' && *end == '`' && end != text) + { + // Code text + text ++; + if (end > text && *end == '`') + *end = '\0'; + + node = mmd_add(parent, MMD_TYPE_CODE_TEXT, whitespace, text, url); + } + else if (*text == '*' && *end == '*' && end > text) + { + // Emphasized or strong text + text ++; + if (*text == '*' && (end - 1) > text && end[-1] == '*') + { + text ++; + end[-1] = '\0'; + node = mmd_add(parent, MMD_TYPE_STRONG_TEXT, whitespace, text, url); + } + else + { + *end = '\0'; + node = mmd_add(parent, MMD_TYPE_EMPHASIZED_TEXT, whitespace, text, url); + } + } + else if (type == MMD_TYPE_NORMAL_TEXT) + { + // Plain linked text... + node = mmd_add(parent, MMD_TYPE_LINKED_TEXT, whitespace, text, url); + } + else + { + // Preserve style of linked text... + node = mmd_add(parent, type, whitespace, text, url); + } + + if (title) + node->extra = strdup(title); + } + else + { + // No text, no node... + node = NULL; + } + + DEBUG2_printf("mmd_parse_inline: text=\"%s\", refname=\"%s\", node=%p\n", text, refname, node); + + if (refname && node) + mmd_ref_add(doc, node, refname, NULL, title); + + if (!*lineptr) + return; + + text = url = NULL; + whitespace = 0; + lineptr --; + } + } + else if (*lineptr == '<' && type != MMD_TYPE_CODE_TEXT && strchr(lineptr + 1, '>')) + { + // Autolink... + *lineptr++ = '\0'; + + if (text) + { + mmd_add(parent, type, whitespace, text, NULL); + + text = NULL; + whitespace = 0; + } + + url = lineptr; + lineptr = strchr(lineptr, '>'); + *lineptr = '\0'; + + mmd_add(parent, MMD_TYPE_LINKED_TEXT, whitespace, url, url); + + text = url = NULL; + whitespace = 0; + } + else if ((*lineptr == '*' || *lineptr == '_') && (!text || ispunct(lineptr[-1] & 255) || type != MMD_TYPE_NORMAL_TEXT) && type != MMD_TYPE_CODE_TEXT) + { + const char *end; // End delimiter + + if (type != MMD_TYPE_NORMAL_TEXT || !delim) + { + if (!strncmp(lineptr, "**", 2)) + delim = "**"; + else if (!strncmp(lineptr, "__", 2)) + delim = "__"; + else if (*lineptr == '*') + delim = "*"; + else + delim = "_"; + + delimlen = strlen(delim); + } + + if (type == MMD_TYPE_NORMAL_TEXT && delim && ((end = strstr(lineptr + delimlen, delim)) == NULL || end == (lineptr + delimlen) || isspace(end[-1] & 255))) + { + if (!text) + text = lineptr; + + delim = NULL; + delimlen = 0; + continue; + } + + if (text) + { + char save = *lineptr; + + *lineptr = '\0'; + + mmd_add(parent, type, whitespace, text, NULL); + + *lineptr = save; + text = NULL; + whitespace = 0; + } + + if (type == MMD_TYPE_NORMAL_TEXT) + { + if (!strncmp(lineptr, delim, delimlen) && !isspace(lineptr[delimlen] & 255)) + { + type = delimlen == 2 ? MMD_TYPE_STRONG_TEXT : MMD_TYPE_EMPHASIZED_TEXT; + text = lineptr + delimlen; + lineptr += delimlen - 1; + } + else + { + text = lineptr; + } + } + else if (!strncmp(lineptr, delim, delimlen)) + { + lineptr += delimlen - 1; + type = MMD_TYPE_NORMAL_TEXT; + + delim = NULL; + delimlen = 0; + } + } + else if (lineptr[0] == '~' && lineptr[1] == '~' && type != MMD_TYPE_CODE_TEXT) + { + if (text) + { + *lineptr = '\0'; + + mmd_add(parent, type, whitespace, text, NULL); + + *lineptr = '~'; + text = NULL; + whitespace = 0; + } + + if (!isspace(lineptr[2] & 255) && type == MMD_TYPE_NORMAL_TEXT) + { + type = MMD_TYPE_STRUCK_TEXT; + text = lineptr + 2; + } + else + { + lineptr ++; + type = MMD_TYPE_NORMAL_TEXT; + } + } + else if (*lineptr == '`' && (type != MMD_TYPE_CODE_TEXT || lineptr[-1] != '\\')) + { + if (type != MMD_TYPE_NORMAL_TEXT || !delim) + { + if (lineptr[1] == '`') + { + if (lineptr[2] == '`') + { + delim = "```"; + delimlen = 3; + } + else + { + delim = "``"; + delimlen = 2; + } + } + else + { + delim = "`"; + delimlen = 1; + } + } + + if (type != MMD_TYPE_CODE_TEXT && delim && !strstr(lineptr + delimlen, delim)) + { + if (!text) + text = lineptr; + + delim = NULL; + delimlen = 0; + continue; + } + + if (text) + { + DEBUG2_printf("mmd_parse_inline: text=\"%s\"\n", text); + + if (!strncmp(lineptr, delim, delimlen)) + { + char *textptr = lineptr; + + while (textptr > text && isspace(textptr[-1] & 255)) + textptr --; + + *textptr = '\0'; + lineptr += delimlen - 1; + } + + if (type == MMD_TYPE_CODE_TEXT) + { + if (whitespace && !*text) + { + mmd_add(parent, type, 0, " ", NULL); + whitespace = 0; + } + } + + mmd_add(parent, type, whitespace, text, NULL); + + text = NULL; + whitespace = 0; + } + + if (type == MMD_TYPE_CODE_TEXT) + { + DEBUG2_puts("mmd_parse_inline: Reverting to normal text.\n"); + + type = MMD_TYPE_NORMAL_TEXT; + delim = NULL; + delimlen = 0; + } + else + { + type = MMD_TYPE_CODE_TEXT; + lineptr += delimlen - 1; + + while (isspace(lineptr[1] & 255)) + lineptr ++; + + text = lineptr + 1; + } + } + else if (!strncmp(lineptr, "\\\n", 2)) + { + // Hard break + *lineptr++ = '\0'; + + if (text) + { + mmd_add(parent, type, whitespace, text, NULL); + + text = NULL; + whitespace = false; + } + + mmd_add(parent, MMD_TYPE_HARD_BREAK, false, NULL, NULL); + } + else if (!text) + { + if (*lineptr == '\\' && ispunct(lineptr[1] & 255) && type != MMD_TYPE_CODE_TEXT) + { + // Escaped character... + lineptr ++; + } + + text = lineptr; + } + else if (*lineptr == '\\' && ispunct(lineptr[1] & 255) && type != MMD_TYPE_CODE_TEXT) + { + // Escaped character... + memmove(lineptr, lineptr + 1, strlen(lineptr)); + } + } + + if (text) + { + DEBUG_puts("mms_parse_inline: Adding text at end.\n"); + mmd_add(parent, type, whitespace, text, NULL); + } +} + + +// +// 'mmd_parse_link()' - Parse a link. +// + +static char * // O - End of link text +mmd_parse_link(_mmd_doc_t *doc, // I - Document + char *lineptr, // I - Pointer into line + char **text, // O - Text + char **url, // O - URL + char **title, // O - Title, if any + char **refname) // O - Reference name +{ + lineptr ++; // skip "[" + + *text = lineptr; + *url = NULL; + *refname = NULL; + + if (title) + *title = NULL; + + while (*lineptr && *lineptr != ']') + { + if (*lineptr == '\"' || *lineptr == '\'') + { + char quote = *lineptr++; + + while (*lineptr && *lineptr != quote) + lineptr ++; + + if (!*lineptr) + return (lineptr); + } + + lineptr ++; + } + + if (!*lineptr) + return (lineptr); + + *lineptr++ = '\0'; + + if (*lineptr == '(') + { + // Get URL... + lineptr ++; + *url = lineptr; + + while (*lineptr && *lineptr != ')') + { + if (isspace(*lineptr & 255)) + { + *lineptr = '\0'; + } + else if (*lineptr == '\\' && lineptr[1]) + { + if (lineptr[1] == ')') + { + // Remove "\" + memmove(lineptr, lineptr + 1, strlen(lineptr)); + } + else + { + // Keep "\" + lineptr ++; + } + } + else if (*lineptr == '\"' || *lineptr == '\'') + { + char quote = *lineptr++; + + if (title) + *title = lineptr; + + while (*lineptr && *lineptr != quote) + { + if (*lineptr == '\\' && lineptr[1]) + { + // Remove "\" + memmove(lineptr, lineptr + 1, strlen(lineptr)); + } + + lineptr ++; + } + + if (!*lineptr) + return (lineptr); + else if (title) + *lineptr = '\0'; + } + + lineptr ++; + } + + *lineptr++ = '\0'; + } + else if (*lineptr == '[') + { + // Get reference... + lineptr ++; + *refname = lineptr; + + while (*lineptr && *lineptr != ']') + { + if (isspace(*lineptr & 255)) + { + *lineptr = '\0'; + } + else if (*lineptr == '\\' && lineptr[1]) + { + if (lineptr[1] == ']') + { + // Remove "\" + memmove(lineptr, lineptr + 1, strlen(lineptr)); + } + else + { + // Keep "\" + lineptr ++; + } + } + else if (*lineptr == '\"' || *lineptr == '\'') + { + char quote = *lineptr++; + + if (title) + *title = lineptr; + + while (*lineptr && *lineptr != quote) + lineptr ++; + + if (!*lineptr) + return (lineptr); + else + *lineptr = '\0'; + } + + lineptr ++; + } + + *lineptr++ = '\0'; + if (!**refname) + *refname = *text; + } + else if (*lineptr == ':') + { + // Get reference definition... + lineptr ++; + while (*lineptr && isspace(*lineptr & 255)) + lineptr ++; + + *url = lineptr; + + while (*lineptr && !isspace(*lineptr & 255)) + { + if (*lineptr == '\\' && lineptr[1]) + { + // Remove "\" + memmove(lineptr, lineptr + 1, strlen(lineptr)); + } + + lineptr ++; + } + + if (*lineptr) + { + *lineptr++ = '\0'; + while (*lineptr && isspace(*lineptr & 255)) + lineptr ++; + + if (*lineptr == '\"' || *lineptr == '\'') + { + char quote = *lineptr++; + + if (title) + *title = lineptr; + + while (*lineptr && *lineptr != quote) + { + if (*lineptr == '\\' && lineptr[1]) + { + // Remove "\" + memmove(lineptr, lineptr + 1, strlen(lineptr)); + } + + lineptr ++; + } + + if (!*lineptr) + return (lineptr); + else + *lineptr = '\0'; + } + } + + mmd_ref_add(doc, NULL, *text, *url, title ? *title : NULL); + + *text = NULL; + *url = NULL; + + if (title) + *title = NULL; + } + else + { + // Shortcut reference... + *refname = *text; + } + + return (lineptr); +} + + +// +// 'mmd_read_buffer()' - Fill the file buffer with more data from a file. +// + +static void +mmd_read_buffer(_mmd_filebuf_t *file) // I - File buffer +{ + size_t bytes; // Bytes read + + + if (file->bufptr && file->bufptr > file->buffer) + { + // Discard previous characters in the buffer. + memmove(file->buffer, file->bufptr, file->bufend - file->bufptr); + file->bufend -= (file->bufptr - file->buffer); + } + else + { + // Otherwise just clear the buffer... + file->bufend = file->buffer; + } + + if ((bytes = (file->cb)(file->cbdata, file->bufend, sizeof(file->buffer) - (size_t)(file->bufend - file->buffer - 1))) > 0) + file->bufend += bytes; + + *(file->bufend) = '\0'; + file->bufptr = file->buffer; +} + + +// +// 'mmd_read_line()' - Read a line from a file in a Markdown-aware way. +// + +static char * // O - Pointer to line or `NULL` on EOF +mmd_read_line(_mmd_filebuf_t *file, // I - File buffer + char *line, // I - Line buffer + size_t linesize) // I - Size of line buffer +{ + int ch, // Current character + column = 0; // Current column + char *lineptr = line, // Pointer into line + *lineend = line + linesize - 1; // Pointer to end of buffer + + + // Fill the buffer as needed... + if (!file->bufptr || (file->bufptr >= file->bufend) || !strchr(file->bufptr, '\n')) + mmd_read_buffer(file); + + // Copy a line out of the file buffer... + while (file->bufptr < file->bufend) + { + ch = *(file->bufptr); + file->bufptr ++; + + if (ch == '\t') + { + // Expand tabs since nobody uses the same tab width and Markdown says + // 4 columns per tab... + do + { + column ++; + if (lineptr < lineend) + *lineptr++ = ' '; + } + while (column & 3); + } + else if (ch != '\r' && lineptr < lineend) + { + column ++; + *lineptr++ = ch; + } + + if (ch == '\n') + break; + } + + *lineptr = '\0'; + + if (file->bufptr == file->bufend && lineptr == line) + return (NULL); + else if (!strchr(file->bufptr, '\n')) + mmd_read_buffer(file); + + return (line); +} + + +// +// 'mmd_ref_add()' - Add or update a reference... +// + +static void +mmd_ref_add(_mmd_doc_t *doc, // I - Document + mmd_t *node, // I - Link node, if any + const char *name, // I - Reference name + const char *url, // I - Reference URL + const char *title) // I - Title, if any +{ + size_t i; // Looping var + _mmd_ref_t *ref = mmd_ref_find(doc, name); + // Reference + + + DEBUG2_printf("mmd_ref_add(doc=%p, node=%p, name=\"%s\", url=\"%s\", title=\"%s\")\n", doc, node, name, url, title); + + if (ref) + { + DEBUG2_printf("mmd_ref_add: ref=%p, ref->url=\"%s\"\n", ref, ref->url); + + if (!ref->url && url) + { + if (node) + node->url = strdup(url); + + ref->url = strdup(url); + + if (title) + { + if (node) + node->extra = strdup(title); + + ref->title = strdup(title); + } + + for (i = 0; i < ref->num_pending; i ++) + { + ref->pending[i]->url = strdup(url); + + if (title) + ref->pending[i]->extra = strdup(title); + } + + free(ref->pending); + + ref->num_pending = 0; + ref->pending = NULL; + return; + } + } + else if ((ref = realloc(doc->references, (doc->num_references + 1) * sizeof(_mmd_ref_t))) != NULL) + { + doc->references = ref; + ref += doc->num_references; + doc->num_references ++; + + ref->name = strdup(name); + ref->url = url ? strdup(url) : NULL; + ref->title = title ? strdup(title) : NULL; + ref->num_pending = 0; + ref->pending = NULL; + } + else + return; + + if (node) + { + if (ref->url) + { + node->url = strdup(ref->url); + node->extra = ref->title ? strdup(ref->title) : NULL; + } + else if ((ref->pending = realloc(ref->pending, (ref->num_pending + 1) * sizeof(mmd_t *))) != NULL) + { + ref->pending[ref->num_pending ++] = node; + } + } +} + + +// +// 'mmd_ref_find()' - Find a reference... +// + +static _mmd_ref_t * // O - Reference or NULL +mmd_ref_find(_mmd_doc_t *doc, // I - Document + const char *name) // I - Reference name +{ + size_t i; // Looping var + + + for (i = 0; i < doc->num_references; i ++) + if (!strcasecmp(name, doc->references[i].name)) + return (doc->references + i); + + return (NULL); +} + + +// +// 'mmd_remove()' - Remove a node from its parent. +// + +static void +mmd_remove(mmd_t *node) // I - Node +{ + if (node && node->parent) + { + if (node->prev_sibling) + node->prev_sibling->next_sibling = node->next_sibling; + else + node->parent->first_child = node->next_sibling; + + if (node->next_sibling) + node->next_sibling->prev_sibling = node->prev_sibling; + else + node->parent->last_child = node->prev_sibling; + + node->parent = NULL; + node->prev_sibling = NULL; + node->next_sibling = NULL; + } +} + + +#if DEBUG +// +// 'mmd_type_string()' - Return a string for the specified type enumeration. +// + +static const char * // O - String representing the type +mmd_type_string(mmd_type_t type) // I - Type value +{ + static char unknown[64]; // Unknown type buffer + + + switch (type) + { + case MMD_TYPE_NONE : + return ("MMD_TYPE_NONE"); + case MMD_TYPE_DOCUMENT : + return "MMD_TYPE_DOCUMENT"; + case MMD_TYPE_METADATA : + return "MMD_TYPE_METADATA"; + case MMD_TYPE_BLOCK_QUOTE : + return "MMD_TYPE_BLOCK_QUOTE"; + case MMD_TYPE_ORDERED_LIST : + return "MMD_TYPE_ORDERED_LIST"; + case MMD_TYPE_UNORDERED_LIST : + return "MMD_TYPE_UNORDERED_LIST"; + case MMD_TYPE_LIST_ITEM : + return "MMD_TYPE_LIST_ITEM"; + case MMD_TYPE_TABLE : + return "MMD_TYPE_TABLE"; + case MMD_TYPE_TABLE_HEADER : + return "MMD_TYPE_TABLE_HEADER"; + case MMD_TYPE_TABLE_BODY : + return "MMD_TYPE_TABLE_BODY"; + case MMD_TYPE_TABLE_ROW : + return "MMD_TYPE_TABLE_ROW"; + case MMD_TYPE_HEADING_1 : + return "MMD_TYPE_HEADING_1"; + case MMD_TYPE_HEADING_2 : + return "MMD_TYPE_HEADING_2"; + case MMD_TYPE_HEADING_3 : + return "MMD_TYPE_HEADING_3"; + case MMD_TYPE_HEADING_4 : + return "MMD_TYPE_HEADING_4"; + case MMD_TYPE_HEADING_5 : + return "MMD_TYPE_HEADING_5"; + case MMD_TYPE_HEADING_6 : + return "MMD_TYPE_HEADING_6"; + case MMD_TYPE_PARAGRAPH : + return "MMD_TYPE_PARAGRAPH"; + case MMD_TYPE_CODE_BLOCK : + return "MMD_TYPE_CODE_BLOCK"; + case MMD_TYPE_THEMATIC_BREAK : + return "MMD_TYPE_THEMATIC_BREAK"; + case MMD_TYPE_TABLE_HEADER_CELL : + return "MMD_TYPE_TABLE_HEADER_CELL"; + case MMD_TYPE_TABLE_BODY_CELL_LEFT : + return "MMD_TYPE_TABLE_BODY_CELL_LEFT"; + case MMD_TYPE_TABLE_BODY_CELL_CENTER : + return "MMD_TYPE_TABLE_BODY_CELL_CENTER"; + case MMD_TYPE_TABLE_BODY_CELL_RIGHT : + return "MMD_TYPE_TABLE_BODY_CELL_RIGHT"; + case MMD_TYPE_NORMAL_TEXT : + return "MMD_TYPE_NORMAL_TEXT"; + case MMD_TYPE_EMPHASIZED_TEXT : + return "MMD_TYPE_EMPHASIZED_TEXT"; + case MMD_TYPE_STRONG_TEXT : + return "MMD_TYPE_STRONG_TEXT"; + case MMD_TYPE_STRUCK_TEXT : + return "MMD_TYPE_STRUCK_TEXT"; + case MMD_TYPE_LINKED_TEXT : + return "MMD_TYPE_LINKED_TEXT"; + case MMD_TYPE_CODE_TEXT : + return "MMD_TYPE_CODE_TEXT"; + case MMD_TYPE_IMAGE : + return "MMD_TYPE_IMAGE"; + case MMD_TYPE_HARD_BREAK : + return "MMD_TYPE_HARD_BREAK"; + case MMD_TYPE_SOFT_BREAK : + return "MMD_TYPE_SOFT_BREAK"; + case MMD_TYPE_METADATA_TEXT : + return "MMD_TYPE_METADATA_TEXT"; + default : + snprintf(unknown, sizeof(unknown), "?? %d ??", (int)type); + return (unknown); + } +} +#endif // DEBUG diff --git a/examples/mmd.h b/examples/mmd.h new file mode 100644 index 0000000..4cf030d --- /dev/null +++ b/examples/mmd.h @@ -0,0 +1,112 @@ +// +// Header file for miniature markdown library. +// +// https://www.msweet.org/mmd +// +// Copyright © 2017-2024 by Michael R Sweet. +// +// Licensed under Apache License v2.0. See the file "LICENSE" for more +// information. +// + +#ifndef MMD_H +# define MMD_H +# include +# include +# ifdef __cplusplus +extern "C" { +# endif // __cplusplus + + +// +// Constants... +// + +enum mmd_option_e +{ + MMD_OPTION_NONE = 0x00, // No markdown extensions + MMD_OPTION_METADATA = 0x01, // Jekyll metadata extension + MMD_OPTION_TABLES = 0x02, // Github table extension + MMD_OPTION_TASKS = 0x04, // Github task item extension (check boxes) + MMD_OPTION_ALL = 0x07 // All supported markdown extensions +}; +typedef unsigned mmd_option_t; + +typedef enum mmd_type_e +{ + MMD_TYPE_NONE = -1, + MMD_TYPE_DOCUMENT, // The document root + MMD_TYPE_METADATA, // Document metadata + MMD_TYPE_BLOCK_QUOTE, //
+ MMD_TYPE_ORDERED_LIST, //
    + MMD_TYPE_UNORDERED_LIST, //
      + MMD_TYPE_LIST_ITEM, //
    • + MMD_TYPE_TABLE, // + MMD_TYPE_TABLE_HEADER, // + MMD_TYPE_TABLE_BODY, // + MMD_TYPE_TABLE_ROW, // + MMD_TYPE_HEADING_1 = 10, //

      + MMD_TYPE_HEADING_2, //

      + MMD_TYPE_HEADING_3, //

      + MMD_TYPE_HEADING_4, //

      + MMD_TYPE_HEADING_5, //

      + MMD_TYPE_HEADING_6, //
      + MMD_TYPE_PARAGRAPH, //

      + MMD_TYPE_CODE_BLOCK, //

      
      +  MMD_TYPE_THEMATIC_BREAK,		// 
      + MMD_TYPE_TABLE_HEADER_CELL, //
      + MMD_TYPE_TABLE_BODY_CELL_LEFT, // + MMD_TYPE_TABLE_BODY_CELL_CENTER, // + MMD_TYPE_TABLE_BODY_CELL_RIGHT, // + MMD_TYPE_NORMAL_TEXT = 100, // Normal text + MMD_TYPE_EMPHASIZED_TEXT, // text + MMD_TYPE_STRONG_TEXT, // text + MMD_TYPE_STRUCK_TEXT, // text + MMD_TYPE_LINKED_TEXT, // text + MMD_TYPE_CODE_TEXT, // text + MMD_TYPE_IMAGE, // + MMD_TYPE_HARD_BREAK, //
      + MMD_TYPE_SOFT_BREAK, // + MMD_TYPE_METADATA_TEXT, // name: value + MMD_TYPE_CHECKBOX // [ ] or [x] +} mmd_type_t; + + +// +// Types... +// + +typedef struct _mmd_s mmd_t; // Markdown node +typedef size_t (*mmd_iocb_t)(void *cbdata, char *buffer, size_t bytes); + // mmdLoadIO callback function + +// +// Functions... +// + +extern char *mmdCopyAllText(mmd_t *node); +extern void mmdFree(mmd_t *node); +extern const char *mmdGetExtra(mmd_t *node); +extern mmd_t *mmdGetFirstChild(mmd_t *node); +extern mmd_t *mmdGetLastChild(mmd_t *node); +extern const char *mmdGetMetadata(mmd_t *doc, const char *keyword); +extern mmd_t *mmdGetNextSibling(mmd_t *node); +extern mmd_option_t mmdGetOptions(void); +extern mmd_t *mmdGetParent(mmd_t *node); +extern mmd_t *mmdGetPrevSibling(mmd_t *node); +extern const char *mmdGetText(mmd_t *node); +extern mmd_type_t mmdGetType(mmd_t *node); +extern const char *mmdGetURL(mmd_t *node); +extern bool mmdGetWhitespace(mmd_t *node); +extern bool mmdIsBlock(mmd_t *node); +extern mmd_t *mmdLoad(mmd_t *root, const char *filename); +extern mmd_t *mmdLoadFile(mmd_t *root, FILE *fp); +extern mmd_t *mmdLoadIO(mmd_t *root, mmd_iocb_t cb, void *cbdata); +extern mmd_t *mmdLoadString(mmd_t *root, const char *s); +extern void mmdSetOptions(mmd_option_t options); + + +# ifdef __cplusplus +} +# endif // __cplusplus +#endif // !MMD_H