// // Implementation of miniature markdown library. // // https://www.msweet.org/mmd // // Copyright © 2017-2024 by Michael R Sweet. // // Licensed under Apache License v2.0. See the file "LICENSE" for more // information. // // // Define DEBUG to get debug printf messages to stderr. // #define DEBUG 0 #if DEBUG > 0 # define DEBUG_printf(...) fprintf(stderr, __VA_ARGS__) # define DEBUG_puts(s) fputs(s, stderr); #else # define DEBUG_printf(...) # define DEBUG_puts(s) #endif // DEBUG > 0 #if DEBUG > 1 # define DEBUG2_printf(...) fprintf(stderr, __VA_ARGS__) # define DEBUG2_puts(s) fputs(s, stderr); #else # define DEBUG2_printf(...) # define DEBUG2_puts(s) #endif // DEBUG > 1 // // Beginning with VC2005, Microsoft breaks ISO C and POSIX conformance // by deprecating a number of functions in the name of security, even // when many of the affected functions are otherwise completely secure. // The _CRT_SECURE_NO_DEPRECATE definition ensures that we won't get // warnings from their use... // // Then Microsoft decided that they should ignore this in VC2008 and use // yet another define (_CRT_SECURE_NO_WARNINGS) instead... // #define _CRT_SECURE_NO_DEPRECATE #define _CRT_SECURE_NO_WARNINGS #if _WIN32 # define strcasecmp stricmp #endif // _WIN32 #include "mmd.h" #include #include #include // // Microsoft renames the POSIX functions to _name, and introduces a broken // compatibility layer using the original names. As a result, random crashes // can occur when, for example, strdup() allocates memory from a different heap // than used by malloc() and free(). // // To avoid moronic problems like this, we #define the POSIX function names to // the corresponding non-standard Microsoft names. // #ifdef _WIN32 # define snprintf _snprintf # define strdup _strdup #endif // _WIN32 // // Private structures... // struct _mmd_s { mmd_type_t type; // Node type bool whitespace; // Leading whitespace? char *text, // Text *url, // Reference URL (image/link/etc.) *extra; // Title, language name, etc. mmd_t *parent, // Parent node *first_child, // First child node *last_child, // Last child node *prev_sibling, // Previous sibling node *next_sibling; // Next sibling node }; typedef struct _mmd_filebuf_s // Buffered file { mmd_iocb_t cb; // Read callback function void *cbdata; // Read callback data char buffer[65536], // Buffer *bufptr, // Pointer into buffer *bufend; // End of buffer } _mmd_filebuf_t; typedef struct _mmd_ref_s // Reference link { char *name, // Name of reference *url, // Reference URL *title; // Title, if any size_t num_pending; // Number of pending nodes mmd_t **pending; // Pending nodes } _mmd_ref_t; typedef struct _mmd_doc_s // Markdown document { mmd_t *root; // Root node size_t num_references; // Number of references _mmd_ref_t *references; // References } _mmd_doc_t; typedef struct _mmd_stack_s // Markdown block stack { mmd_t *parent; // Parent node int indent; // Indentation char fence; // Code fence character size_t fencelen; // Length of code fence } _mmd_stack_t; // // Local globals... // static mmd_option_t mmd_options = MMD_OPTION_ALL; // Markdown extensions to support // // Local functions... // static mmd_t *mmd_add(mmd_t *parent, mmd_type_t type, int whitespace, char *text, char *url); static void mmd_free(mmd_t *node); static int mmd_has_continuation(const char *line, _mmd_filebuf_t *file, int indent); static size_t mmd_iocb_file(FILE *fp, char *buffer, size_t bytes); static size_t mmd_iocb_string(const char **s, char *buffer, size_t bytes); static size_t mmd_is_chars(const char *lineptr, const char *chars, size_t minchars); static size_t mmd_is_codefence(char *lineptr, char fence, size_t fencelen, char **language); static bool mmd_is_table(_mmd_filebuf_t *file, int indent); static void mmd_parse_inline(_mmd_doc_t *doc, mmd_t *parent, char *lineptr); static char *mmd_parse_link(_mmd_doc_t *doc, char *lineptr, char **text, char **url, char **title, char **refname); static void mmd_read_buffer(_mmd_filebuf_t *file); static char *mmd_read_line(_mmd_filebuf_t *file, char *line, size_t linesize); static void mmd_ref_add(_mmd_doc_t *doc, mmd_t *node, const char *name, const char *url, const char *title); static _mmd_ref_t *mmd_ref_find(_mmd_doc_t *doc, const char *name); static void mmd_remove(mmd_t *node); #if DEBUG static const char *mmd_type_string(mmd_type_t type); #endif // DEBUG // // 'mmdCopyAllText()' - Make a copy of all the text under a given node. // // The returned string must be freed using free(). // char * // O - Copied string mmdCopyAllText(mmd_t *node) // I - Parent node { char *all = NULL, // String buffer *allptr = NULL, // Pointer into string buffer *temp; // Temporary pointer size_t allsize = 0, // Size of "all" buffer textlen; // Length of "text" string mmd_t *current, // Current node *next; // Next node current = mmdGetFirstChild(node); while (current != node) { if (current->text) { // Append this node's text to the string... textlen = strlen(current->text); allsize += textlen + (size_t)current->whitespace; temp = realloc(all, allsize); if (!temp) { free(all); return (NULL); } allptr = temp + (allptr - all); all = temp; if (current->whitespace) *allptr++ = ' '; memcpy(allptr, current->text, textlen); allptr += textlen; } // Find the next logical node... if ((next = mmdGetNextSibling(current)) == NULL) { next = mmdGetParent(current); while (next && next != node && mmdGetNextSibling(next) == NULL) next = mmdGetParent(next); if (next != node) next = mmdGetNextSibling(next); } current = next; } if (allptr) *allptr = '\0'; return (all); } // // 'mmdFree()' - Free a markdown tree. // void mmdFree(mmd_t *node) // I - First node { mmd_t *current, // Current node *next; // Next node mmd_remove(node); for (current = node->first_child; current; current = next) { // Get the next node... if ((next = current->first_child) != NULL) { // Free parent nodes after child nodes have been freed... current->first_child = NULL; continue; } if ((next = current->next_sibling) == NULL) { // Next node is the parent, which we'll free as needed... if ((next = current->parent) == node) next = NULL; } // Free child... mmd_free(current); } // Then free the memory used by the parent node... mmd_free(node); } // // 'mmdGetExtra()' - Get extra text (title, language, etc.) associated with a // node. // const char * // O - Extra text or NULL if none mmdGetExtra(mmd_t *node) // I - Node { return (node ? node->extra : NULL); } // // 'mmdGetFirstChild()' - Return the first child of a node, if any. // mmd_t * // O - First child or @code NULL@ if none mmdGetFirstChild(mmd_t *node) // I - Node { return (node ? node->first_child : NULL); } // // 'mmdGetLastChild()' - Return the last child of a node, if any. // mmd_t * // O - Last child or @code NULL@ if none mmdGetLastChild(mmd_t *node) // I - Node { return (node ? node->last_child : NULL); } // // 'mmdGetMetadata()' - Return the metadata for the given keyword. // const char * // O - Value or @code NULL@ if none mmdGetMetadata(mmd_t *doc, // I - Document const char *keyword) // I - Keyword { mmd_t *metadata, // Metadata node *current; // Current node char prefix[256]; // Prefix string size_t prefix_len; // Length of prefix string const char *value; // Pointer to value if (!doc || (metadata = doc->first_child) == NULL || metadata->type != MMD_TYPE_METADATA) return (NULL); snprintf(prefix, sizeof(prefix), "%s:", keyword); prefix_len = strlen(prefix); for (current = metadata->first_child; current; current = current->next_sibling) { if (strncmp(current->text, prefix, prefix_len)) continue; value = current->text + prefix_len; while (isspace(*value & 255)) value ++; return (value); } return (NULL); } // // 'mmdGetNextSibling()' - Return the next sibling of a node, if any. // mmd_t * // O - Next sibling or @code NULL@ if none mmdGetNextSibling(mmd_t *node) // I - Node { return (node ? node->next_sibling : NULL); } // // 'mmdGetOptions()' - Get the enabled markdown processing options/extensions. // mmd_option_t // O - Enabled options mmdGetOptions(void) { return (mmd_options); } // // 'mmdGetParent()' - Return the parent of a node, if any. // mmd_t * // O - Parent node or @code NULL@ if none mmdGetParent(mmd_t *node) // I - Node { return (node ? node->parent : NULL); } // // 'mmdGetPrevSibling()' - Return the previous sibling of a node, if any. // mmd_t * // O - Previous sibling or @code NULL@ if none mmdGetPrevSibling(mmd_t *node) // I - Node { return (node ? node->prev_sibling : NULL); } // // 'mmdGetText()' - Return the text associated with a node, if any. // const char * // O - Text or @code NULL@ if none mmdGetText(mmd_t *node) // I - Node { return (node ? node->text : NULL); } // // 'mmdGetType()' - Return the type of a node, if any. // mmd_type_t // O - Type or @code MMD_TYPE_NONE@ if none mmdGetType(mmd_t *node) // I - Node { return (node ? node->type : MMD_TYPE_NONE); } // // 'mmdGetURL()' - Return the URL associated with a node, if any. // const char * // O - URL or @code NULL@ if none mmdGetURL(mmd_t *node) // I - Node { return (node ? node->url : NULL); } // // 'mmdGetWhitespace()' - Return whether whitespace preceded a node. // bool // O - `true` for leading whitespace, `false` for none mmdGetWhitespace(mmd_t *node) // I - Node { return (node ? node->whitespace : false); } // // 'mmdIsBlock()' - Return whether the node is a block. // bool // O - `true` for block nodes, `false` otherwise mmdIsBlock(mmd_t *node) // I - Node { return (node ? node->type < MMD_TYPE_NORMAL_TEXT : false); } // // 'mmdLoad()' - Load a markdown file into nodes. // mmd_t * // O - Root node in markdown mmdLoad(mmd_t *root, // I - Root node for document or `NULL` for a new document const char *filename) // I - File to load { FILE *fp; // File // Open the file and load the document... if ((fp = fopen(filename, "r")) == NULL) return (NULL); root = mmdLoadIO(root, (mmd_iocb_t)mmd_iocb_file, fp); // Close and return... fclose(fp); return (root); } // // 'mmdLoadFile()' - Load a markdown file into nodes from a stdio file. // mmd_t * // O - First node in markdown mmdLoadFile(mmd_t *root, // I - Root node for document or `NULL` for a new document FILE *fp) // I - File to load { return (mmdLoadIO(root, (mmd_iocb_t)mmd_iocb_file, fp)); } // // 'mmdLoadIO()' - Load a markdown file into nodes using a callback. // mmd_t * // O - First node in markdown mmdLoadIO(mmd_t *root, // I - Root node for document or `NULL` for a new document mmd_iocb_t cb, // I - Read callback function void *cbdata) // I - Read callback data { size_t i; // Looping var _mmd_doc_t doc; // Document _mmd_ref_t *reference; // Current reference mmd_t *block = NULL; // Current block mmd_type_t type; // Type for line _mmd_filebuf_t file; // File buffer char line[8192], // Read line *linestart, // Start of line *lineptr, // Pointer into line *lineend, // End of line *temp; // Temporary pointer int newindent; // New indentation int blank_code = 0; // Saved indented blank code line mmd_type_t columns[256]; // Alignment of table columns int num_columns = 0, // Number of columns in table rows = 0; // Number of rows in table _mmd_stack_t stack[32], // Block stack *stackptr = stack; // Pointer to top of stack // Create an empty document as needed... DEBUG_printf("mmdLoadIO: mmd_options=%d%s%s\n", mmd_options, (mmd_options & MMD_OPTION_METADATA) ? " METADATA" : "", (mmd_options & MMD_OPTION_TABLES) ? " TABLES" : ""); memset(&doc, 0, sizeof(doc)); if (root) doc.root = root; else doc.root = mmd_add(NULL, MMD_TYPE_DOCUMENT, 0, NULL, NULL); if (!doc.root) return (NULL); // Initialize the block stack... memset(stack, 0, sizeof(stack)); stackptr->parent = doc.root; // Read lines until end-of-file... memset(&file, 0, sizeof(file)); file.cb = cb; file.cbdata = cbdata; #ifdef __clang_analyzer__ memset(line, 0, sizeof(line)); #endif // __clang_analyzer__ while ((lineptr = mmd_read_line(&file, line, sizeof(line))) != NULL) { DEBUG_printf("%03d %-12s %s", stackptr->indent, mmd_type_string(stackptr->parent->type) + 9, lineptr); #if DEBUG if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK) DEBUG2_printf(" blank_code=%d\n", blank_code); #endif // DEBUG linestart = lineptr; while (isspace(*lineptr & 255)) lineptr ++; DEBUG2_printf(" line indent=%d\n", (int)(lineptr - line)); DEBUG2_printf(" stackptr=%d\n", (int)(stackptr - stack)); if (!*lineptr && stackptr->parent->type == MMD_TYPE_TABLE) { DEBUG2_puts("END TABLE\n"); stackptr --; block = NULL; continue; } else if (*lineptr == '>' && (lineptr - linestart) < 4) { // Block quote. See if there is an existing blockquote... DEBUG_printf(" BLOCKQUOTE (stackptr=%ld)\n", stackptr - stack); if (stackptr == stack || stack[1].parent->type != MMD_TYPE_BLOCK_QUOTE) { block = NULL; stackptr = stack + 1; stackptr->parent = mmd_add(doc.root, MMD_TYPE_BLOCK_QUOTE, 0, NULL, NULL); stackptr->indent = 2; stackptr->fence = '\0'; } // Skip whitespace after the ">"... lineptr ++; if (isspace(*lineptr & 255)) lineptr ++; linestart = lineptr; while (isspace(*lineptr & 255)) lineptr ++; } else if (*lineptr != '>' && stackptr > stack && stack[1].parent->type == MMD_TYPE_BLOCK_QUOTE && (!block || *lineptr == '\n' || mmd_is_chars(lineptr, "- \t", 3) || mmd_is_chars(lineptr, "_ \t", 3) || mmd_is_chars(lineptr, "* \t", 3))) { // Not a lazy continuation so terminate this block quote... DEBUG_puts(" Terminating BLOCKQUOTE\n"); block = NULL; stackptr = stack; } // Now handle all other markup not related to block quotes... DEBUG2_printf(" stackptr=%d (%s), block=%p (%s)\n", (int)(stackptr - stack), mmd_type_string(stackptr->parent->type) + 9, block, block ? mmd_type_string(block->type) + 9 : ""); DEBUG2_printf(" strchr(lineptr, '|')=%p, mmd_is_table(&file, stackptr->indent)=%d\n", strchr(lineptr, '|'), mmd_is_table(&file, stackptr->indent)); DEBUG2_printf(" linestart=%d, lineptr=%d\n", (int)(linestart - line), (int)(lineptr - line)); DEBUG2_printf(" mmd_is_chars(lineptr, \"-\", 1)=%d\n", (int)mmd_is_chars(lineptr, "-", 1)); DEBUG2_printf(" mmd_is_chars(lineptr, \"=\", 1)=%d\n", (int)mmd_is_chars(lineptr, "=", 1)); if ((lineptr - line - stackptr->indent) < 4 && ((stackptr->parent->type != MMD_TYPE_CODE_BLOCK && !stackptr->fence && mmd_is_codefence(lineptr, '\0', 0, NULL)) || (stackptr->fence && mmd_is_codefence(lineptr, stackptr->fence, stackptr->fencelen, NULL)))) { // Code fence... DEBUG2_printf("stackptr->indent=%d, fence='%c', fencelen=%d\n", stackptr->indent, stackptr->fence, (int)stackptr->fencelen); if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK) { DEBUG2_puts("Ending code block...\n"); stackptr --; } else if (stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) { char *language; // Language name, if any DEBUG2_printf("Starting code block with fence '%c'.\n", *lineptr); block = NULL; stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_CODE_BLOCK, 0, NULL, NULL); stackptr[1].indent = lineptr - line; stackptr[1].fence = *lineptr; stackptr[1].fencelen = mmd_is_codefence(lineptr, '\0', 0, &language); stackptr ++; DEBUG2_printf("Code language=\"%s\"\n", language); if (language) stackptr->parent->extra = strdup(language); blank_code = 0; } continue; } else if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK && (lineptr - line) >= stackptr->indent) { if (line[stackptr->indent] == '\n') { blank_code ++; } else { while (blank_code > 0) { mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, "\n", NULL); blank_code --; } mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, line + stackptr->indent, NULL); } continue; } else if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK && stackptr->fence) { DEBUG2_printf(" fence='%c'\n", stackptr->fence); if (!*lineptr) { blank_code ++; } else { while (blank_code > 0) { mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, "\n", NULL); blank_code --; } mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, lineptr, NULL); } continue; } else if (!strncmp(lineptr, "---", 3) && doc.root->first_child == NULL && (mmd_options & MMD_OPTION_METADATA)) { // Document metadata... block = mmd_add(doc.root, MMD_TYPE_METADATA, 0, NULL, NULL); while ((lineptr = mmd_read_line(&file, line, sizeof(line))) != NULL) { while (isspace(*lineptr & 255)) lineptr ++; if (!strncmp(lineptr, "---", 3) || !strncmp(lineptr, "...", 3)) break; lineend = lineptr + strlen(lineptr) - 1; if (lineend > lineptr && *lineend == '\n') *lineend = '\0'; mmd_add(block, MMD_TYPE_METADATA_TEXT, 0, lineptr, NULL); } continue; } else if (block && block->type == MMD_TYPE_PARAGRAPH && (lineptr - linestart) < 4 && (lineptr - line) >= stackptr->indent && (mmd_is_chars(lineptr, "-", 1) || mmd_is_chars(lineptr, "=", 1))) { int ch = *lineptr; DEBUG_puts(" SETEXT HEADING\n"); lineptr += 3; while (*lineptr == ch) lineptr ++; while (isspace(*lineptr & 255)) lineptr ++; if (!*lineptr) { if (ch == '=') block->type = MMD_TYPE_HEADING_1; else block->type = MMD_TYPE_HEADING_2; block = NULL; continue; } type = MMD_TYPE_PARAGRAPH; } else if ((lineptr - linestart) < 4 && (mmd_is_chars(lineptr, "- \t", 3) || mmd_is_chars(lineptr, "_ \t", 3) || mmd_is_chars(lineptr, "* \t", 3))) { DEBUG_puts(" THEMATIC BREAK\n"); if (line[0] == '>') stackptr = stack + 1; else stackptr = stack; mmd_add(stackptr->parent, MMD_TYPE_THEMATIC_BREAK, 0, NULL, NULL); // type = MMD_TYPE_PARAGRAPH; block = NULL; continue; } else if ((*lineptr == '-' || *lineptr == '+' || *lineptr == '*') && (lineptr[1] == '\t' || lineptr[1] == ' ')) { // Bulleted list... DEBUG_puts(" UNORDERED LIST\n"); lineptr += 2; linestart = lineptr; newindent = linestart - line; while (isspace(*lineptr & 255)) lineptr ++; while (stackptr > stack && stackptr->indent > newindent) stackptr --; if (stackptr > stack && stackptr->parent->type == MMD_TYPE_LIST_ITEM && stackptr->indent == newindent) stackptr --; if (stackptr > stack && stackptr->parent->type == MMD_TYPE_ORDERED_LIST && stackptr->indent == newindent) stackptr --; if (stackptr > stack && stackptr->parent->type == MMD_TYPE_BLOCK_QUOTE && line[0] != '>') stackptr --; if (stackptr->parent->type != MMD_TYPE_UNORDERED_LIST && stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) { stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_UNORDERED_LIST, 0, NULL, NULL); stackptr[1].indent = linestart - line; stackptr[1].fence = '\0'; stackptr ++; } if (stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) { stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_LIST_ITEM, 0, NULL, NULL); stackptr[1].indent = linestart - line; stackptr[1].fence = '\0'; stackptr ++; } type = MMD_TYPE_PARAGRAPH; block = NULL; if (mmd_is_chars(lineptr, "- \t", 3) || mmd_is_chars(lineptr, "_ \t", 3) || mmd_is_chars(lineptr, "* \t", 3)) { mmd_add(stackptr->parent, MMD_TYPE_THEMATIC_BREAK, 0, NULL, NULL); continue; } } else if (isdigit(*lineptr & 255)) { // Ordered list? DEBUG_puts(" ORDERED LIST?\n"); temp = lineptr + 1; while (isdigit(*temp & 255)) temp ++; if ((*temp == '.' || *temp == ')') && (temp[1] == '\t' || temp[1] == ' ')) { // Yes, ordered list. lineptr = temp + 2; linestart = lineptr; newindent = linestart - line; while (isspace(*lineptr & 255)) lineptr ++; while (stackptr > stack && stackptr->indent > newindent) stackptr --; if (stackptr->parent->type == MMD_TYPE_LIST_ITEM && stackptr->indent == newindent) stackptr --; if (stackptr->parent->type == MMD_TYPE_UNORDERED_LIST && stackptr->indent == newindent) stackptr --; if (stackptr->parent->type == MMD_TYPE_BLOCK_QUOTE && line[0] != '>') stackptr --; if (stackptr->parent->type != MMD_TYPE_ORDERED_LIST && stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) { stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_ORDERED_LIST, 0, NULL, NULL); stackptr[1].indent = linestart - line; stackptr[1].fence = '\0'; stackptr ++; } if (stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) { stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_LIST_ITEM, 0, NULL, NULL); stackptr[1].indent = linestart - line; stackptr[1].fence = '\0'; stackptr ++; } type = MMD_TYPE_PARAGRAPH; block = NULL; } else { // No, just a regular paragraph... type = block ? block->type : MMD_TYPE_PARAGRAPH; } } else if (*lineptr == '#' && (lineptr - linestart) < 4) { // Heading, count the number of '#' for the heading level... DEBUG_puts(" HEADING?\n"); newindent = lineptr - line; temp = lineptr + 1; while (*temp == '#') temp ++; if ((temp - lineptr) <= 6 && isspace(*temp & 255)) { // Heading 1-6... type = MMD_TYPE_HEADING_1 + (temp - lineptr - 1); block = NULL; // Skip whitespace after "#"... lineptr = temp; while (isspace(*lineptr & 255)) lineptr ++; linestart = lineptr; // Strip trailing "#" characters and whitespace... temp = lineptr + strlen(lineptr) - 1; while (temp > lineptr && isspace(*temp & 255)) *temp-- = '\0'; while (temp > lineptr && *temp == '#') temp --; if (isspace(*temp & 255)) { while (temp > lineptr && isspace(*temp & 255)) *temp-- = '\0'; } else if (temp == lineptr) *temp = '\0'; while (stackptr > stack && stackptr->indent > newindent) stackptr --; block = mmd_add(stackptr->parent, type, 0, NULL, NULL); } else { // More than 6 #'s, just treat as a paragraph... type = MMD_TYPE_PARAGRAPH; } } else if (block && block->type >= MMD_TYPE_HEADING_1 && block->type <= MMD_TYPE_HEADING_6) { DEBUG_puts(" PARAGRAPH\n"); type = MMD_TYPE_PARAGRAPH; block = NULL; } else if (!block) { type = MMD_TYPE_PARAGRAPH; if (lineptr == line && stackptr->parent->type != MMD_TYPE_TABLE) stackptr = stack; } else type = block->type; if (!*lineptr) { if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK) blank_code ++; else if (stackptr->parent->type == MMD_TYPE_BLOCK_QUOTE && line[0] != '>') stackptr --; block = NULL; continue; } else if (!strcmp(lineptr, "+")) { if (block) { if (block->type == MMD_TYPE_LIST_ITEM) block = mmd_add(block, MMD_TYPE_PARAGRAPH, 0, NULL, NULL); else if (block->parent->type == MMD_TYPE_LIST_ITEM) block = mmd_add(block->parent, MMD_TYPE_PARAGRAPH, 0, NULL, NULL); else block = NULL; } continue; } else if ((mmd_options & MMD_OPTION_TABLES) && strchr(lineptr, '|') && (stackptr->parent->type == MMD_TYPE_TABLE || mmd_is_table(&file, stackptr->indent))) { // Table... int col; // Current column char *start, // Start of column/cell *end; // End of column/cell mmd_t *row = NULL, // Current row *cell; // Current cell DEBUG2_printf("TABLE stackptr->parent=%p (%d), rows=%d\n", stackptr->parent, stackptr->parent->type, rows); if (stackptr->parent->type != MMD_TYPE_TABLE && stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) { DEBUG2_printf("ADDING NEW TABLE to %p (%s)\n", stackptr->parent, mmd_type_string(stackptr->parent->type)); stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_TABLE, 0, NULL, NULL); stackptr[1].indent = stackptr->indent; stackptr[1].fence = '\0'; stackptr ++; block = mmd_add(stackptr->parent, MMD_TYPE_TABLE_HEADER, 0, NULL, NULL); for (col = 0; col < (int)(sizeof(columns) / sizeof(columns[0])); col ++) columns[col] = MMD_TYPE_TABLE_BODY_CELL_LEFT; num_columns = 0; rows = -1; } else if (rows > 0) { if (rows == 1) block = mmd_add(stackptr->parent, MMD_TYPE_TABLE_BODY, 0, NULL, NULL); } else block = NULL; if (block) row = mmd_add(block, MMD_TYPE_TABLE_ROW, 0, NULL, NULL); if (*lineptr == '|') lineptr ++; // Skip leading pipe if ((end = lineptr + strlen(lineptr) - 1) > lineptr) { while ((*end == '\n' || *end == 'r') && end > lineptr) end --; if (end > lineptr && *end == '|') *end = '\0'; // Truncate trailing pipe } for (col = 0; lineptr && *lineptr && col < (int)(sizeof(columns) / sizeof(columns[0])); col ++) { // Get the bounds of the stackptr->parent cell... start = lineptr; if ((lineptr = strchr(lineptr + 1, '|')) != NULL) *lineptr++ = '\0'; if (block) { // Add a cell to this row... if (block->type == MMD_TYPE_TABLE_HEADER) cell = mmd_add(row, MMD_TYPE_TABLE_HEADER_CELL, 0, NULL, NULL); else cell = mmd_add(row, columns[col], 0, NULL, NULL); mmd_parse_inline(&doc, cell, start); } else { // Process separator row for alignment... while (isspace(*start & 255)) start ++; for (end = start + strlen(start) - 1; end > start && isspace(*end & 255); end --) ; // Find the last non-space character if (*start == ':' && *end == ':') columns[col] = MMD_TYPE_TABLE_BODY_CELL_CENTER; else if (*end == ':') columns[col] = MMD_TYPE_TABLE_BODY_CELL_RIGHT; DEBUG2_printf("COLUMN %d SEPARATOR=\"%s\", TYPE=%d\n", col, start, columns[col]); } } // Make sure the table is balanced... if (col > num_columns) { num_columns = col; } else if (block && block->type != MMD_TYPE_TABLE_HEADER) { while (col < num_columns) { mmd_add(row, columns[col], 0, NULL, NULL); col ++; } } rows ++; continue; } else if (stackptr->parent->type == MMD_TYPE_TABLE) { DEBUG2_puts("END TABLE\n"); stackptr --; block = NULL; } if (stackptr->parent->type != MMD_TYPE_CODE_BLOCK && (!block || block->type == MMD_TYPE_CODE_BLOCK) && (lineptr - linestart) >= (stackptr->indent + 4)) { // Indented code block. if (stackptr->parent->type != MMD_TYPE_CODE_BLOCK && stackptr < (stack + sizeof(stack) / sizeof(stack[0]) - 1)) { stackptr[1].parent = mmd_add(stackptr->parent, MMD_TYPE_CODE_BLOCK, 0, NULL, NULL); stackptr[1].indent = stackptr->indent + 4; stackptr[1].fence = '\0'; stackptr ++; blank_code = 0; } while (blank_code > 0) { mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, "\n", NULL); blank_code --; } mmd_add(stackptr->parent, MMD_TYPE_CODE_TEXT, 0, line + stackptr->indent, NULL); continue; } if (!block || block->type != type) { if (stackptr->parent->type == MMD_TYPE_CODE_BLOCK) stackptr --; block = mmd_add(stackptr->parent, type, 0, NULL, NULL); } // Read continuation lines before parsing this... while (mmd_has_continuation(line, &file, stackptr->indent)) { char *ptr = line + strlen(line); if (!mmd_read_line(&file, ptr, sizeof(line) - (size_t)(ptr - line))) break; else if (line[0] == '>' && *ptr == '>') memmove(ptr, ptr + 1, strlen(ptr)); } mmd_parse_inline(&doc, block, lineptr); if (block->type == MMD_TYPE_PARAGRAPH && !block->first_child) { mmd_remove(block); mmd_free(block); block = NULL; } } // Free any references... for (i = doc.num_references, reference = doc.references; i > 0; i --, reference ++) { if (reference->pending) { char text[8192]; // Reference text size_t j; // Looping var DEBUG2_printf("Clearing links for '%s'.\n", reference->name); snprintf(text, sizeof(text), "[%s]", reference->name); for (j = 0; j < reference->num_pending; j ++) { free(reference->pending[j]->text); reference->pending[j]->text = strdup(text); reference->pending[j]->type = MMD_TYPE_NORMAL_TEXT; } free(reference->pending); } free(reference->name); free(reference->url); } free(doc.references); // Return the root node... return (doc.root); } // // 'mmdLoadString()' - Load a markdown string into nodes. // mmd_t * // O - Root node in markdown mmdLoadString(mmd_t *root, // I - Root node for document or `NULL` for a new document const char *s) // I - String to load { return (mmdLoadIO(root, (mmd_iocb_t)mmd_iocb_string, &s)); } // // 'mmdSetOptions()' - Set (enable/disable) support for various markdown options. // void mmdSetOptions(mmd_option_t options) // I - Options { mmd_options = options; } // // 'mmd_add()' - Add a new markdown node. // static mmd_t * // O - New node mmd_add(mmd_t *parent, // I - Parent node mmd_type_t type, // I - Node type int whitespace, // I - 1 if whitespace precedes this node char *text, // I - Text, if any char *url) // I - URL, if any { mmd_t *temp; // New node DEBUG2_printf("Adding %s to %p(%s), whitespace=%d, text=\"%s\", url=\"%s\"\n", mmd_type_string(type), parent, parent ? mmd_type_string(parent->type) : "", whitespace, text ? text : "(null)", url ? url : "(null)"); if (!parent && type != MMD_TYPE_DOCUMENT) return (NULL); // Only document nodes can be at the root if ((temp = calloc(1, sizeof(mmd_t))) != NULL) { if (parent) { // Add node to the parent... temp->parent = parent; if (parent->last_child) { parent->last_child->next_sibling = temp; temp->prev_sibling = parent->last_child; parent->last_child = temp; } else { parent->first_child = parent->last_child = temp; } } // Copy the node values... temp->type = type; temp->whitespace = whitespace; if (text) temp->text = strdup(text); if (url) temp->url = strdup(url); } return (temp); } // // 'mmd_free()' - Free memory used by a node. // static void mmd_free(mmd_t *node) // I - Node { free(node->text); free(node->url); free(node->extra); free(node); } // // 'mmd_has_continuation()' - Determine whether the next line is a continuation // of the current one. // static int // O - 1 if the next line continues, 0 otherwise mmd_has_continuation( const char *line, // I - Current line _mmd_filebuf_t *file, // I - File buffer int indent) // I - Indentation for current block { const char *lineptr = line; // Pointer into current line const char *fileptr = file->bufptr;// Pointer into next line if (*fileptr == '\n' || *fileptr == '\r') return (0); do { while (isspace(*lineptr & 255)) lineptr ++; if (*lineptr == '[' && (lineptr - line - indent) < 4 && (*fileptr == ' ' || *fileptr == '\t')) return (1); while (isspace(*fileptr & 255)) fileptr ++; if (*lineptr == '>' && *fileptr == '>') { lineptr ++; fileptr ++; } else if (*fileptr == '>') return (0); if (*fileptr == '\n' || *fileptr == '\r') return (0); } while (isspace(*lineptr & 255) || isspace(*fileptr & 255)); if (*lineptr == '#') return (0); if (strchr("-+*", *fileptr) && isspace(fileptr[1] & 255)) { // Bullet list item... return (0); } if (isdigit(*fileptr & 255)) { // Ordered list item... while (*fileptr && isdigit(*fileptr & 255)) fileptr ++; if (*fileptr == '.' || *fileptr == '(') return (0); } if (mmd_is_codefence((char *)fileptr, '\0', 0, NULL)) return (0); if (mmd_is_chars(fileptr, "- \t", 3) || mmd_is_chars(fileptr, "_ \t", 3) || mmd_is_chars(fileptr, "* \t", 3)) { // Thematic break... return (0); } if (mmd_is_chars(fileptr, "-", 1) || mmd_is_chars(fileptr, "=", 1)) { // Heading... return (0); } if (*fileptr == '#') { // Possible heading... int count = 0; while (*fileptr == '#') { fileptr ++; count ++; } if (count <= 6) return (0); } return ((fileptr - file->bufptr) <= indent); } // // 'mmd_iocb_file()' - Read from a file. // static size_t // O - Number of bytes read mmd_iocb_file(FILE *fp, // I - File pointer char *buffer, // I - Buffer size_t bytes) // I - Number of bytes to read { return (fread(buffer, 1, bytes, fp)); } // // 'mmd_iocb_string()' - Read from a string. // static size_t // O - Number of bytes read mmd_iocb_string(const char **s, // I - Pointer into string char *buffer, // I - Buffer size_t bytes) // I - Number of bytes to read { size_t ret; // Bytes read/returned // See how many bytes remain in the string... if ((ret = strlen(*s)) > bytes) ret = bytes; if (ret > 0) { // Copy bytes from the string... memcpy(buffer, *s, ret); (*s) += ret; } return (ret); } // // 'mmd_is_chars()' - Determine whether a line consists solely of whitespace // and the specified character. // static size_t // O - 1 if as specified, 0 otherwise mmd_is_chars(const char *lineptr, // I - Current line const char *chars, // I - Non-space character size_t minchars) // I - Minimum number of non-space characters { size_t found_ch = 0; // Did we find the specified characters? while (*lineptr == *chars) { found_ch ++; lineptr ++; } if (minchars > 1) { while (*lineptr && strchr(chars, *lineptr)) { if (*lineptr == *chars) found_ch ++; lineptr ++; } } while (*lineptr && isspace(*lineptr & 255) && *lineptr != '\n') lineptr ++; if ((*lineptr && *lineptr != '\n') || found_ch < minchars) return (0); else return (found_ch); } // // 'mmd_is_codefence()' - Determine whether the line contains a code fence. // static size_t // O - Length of fence or 0 otherwise mmd_is_codefence(char *lineptr, // I - Line char fence, // I - Current fence character, if any size_t fencelen, // I - Current fence length char **language) // O - Language name, if any { char match = fence; // Character to match size_t len = 0; // Length of fence chars if (language) *language = NULL; if (!match) { if (*lineptr == '~' || *lineptr == '`') match = *lineptr; else return (0); } while (*lineptr == match) { lineptr ++; len ++; } if (len < 3 || (fencelen && len < fencelen)) return (0); if (*lineptr && *lineptr != '\n' && fence) return (0); else if (*lineptr && *lineptr != '\n' && !fence) { if (match == '`' && strchr(lineptr, match)) return (0); while (isspace(*lineptr & 255)) lineptr ++; if (*lineptr && language) { *language = lineptr; while (*lineptr && !isspace(*lineptr & 255)) { if (*lineptr == '\\' && lineptr[1]) { // Remove "\" memmove(lineptr, lineptr + 1, strlen(lineptr)); } lineptr ++; } *lineptr = '\0'; } } return (len); } // // 'mmd_is_table()' - Look ahead to see if the next line contains a heading // divider for a table. // static bool // O - `true` if this is a table, `false` otherwise mmd_is_table(_mmd_filebuf_t *file, // I - File to read from int indent) // I - Indentation of table line { const char *ptr; // Pointer into buffer ptr = file->bufptr; while (*ptr) { if (!strchr(" \t>", *ptr)) break; ptr ++; } if ((ptr - file->bufptr - indent) >= 4) return (false); while (*ptr) { if (!strchr(" \t:-|", *ptr)) break; ptr ++; } return (*ptr == '\r' || *ptr == '\n'); } // // 'mmd_parse_inline()' - Parse inline formatting. // static void mmd_parse_inline(_mmd_doc_t *doc, // I - Document mmd_t *parent, // I - Parent node char *lineptr) // I - Pointer into line { mmd_t *node; // New node mmd_type_t type; // Current node type int whitespace; // Whitespace precedes? char *text, // Text fragment in line *title, // Link title *url, // URL in link *refname; // Reference name const char *delim = NULL; // Delimiter size_t delimlen = 0; // Length of delimiter whitespace = parent->last_child != NULL; for (text = NULL, type = MMD_TYPE_NORMAL_TEXT; *lineptr; lineptr ++) { DEBUG2_printf("mmd_parse_inline: lineptr=%p(\"%32.32s...\"), type=%d, text=%p, whitespace=%d\n", lineptr, lineptr, type, text, whitespace); if (isspace(*lineptr & 255) && type != MMD_TYPE_CODE_TEXT) { if (text) { *lineptr = '\0'; mmd_add(parent, type, whitespace, text, NULL); text = NULL; } if (!strncmp(lineptr + 1, " \n", 2) && lineptr[3]) { DEBUG2_printf("mmd_parse_inline: Adding hard break to %p(%d)\n", parent, parent->type); mmd_add(parent, MMD_TYPE_HARD_BREAK, 0, NULL, NULL); lineptr += 2; whitespace = 0; } else { whitespace = 1; } } else if (*lineptr == '!' && lineptr[1] == '[' && type != MMD_TYPE_CODE_TEXT) { // Image... if (text) { mmd_add(parent, type, whitespace, text, NULL); text = NULL; whitespace = 0; } lineptr = mmd_parse_link(doc, lineptr + 1, &text, &url, NULL, &refname); if (url || refname) { node = mmd_add(parent, MMD_TYPE_IMAGE, whitespace, text, url); if (refname) mmd_ref_add(doc, node, refname, NULL, NULL); } if (!*lineptr) return; text = url = NULL; whitespace = 0; lineptr --; } else if (*lineptr == '[' && type != MMD_TYPE_CODE_TEXT) { // Link or checkbox... if (text) { *lineptr = '\0'; mmd_add(parent, type, whitespace, text, NULL); *lineptr = '['; text = NULL; whitespace = 0; } if ((mmd_options & MMD_OPTION_TASKS) && (!strncmp(lineptr, "[ ]", 3) || !strncmp(lineptr, "[x]", 3) || !strncmp(lineptr, "[X]", 3))) { // Checkbox mmd_add(parent, MMD_TYPE_CHECKBOX, 0, lineptr[1] == ' ' ? NULL : "x", NULL); lineptr += 2; } else { // Link lineptr = mmd_parse_link(doc, lineptr, &text, &url, &title, &refname); if (text) { char *end = text + strlen(text) - 1; // End of text fragment if (*text == '`' && *end == '`' && end != text) { // Code text text ++; if (end > text && *end == '`') *end = '\0'; node = mmd_add(parent, MMD_TYPE_CODE_TEXT, whitespace, text, url); } else if (*text == '*' && *end == '*' && end > text) { // Emphasized or strong text text ++; if (*text == '*' && (end - 1) > text && end[-1] == '*') { text ++; end[-1] = '\0'; node = mmd_add(parent, MMD_TYPE_STRONG_TEXT, whitespace, text, url); } else { *end = '\0'; node = mmd_add(parent, MMD_TYPE_EMPHASIZED_TEXT, whitespace, text, url); } } else if (type == MMD_TYPE_NORMAL_TEXT) { // Plain linked text... node = mmd_add(parent, MMD_TYPE_LINKED_TEXT, whitespace, text, url); } else { // Preserve style of linked text... node = mmd_add(parent, type, whitespace, text, url); } if (title) node->extra = strdup(title); } else { // No text, no node... node = NULL; } DEBUG2_printf("mmd_parse_inline: text=\"%s\", refname=\"%s\", node=%p\n", text, refname, node); if (refname && node) mmd_ref_add(doc, node, refname, NULL, title); if (!*lineptr) return; text = url = NULL; whitespace = 0; lineptr --; } } else if (*lineptr == '<' && type != MMD_TYPE_CODE_TEXT && strchr(lineptr + 1, '>')) { // Autolink... *lineptr++ = '\0'; if (text) { mmd_add(parent, type, whitespace, text, NULL); text = NULL; whitespace = 0; } url = lineptr; lineptr = strchr(lineptr, '>'); *lineptr = '\0'; mmd_add(parent, MMD_TYPE_LINKED_TEXT, whitespace, url, url); text = url = NULL; whitespace = 0; } else if ((*lineptr == '*' || *lineptr == '_') && (!text || ispunct(lineptr[-1] & 255) || type != MMD_TYPE_NORMAL_TEXT) && type != MMD_TYPE_CODE_TEXT) { const char *end; // End delimiter if (type != MMD_TYPE_NORMAL_TEXT || !delim) { if (!strncmp(lineptr, "**", 2)) delim = "**"; else if (!strncmp(lineptr, "__", 2)) delim = "__"; else if (*lineptr == '*') delim = "*"; else delim = "_"; delimlen = strlen(delim); } if (type == MMD_TYPE_NORMAL_TEXT && delim && ((end = strstr(lineptr + delimlen, delim)) == NULL || end == (lineptr + delimlen) || isspace(end[-1] & 255))) { if (!text) text = lineptr; delim = NULL; delimlen = 0; continue; } if (text) { char save = *lineptr; *lineptr = '\0'; mmd_add(parent, type, whitespace, text, NULL); *lineptr = save; text = NULL; whitespace = 0; } if (type == MMD_TYPE_NORMAL_TEXT) { if (!strncmp(lineptr, delim, delimlen) && !isspace(lineptr[delimlen] & 255)) { type = delimlen == 2 ? MMD_TYPE_STRONG_TEXT : MMD_TYPE_EMPHASIZED_TEXT; text = lineptr + delimlen; lineptr += delimlen - 1; } else { text = lineptr; } } else if (!strncmp(lineptr, delim, delimlen)) { lineptr += delimlen - 1; type = MMD_TYPE_NORMAL_TEXT; delim = NULL; delimlen = 0; } } else if (lineptr[0] == '~' && lineptr[1] == '~' && type != MMD_TYPE_CODE_TEXT) { if (text) { *lineptr = '\0'; mmd_add(parent, type, whitespace, text, NULL); *lineptr = '~'; text = NULL; whitespace = 0; } if (!isspace(lineptr[2] & 255) && type == MMD_TYPE_NORMAL_TEXT) { type = MMD_TYPE_STRUCK_TEXT; text = lineptr + 2; } else { lineptr ++; type = MMD_TYPE_NORMAL_TEXT; } } else if (*lineptr == '`' && (type != MMD_TYPE_CODE_TEXT || lineptr[-1] != '\\')) { if (type != MMD_TYPE_NORMAL_TEXT || !delim) { if (lineptr[1] == '`') { if (lineptr[2] == '`') { delim = "```"; delimlen = 3; } else { delim = "``"; delimlen = 2; } } else { delim = "`"; delimlen = 1; } } if (type != MMD_TYPE_CODE_TEXT && delim && !strstr(lineptr + delimlen, delim)) { if (!text) text = lineptr; delim = NULL; delimlen = 0; continue; } if (text) { DEBUG2_printf("mmd_parse_inline: text=\"%s\"\n", text); if (!strncmp(lineptr, delim, delimlen)) { char *textptr = lineptr; while (textptr > text && isspace(textptr[-1] & 255)) textptr --; *textptr = '\0'; lineptr += delimlen - 1; } if (type == MMD_TYPE_CODE_TEXT) { if (whitespace && !*text) { mmd_add(parent, type, 0, " ", NULL); whitespace = 0; } } mmd_add(parent, type, whitespace, text, NULL); text = NULL; whitespace = 0; } if (type == MMD_TYPE_CODE_TEXT) { DEBUG2_puts("mmd_parse_inline: Reverting to normal text.\n"); type = MMD_TYPE_NORMAL_TEXT; delim = NULL; delimlen = 0; } else { type = MMD_TYPE_CODE_TEXT; lineptr += delimlen - 1; while (isspace(lineptr[1] & 255)) lineptr ++; text = lineptr + 1; } } else if (!strncmp(lineptr, "\\\n", 2)) { // Hard break *lineptr++ = '\0'; if (text) { mmd_add(parent, type, whitespace, text, NULL); text = NULL; whitespace = false; } mmd_add(parent, MMD_TYPE_HARD_BREAK, false, NULL, NULL); } else if (!text) { if (*lineptr == '\\' && ispunct(lineptr[1] & 255) && type != MMD_TYPE_CODE_TEXT) { // Escaped character... lineptr ++; } text = lineptr; } else if (*lineptr == '\\' && ispunct(lineptr[1] & 255) && type != MMD_TYPE_CODE_TEXT) { // Escaped character... memmove(lineptr, lineptr + 1, strlen(lineptr)); } } if (text) { DEBUG_puts("mms_parse_inline: Adding text at end.\n"); mmd_add(parent, type, whitespace, text, NULL); } } // // 'mmd_parse_link()' - Parse a link. // static char * // O - End of link text mmd_parse_link(_mmd_doc_t *doc, // I - Document char *lineptr, // I - Pointer into line char **text, // O - Text char **url, // O - URL char **title, // O - Title, if any char **refname) // O - Reference name { lineptr ++; // skip "[" *text = lineptr; *url = NULL; *refname = NULL; if (title) *title = NULL; while (*lineptr && *lineptr != ']') { if (*lineptr == '\"' || *lineptr == '\'') { char quote = *lineptr++; while (*lineptr && *lineptr != quote) lineptr ++; if (!*lineptr) return (lineptr); } lineptr ++; } if (!*lineptr) return (lineptr); *lineptr++ = '\0'; if (*lineptr == '(') { // Get URL... lineptr ++; *url = lineptr; while (*lineptr && *lineptr != ')') { if (isspace(*lineptr & 255)) { *lineptr = '\0'; } else if (*lineptr == '\\' && lineptr[1]) { if (lineptr[1] == ')') { // Remove "\" memmove(lineptr, lineptr + 1, strlen(lineptr)); } else { // Keep "\" lineptr ++; } } else if (*lineptr == '\"' || *lineptr == '\'') { char quote = *lineptr++; if (title) *title = lineptr; while (*lineptr && *lineptr != quote) { if (*lineptr == '\\' && lineptr[1]) { // Remove "\" memmove(lineptr, lineptr + 1, strlen(lineptr)); } lineptr ++; } if (!*lineptr) return (lineptr); else if (title) *lineptr = '\0'; } lineptr ++; } *lineptr++ = '\0'; } else if (*lineptr == '[') { // Get reference... lineptr ++; *refname = lineptr; while (*lineptr && *lineptr != ']') { if (isspace(*lineptr & 255)) { *lineptr = '\0'; } else if (*lineptr == '\\' && lineptr[1]) { if (lineptr[1] == ']') { // Remove "\" memmove(lineptr, lineptr + 1, strlen(lineptr)); } else { // Keep "\" lineptr ++; } } else if (*lineptr == '\"' || *lineptr == '\'') { char quote = *lineptr++; if (title) *title = lineptr; while (*lineptr && *lineptr != quote) lineptr ++; if (!*lineptr) return (lineptr); else *lineptr = '\0'; } lineptr ++; } *lineptr++ = '\0'; if (!**refname) *refname = *text; } else if (*lineptr == ':') { // Get reference definition... lineptr ++; while (*lineptr && isspace(*lineptr & 255)) lineptr ++; *url = lineptr; while (*lineptr && !isspace(*lineptr & 255)) { if (*lineptr == '\\' && lineptr[1]) { // Remove "\" memmove(lineptr, lineptr + 1, strlen(lineptr)); } lineptr ++; } if (*lineptr) { *lineptr++ = '\0'; while (*lineptr && isspace(*lineptr & 255)) lineptr ++; if (*lineptr == '\"' || *lineptr == '\'') { char quote = *lineptr++; if (title) *title = lineptr; while (*lineptr && *lineptr != quote) { if (*lineptr == '\\' && lineptr[1]) { // Remove "\" memmove(lineptr, lineptr + 1, strlen(lineptr)); } lineptr ++; } if (!*lineptr) return (lineptr); else *lineptr = '\0'; } } mmd_ref_add(doc, NULL, *text, *url, title ? *title : NULL); *text = NULL; *url = NULL; if (title) *title = NULL; } else { // Shortcut reference... *refname = *text; } return (lineptr); } // // 'mmd_read_buffer()' - Fill the file buffer with more data from a file. // static void mmd_read_buffer(_mmd_filebuf_t *file) // I - File buffer { size_t bytes; // Bytes read if (file->bufptr && file->bufptr > file->buffer) { // Discard previous characters in the buffer. memmove(file->buffer, file->bufptr, file->bufend - file->bufptr); file->bufend -= (file->bufptr - file->buffer); } else { // Otherwise just clear the buffer... file->bufend = file->buffer; } if ((bytes = (file->cb)(file->cbdata, file->bufend, sizeof(file->buffer) - (size_t)(file->bufend - file->buffer - 1))) > 0) file->bufend += bytes; *(file->bufend) = '\0'; file->bufptr = file->buffer; } // // 'mmd_read_line()' - Read a line from a file in a Markdown-aware way. // static char * // O - Pointer to line or `NULL` on EOF mmd_read_line(_mmd_filebuf_t *file, // I - File buffer char *line, // I - Line buffer size_t linesize) // I - Size of line buffer { int ch, // Current character column = 0; // Current column char *lineptr = line, // Pointer into line *lineend = line + linesize - 1; // Pointer to end of buffer // Fill the buffer as needed... if (!file->bufptr || (file->bufptr >= file->bufend) || !strchr(file->bufptr, '\n')) mmd_read_buffer(file); // Copy a line out of the file buffer... while (file->bufptr < file->bufend) { ch = *(file->bufptr); file->bufptr ++; if (ch == '\t') { // Expand tabs since nobody uses the same tab width and Markdown says // 4 columns per tab... do { column ++; if (lineptr < lineend) *lineptr++ = ' '; } while (column & 3); } else if (ch != '\r' && lineptr < lineend) { column ++; *lineptr++ = ch; } if (ch == '\n') break; } *lineptr = '\0'; if (file->bufptr == file->bufend && lineptr == line) return (NULL); else if (!strchr(file->bufptr, '\n')) mmd_read_buffer(file); return (line); } // // 'mmd_ref_add()' - Add or update a reference... // static void mmd_ref_add(_mmd_doc_t *doc, // I - Document mmd_t *node, // I - Link node, if any const char *name, // I - Reference name const char *url, // I - Reference URL const char *title) // I - Title, if any { size_t i; // Looping var _mmd_ref_t *ref = mmd_ref_find(doc, name); // Reference DEBUG2_printf("mmd_ref_add(doc=%p, node=%p, name=\"%s\", url=\"%s\", title=\"%s\")\n", doc, node, name, url, title); if (ref) { DEBUG2_printf("mmd_ref_add: ref=%p, ref->url=\"%s\"\n", ref, ref->url); if (!ref->url && url) { if (node) node->url = strdup(url); ref->url = strdup(url); if (title) { if (node) node->extra = strdup(title); ref->title = strdup(title); } for (i = 0; i < ref->num_pending; i ++) { ref->pending[i]->url = strdup(url); if (title) ref->pending[i]->extra = strdup(title); } free(ref->pending); ref->num_pending = 0; ref->pending = NULL; return; } } else if ((ref = realloc(doc->references, (doc->num_references + 1) * sizeof(_mmd_ref_t))) != NULL) { doc->references = ref; ref += doc->num_references; doc->num_references ++; ref->name = strdup(name); ref->url = url ? strdup(url) : NULL; ref->title = title ? strdup(title) : NULL; ref->num_pending = 0; ref->pending = NULL; } else return; if (node) { if (ref->url) { node->url = strdup(ref->url); node->extra = ref->title ? strdup(ref->title) : NULL; } else if ((ref->pending = realloc(ref->pending, (ref->num_pending + 1) * sizeof(mmd_t *))) != NULL) { ref->pending[ref->num_pending ++] = node; } } } // // 'mmd_ref_find()' - Find a reference... // static _mmd_ref_t * // O - Reference or NULL mmd_ref_find(_mmd_doc_t *doc, // I - Document const char *name) // I - Reference name { size_t i; // Looping var for (i = 0; i < doc->num_references; i ++) if (!strcasecmp(name, doc->references[i].name)) return (doc->references + i); return (NULL); } // // 'mmd_remove()' - Remove a node from its parent. // static void mmd_remove(mmd_t *node) // I - Node { if (node && node->parent) { if (node->prev_sibling) node->prev_sibling->next_sibling = node->next_sibling; else node->parent->first_child = node->next_sibling; if (node->next_sibling) node->next_sibling->prev_sibling = node->prev_sibling; else node->parent->last_child = node->prev_sibling; node->parent = NULL; node->prev_sibling = NULL; node->next_sibling = NULL; } } #if DEBUG // // 'mmd_type_string()' - Return a string for the specified type enumeration. // static const char * // O - String representing the type mmd_type_string(mmd_type_t type) // I - Type value { static char unknown[64]; // Unknown type buffer switch (type) { case MMD_TYPE_NONE : return ("MMD_TYPE_NONE"); case MMD_TYPE_DOCUMENT : return "MMD_TYPE_DOCUMENT"; case MMD_TYPE_METADATA : return "MMD_TYPE_METADATA"; case MMD_TYPE_BLOCK_QUOTE : return "MMD_TYPE_BLOCK_QUOTE"; case MMD_TYPE_ORDERED_LIST : return "MMD_TYPE_ORDERED_LIST"; case MMD_TYPE_UNORDERED_LIST : return "MMD_TYPE_UNORDERED_LIST"; case MMD_TYPE_LIST_ITEM : return "MMD_TYPE_LIST_ITEM"; case MMD_TYPE_TABLE : return "MMD_TYPE_TABLE"; case MMD_TYPE_TABLE_HEADER : return "MMD_TYPE_TABLE_HEADER"; case MMD_TYPE_TABLE_BODY : return "MMD_TYPE_TABLE_BODY"; case MMD_TYPE_TABLE_ROW : return "MMD_TYPE_TABLE_ROW"; case MMD_TYPE_HEADING_1 : return "MMD_TYPE_HEADING_1"; case MMD_TYPE_HEADING_2 : return "MMD_TYPE_HEADING_2"; case MMD_TYPE_HEADING_3 : return "MMD_TYPE_HEADING_3"; case MMD_TYPE_HEADING_4 : return "MMD_TYPE_HEADING_4"; case MMD_TYPE_HEADING_5 : return "MMD_TYPE_HEADING_5"; case MMD_TYPE_HEADING_6 : return "MMD_TYPE_HEADING_6"; case MMD_TYPE_PARAGRAPH : return "MMD_TYPE_PARAGRAPH"; case MMD_TYPE_CODE_BLOCK : return "MMD_TYPE_CODE_BLOCK"; case MMD_TYPE_THEMATIC_BREAK : return "MMD_TYPE_THEMATIC_BREAK"; case MMD_TYPE_TABLE_HEADER_CELL : return "MMD_TYPE_TABLE_HEADER_CELL"; case MMD_TYPE_TABLE_BODY_CELL_LEFT : return "MMD_TYPE_TABLE_BODY_CELL_LEFT"; case MMD_TYPE_TABLE_BODY_CELL_CENTER : return "MMD_TYPE_TABLE_BODY_CELL_CENTER"; case MMD_TYPE_TABLE_BODY_CELL_RIGHT : return "MMD_TYPE_TABLE_BODY_CELL_RIGHT"; case MMD_TYPE_NORMAL_TEXT : return "MMD_TYPE_NORMAL_TEXT"; case MMD_TYPE_EMPHASIZED_TEXT : return "MMD_TYPE_EMPHASIZED_TEXT"; case MMD_TYPE_STRONG_TEXT : return "MMD_TYPE_STRONG_TEXT"; case MMD_TYPE_STRUCK_TEXT : return "MMD_TYPE_STRUCK_TEXT"; case MMD_TYPE_LINKED_TEXT : return "MMD_TYPE_LINKED_TEXT"; case MMD_TYPE_CODE_TEXT : return "MMD_TYPE_CODE_TEXT"; case MMD_TYPE_IMAGE : return "MMD_TYPE_IMAGE"; case MMD_TYPE_HARD_BREAK : return "MMD_TYPE_HARD_BREAK"; case MMD_TYPE_SOFT_BREAK : return "MMD_TYPE_SOFT_BREAK"; case MMD_TYPE_METADATA_TEXT : return "MMD_TYPE_METADATA_TEXT"; default : snprintf(unknown, sizeof(unknown), "?? %d ??", (int)type); return (unknown); } } #endif // DEBUG