diff --git a/.gitignore b/.gitignore index 3523288..5de591d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,49 +1,76 @@ -# http://www.gnu.org/software/automake +# Prerequisites +plugins +build +*._* +*.d +*.deb +.vscode +# Object files +*.o +*.ko +*.obj +*.elf -Makefile.in -/ar-lib -/mdate-sh -/py-compile -/test-driver -/ylwrap +# Linker output +*.ilk +*.map +*.exp -# http://www.gnu.org/software/autoconf +# Precompiled Headers +*.gch +*.pch -autom4te.cache -/autoscan.log -/autoscan-*.log -/aclocal.m4 -/compile -/config.guess -/config.h.in -/config.log -/config.status -/config.sub -/configure -/configure.scan -/depcomp -/install-sh -/missing -/stamp-h1 +# Libraries +*.lib +*.a +*.la +*.lo -# https://www.gnu.org/software/libtool/ +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib -/ltmain.sh +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex -# http://www.gnu.org/software/texinfo +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb -/texinfo.tex - -# http://www.gnu.org/software/m4/ - -m4/libtool.m4 -m4/ltoptions.m4 -m4/ltsugar.m4 -m4/ltversion.m4 -m4/lt~obsolete.m4 - -# Generated Makefile -# (meta build system like autotools, -# can automatically generate from config.status script -# (which is called by configure script)) +# Kernel Module Compile Results +*.mod* +*.cmd +modules.order +Module.symvers +Makefile.old +dkms.con +.DS_Store +.* +*.cache Makefile +antd +compile +config.guess +depcomp +install-sh +missing +libtool +config.log +config.status +config.sub +configure +aclocal.m4 +ltmain.sh +Makefile.in +# others +3rd/lua-5.3.4/lua +3rd/lua-5.3.4/luac diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..bd58518 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,40 @@ +AUTOMAKE_OPTIONS = foreign + + + +# check for system +if LINUX + AM_CPPFLAGS = -Wl,E,--no-as-needed + FL_LUA=linux +else + AM_CPPFLAGS = -Wl,-undefined,dynamic_lookup + FL_LUA=macosx +endif + + +AM_CPPFLAGS += -W -Wall -g -std=c99 -fPIC + +silkdir = $(libdir)/lua/silk +silk_DATA = silkmvc/router.lua.tpl \ + silkmvc/BaseController.lua \ + silkmvc/Router.lua \ + silkmvc/BaseModel.lua \ + silkmvc/Template.lua \ + silkmvc/Logger.lua \ + silkmvc/BaseObject.lua \ + silkmvc/DBHelper.lua \ + silkmvc/api.lua + +coredir = $(libdir)/lua/silk/core +core_DATA = silkmvc/core/OOP.lua \ + silkmvc/core/std.lua \ + silkmvc/core/extra_mime.lua \ + silkmvc/core/utils.lua \ + silkmvc/core/api.lua \ + silkmvc/core/cif.lua \ + silkmvc/core/sqlite.lua + +# lua libraris & modules +SUBDIRS = . modules + +EXTRA_DIST = README.md silkmvc diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..2f329bc --- /dev/null +++ b/configure.ac @@ -0,0 +1,87 @@ +# initialise autoconf and set up some basic information about the program we’re packaging +AC_INIT([silk], [0.1.0], [xsang.le@gmail.com]) + +# We’re going to use automake for this project +# [subdir-objects] if needed +AM_INIT_AUTOMAKE([subdir-objects]) + +# dependencies +# C compiler +AC_PROG_CC +# libtool for linking +AC_PROG_LIBTOOL + + +# check if sqlite3 header exists +has_sqlite=no +AC_CHECK_HEADER([sqlite3.h],[ + AC_DEFINE([USE_DB], [1],[Use sqlite3]) + has_sqlite=yes + # check if the library exists +],[]) +AC_CHECK_LIB([sqlite3],[sqlite3_open],[],[ + if test "$has_sqlite" = "yes"; then + AC_MSG_ERROR([Unable to find sqlite3 shared library]) + fi +]) + +# check for lm +AC_CHECK_LIB([m],[cos],[],[ + AC_MSG_ERROR([unable to find libm]) +]) + +# check for libreadline +#AC_CHECK_HEADER([readline/readline.h],[],[ +# AC_MSG_ERROR([unable to find libreadline headers]) +#]) + +#AC_CHECK_LIB([readline],[read_history],[],[ +# AC_MSG_ERROR([unable to find libreadline]) +#]) + +AC_DEFINE([_GNU_SOURCE], [1],[Use GNU source]) +# AC_CANONICAL_HOST is needed to access the 'host_os' variable + +AC_CANONICAL_HOST +build_linux=no +build_windows=no +build_mac=no +# Detect the target system +case "${host_os}" in + linux*) + AC_DEFINE([LINUX], [1],[Linux system]) + build_linux=yes + ;; + darwin*) + build_mac=yes + AC_DEFINE([MACOS], [1],[MacOS system]) + ;; + *) + AC_MSG_ERROR(["OS $host_os is not supported"]) + ;; +esac + +if test "$build_linux" = "yes"; then + AC_CHECK_LIB([crypt],[crypt],[],[ + AC_MSG_ERROR([unable to find libcrypt]) + ]) +fi + +# case for window: +# cygwin*|mingw*) +# build_windows=yes +# ;; +# Pass the conditionals to automake +AM_CONDITIONAL([HAS_DB], [test "$has_sqlite" = "yes"]) +AM_CONDITIONAL([LINUX], [test "$build_linux" = "yes"]) +AM_CONDITIONAL([WINDOWS], [test "$build_windows" = "yes"]) +AM_CONDITIONAL([OSX], [test "$build_mac" = "yes"]) +# find all config files +AC_CONFIG_FILES([ + Makefile + modules/Makefile +]) + +# AC_SUBST([my_CPPFLAGS]) pass my_CPPFLAGS to the makefile.am +# output the script: +AC_OUTPUT diff --git a/modules/3rd/jsmn/LICENSE b/modules/3rd/jsmn/LICENSE new file mode 100755 index 0000000..c84fb2e --- /dev/null +++ b/modules/3rd/jsmn/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2010 Serge A. Zaitsev + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff --git a/modules/3rd/jsmn/README.md b/modules/3rd/jsmn/README.md new file mode 100755 index 0000000..105897b --- /dev/null +++ b/modules/3rd/jsmn/README.md @@ -0,0 +1,167 @@ + +JSMN +==== + +jsmn (pronounced like 'jasmine') is a minimalistic JSON parser in C. It can be +easily integrated into resource-limited or embedded projects. + +You can find more information about JSON format at [json.org][1] + +Library sources are available at https://github.com/zserge/jsmn + +The web page with some information about jsmn can be found at +[http://zserge.com/jsmn.html][2] + +Philosophy +---------- + +Most JSON parsers offer you a bunch of functions to load JSON data, parse it +and extract any value by its name. jsmn proves that checking the correctness of +every JSON packet or allocating temporary objects to store parsed JSON fields +often is an overkill. + +JSON format itself is extremely simple, so why should we complicate it? + +jsmn is designed to be **robust** (it should work fine even with erroneous +data), **fast** (it should parse data on the fly), **portable** (no superfluous +dependencies or non-standard C extensions). And of course, **simplicity** is a +key feature - simple code style, simple algorithm, simple integration into +other projects. + +Features +-------- + +* compatible with C89 +* no dependencies (even libc!) +* highly portable (tested on x86/amd64, ARM, AVR) +* about 200 lines of code +* extremely small code footprint +* API contains only 2 functions +* no dynamic memory allocation +* incremental single-pass parsing +* library code is covered with unit-tests + +Design +------ + +The rudimentary jsmn object is a **token**. Let's consider a JSON string: + + '{ "name" : "Jack", "age" : 27 }' + +It holds the following tokens: + +* Object: `{ "name" : "Jack", "age" : 27}` (the whole object) +* Strings: `"name"`, `"Jack"`, `"age"` (keys and some values) +* Number: `27` + +In jsmn, tokens do not hold any data, but point to token boundaries in JSON +string instead. In the example above jsmn will create tokens like: Object +[0..31], String [3..7], String [12..16], String [20..23], Number [27..29]. + +Every jsmn token has a type, which indicates the type of corresponding JSON +token. jsmn supports the following token types: + +* Object - a container of key-value pairs, e.g.: + `{ "foo":"bar", "x":0.3 }` +* Array - a sequence of values, e.g.: + `[ 1, 2, 3 ]` +* String - a quoted sequence of chars, e.g.: `"foo"` +* Primitive - a number, a boolean (`true`, `false`) or `null` + +Besides start/end positions, jsmn tokens for complex types (like arrays +or objects) also contain a number of child items, so you can easily follow +object hierarchy. + +This approach provides enough information for parsing any JSON data and makes +it possible to use zero-copy techniques. + +Install +------- + +To clone the repository you should have Git installed. Just run: + + $ git clone https://github.com/zserge/jsmn + +Repository layout is simple: jsmn.c and jsmn.h are library files, tests are in +the jsmn\_test.c, you will also find README, LICENSE and Makefile files inside. + +To build the library, run `make`. It is also recommended to run `make test`. +Let me know, if some tests fail. + +If build was successful, you should get a `libjsmn.a` library. +The header file you should include is called `"jsmn.h"`. + +API +--- + +Token types are described by `jsmntype_t`: + + typedef enum { + JSMN_UNDEFINED = 0, + JSMN_OBJECT = 1, + JSMN_ARRAY = 2, + JSMN_STRING = 3, + JSMN_PRIMITIVE = 4 + } jsmntype_t; + +**Note:** Unlike JSON data types, primitive tokens are not divided into +numbers, booleans and null, because one can easily tell the type using the +first character: + +* 't', 'f' - boolean +* 'n' - null +* '-', '0'..'9' - number + +Token is an object of `jsmntok_t` type: + + typedef struct { + jsmntype_t type; // Token type + int start; // Token start position + int end; // Token end position + int size; // Number of child (nested) tokens + } jsmntok_t; + +**Note:** string tokens point to the first character after +the opening quote and the previous symbol before final quote. This was made +to simplify string extraction from JSON data. + +All job is done by `jsmn_parser` object. You can initialize a new parser using: + + jsmn_parser parser; + jsmntok_t tokens[10]; + + jsmn_init(&parser); + + // js - pointer to JSON string + // tokens - an array of tokens available + // 10 - number of tokens available + jsmn_parse(&parser, js, strlen(js), tokens, 10); + +This will create a parser, and then it tries to parse up to 10 JSON tokens from +the `js` string. + +A non-negative return value of `jsmn_parse` is the number of tokens actually +used by the parser. +Passing NULL instead of the tokens array would not store parsing results, but +instead the function will return the value of tokens needed to parse the given +string. This can be useful if you don't know yet how many tokens to allocate. + +If something goes wrong, you will get an error. Error will be one of these: + +* `JSMN_ERROR_INVAL` - bad token, JSON string is corrupted +* `JSMN_ERROR_NOMEM` - not enough tokens, JSON string is too large +* `JSMN_ERROR_PART` - JSON string is too short, expecting more JSON data + +If you get `JSON_ERROR_NOMEM`, you can re-allocate more tokens and call +`jsmn_parse` once more. If you read json data from the stream, you can +periodically call `jsmn_parse` and check if return value is `JSON_ERROR_PART`. +You will get this error until you reach the end of JSON data. + +Other info +---------- + +This software is distributed under [MIT license](http://www.opensource.org/licenses/mit-license.php), + so feel free to integrate it in your commercial products. + +[1]: http://www.json.org/ +[2]: http://zserge.com/jsmn.html diff --git a/modules/3rd/jsmn/example/jsondump.c b/modules/3rd/jsmn/example/jsondump.c new file mode 100755 index 0000000..4fb10fd --- /dev/null +++ b/modules/3rd/jsmn/example/jsondump.c @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include "../jsmn.h" + +/* Function realloc_it() is a wrapper function for standart realloc() + * with one difference - it frees old memory pointer in case of realloc + * failure. Thus, DO NOT use old data pointer in anyway after call to + * realloc_it(). If your code has some kind of fallback algorithm if + * memory can't be re-allocated - use standart realloc() instead. + */ +static inline void *realloc_it(void *ptrmem, size_t size) { + void *p = realloc(ptrmem, size); + if (!p) { + free (ptrmem); + fprintf(stderr, "realloc(): errno=%d\n", errno); + } + return p; +} + +/* + * An example of reading JSON from stdin and printing its content to stdout. + * The output looks like YAML, but I'm not sure if it's really compatible. + */ + +static int dump(const char *js, jsmntok_t *t, size_t count, int indent) { + int i, j, k; + if (count == 0) { + return 0; + } + if (t->type == JSMN_PRIMITIVE) { + printf("%.*s", t->end - t->start, js+t->start); + return 1; + } else if (t->type == JSMN_STRING) { + printf("'%.*s'", t->end - t->start, js+t->start); + return 1; + } else if (t->type == JSMN_OBJECT) { + printf("\n"); + j = 0; + for (i = 0; i < t->size; i++) { + for (k = 0; k < indent; k++) printf(" "); + j += dump(js, t+1+j, count-j, indent+1); + printf(": "); + j += dump(js, t+1+j, count-j, indent+1); + printf("\n"); + } + return j+1; + } else if (t->type == JSMN_ARRAY) { + j = 0; + printf("\n"); + for (i = 0; i < t->size; i++) { + for (k = 0; k < indent-1; k++) printf(" "); + printf(" - "); + j += dump(js, t+1+j, count-j, indent+1); + printf("\n"); + } + return j+1; + } + return 0; +} + +int main() { + int r; + int eof_expected = 0; + char *js = NULL; + size_t jslen = 0; + char buf[BUFSIZ]; + + jsmn_parser p; + jsmntok_t *tok; + size_t tokcount = 2; + + /* Prepare parser */ + jsmn_init(&p); + + /* Allocate some tokens as a start */ + tok = malloc(sizeof(*tok) * tokcount); + if (tok == NULL) { + fprintf(stderr, "malloc(): errno=%d\n", errno); + return 3; + } + + for (;;) { + /* Read another chunk */ + r = fread(buf, 1, sizeof(buf), stdin); + if (r < 0) { + fprintf(stderr, "fread(): %d, errno=%d\n", r, errno); + return 1; + } + if (r == 0) { + if (eof_expected != 0) { + return 0; + } else { + fprintf(stderr, "fread(): unexpected EOF\n"); + return 2; + } + } + + js = realloc_it(js, jslen + r + 1); + if (js == NULL) { + return 3; + } + strncpy(js + jslen, buf, r); + jslen = jslen + r; + +again: + r = jsmn_parse(&p, js, jslen, tok, tokcount); + if (r < 0) { + if (r == JSMN_ERROR_NOMEM) { + tokcount = tokcount * 2; + tok = realloc_it(tok, sizeof(*tok) * tokcount); + if (tok == NULL) { + return 3; + } + goto again; + } + } else { + dump(js, tok, p.toknext, 0); + eof_expected = 1; + } + } + + return EXIT_SUCCESS; +} diff --git a/modules/3rd/jsmn/example/simple.c b/modules/3rd/jsmn/example/simple.c new file mode 100755 index 0000000..aeb9cf4 --- /dev/null +++ b/modules/3rd/jsmn/example/simple.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include "../jsmn.h" + +/* + * A small example of jsmn parsing when JSON structure is known and number of + * tokens is predictable. + */ + +static const char *JSON_STRING = + "{\"user\": \"johndoe\", \"admin\": false, \"uid\": 1000,\n " + "\"groups\": [\"users\", \"wheel\", \"audio\", \"video\"]}"; + +static int jsoneq(const char *json, jsmntok_t *tok, const char *s) { + if (tok->type == JSMN_STRING && (int) strlen(s) == tok->end - tok->start && + strncmp(json + tok->start, s, tok->end - tok->start) == 0) { + return 0; + } + return -1; +} + +int main() { + int i; + int r; + jsmn_parser p; + jsmntok_t t[128]; /* We expect no more than 128 tokens */ + + jsmn_init(&p); + r = jsmn_parse(&p, JSON_STRING, strlen(JSON_STRING), t, sizeof(t)/sizeof(t[0])); + if (r < 0) { + printf("Failed to parse JSON: %d\n", r); + return 1; + } + + /* Assume the top-level element is an object */ + if (r < 1 || t[0].type != JSMN_OBJECT) { + printf("Object expected\n"); + return 1; + } + + /* Loop over all keys of the root object */ + for (i = 1; i < r; i++) { + if (jsoneq(JSON_STRING, &t[i], "user") == 0) { + /* We may use strndup() to fetch string value */ + printf("- User: %.*s\n", t[i+1].end-t[i+1].start, + JSON_STRING + t[i+1].start); + i++; + } else if (jsoneq(JSON_STRING, &t[i], "admin") == 0) { + /* We may additionally check if the value is either "true" or "false" */ + printf("- Admin: %.*s\n", t[i+1].end-t[i+1].start, + JSON_STRING + t[i+1].start); + i++; + } else if (jsoneq(JSON_STRING, &t[i], "uid") == 0) { + /* We may want to do strtol() here to get numeric value */ + printf("- UID: %.*s\n", t[i+1].end-t[i+1].start, + JSON_STRING + t[i+1].start); + i++; + } else if (jsoneq(JSON_STRING, &t[i], "groups") == 0) { + int j; + printf("- Groups:\n"); + if (t[i+1].type != JSMN_ARRAY) { + continue; /* We expect groups to be an array of strings */ + } + for (j = 0; j < t[i+1].size; j++) { + jsmntok_t *g = &t[i+j+2]; + printf(" * %.*s\n", g->end - g->start, JSON_STRING + g->start); + } + i += t[i+1].size + 1; + } else { + printf("Unexpected key: %.*s\n", t[i].end-t[i].start, + JSON_STRING + t[i].start); + } + } + return EXIT_SUCCESS; +} diff --git a/modules/3rd/jsmn/jsmn.c b/modules/3rd/jsmn/jsmn.c new file mode 100755 index 0000000..bcd6392 --- /dev/null +++ b/modules/3rd/jsmn/jsmn.c @@ -0,0 +1,314 @@ +#include "jsmn.h" + +/** + * Allocates a fresh unused token from the token pull. + */ +static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, + jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *tok; + if (parser->toknext >= num_tokens) { + return NULL; + } + tok = &tokens[parser->toknext++]; + tok->start = tok->end = -1; + tok->size = 0; +#ifdef JSMN_PARENT_LINKS + tok->parent = -1; +#endif + return tok; +} + +/** + * Fills token type and boundaries. + */ +static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, + int start, int end) { + token->type = type; + token->start = start; + token->end = end; + token->size = 0; +} + +/** + * Fills next available token with JSON primitive. + */ +static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, + size_t len, jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *token; + int start; + + start = parser->pos; + + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + switch (js[parser->pos]) { +#ifndef JSMN_STRICT + /* In strict mode primitive must be followed by "," or "}" or "]" */ + case ':': +#endif + case '\t' : case '\r' : case '\n' : case ' ' : + case ',' : case ']' : case '}' : + goto found; + } + if (js[parser->pos] < 32 || js[parser->pos] >= 127) { + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } +#ifdef JSMN_STRICT + /* In strict mode primitive must be followed by a comma/object/array */ + parser->pos = start; + return JSMN_ERROR_PART; +#endif + +found: + if (tokens == NULL) { + parser->pos--; + return 0; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + parser->pos--; + return 0; +} + +/** + * Fills next token with JSON string. + */ +static int jsmn_parse_string(jsmn_parser *parser, const char *js, + size_t len, jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *token; + + int start = parser->pos; + + parser->pos++; + + /* Skip starting quote */ + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + char c = js[parser->pos]; + + /* Quote: end of string */ + if (c == '\"') { + if (tokens == NULL) { + return 0; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_STRING, start+1, parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + return 0; + } + + /* Backslash: Quoted symbol expected */ + if (c == '\\' && parser->pos + 1 < len) { + int i; + parser->pos++; + switch (js[parser->pos]) { + /* Allowed escaped symbols */ + case '\"': case '/' : case '\\' : case 'b' : + case 'f' : case 'r' : case 'n' : case 't' : + break; + /* Allows escaped symbol \uXXXX */ + case 'u': + parser->pos++; + for(i = 0; i < 4 && parser->pos < len && js[parser->pos] != '\0'; i++) { + /* If it isn't a hex character we have an error */ + if(!((js[parser->pos] >= 48 && js[parser->pos] <= 57) || /* 0-9 */ + (js[parser->pos] >= 65 && js[parser->pos] <= 70) || /* A-F */ + (js[parser->pos] >= 97 && js[parser->pos] <= 102))) { /* a-f */ + parser->pos = start; + return JSMN_ERROR_INVAL; + } + parser->pos++; + } + parser->pos--; + break; + /* Unexpected symbol */ + default: + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } + } + parser->pos = start; + return JSMN_ERROR_PART; +} + +/** + * Parse JSON string and fill tokens. + */ +int jsmn_parse(jsmn_parser *parser, const char *js, size_t len, + jsmntok_t *tokens, unsigned int num_tokens) { + int r; + int i; + jsmntok_t *token; + int count = parser->toknext; + + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + char c; + jsmntype_t type; + + c = js[parser->pos]; + switch (c) { + case '{': case '[': + count++; + if (tokens == NULL) { + break; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) + return JSMN_ERROR_NOMEM; + if (parser->toksuper != -1) { + tokens[parser->toksuper].size++; +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + } + token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY); + token->start = parser->pos; + parser->toksuper = parser->toknext - 1; + break; + case '}': case ']': + if (tokens == NULL) + break; + type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); +#ifdef JSMN_PARENT_LINKS + if (parser->toknext < 1) { + return JSMN_ERROR_INVAL; + } + token = &tokens[parser->toknext - 1]; + for (;;) { + if (token->start != -1 && token->end == -1) { + if (token->type != type) { + return JSMN_ERROR_INVAL; + } + token->end = parser->pos + 1; + parser->toksuper = token->parent; + break; + } + if (token->parent == -1) { + if(token->type != type || parser->toksuper == -1) { + return JSMN_ERROR_INVAL; + } + break; + } + token = &tokens[token->parent]; + } +#else + for (i = parser->toknext - 1; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + if (token->type != type) { + return JSMN_ERROR_INVAL; + } + parser->toksuper = -1; + token->end = parser->pos + 1; + break; + } + } + /* Error if unmatched closing bracket */ + if (i == -1) return JSMN_ERROR_INVAL; + for (; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + parser->toksuper = i; + break; + } + } +#endif + break; + case '\"': + r = jsmn_parse_string(parser, js, len, tokens, num_tokens); + if (r < 0) return r; + count++; + if (parser->toksuper != -1 && tokens != NULL) + tokens[parser->toksuper].size++; + break; + case '\t' : case '\r' : case '\n' : case ' ': + break; + case ':': + parser->toksuper = parser->toknext - 1; + break; + case ',': + if (tokens != NULL && parser->toksuper != -1 && + tokens[parser->toksuper].type != JSMN_ARRAY && + tokens[parser->toksuper].type != JSMN_OBJECT) { +#ifdef JSMN_PARENT_LINKS + parser->toksuper = tokens[parser->toksuper].parent; +#else + for (i = parser->toknext - 1; i >= 0; i--) { + if (tokens[i].type == JSMN_ARRAY || tokens[i].type == JSMN_OBJECT) { + if (tokens[i].start != -1 && tokens[i].end == -1) { + parser->toksuper = i; + break; + } + } + } +#endif + } + break; +#ifdef JSMN_STRICT + /* In strict mode primitives are: numbers and booleans */ + case '-': case '0': case '1' : case '2': case '3' : case '4': + case '5': case '6': case '7' : case '8': case '9': + case 't': case 'f': case 'n' : + /* And they must not be keys of the object */ + if (tokens != NULL && parser->toksuper != -1) { + jsmntok_t *t = &tokens[parser->toksuper]; + if (t->type == JSMN_OBJECT || + (t->type == JSMN_STRING && t->size != 0)) { + return JSMN_ERROR_INVAL; + } + } +#else + /* In non-strict mode every unquoted value is a primitive */ + default: +#endif + r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens); + if (r < 0) return r; + count++; + if (parser->toksuper != -1 && tokens != NULL) + tokens[parser->toksuper].size++; + break; + +#ifdef JSMN_STRICT + /* Unexpected char in strict mode */ + default: + return JSMN_ERROR_INVAL; +#endif + } + } + + if (tokens != NULL) { + for (i = parser->toknext - 1; i >= 0; i--) { + /* Unmatched opened object or array */ + if (tokens[i].start != -1 && tokens[i].end == -1) { + return JSMN_ERROR_PART; + } + } + } + + return count; +} + +/** + * Creates a new parser based over a given buffer with an array of tokens + * available. + */ +void jsmn_init(jsmn_parser *parser) { + parser->pos = 0; + parser->toknext = 0; + parser->toksuper = -1; +} + diff --git a/modules/3rd/jsmn/jsmn.h b/modules/3rd/jsmn/jsmn.h new file mode 100755 index 0000000..5a5200e --- /dev/null +++ b/modules/3rd/jsmn/jsmn.h @@ -0,0 +1,76 @@ +#ifndef __JSMN_H_ +#define __JSMN_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * JSON type identifier. Basic types are: + * o Object + * o Array + * o String + * o Other primitive: number, boolean (true/false) or null + */ +typedef enum { + JSMN_UNDEFINED = 0, + JSMN_OBJECT = 1, + JSMN_ARRAY = 2, + JSMN_STRING = 3, + JSMN_PRIMITIVE = 4 +} jsmntype_t; + +enum jsmnerr { + /* Not enough tokens were provided */ + JSMN_ERROR_NOMEM = -1, + /* Invalid character inside JSON string */ + JSMN_ERROR_INVAL = -2, + /* The string is not a full JSON packet, more bytes expected */ + JSMN_ERROR_PART = -3 +}; + +/** + * JSON token description. + * type type (object, array, string etc.) + * start start position in JSON data string + * end end position in JSON data string + */ +typedef struct { + jsmntype_t type; + int start; + int end; + int size; +#ifdef JSMN_PARENT_LINKS + int parent; +#endif +} jsmntok_t; + +/** + * JSON parser. Contains an array of token blocks available. Also stores + * the string being parsed now and current position in that string + */ +typedef struct { + unsigned int pos; /* offset in the JSON string */ + unsigned int toknext; /* next token to allocate */ + int toksuper; /* superior token node, e.g parent object or array */ +} jsmn_parser; + +/** + * Create JSON parser over an array of tokens + */ +void jsmn_init(jsmn_parser *parser); + +/** + * Run JSON parser. It parses a JSON data string into and array of tokens, each describing + * a single JSON object. + */ +int jsmn_parse(jsmn_parser *parser, const char *js, size_t len, + jsmntok_t *tokens, unsigned int num_tokens); + +#ifdef __cplusplus +} +#endif + +#endif /* __JSMN_H_ */ diff --git a/modules/3rd/jsmn/library.json b/modules/3rd/jsmn/library.json new file mode 100755 index 0000000..8e2f5c2 --- /dev/null +++ b/modules/3rd/jsmn/library.json @@ -0,0 +1,16 @@ +{ + "name": "jsmn", + "keywords": "json", + "description": "Minimalistic JSON parser/tokenizer in C. It can be easily integrated into resource-limited or embedded projects", + "repository": + { + "type": "git", + "url": "https://github.com/zserge/jsmn.git" + }, + "frameworks": "*", + "platforms": "*", + "examples": [ + "example/*.c" + ], + "exclude": "test" +} diff --git a/modules/3rd/jsmn/test/test.h b/modules/3rd/jsmn/test/test.h new file mode 100755 index 0000000..35f704f --- /dev/null +++ b/modules/3rd/jsmn/test/test.h @@ -0,0 +1,27 @@ +#ifndef __TEST_H__ +#define __TEST_H__ + +static int test_passed = 0; +static int test_failed = 0; + +/* Terminate current test with error */ +#define fail() return __LINE__ + +/* Successful end of the test case */ +#define done() return 0 + +/* Check single condition */ +#define check(cond) do { if (!(cond)) fail(); } while (0) + +/* Test runner */ +static void test(int (*func)(void), const char *name) { + int r = func(); + if (r == 0) { + test_passed++; + } else { + test_failed++; + printf("FAILED: %s (at line %d)\n", name, r); + } +} + +#endif /* __TEST_H__ */ diff --git a/modules/3rd/jsmn/test/tests.c b/modules/3rd/jsmn/test/tests.c new file mode 100755 index 0000000..d5f0c53 --- /dev/null +++ b/modules/3rd/jsmn/test/tests.c @@ -0,0 +1,398 @@ +#include +#include +#include +#include + +#include "test.h" +#include "testutil.h" + +int test_empty(void) { + check(parse("{}", 1, 1, + JSMN_OBJECT, 0, 2, 0)); + check(parse("[]", 1, 1, + JSMN_ARRAY, 0, 2, 0)); + check(parse("[{},{}]", 3, 3, + JSMN_ARRAY, 0, 7, 2, + JSMN_OBJECT, 1, 3, 0, + JSMN_OBJECT, 4, 6, 0)); + return 0; +} + +int test_object(void) { + check(parse("{\"a\":0}", 3, 3, + JSMN_OBJECT, 0, 7, 1, + JSMN_STRING, "a", 1, + JSMN_PRIMITIVE, "0")); + check(parse("{\"a\":[]}", 3, 3, + JSMN_OBJECT, 0, 8, 1, + JSMN_STRING, "a", 1, + JSMN_ARRAY, 5, 7, 0)); + check(parse("{\"a\":{},\"b\":{}}", 5, 5, + JSMN_OBJECT, -1, -1, 2, + JSMN_STRING, "a", 1, + JSMN_OBJECT, -1, -1, 0, + JSMN_STRING, "b", 1, + JSMN_OBJECT, -1, -1, 0)); + check(parse("{\n \"Day\": 26,\n \"Month\": 9,\n \"Year\": 12\n }", 7, 7, + JSMN_OBJECT, -1, -1, 3, + JSMN_STRING, "Day", 1, + JSMN_PRIMITIVE, "26", + JSMN_STRING, "Month", 1, + JSMN_PRIMITIVE, "9", + JSMN_STRING, "Year", 1, + JSMN_PRIMITIVE, "12")); + check(parse("{\"a\": 0, \"b\": \"c\"}", 5, 5, + JSMN_OBJECT, -1, -1, 2, + JSMN_STRING, "a", 1, + JSMN_PRIMITIVE, "0", + JSMN_STRING, "b", 1, + JSMN_STRING, "c", 0)); + +#ifdef JSMN_STRICT + check(parse("{\"a\"\n0}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"a\", 0}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"a\": {2}}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"a\": {2: 3}}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"a\": {\"a\": 2 3}}", JSMN_ERROR_INVAL, 5)); + /* FIXME */ + /*check(parse("{\"a\"}", JSMN_ERROR_INVAL, 2));*/ + /*check(parse("{\"a\": 1, \"b\"}", JSMN_ERROR_INVAL, 4));*/ + /*check(parse("{\"a\",\"b\":1}", JSMN_ERROR_INVAL, 4));*/ + /*check(parse("{\"a\":1,}", JSMN_ERROR_INVAL, 4));*/ + /*check(parse("{\"a\":\"b\":\"c\"}", JSMN_ERROR_INVAL, 4));*/ + /*check(parse("{,}", JSMN_ERROR_INVAL, 4));*/ +#endif + return 0; +} + +int test_array(void) { + /* FIXME */ + /*check(parse("[10}", JSMN_ERROR_INVAL, 3));*/ + /*check(parse("[1,,3]", JSMN_ERROR_INVAL, 3)*/ + check(parse("[10]", 2, 2, + JSMN_ARRAY, -1, -1, 1, + JSMN_PRIMITIVE, "10")); + check(parse("{\"a\": 1]", JSMN_ERROR_INVAL, 3)); + /* FIXME */ + /*check(parse("[\"a\": 1]", JSMN_ERROR_INVAL, 3));*/ + return 0; +} + +int test_primitive(void) { + check(parse("{\"boolVar\" : true }", 3, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "boolVar", 1, + JSMN_PRIMITIVE, "true")); + check(parse("{\"boolVar\" : false }", 3, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "boolVar", 1, + JSMN_PRIMITIVE, "false")); + check(parse("{\"nullVar\" : null }", 3, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "nullVar", 1, + JSMN_PRIMITIVE, "null")); + check(parse("{\"intVar\" : 12}", 3, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "intVar", 1, + JSMN_PRIMITIVE, "12")); + check(parse("{\"floatVar\" : 12.345}", 3, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, + JSMN_PRIMITIVE, "12.345")); + return 0; +} + +int test_string(void) { + check(parse("{\"strVar\" : \"hello world\"}", 3, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "strVar", 1, + JSMN_STRING, "hello world", 0)); + check(parse("{\"strVar\" : \"escapes: \\/\\r\\n\\t\\b\\f\\\"\\\\\"}", 3, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "strVar", 1, + JSMN_STRING, "escapes: \\/\\r\\n\\t\\b\\f\\\"\\\\", 0)); + check(parse("{\"strVar\": \"\"}", 3, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "strVar", 1, + JSMN_STRING, "", 0)); + check(parse("{\"a\":\"\\uAbcD\"}", 3, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "a", 1, + JSMN_STRING, "\\uAbcD", 0)); + check(parse("{\"a\":\"str\\u0000\"}", 3, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "a", 1, + JSMN_STRING, "str\\u0000", 0)); + check(parse("{\"a\":\"\\uFFFFstr\"}", 3, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "a", 1, + JSMN_STRING, "\\uFFFFstr", 0)); + check(parse("{\"a\":[\"\\u0280\"]}", 4, 4, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "a", 1, + JSMN_ARRAY, -1, -1, 1, + JSMN_STRING, "\\u0280", 0)); + + check(parse("{\"a\":\"str\\uFFGFstr\"}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"a\":\"str\\u@FfF\"}", JSMN_ERROR_INVAL, 3)); + check(parse("{{\"a\":[\"\\u028\"]}", JSMN_ERROR_INVAL, 4)); + return 0; +} + +int test_partial_string(void) { + int i; + int r; + jsmn_parser p; + jsmntok_t tok[5]; + const char *js = "{\"x\": \"va\\\\ue\", \"y\": \"value y\"}"; + + jsmn_init(&p); + for (i = 1; i <= strlen(js); i++) { + r = jsmn_parse(&p, js, i, tok, sizeof(tok)/sizeof(tok[0])); + if (i == strlen(js)) { + check(r == 5); + check(tokeq(js, tok, 5, + JSMN_OBJECT, -1, -1, 2, + JSMN_STRING, "x", 1, + JSMN_STRING, "va\\\\ue", 0, + JSMN_STRING, "y", 1, + JSMN_STRING, "value y", 0)); + } else { + check(r == JSMN_ERROR_PART); + } + } + return 0; +} + +int test_partial_array(void) { +#ifdef JSMN_STRICT + int r; + int i; + jsmn_parser p; + jsmntok_t tok[10]; + const char *js = "[ 1, true, [123, \"hello\"]]"; + + jsmn_init(&p); + for (i = 1; i <= strlen(js); i++) { + r = jsmn_parse(&p, js, i, tok, sizeof(tok)/sizeof(tok[0])); + if (i == strlen(js)) { + check(r == 6); + check(tokeq(js, tok, 6, + JSMN_ARRAY, -1, -1, 3, + JSMN_PRIMITIVE, "1", + JSMN_PRIMITIVE, "true", + JSMN_ARRAY, -1, -1, 2, + JSMN_PRIMITIVE, "123", + JSMN_STRING, "hello", 0)); + } else { + check(r == JSMN_ERROR_PART); + } + } +#endif + return 0; +} + +int test_array_nomem(void) { + int i; + int r; + jsmn_parser p; + jsmntok_t toksmall[10], toklarge[10]; + const char *js; + + js = " [ 1, true, [123, \"hello\"]]"; + + for (i = 0; i < 6; i++) { + jsmn_init(&p); + memset(toksmall, 0, sizeof(toksmall)); + memset(toklarge, 0, sizeof(toklarge)); + r = jsmn_parse(&p, js, strlen(js), toksmall, i); + check(r == JSMN_ERROR_NOMEM); + + memcpy(toklarge, toksmall, sizeof(toksmall)); + + r = jsmn_parse(&p, js, strlen(js), toklarge, 10); + check(r >= 0); + check(tokeq(js, toklarge, 4, + JSMN_ARRAY, -1, -1, 3, + JSMN_PRIMITIVE, "1", + JSMN_PRIMITIVE, "true", + JSMN_ARRAY, -1, -1, 2, + JSMN_PRIMITIVE, "123", + JSMN_STRING, "hello", 0)); + } + return 0; +} + +int test_unquoted_keys(void) { +#ifndef JSMN_STRICT + int r; + jsmn_parser p; + jsmntok_t tok[10]; + const char *js; + + jsmn_init(&p); + js = "key1: \"value\"\nkey2 : 123"; + + r = jsmn_parse(&p, js, strlen(js), tok, 10); + check(r >= 0); + check(tokeq(js, tok, 4, + JSMN_PRIMITIVE, "key1", + JSMN_STRING, "value", 0, + JSMN_PRIMITIVE, "key2", + JSMN_PRIMITIVE, "123")); +#endif + return 0; +} + +int test_issue_22(void) { + int r; + jsmn_parser p; + jsmntok_t tokens[128]; + const char *js; + + js = "{ \"height\":10, \"layers\":[ { \"data\":[6,6], \"height\":10, " + "\"name\":\"Calque de Tile 1\", \"opacity\":1, \"type\":\"tilelayer\", " + "\"visible\":true, \"width\":10, \"x\":0, \"y\":0 }], " + "\"orientation\":\"orthogonal\", \"properties\": { }, \"tileheight\":32, " + "\"tilesets\":[ { \"firstgid\":1, \"image\":\"..\\/images\\/tiles.png\", " + "\"imageheight\":64, \"imagewidth\":160, \"margin\":0, \"name\":\"Tiles\", " + "\"properties\":{}, \"spacing\":0, \"tileheight\":32, \"tilewidth\":32 }], " + "\"tilewidth\":32, \"version\":1, \"width\":10 }"; + jsmn_init(&p); + r = jsmn_parse(&p, js, strlen(js), tokens, 128); + check(r >= 0); + return 0; +} + +int test_issue_27(void) { + const char *js = + "{ \"name\" : \"Jack\", \"age\" : 27 } { \"name\" : \"Anna\", "; + check(parse(js, JSMN_ERROR_PART, 8)); + return 0; +} + +int test_input_length(void) { + const char *js; + int r; + jsmn_parser p; + jsmntok_t tokens[10]; + + js = "{\"a\": 0}garbage"; + + jsmn_init(&p); + r = jsmn_parse(&p, js, 8, tokens, 10); + check(r == 3); + check(tokeq(js, tokens, 3, + JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "a", 1, + JSMN_PRIMITIVE, "0")); + return 0; +} + +int test_count(void) { + jsmn_parser p; + const char *js; + + js = "{}"; + jsmn_init(&p); + check(jsmn_parse(&p, js, strlen(js), NULL, 0) == 1); + + js = "[]"; + jsmn_init(&p); + check(jsmn_parse(&p, js, strlen(js), NULL, 0) == 1); + + js = "[[]]"; + jsmn_init(&p); + check(jsmn_parse(&p, js, strlen(js), NULL, 0) == 2); + + js = "[[], []]"; + jsmn_init(&p); + check(jsmn_parse(&p, js, strlen(js), NULL, 0) == 3); + + js = "[[], []]"; + jsmn_init(&p); + check(jsmn_parse(&p, js, strlen(js), NULL, 0) == 3); + + js = "[[], [[]], [[], []]]"; + jsmn_init(&p); + check(jsmn_parse(&p, js, strlen(js), NULL, 0) == 7); + + js = "[\"a\", [[], []]]"; + jsmn_init(&p); + check(jsmn_parse(&p, js, strlen(js), NULL, 0) == 5); + + js = "[[], \"[], [[]]\", [[]]]"; + jsmn_init(&p); + check(jsmn_parse(&p, js, strlen(js), NULL, 0) == 5); + + js = "[1, 2, 3]"; + jsmn_init(&p); + check(jsmn_parse(&p, js, strlen(js), NULL, 0) == 4); + + js = "[1, 2, [3, \"a\"], null]"; + jsmn_init(&p); + check(jsmn_parse(&p, js, strlen(js), NULL, 0) == 7); + + return 0; +} + + +int test_nonstrict(void) { +#ifndef JSMN_STRICT + const char *js; + js = "a: 0garbage"; + check(parse(js, 2, 2, + JSMN_PRIMITIVE, "a", + JSMN_PRIMITIVE, "0garbage")); + + js = "Day : 26\nMonth : Sep\n\nYear: 12"; + check(parse(js, 6, 6, + JSMN_PRIMITIVE, "Day", + JSMN_PRIMITIVE, "26", + JSMN_PRIMITIVE, "Month", + JSMN_PRIMITIVE, "Sep", + JSMN_PRIMITIVE, "Year", + JSMN_PRIMITIVE, "12")); +#endif + return 0; +} + +int test_unmatched_brackets(void) { + const char *js; + js = "\"key 1\": 1234}"; + check(parse(js, JSMN_ERROR_INVAL, 2)); + js = "{\"key 1\": 1234"; + check(parse(js, JSMN_ERROR_PART, 3)); + js = "{\"key 1\": 1234}}"; + check(parse(js, JSMN_ERROR_INVAL, 3)); + js = "\"key 1\"}: 1234"; + check(parse(js, JSMN_ERROR_INVAL, 3)); + js = "\"key {1\": 1234"; + check(parse(js, 2, 2, + JSMN_STRING, "key {1", 1, + JSMN_PRIMITIVE, "1234")); + js = "{{\"key 1\": 1234}"; + check(parse(js, JSMN_ERROR_PART, 4)); + return 0; +} + +int main(void) { + test(test_empty, "test for a empty JSON objects/arrays"); + test(test_object, "test for a JSON objects"); + test(test_array, "test for a JSON arrays"); + test(test_primitive, "test primitive JSON data types"); + test(test_string, "test string JSON data types"); + + test(test_partial_string, "test partial JSON string parsing"); + test(test_partial_array, "test partial array reading"); + test(test_array_nomem, "test array reading with a smaller number of tokens"); + test(test_unquoted_keys, "test unquoted keys (like in JavaScript)"); + test(test_input_length, "test strings that are not null-terminated"); + test(test_issue_22, "test issue #22"); + test(test_issue_27, "test issue #27"); + test(test_count, "test tokens count estimation"); + test(test_nonstrict, "test for non-strict mode"); + test(test_unmatched_brackets, "test for unmatched brackets"); + printf("\nPASSED: %d\nFAILED: %d\n", test_passed, test_failed); + return (test_failed > 0); +} diff --git a/modules/3rd/jsmn/test/testutil.h b/modules/3rd/jsmn/test/testutil.h new file mode 100755 index 0000000..9a1eb2d --- /dev/null +++ b/modules/3rd/jsmn/test/testutil.h @@ -0,0 +1,94 @@ +#ifndef __TEST_UTIL_H__ +#define __TEST_UTIL_H__ + +#include "../jsmn.c" + +static int vtokeq(const char *s, jsmntok_t *t, int numtok, va_list ap) { + if (numtok > 0) { + int i, start, end, size; + int type; + char *value; + + size = -1; + value = NULL; + for (i = 0; i < numtok; i++) { + type = va_arg(ap, int); + if (type == JSMN_STRING) { + value = va_arg(ap, char *); + size = va_arg(ap, int); + start = end = -1; + } else if (type == JSMN_PRIMITIVE) { + value = va_arg(ap, char *); + start = end = size = -1; + } else { + start = va_arg(ap, int); + end = va_arg(ap, int); + size = va_arg(ap, int); + value = NULL; + } + if (t[i].type != type) { + printf("token %d type is %d, not %d\n", i, t[i].type, type); + return 0; + } + if (start != -1 && end != -1) { + if (t[i].start != start) { + printf("token %d start is %d, not %d\n", i, t[i].start, start); + return 0; + } + if (t[i].end != end ) { + printf("token %d end is %d, not %d\n", i, t[i].end, end); + return 0; + } + } + if (size != -1 && t[i].size != size) { + printf("token %d size is %d, not %d\n", i, t[i].size, size); + return 0; + } + + if (s != NULL && value != NULL) { + const char *p = s + t[i].start; + if (strlen(value) != t[i].end - t[i].start || + strncmp(p, value, t[i].end - t[i].start) != 0) { + printf("token %d value is %.*s, not %s\n", i, t[i].end-t[i].start, + s+t[i].start, value); + return 0; + } + } + } + } + return 1; +} + +static int tokeq(const char *s, jsmntok_t *tokens, int numtok, ...) { + int ok; + va_list args; + va_start(args, numtok); + ok = vtokeq(s, tokens, numtok, args); + va_end(args); + return ok; +} + +static int parse(const char *s, int status, int numtok, ...) { + int r; + int ok = 1; + va_list args; + jsmn_parser p; + jsmntok_t *t = malloc(numtok * sizeof(jsmntok_t)); + + jsmn_init(&p); + r = jsmn_parse(&p, s, strlen(s), t, numtok); + if (r != status) { + printf("status is %d, not %d\n", r, status); + return 0; + } + + if (status >= 0) { + va_start(args, numtok); + ok = vtokeq(s, t, numtok, args); + va_end(args); + } + free(t); + return ok; +} + +#endif /* __TEST_UTIL_H__ */ diff --git a/modules/3rd/md4c/entity.c b/modules/3rd/md4c/entity.c new file mode 100644 index 0000000..9991ca1 --- /dev/null +++ b/modules/3rd/md4c/entity.c @@ -0,0 +1,2190 @@ +/* + * MD4C: Markdown parser for C + * (http://github.com/mity/md4c) + * + * Copyright (c) 2016-2017 Martin Mitas + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "entity.h" +#include + + +/* The table is generated from https://html.spec.whatwg.org/entities.json */ +static const struct entity entity_table[] = { + { "Æ", { 198, 0 } }, + { "&", { 38, 0 } }, + { "Á", { 193, 0 } }, + { "Ă", { 258, 0 } }, + { "Â", { 194, 0 } }, + { "А", { 1040, 0 } }, + { "𝔄", { 120068, 0 } }, + { "À", { 192, 0 } }, + { "Α", { 913, 0 } }, + { "Ā", { 256, 0 } }, + { "⩓", { 10835, 0 } }, + { "Ą", { 260, 0 } }, + { "𝔸", { 120120, 0 } }, + { "⁡", { 8289, 0 } }, + { "Å", { 197, 0 } }, + { "𝒜", { 119964, 0 } }, + { "≔", { 8788, 0 } }, + { "Ã", { 195, 0 } }, + { "Ä", { 196, 0 } }, + { "∖", { 8726, 0 } }, + { "⫧", { 10983, 0 } }, + { "⌆", { 8966, 0 } }, + { "Б", { 1041, 0 } }, + { "∵", { 8757, 0 } }, + { "ℬ", { 8492, 0 } }, + { "Β", { 914, 0 } }, + { "𝔅", { 120069, 0 } }, + { "𝔹", { 120121, 0 } }, + { "˘", { 728, 0 } }, + { "ℬ", { 8492, 0 } }, + { "≎", { 8782, 0 } }, + { "Ч", { 1063, 0 } }, + { "©", { 169, 0 } }, + { "Ć", { 262, 0 } }, + { "⋒", { 8914, 0 } }, + { "ⅅ", { 8517, 0 } }, + { "ℭ", { 8493, 0 } }, + { "Č", { 268, 0 } }, + { "Ç", { 199, 0 } }, + { "Ĉ", { 264, 0 } }, + { "∰", { 8752, 0 } }, + { "Ċ", { 266, 0 } }, + { "¸", { 184, 0 } }, + { "·", { 183, 0 } }, + { "ℭ", { 8493, 0 } }, + { "Χ", { 935, 0 } }, + { "⊙", { 8857, 0 } }, + { "⊖", { 8854, 0 } }, + { "⊕", { 8853, 0 } }, + { "⊗", { 8855, 0 } }, + { "∲", { 8754, 0 } }, + { "”", { 8221, 0 } }, + { "’", { 8217, 0 } }, + { "∷", { 8759, 0 } }, + { "⩴", { 10868, 0 } }, + { "≡", { 8801, 0 } }, + { "∯", { 8751, 0 } }, + { "∮", { 8750, 0 } }, + { "ℂ", { 8450, 0 } }, + { "∐", { 8720, 0 } }, + { "∳", { 8755, 0 } }, + { "⨯", { 10799, 0 } }, + { "𝒞", { 119966, 0 } }, + { "⋓", { 8915, 0 } }, + { "≍", { 8781, 0 } }, + { "ⅅ", { 8517, 0 } }, + { "⤑", { 10513, 0 } }, + { "Ђ", { 1026, 0 } }, + { "Ѕ", { 1029, 0 } }, + { "Џ", { 1039, 0 } }, + { "‡", { 8225, 0 } }, + { "↡", { 8609, 0 } }, + { "⫤", { 10980, 0 } }, + { "Ď", { 270, 0 } }, + { "Д", { 1044, 0 } }, + { "∇", { 8711, 0 } }, + { "Δ", { 916, 0 } }, + { "𝔇", { 120071, 0 } }, + { "´", { 180, 0 } }, + { "˙", { 729, 0 } }, + { "˝", { 733, 0 } }, + { "`", { 96, 0 } }, + { "˜", { 732, 0 } }, + { "⋄", { 8900, 0 } }, + { "ⅆ", { 8518, 0 } }, + { "𝔻", { 120123, 0 } }, + { "¨", { 168, 0 } }, + { "⃜", { 8412, 0 } }, + { "≐", { 8784, 0 } }, + { "∯", { 8751, 0 } }, + { "¨", { 168, 0 } }, + { "⇓", { 8659, 0 } }, + { "⇐", { 8656, 0 } }, + { "⇔", { 8660, 0 } }, + { "⫤", { 10980, 0 } }, + { "⟸", { 10232, 0 } }, + { "⟺", { 10234, 0 } }, + { "⟹", { 10233, 0 } }, + { "⇒", { 8658, 0 } }, + { "⊨", { 8872, 0 } }, + { "⇑", { 8657, 0 } }, + { "⇕", { 8661, 0 } }, + { "∥", { 8741, 0 } }, + { "↓", { 8595, 0 } }, + { "⤓", { 10515, 0 } }, + { "⇵", { 8693, 0 } }, + { "̑", { 785, 0 } }, + { "⥐", { 10576, 0 } }, + { "⥞", { 10590, 0 } }, + { "↽", { 8637, 0 } }, + { "⥖", { 10582, 0 } }, + { "⥟", { 10591, 0 } }, + { "⇁", { 8641, 0 } }, + { "⥗", { 10583, 0 } }, + { "⊤", { 8868, 0 } }, + { "↧", { 8615, 0 } }, + { "⇓", { 8659, 0 } }, + { "𝒟", { 119967, 0 } }, + { "Đ", { 272, 0 } }, + { "Ŋ", { 330, 0 } }, + { "Ð", { 208, 0 } }, + { "É", { 201, 0 } }, + { "Ě", { 282, 0 } }, + { "Ê", { 202, 0 } }, + { "Э", { 1069, 0 } }, + { "Ė", { 278, 0 } }, + { "𝔈", { 120072, 0 } }, + { "È", { 200, 0 } }, + { "∈", { 8712, 0 } }, + { "Ē", { 274, 0 } }, + { "◻", { 9723, 0 } }, + { "▫", { 9643, 0 } }, + { "Ę", { 280, 0 } }, + { "𝔼", { 120124, 0 } }, + { "Ε", { 917, 0 } }, + { "⩵", { 10869, 0 } }, + { "≂", { 8770, 0 } }, + { "⇌", { 8652, 0 } }, + { "ℰ", { 8496, 0 } }, + { "⩳", { 10867, 0 } }, + { "Η", { 919, 0 } }, + { "Ë", { 203, 0 } }, + { "∃", { 8707, 0 } }, + { "ⅇ", { 8519, 0 } }, + { "Ф", { 1060, 0 } }, + { "𝔉", { 120073, 0 } }, + { "◼", { 9724, 0 } }, + { "▪", { 9642, 0 } }, + { "𝔽", { 120125, 0 } }, + { "∀", { 8704, 0 } }, + { "ℱ", { 8497, 0 } }, + { "ℱ", { 8497, 0 } }, + { "Ѓ", { 1027, 0 } }, + { ">", { 62, 0 } }, + { "Γ", { 915, 0 } }, + { "Ϝ", { 988, 0 } }, + { "Ğ", { 286, 0 } }, + { "Ģ", { 290, 0 } }, + { "Ĝ", { 284, 0 } }, + { "Г", { 1043, 0 } }, + { "Ġ", { 288, 0 } }, + { "𝔊", { 120074, 0 } }, + { "⋙", { 8921, 0 } }, + { "𝔾", { 120126, 0 } }, + { "≥", { 8805, 0 } }, + { "⋛", { 8923, 0 } }, + { "≧", { 8807, 0 } }, + { "⪢", { 10914, 0 } }, + { "≷", { 8823, 0 } }, + { "⩾", { 10878, 0 } }, + { "≳", { 8819, 0 } }, + { "𝒢", { 119970, 0 } }, + { "≫", { 8811, 0 } }, + { "Ъ", { 1066, 0 } }, + { "ˇ", { 711, 0 } }, + { "^", { 94, 0 } }, + { "Ĥ", { 292, 0 } }, + { "ℌ", { 8460, 0 } }, + { "ℋ", { 8459, 0 } }, + { "ℍ", { 8461, 0 } }, + { "─", { 9472, 0 } }, + { "ℋ", { 8459, 0 } }, + { "Ħ", { 294, 0 } }, + { "≎", { 8782, 0 } }, + { "≏", { 8783, 0 } }, + { "Е", { 1045, 0 } }, + { "IJ", { 306, 0 } }, + { "Ё", { 1025, 0 } }, + { "Í", { 205, 0 } }, + { "Î", { 206, 0 } }, + { "И", { 1048, 0 } }, + { "İ", { 304, 0 } }, + { "ℑ", { 8465, 0 } }, + { "Ì", { 204, 0 } }, + { "ℑ", { 8465, 0 } }, + { "Ī", { 298, 0 } }, + { "ⅈ", { 8520, 0 } }, + { "⇒", { 8658, 0 } }, + { "∬", { 8748, 0 } }, + { "∫", { 8747, 0 } }, + { "⋂", { 8898, 0 } }, + { "⁣", { 8291, 0 } }, + { "⁢", { 8290, 0 } }, + { "Į", { 302, 0 } }, + { "𝕀", { 120128, 0 } }, + { "Ι", { 921, 0 } }, + { "ℐ", { 8464, 0 } }, + { "Ĩ", { 296, 0 } }, + { "І", { 1030, 0 } }, + { "Ï", { 207, 0 } }, + { "Ĵ", { 308, 0 } }, + { "Й", { 1049, 0 } }, + { "𝔍", { 120077, 0 } }, + { "𝕁", { 120129, 0 } }, + { "𝒥", { 119973, 0 } }, + { "Ј", { 1032, 0 } }, + { "Є", { 1028, 0 } }, + { "Х", { 1061, 0 } }, + { "Ќ", { 1036, 0 } }, + { "Κ", { 922, 0 } }, + { "Ķ", { 310, 0 } }, + { "К", { 1050, 0 } }, + { "𝔎", { 120078, 0 } }, + { "𝕂", { 120130, 0 } }, + { "𝒦", { 119974, 0 } }, + { "Љ", { 1033, 0 } }, + { "<", { 60, 0 } }, + { "Ĺ", { 313, 0 } }, + { "Λ", { 923, 0 } }, + { "⟪", { 10218, 0 } }, + { "ℒ", { 8466, 0 } }, + { "↞", { 8606, 0 } }, + { "Ľ", { 317, 0 } }, + { "Ļ", { 315, 0 } }, + { "Л", { 1051, 0 } }, + { "⟨", { 10216, 0 } }, + { "←", { 8592, 0 } }, + { "⇤", { 8676, 0 } }, + { "⇆", { 8646, 0 } }, + { "⌈", { 8968, 0 } }, + { "⟦", { 10214, 0 } }, + { "⥡", { 10593, 0 } }, + { "⇃", { 8643, 0 } }, + { "⥙", { 10585, 0 } }, + { "⌊", { 8970, 0 } }, + { "↔", { 8596, 0 } }, + { "⥎", { 10574, 0 } }, + { "⊣", { 8867, 0 } }, + { "↤", { 8612, 0 } }, + { "⥚", { 10586, 0 } }, + { "⊲", { 8882, 0 } }, + { "⧏", { 10703, 0 } }, + { "⊴", { 8884, 0 } }, + { "⥑", { 10577, 0 } }, + { "⥠", { 10592, 0 } }, + { "↿", { 8639, 0 } }, + { "⥘", { 10584, 0 } }, + { "↼", { 8636, 0 } }, + { "⥒", { 10578, 0 } }, + { "⇐", { 8656, 0 } }, + { "⇔", { 8660, 0 } }, + { "⋚", { 8922, 0 } }, + { "≦", { 8806, 0 } }, + { "≶", { 8822, 0 } }, + { "⪡", { 10913, 0 } }, + { "⩽", { 10877, 0 } }, + { "≲", { 8818, 0 } }, + { "𝔏", { 120079, 0 } }, + { "⋘", { 8920, 0 } }, + { "⇚", { 8666, 0 } }, + { "Ŀ", { 319, 0 } }, + { "⟵", { 10229, 0 } }, + { "⟷", { 10231, 0 } }, + { "⟶", { 10230, 0 } }, + { "⟸", { 10232, 0 } }, + { "⟺", { 10234, 0 } }, + { "⟹", { 10233, 0 } }, + { "𝕃", { 120131, 0 } }, + { "↙", { 8601, 0 } }, + { "↘", { 8600, 0 } }, + { "ℒ", { 8466, 0 } }, + { "↰", { 8624, 0 } }, + { "Ł", { 321, 0 } }, + { "≪", { 8810, 0 } }, + { "⤅", { 10501, 0 } }, + { "М", { 1052, 0 } }, + { " ", { 8287, 0 } }, + { "ℳ", { 8499, 0 } }, + { "𝔐", { 120080, 0 } }, + { "∓", { 8723, 0 } }, + { "𝕄", { 120132, 0 } }, + { "ℳ", { 8499, 0 } }, + { "Μ", { 924, 0 } }, + { "Њ", { 1034, 0 } }, + { "Ń", { 323, 0 } }, + { "Ň", { 327, 0 } }, + { "Ņ", { 325, 0 } }, + { "Н", { 1053, 0 } }, + { "​", { 8203, 0 } }, + { "​", { 8203, 0 } }, + { "​", { 8203, 0 } }, + { "​", { 8203, 0 } }, + { "≫", { 8811, 0 } }, + { "≪", { 8810, 0 } }, + { " ", { 10, 0 } }, + { "𝔑", { 120081, 0 } }, + { "⁠", { 8288, 0 } }, + { " ", { 160, 0 } }, + { "ℕ", { 8469, 0 } }, + { "⫬", { 10988, 0 } }, + { "≢", { 8802, 0 } }, + { "≭", { 8813, 0 } }, + { "∦", { 8742, 0 } }, + { "∉", { 8713, 0 } }, + { "≠", { 8800, 0 } }, + { "≂̸", { 8770, 824 } }, + { "∄", { 8708, 0 } }, + { "≯", { 8815, 0 } }, + { "≱", { 8817, 0 } }, + { "≧̸", { 8807, 824 } }, + { "≫̸", { 8811, 824 } }, + { "≹", { 8825, 0 } }, + { "⩾̸", { 10878, 824 } }, + { "≵", { 8821, 0 } }, + { "≎̸", { 8782, 824 } }, + { "≏̸", { 8783, 824 } }, + { "⋪", { 8938, 0 } }, + { "⧏̸", { 10703, 824 } }, + { "⋬", { 8940, 0 } }, + { "≮", { 8814, 0 } }, + { "≰", { 8816, 0 } }, + { "≸", { 8824, 0 } }, + { "≪̸", { 8810, 824 } }, + { "⩽̸", { 10877, 824 } }, + { "≴", { 8820, 0 } }, + { "⪢̸", { 10914, 824 } }, + { "⪡̸", { 10913, 824 } }, + { "⊀", { 8832, 0 } }, + { "⪯̸", { 10927, 824 } }, + { "⋠", { 8928, 0 } }, + { "∌", { 8716, 0 } }, + { "⋫", { 8939, 0 } }, + { "⧐̸", { 10704, 824 } }, + { "⋭", { 8941, 0 } }, + { "⊏̸", { 8847, 824 } }, + { "⋢", { 8930, 0 } }, + { "⊐̸", { 8848, 824 } }, + { "⋣", { 8931, 0 } }, + { "⊂⃒", { 8834, 8402 } }, + { "⊈", { 8840, 0 } }, + { "⊁", { 8833, 0 } }, + { "⪰̸", { 10928, 824 } }, + { "⋡", { 8929, 0 } }, + { "≿̸", { 8831, 824 } }, + { "⊃⃒", { 8835, 8402 } }, + { "⊉", { 8841, 0 } }, + { "≁", { 8769, 0 } }, + { "≄", { 8772, 0 } }, + { "≇", { 8775, 0 } }, + { "≉", { 8777, 0 } }, + { "∤", { 8740, 0 } }, + { "𝒩", { 119977, 0 } }, + { "Ñ", { 209, 0 } }, + { "Ν", { 925, 0 } }, + { "Œ", { 338, 0 } }, + { "Ó", { 211, 0 } }, + { "Ô", { 212, 0 } }, + { "О", { 1054, 0 } }, + { "Ő", { 336, 0 } }, + { "𝔒", { 120082, 0 } }, + { "Ò", { 210, 0 } }, + { "Ō", { 332, 0 } }, + { "Ω", { 937, 0 } }, + { "Ο", { 927, 0 } }, + { "𝕆", { 120134, 0 } }, + { "“", { 8220, 0 } }, + { "‘", { 8216, 0 } }, + { "⩔", { 10836, 0 } }, + { "𝒪", { 119978, 0 } }, + { "Ø", { 216, 0 } }, + { "Õ", { 213, 0 } }, + { "⨷", { 10807, 0 } }, + { "Ö", { 214, 0 } }, + { "‾", { 8254, 0 } }, + { "⏞", { 9182, 0 } }, + { "⎴", { 9140, 0 } }, + { "⏜", { 9180, 0 } }, + { "∂", { 8706, 0 } }, + { "П", { 1055, 0 } }, + { "𝔓", { 120083, 0 } }, + { "Φ", { 934, 0 } }, + { "Π", { 928, 0 } }, + { "±", { 177, 0 } }, + { "ℌ", { 8460, 0 } }, + { "ℙ", { 8473, 0 } }, + { "⪻", { 10939, 0 } }, + { "≺", { 8826, 0 } }, + { "⪯", { 10927, 0 } }, + { "≼", { 8828, 0 } }, + { "≾", { 8830, 0 } }, + { "″", { 8243, 0 } }, + { "∏", { 8719, 0 } }, + { "∷", { 8759, 0 } }, + { "∝", { 8733, 0 } }, + { "𝒫", { 119979, 0 } }, + { "Ψ", { 936, 0 } }, + { """, { 34, 0 } }, + { "𝔔", { 120084, 0 } }, + { "ℚ", { 8474, 0 } }, + { "𝒬", { 119980, 0 } }, + { "⤐", { 10512, 0 } }, + { "®", { 174, 0 } }, + { "Ŕ", { 340, 0 } }, + { "⟫", { 10219, 0 } }, + { "↠", { 8608, 0 } }, + { "⤖", { 10518, 0 } }, + { "Ř", { 344, 0 } }, + { "Ŗ", { 342, 0 } }, + { "Р", { 1056, 0 } }, + { "ℜ", { 8476, 0 } }, + { "∋", { 8715, 0 } }, + { "⇋", { 8651, 0 } }, + { "⥯", { 10607, 0 } }, + { "ℜ", { 8476, 0 } }, + { "Ρ", { 929, 0 } }, + { "⟩", { 10217, 0 } }, + { "→", { 8594, 0 } }, + { "⇥", { 8677, 0 } }, + { "⇄", { 8644, 0 } }, + { "⌉", { 8969, 0 } }, + { "⟧", { 10215, 0 } }, + { "⥝", { 10589, 0 } }, + { "⇂", { 8642, 0 } }, + { "⥕", { 10581, 0 } }, + { "⌋", { 8971, 0 } }, + { "⊢", { 8866, 0 } }, + { "↦", { 8614, 0 } }, + { "⥛", { 10587, 0 } }, + { "⊳", { 8883, 0 } }, + { "⧐", { 10704, 0 } }, + { "⊵", { 8885, 0 } }, + { "⥏", { 10575, 0 } }, + { "⥜", { 10588, 0 } }, + { "↾", { 8638, 0 } }, + { "⥔", { 10580, 0 } }, + { "⇀", { 8640, 0 } }, + { "⥓", { 10579, 0 } }, + { "⇒", { 8658, 0 } }, + { "ℝ", { 8477, 0 } }, + { "⥰", { 10608, 0 } }, + { "⇛", { 8667, 0 } }, + { "ℛ", { 8475, 0 } }, + { "↱", { 8625, 0 } }, + { "⧴", { 10740, 0 } }, + { "Щ", { 1065, 0 } }, + { "Ш", { 1064, 0 } }, + { "Ь", { 1068, 0 } }, + { "Ś", { 346, 0 } }, + { "⪼", { 10940, 0 } }, + { "Š", { 352, 0 } }, + { "Ş", { 350, 0 } }, + { "Ŝ", { 348, 0 } }, + { "С", { 1057, 0 } }, + { "𝔖", { 120086, 0 } }, + { "↓", { 8595, 0 } }, + { "←", { 8592, 0 } }, + { "→", { 8594, 0 } }, + { "↑", { 8593, 0 } }, + { "Σ", { 931, 0 } }, + { "∘", { 8728, 0 } }, + { "𝕊", { 120138, 0 } }, + { "√", { 8730, 0 } }, + { "□", { 9633, 0 } }, + { "⊓", { 8851, 0 } }, + { "⊏", { 8847, 0 } }, + { "⊑", { 8849, 0 } }, + { "⊐", { 8848, 0 } }, + { "⊒", { 8850, 0 } }, + { "⊔", { 8852, 0 } }, + { "𝒮", { 119982, 0 } }, + { "⋆", { 8902, 0 } }, + { "⋐", { 8912, 0 } }, + { "⋐", { 8912, 0 } }, + { "⊆", { 8838, 0 } }, + { "≻", { 8827, 0 } }, + { "⪰", { 10928, 0 } }, + { "≽", { 8829, 0 } }, + { "≿", { 8831, 0 } }, + { "∋", { 8715, 0 } }, + { "∑", { 8721, 0 } }, + { "⋑", { 8913, 0 } }, + { "⊃", { 8835, 0 } }, + { "⊇", { 8839, 0 } }, + { "⋑", { 8913, 0 } }, + { "Þ", { 222, 0 } }, + { "™", { 8482, 0 } }, + { "Ћ", { 1035, 0 } }, + { "Ц", { 1062, 0 } }, + { " ", { 9, 0 } }, + { "Τ", { 932, 0 } }, + { "Ť", { 356, 0 } }, + { "Ţ", { 354, 0 } }, + { "Т", { 1058, 0 } }, + { "𝔗", { 120087, 0 } }, + { "∴", { 8756, 0 } }, + { "Θ", { 920, 0 } }, + { "  ", { 8287, 8202 } }, + { " ", { 8201, 0 } }, + { "∼", { 8764, 0 } }, + { "≃", { 8771, 0 } }, + { "≅", { 8773, 0 } }, + { "≈", { 8776, 0 } }, + { "𝕋", { 120139, 0 } }, + { "⃛", { 8411, 0 } }, + { "𝒯", { 119983, 0 } }, + { "Ŧ", { 358, 0 } }, + { "Ú", { 218, 0 } }, + { "↟", { 8607, 0 } }, + { "⥉", { 10569, 0 } }, + { "Ў", { 1038, 0 } }, + { "Ŭ", { 364, 0 } }, + { "Û", { 219, 0 } }, + { "У", { 1059, 0 } }, + { "Ű", { 368, 0 } }, + { "𝔘", { 120088, 0 } }, + { "Ù", { 217, 0 } }, + { "Ū", { 362, 0 } }, + { "_", { 95, 0 } }, + { "⏟", { 9183, 0 } }, + { "⎵", { 9141, 0 } }, + { "⏝", { 9181, 0 } }, + { "⋃", { 8899, 0 } }, + { "⊎", { 8846, 0 } }, + { "Ų", { 370, 0 } }, + { "𝕌", { 120140, 0 } }, + { "↑", { 8593, 0 } }, + { "⤒", { 10514, 0 } }, + { "⇅", { 8645, 0 } }, + { "↕", { 8597, 0 } }, + { "⥮", { 10606, 0 } }, + { "⊥", { 8869, 0 } }, + { "↥", { 8613, 0 } }, + { "⇑", { 8657, 0 } }, + { "⇕", { 8661, 0 } }, + { "↖", { 8598, 0 } }, + { "↗", { 8599, 0 } }, + { "ϒ", { 978, 0 } }, + { "Υ", { 933, 0 } }, + { "Ů", { 366, 0 } }, + { "𝒰", { 119984, 0 } }, + { "Ũ", { 360, 0 } }, + { "Ü", { 220, 0 } }, + { "⊫", { 8875, 0 } }, + { "⫫", { 10987, 0 } }, + { "В", { 1042, 0 } }, + { "⊩", { 8873, 0 } }, + { "⫦", { 10982, 0 } }, + { "⋁", { 8897, 0 } }, + { "‖", { 8214, 0 } }, + { "‖", { 8214, 0 } }, + { "∣", { 8739, 0 } }, + { "|", { 124, 0 } }, + { "❘", { 10072, 0 } }, + { "≀", { 8768, 0 } }, + { " ", { 8202, 0 } }, + { "𝔙", { 120089, 0 } }, + { "𝕍", { 120141, 0 } }, + { "𝒱", { 119985, 0 } }, + { "⊪", { 8874, 0 } }, + { "Ŵ", { 372, 0 } }, + { "⋀", { 8896, 0 } }, + { "𝔚", { 120090, 0 } }, + { "𝕎", { 120142, 0 } }, + { "𝒲", { 119986, 0 } }, + { "𝔛", { 120091, 0 } }, + { "Ξ", { 926, 0 } }, + { "𝕏", { 120143, 0 } }, + { "𝒳", { 119987, 0 } }, + { "Я", { 1071, 0 } }, + { "Ї", { 1031, 0 } }, + { "Ю", { 1070, 0 } }, + { "Ý", { 221, 0 } }, + { "Ŷ", { 374, 0 } }, + { "Ы", { 1067, 0 } }, + { "𝔜", { 120092, 0 } }, + { "𝕐", { 120144, 0 } }, + { "𝒴", { 119988, 0 } }, + { "Ÿ", { 376, 0 } }, + { "Ж", { 1046, 0 } }, + { "Ź", { 377, 0 } }, + { "Ž", { 381, 0 } }, + { "З", { 1047, 0 } }, + { "Ż", { 379, 0 } }, + { "​", { 8203, 0 } }, + { "Ζ", { 918, 0 } }, + { "ℨ", { 8488, 0 } }, + { "ℤ", { 8484, 0 } }, + { "𝒵", { 119989, 0 } }, + { "á", { 225, 0 } }, + { "ă", { 259, 0 } }, + { "∾", { 8766, 0 } }, + { "∾̳", { 8766, 819 } }, + { "∿", { 8767, 0 } }, + { "â", { 226, 0 } }, + { "´", { 180, 0 } }, + { "а", { 1072, 0 } }, + { "æ", { 230, 0 } }, + { "⁡", { 8289, 0 } }, + { "𝔞", { 120094, 0 } }, + { "à", { 224, 0 } }, + { "ℵ", { 8501, 0 } }, + { "ℵ", { 8501, 0 } }, + { "α", { 945, 0 } }, + { "ā", { 257, 0 } }, + { "⨿", { 10815, 0 } }, + { "&", { 38, 0 } }, + { "∧", { 8743, 0 } }, + { "⩕", { 10837, 0 } }, + { "⩜", { 10844, 0 } }, + { "⩘", { 10840, 0 } }, + { "⩚", { 10842, 0 } }, + { "∠", { 8736, 0 } }, + { "⦤", { 10660, 0 } }, + { "∠", { 8736, 0 } }, + { "∡", { 8737, 0 } }, + { "⦨", { 10664, 0 } }, + { "⦩", { 10665, 0 } }, + { "⦪", { 10666, 0 } }, + { "⦫", { 10667, 0 } }, + { "⦬", { 10668, 0 } }, + { "⦭", { 10669, 0 } }, + { "⦮", { 10670, 0 } }, + { "⦯", { 10671, 0 } }, + { "∟", { 8735, 0 } }, + { "⊾", { 8894, 0 } }, + { "⦝", { 10653, 0 } }, + { "∢", { 8738, 0 } }, + { "Å", { 197, 0 } }, + { "⍼", { 9084, 0 } }, + { "ą", { 261, 0 } }, + { "𝕒", { 120146, 0 } }, + { "≈", { 8776, 0 } }, + { "⩰", { 10864, 0 } }, + { "⩯", { 10863, 0 } }, + { "≊", { 8778, 0 } }, + { "≋", { 8779, 0 } }, + { "'", { 39, 0 } }, + { "≈", { 8776, 0 } }, + { "≊", { 8778, 0 } }, + { "å", { 229, 0 } }, + { "𝒶", { 119990, 0 } }, + { "*", { 42, 0 } }, + { "≈", { 8776, 0 } }, + { "≍", { 8781, 0 } }, + { "ã", { 227, 0 } }, + { "ä", { 228, 0 } }, + { "∳", { 8755, 0 } }, + { "⨑", { 10769, 0 } }, + { "⫭", { 10989, 0 } }, + { "≌", { 8780, 0 } }, + { "϶", { 1014, 0 } }, + { "‵", { 8245, 0 } }, + { "∽", { 8765, 0 } }, + { "⋍", { 8909, 0 } }, + { "⊽", { 8893, 0 } }, + { "⌅", { 8965, 0 } }, + { "⌅", { 8965, 0 } }, + { "⎵", { 9141, 0 } }, + { "⎶", { 9142, 0 } }, + { "≌", { 8780, 0 } }, + { "б", { 1073, 0 } }, + { "„", { 8222, 0 } }, + { "∵", { 8757, 0 } }, + { "∵", { 8757, 0 } }, + { "⦰", { 10672, 0 } }, + { "϶", { 1014, 0 } }, + { "ℬ", { 8492, 0 } }, + { "β", { 946, 0 } }, + { "ℶ", { 8502, 0 } }, + { "≬", { 8812, 0 } }, + { "𝔟", { 120095, 0 } }, + { "⋂", { 8898, 0 } }, + { "◯", { 9711, 0 } }, + { "⋃", { 8899, 0 } }, + { "⨀", { 10752, 0 } }, + { "⨁", { 10753, 0 } }, + { "⨂", { 10754, 0 } }, + { "⨆", { 10758, 0 } }, + { "★", { 9733, 0 } }, + { "▽", { 9661, 0 } }, + { "△", { 9651, 0 } }, + { "⨄", { 10756, 0 } }, + { "⋁", { 8897, 0 } }, + { "⋀", { 8896, 0 } }, + { "⤍", { 10509, 0 } }, + { "⧫", { 10731, 0 } }, + { "▪", { 9642, 0 } }, + { "▴", { 9652, 0 } }, + { "▾", { 9662, 0 } }, + { "◂", { 9666, 0 } }, + { "▸", { 9656, 0 } }, + { "␣", { 9251, 0 } }, + { "▒", { 9618, 0 } }, + { "░", { 9617, 0 } }, + { "▓", { 9619, 0 } }, + { "█", { 9608, 0 } }, + { "=⃥", { 61, 8421 } }, + { "≡⃥", { 8801, 8421 } }, + { "⌐", { 8976, 0 } }, + { "𝕓", { 120147, 0 } }, + { "⊥", { 8869, 0 } }, + { "⊥", { 8869, 0 } }, + { "⋈", { 8904, 0 } }, + { "╗", { 9559, 0 } }, + { "╔", { 9556, 0 } }, + { "╖", { 9558, 0 } }, + { "╓", { 9555, 0 } }, + { "═", { 9552, 0 } }, + { "╦", { 9574, 0 } }, + { "╩", { 9577, 0 } }, + { "╤", { 9572, 0 } }, + { "╧", { 9575, 0 } }, + { "╝", { 9565, 0 } }, + { "╚", { 9562, 0 } }, + { "╜", { 9564, 0 } }, + { "╙", { 9561, 0 } }, + { "║", { 9553, 0 } }, + { "╬", { 9580, 0 } }, + { "╣", { 9571, 0 } }, + { "╠", { 9568, 0 } }, + { "╫", { 9579, 0 } }, + { "╢", { 9570, 0 } }, + { "╟", { 9567, 0 } }, + { "⧉", { 10697, 0 } }, + { "╕", { 9557, 0 } }, + { "╒", { 9554, 0 } }, + { "┐", { 9488, 0 } }, + { "┌", { 9484, 0 } }, + { "─", { 9472, 0 } }, + { "╥", { 9573, 0 } }, + { "╨", { 9576, 0 } }, + { "┬", { 9516, 0 } }, + { "┴", { 9524, 0 } }, + { "⊟", { 8863, 0 } }, + { "⊞", { 8862, 0 } }, + { "⊠", { 8864, 0 } }, + { "╛", { 9563, 0 } }, + { "╘", { 9560, 0 } }, + { "┘", { 9496, 0 } }, + { "└", { 9492, 0 } }, + { "│", { 9474, 0 } }, + { "╪", { 9578, 0 } }, + { "╡", { 9569, 0 } }, + { "╞", { 9566, 0 } }, + { "┼", { 9532, 0 } }, + { "┤", { 9508, 0 } }, + { "├", { 9500, 0 } }, + { "‵", { 8245, 0 } }, + { "˘", { 728, 0 } }, + { "¦", { 166, 0 } }, + { "𝒷", { 119991, 0 } }, + { "⁏", { 8271, 0 } }, + { "∽", { 8765, 0 } }, + { "⋍", { 8909, 0 } }, + { "\", { 92, 0 } }, + { "⧅", { 10693, 0 } }, + { "⟈", { 10184, 0 } }, + { "•", { 8226, 0 } }, + { "•", { 8226, 0 } }, + { "≎", { 8782, 0 } }, + { "⪮", { 10926, 0 } }, + { "≏", { 8783, 0 } }, + { "≏", { 8783, 0 } }, + { "ć", { 263, 0 } }, + { "∩", { 8745, 0 } }, + { "⩄", { 10820, 0 } }, + { "⩉", { 10825, 0 } }, + { "⩋", { 10827, 0 } }, + { "⩇", { 10823, 0 } }, + { "⩀", { 10816, 0 } }, + { "∩︀", { 8745, 65024 } }, + { "⁁", { 8257, 0 } }, + { "ˇ", { 711, 0 } }, + { "⩍", { 10829, 0 } }, + { "č", { 269, 0 } }, + { "ç", { 231, 0 } }, + { "ĉ", { 265, 0 } }, + { "⩌", { 10828, 0 } }, + { "⩐", { 10832, 0 } }, + { "ċ", { 267, 0 } }, + { "¸", { 184, 0 } }, + { "⦲", { 10674, 0 } }, + { "¢", { 162, 0 } }, + { "·", { 183, 0 } }, + { "𝔠", { 120096, 0 } }, + { "ч", { 1095, 0 } }, + { "✓", { 10003, 0 } }, + { "✓", { 10003, 0 } }, + { "χ", { 967, 0 } }, + { "○", { 9675, 0 } }, + { "⧃", { 10691, 0 } }, + { "ˆ", { 710, 0 } }, + { "≗", { 8791, 0 } }, + { "↺", { 8634, 0 } }, + { "↻", { 8635, 0 } }, + { "®", { 174, 0 } }, + { "Ⓢ", { 9416, 0 } }, + { "⊛", { 8859, 0 } }, + { "⊚", { 8858, 0 } }, + { "⊝", { 8861, 0 } }, + { "≗", { 8791, 0 } }, + { "⨐", { 10768, 0 } }, + { "⫯", { 10991, 0 } }, + { "⧂", { 10690, 0 } }, + { "♣", { 9827, 0 } }, + { "♣", { 9827, 0 } }, + { ":", { 58, 0 } }, + { "≔", { 8788, 0 } }, + { "≔", { 8788, 0 } }, + { ",", { 44, 0 } }, + { "@", { 64, 0 } }, + { "∁", { 8705, 0 } }, + { "∘", { 8728, 0 } }, + { "∁", { 8705, 0 } }, + { "ℂ", { 8450, 0 } }, + { "≅", { 8773, 0 } }, + { "⩭", { 10861, 0 } }, + { "∮", { 8750, 0 } }, + { "𝕔", { 120148, 0 } }, + { "∐", { 8720, 0 } }, + { "©", { 169, 0 } }, + { "℗", { 8471, 0 } }, + { "↵", { 8629, 0 } }, + { "✗", { 10007, 0 } }, + { "𝒸", { 119992, 0 } }, + { "⫏", { 10959, 0 } }, + { "⫑", { 10961, 0 } }, + { "⫐", { 10960, 0 } }, + { "⫒", { 10962, 0 } }, + { "⋯", { 8943, 0 } }, + { "⤸", { 10552, 0 } }, + { "⤵", { 10549, 0 } }, + { "⋞", { 8926, 0 } }, + { "⋟", { 8927, 0 } }, + { "↶", { 8630, 0 } }, + { "⤽", { 10557, 0 } }, + { "∪", { 8746, 0 } }, + { "⩈", { 10824, 0 } }, + { "⩆", { 10822, 0 } }, + { "⩊", { 10826, 0 } }, + { "⊍", { 8845, 0 } }, + { "⩅", { 10821, 0 } }, + { "∪︀", { 8746, 65024 } }, + { "↷", { 8631, 0 } }, + { "⤼", { 10556, 0 } }, + { "⋞", { 8926, 0 } }, + { "⋟", { 8927, 0 } }, + { "⋎", { 8910, 0 } }, + { "⋏", { 8911, 0 } }, + { "¤", { 164, 0 } }, + { "↶", { 8630, 0 } }, + { "↷", { 8631, 0 } }, + { "⋎", { 8910, 0 } }, + { "⋏", { 8911, 0 } }, + { "∲", { 8754, 0 } }, + { "∱", { 8753, 0 } }, + { "⌭", { 9005, 0 } }, + { "⇓", { 8659, 0 } }, + { "⥥", { 10597, 0 } }, + { "†", { 8224, 0 } }, + { "ℸ", { 8504, 0 } }, + { "↓", { 8595, 0 } }, + { "‐", { 8208, 0 } }, + { "⊣", { 8867, 0 } }, + { "⤏", { 10511, 0 } }, + { "˝", { 733, 0 } }, + { "ď", { 271, 0 } }, + { "д", { 1076, 0 } }, + { "ⅆ", { 8518, 0 } }, + { "‡", { 8225, 0 } }, + { "⇊", { 8650, 0 } }, + { "⩷", { 10871, 0 } }, + { "°", { 176, 0 } }, + { "δ", { 948, 0 } }, + { "⦱", { 10673, 0 } }, + { "⥿", { 10623, 0 } }, + { "𝔡", { 120097, 0 } }, + { "⇃", { 8643, 0 } }, + { "⇂", { 8642, 0 } }, + { "⋄", { 8900, 0 } }, + { "⋄", { 8900, 0 } }, + { "♦", { 9830, 0 } }, + { "♦", { 9830, 0 } }, + { "¨", { 168, 0 } }, + { "ϝ", { 989, 0 } }, + { "⋲", { 8946, 0 } }, + { "÷", { 247, 0 } }, + { "÷", { 247, 0 } }, + { "⋇", { 8903, 0 } }, + { "⋇", { 8903, 0 } }, + { "ђ", { 1106, 0 } }, + { "⌞", { 8990, 0 } }, + { "⌍", { 8973, 0 } }, + { "$", { 36, 0 } }, + { "𝕕", { 120149, 0 } }, + { "˙", { 729, 0 } }, + { "≐", { 8784, 0 } }, + { "≑", { 8785, 0 } }, + { "∸", { 8760, 0 } }, + { "∔", { 8724, 0 } }, + { "⊡", { 8865, 0 } }, + { "⌆", { 8966, 0 } }, + { "↓", { 8595, 0 } }, + { "⇊", { 8650, 0 } }, + { "⇃", { 8643, 0 } }, + { "⇂", { 8642, 0 } }, + { "⤐", { 10512, 0 } }, + { "⌟", { 8991, 0 } }, + { "⌌", { 8972, 0 } }, + { "𝒹", { 119993, 0 } }, + { "ѕ", { 1109, 0 } }, + { "⧶", { 10742, 0 } }, + { "đ", { 273, 0 } }, + { "⋱", { 8945, 0 } }, + { "▿", { 9663, 0 } }, + { "▾", { 9662, 0 } }, + { "⇵", { 8693, 0 } }, + { "⥯", { 10607, 0 } }, + { "⦦", { 10662, 0 } }, + { "џ", { 1119, 0 } }, + { "⟿", { 10239, 0 } }, + { "⩷", { 10871, 0 } }, + { "≑", { 8785, 0 } }, + { "é", { 233, 0 } }, + { "⩮", { 10862, 0 } }, + { "ě", { 283, 0 } }, + { "≖", { 8790, 0 } }, + { "ê", { 234, 0 } }, + { "≕", { 8789, 0 } }, + { "э", { 1101, 0 } }, + { "ė", { 279, 0 } }, + { "ⅇ", { 8519, 0 } }, + { "≒", { 8786, 0 } }, + { "𝔢", { 120098, 0 } }, + { "⪚", { 10906, 0 } }, + { "è", { 232, 0 } }, + { "⪖", { 10902, 0 } }, + { "⪘", { 10904, 0 } }, + { "⪙", { 10905, 0 } }, + { "⏧", { 9191, 0 } }, + { "ℓ", { 8467, 0 } }, + { "⪕", { 10901, 0 } }, + { "⪗", { 10903, 0 } }, + { "ē", { 275, 0 } }, + { "∅", { 8709, 0 } }, + { "∅", { 8709, 0 } }, + { "∅", { 8709, 0 } }, + { " ", { 8196, 0 } }, + { " ", { 8197, 0 } }, + { " ", { 8195, 0 } }, + { "ŋ", { 331, 0 } }, + { " ", { 8194, 0 } }, + { "ę", { 281, 0 } }, + { "𝕖", { 120150, 0 } }, + { "⋕", { 8917, 0 } }, + { "⧣", { 10723, 0 } }, + { "⩱", { 10865, 0 } }, + { "ε", { 949, 0 } }, + { "ε", { 949, 0 } }, + { "ϵ", { 1013, 0 } }, + { "≖", { 8790, 0 } }, + { "≕", { 8789, 0 } }, + { "≂", { 8770, 0 } }, + { "⪖", { 10902, 0 } }, + { "⪕", { 10901, 0 } }, + { "=", { 61, 0 } }, + { "≟", { 8799, 0 } }, + { "≡", { 8801, 0 } }, + { "⩸", { 10872, 0 } }, + { "⧥", { 10725, 0 } }, + { "≓", { 8787, 0 } }, + { "⥱", { 10609, 0 } }, + { "ℯ", { 8495, 0 } }, + { "≐", { 8784, 0 } }, + { "≂", { 8770, 0 } }, + { "η", { 951, 0 } }, + { "ð", { 240, 0 } }, + { "ë", { 235, 0 } }, + { "€", { 8364, 0 } }, + { "!", { 33, 0 } }, + { "∃", { 8707, 0 } }, + { "ℰ", { 8496, 0 } }, + { "ⅇ", { 8519, 0 } }, + { "≒", { 8786, 0 } }, + { "ф", { 1092, 0 } }, + { "♀", { 9792, 0 } }, + { "ffi", { 64259, 0 } }, + { "ff", { 64256, 0 } }, + { "ffl", { 64260, 0 } }, + { "𝔣", { 120099, 0 } }, + { "fi", { 64257, 0 } }, + { "fj", { 102, 106 } }, + { "♭", { 9837, 0 } }, + { "fl", { 64258, 0 } }, + { "▱", { 9649, 0 } }, + { "ƒ", { 402, 0 } }, + { "𝕗", { 120151, 0 } }, + { "∀", { 8704, 0 } }, + { "⋔", { 8916, 0 } }, + { "⫙", { 10969, 0 } }, + { "⨍", { 10765, 0 } }, + { "½", { 189, 0 } }, + { "½", { 189, 0 } }, + { "⅓", { 8531, 0 } }, + { "¼", { 188, 0 } }, + { "¼", { 188, 0 } }, + { "⅕", { 8533, 0 } }, + { "⅙", { 8537, 0 } }, + { "⅛", { 8539, 0 } }, + { "⅔", { 8532, 0 } }, + { "⅖", { 8534, 0 } }, + { "¾", { 190, 0 } }, + { "¾", { 190, 0 } }, + { "⅗", { 8535, 0 } }, + { "⅜", { 8540, 0 } }, + { "⅘", { 8536, 0 } }, + { "⅚", { 8538, 0 } }, + { "⅝", { 8541, 0 } }, + { "⅞", { 8542, 0 } }, + { "⁄", { 8260, 0 } }, + { "⌢", { 8994, 0 } }, + { "𝒻", { 119995, 0 } }, + { "≧", { 8807, 0 } }, + { "⪌", { 10892, 0 } }, + { "ǵ", { 501, 0 } }, + { "γ", { 947, 0 } }, + { "ϝ", { 989, 0 } }, + { "⪆", { 10886, 0 } }, + { "ğ", { 287, 0 } }, + { "ĝ", { 285, 0 } }, + { "г", { 1075, 0 } }, + { "ġ", { 289, 0 } }, + { "≥", { 8805, 0 } }, + { "⋛", { 8923, 0 } }, + { "≥", { 8805, 0 } }, + { "≧", { 8807, 0 } }, + { "⩾", { 10878, 0 } }, + { "⩾", { 10878, 0 } }, + { "⪩", { 10921, 0 } }, + { "⪀", { 10880, 0 } }, + { "⪂", { 10882, 0 } }, + { "⪄", { 10884, 0 } }, + { "⋛︀", { 8923, 65024 } }, + { "⪔", { 10900, 0 } }, + { "𝔤", { 120100, 0 } }, + { "≫", { 8811, 0 } }, + { "⋙", { 8921, 0 } }, + { "ℷ", { 8503, 0 } }, + { "ѓ", { 1107, 0 } }, + { "≷", { 8823, 0 } }, + { "⪒", { 10898, 0 } }, + { "⪥", { 10917, 0 } }, + { "⪤", { 10916, 0 } }, + { "≩", { 8809, 0 } }, + { "⪊", { 10890, 0 } }, + { "⪊", { 10890, 0 } }, + { "⪈", { 10888, 0 } }, + { "⪈", { 10888, 0 } }, + { "≩", { 8809, 0 } }, + { "⋧", { 8935, 0 } }, + { "𝕘", { 120152, 0 } }, + { "`", { 96, 0 } }, + { "ℊ", { 8458, 0 } }, + { "≳", { 8819, 0 } }, + { "⪎", { 10894, 0 } }, + { "⪐", { 10896, 0 } }, + { ">", { 62, 0 } }, + { "⪧", { 10919, 0 } }, + { "⩺", { 10874, 0 } }, + { "⋗", { 8919, 0 } }, + { "⦕", { 10645, 0 } }, + { "⩼", { 10876, 0 } }, + { "⪆", { 10886, 0 } }, + { "⥸", { 10616, 0 } }, + { "⋗", { 8919, 0 } }, + { "⋛", { 8923, 0 } }, + { "⪌", { 10892, 0 } }, + { "≷", { 8823, 0 } }, + { "≳", { 8819, 0 } }, + { "≩︀", { 8809, 65024 } }, + { "≩︀", { 8809, 65024 } }, + { "⇔", { 8660, 0 } }, + { " ", { 8202, 0 } }, + { "½", { 189, 0 } }, + { "ℋ", { 8459, 0 } }, + { "ъ", { 1098, 0 } }, + { "↔", { 8596, 0 } }, + { "⥈", { 10568, 0 } }, + { "↭", { 8621, 0 } }, + { "ℏ", { 8463, 0 } }, + { "ĥ", { 293, 0 } }, + { "♥", { 9829, 0 } }, + { "♥", { 9829, 0 } }, + { "…", { 8230, 0 } }, + { "⊹", { 8889, 0 } }, + { "𝔥", { 120101, 0 } }, + { "⤥", { 10533, 0 } }, + { "⤦", { 10534, 0 } }, + { "⇿", { 8703, 0 } }, + { "∻", { 8763, 0 } }, + { "↩", { 8617, 0 } }, + { "↪", { 8618, 0 } }, + { "𝕙", { 120153, 0 } }, + { "―", { 8213, 0 } }, + { "𝒽", { 119997, 0 } }, + { "ℏ", { 8463, 0 } }, + { "ħ", { 295, 0 } }, + { "⁃", { 8259, 0 } }, + { "‐", { 8208, 0 } }, + { "í", { 237, 0 } }, + { "⁣", { 8291, 0 } }, + { "î", { 238, 0 } }, + { "и", { 1080, 0 } }, + { "е", { 1077, 0 } }, + { "¡", { 161, 0 } }, + { "⇔", { 8660, 0 } }, + { "𝔦", { 120102, 0 } }, + { "ì", { 236, 0 } }, + { "ⅈ", { 8520, 0 } }, + { "⨌", { 10764, 0 } }, + { "∭", { 8749, 0 } }, + { "⧜", { 10716, 0 } }, + { "℩", { 8489, 0 } }, + { "ij", { 307, 0 } }, + { "ī", { 299, 0 } }, + { "ℑ", { 8465, 0 } }, + { "ℐ", { 8464, 0 } }, + { "ℑ", { 8465, 0 } }, + { "ı", { 305, 0 } }, + { "⊷", { 8887, 0 } }, + { "Ƶ", { 437, 0 } }, + { "∈", { 8712, 0 } }, + { "℅", { 8453, 0 } }, + { "∞", { 8734, 0 } }, + { "⧝", { 10717, 0 } }, + { "ı", { 305, 0 } }, + { "∫", { 8747, 0 } }, + { "⊺", { 8890, 0 } }, + { "ℤ", { 8484, 0 } }, + { "⊺", { 8890, 0 } }, + { "⨗", { 10775, 0 } }, + { "⨼", { 10812, 0 } }, + { "ё", { 1105, 0 } }, + { "į", { 303, 0 } }, + { "𝕚", { 120154, 0 } }, + { "ι", { 953, 0 } }, + { "⨼", { 10812, 0 } }, + { "¿", { 191, 0 } }, + { "𝒾", { 119998, 0 } }, + { "∈", { 8712, 0 } }, + { "⋹", { 8953, 0 } }, + { "⋵", { 8949, 0 } }, + { "⋴", { 8948, 0 } }, + { "⋳", { 8947, 0 } }, + { "∈", { 8712, 0 } }, + { "⁢", { 8290, 0 } }, + { "ĩ", { 297, 0 } }, + { "і", { 1110, 0 } }, + { "ï", { 239, 0 } }, + { "ĵ", { 309, 0 } }, + { "й", { 1081, 0 } }, + { "𝔧", { 120103, 0 } }, + { "ȷ", { 567, 0 } }, + { "𝕛", { 120155, 0 } }, + { "𝒿", { 119999, 0 } }, + { "ј", { 1112, 0 } }, + { "є", { 1108, 0 } }, + { "κ", { 954, 0 } }, + { "ϰ", { 1008, 0 } }, + { "ķ", { 311, 0 } }, + { "к", { 1082, 0 } }, + { "𝔨", { 120104, 0 } }, + { "ĸ", { 312, 0 } }, + { "х", { 1093, 0 } }, + { "ќ", { 1116, 0 } }, + { "𝕜", { 120156, 0 } }, + { "𝓀", { 120000, 0 } }, + { "⇚", { 8666, 0 } }, + { "⇐", { 8656, 0 } }, + { "⤛", { 10523, 0 } }, + { "⤎", { 10510, 0 } }, + { "≦", { 8806, 0 } }, + { "⪋", { 10891, 0 } }, + { "⥢", { 10594, 0 } }, + { "ĺ", { 314, 0 } }, + { "⦴", { 10676, 0 } }, + { "ℒ", { 8466, 0 } }, + { "λ", { 955, 0 } }, + { "⟨", { 10216, 0 } }, + { "⦑", { 10641, 0 } }, + { "⟨", { 10216, 0 } }, + { "⪅", { 10885, 0 } }, + { "«", { 171, 0 } }, + { "←", { 8592, 0 } }, + { "⇤", { 8676, 0 } }, + { "⤟", { 10527, 0 } }, + { "⤝", { 10525, 0 } }, + { "↩", { 8617, 0 } }, + { "↫", { 8619, 0 } }, + { "⤹", { 10553, 0 } }, + { "⥳", { 10611, 0 } }, + { "↢", { 8610, 0 } }, + { "⪫", { 10923, 0 } }, + { "⤙", { 10521, 0 } }, + { "⪭", { 10925, 0 } }, + { "⪭︀", { 10925, 65024 } }, + { "⤌", { 10508, 0 } }, + { "❲", { 10098, 0 } }, + { "{", { 123, 0 } }, + { "[", { 91, 0 } }, + { "⦋", { 10635, 0 } }, + { "⦏", { 10639, 0 } }, + { "⦍", { 10637, 0 } }, + { "ľ", { 318, 0 } }, + { "ļ", { 316, 0 } }, + { "⌈", { 8968, 0 } }, + { "{", { 123, 0 } }, + { "л", { 1083, 0 } }, + { "⤶", { 10550, 0 } }, + { "“", { 8220, 0 } }, + { "„", { 8222, 0 } }, + { "⥧", { 10599, 0 } }, + { "⥋", { 10571, 0 } }, + { "↲", { 8626, 0 } }, + { "≤", { 8804, 0 } }, + { "←", { 8592, 0 } }, + { "↢", { 8610, 0 } }, + { "↽", { 8637, 0 } }, + { "↼", { 8636, 0 } }, + { "⇇", { 8647, 0 } }, + { "↔", { 8596, 0 } }, + { "⇆", { 8646, 0 } }, + { "⇋", { 8651, 0 } }, + { "↭", { 8621, 0 } }, + { "⋋", { 8907, 0 } }, + { "⋚", { 8922, 0 } }, + { "≤", { 8804, 0 } }, + { "≦", { 8806, 0 } }, + { "⩽", { 10877, 0 } }, + { "⩽", { 10877, 0 } }, + { "⪨", { 10920, 0 } }, + { "⩿", { 10879, 0 } }, + { "⪁", { 10881, 0 } }, + { "⪃", { 10883, 0 } }, + { "⋚︀", { 8922, 65024 } }, + { "⪓", { 10899, 0 } }, + { "⪅", { 10885, 0 } }, + { "⋖", { 8918, 0 } }, + { "⋚", { 8922, 0 } }, + { "⪋", { 10891, 0 } }, + { "≶", { 8822, 0 } }, + { "≲", { 8818, 0 } }, + { "⥼", { 10620, 0 } }, + { "⌊", { 8970, 0 } }, + { "𝔩", { 120105, 0 } }, + { "≶", { 8822, 0 } }, + { "⪑", { 10897, 0 } }, + { "↽", { 8637, 0 } }, + { "↼", { 8636, 0 } }, + { "⥪", { 10602, 0 } }, + { "▄", { 9604, 0 } }, + { "љ", { 1113, 0 } }, + { "≪", { 8810, 0 } }, + { "⇇", { 8647, 0 } }, + { "⌞", { 8990, 0 } }, + { "⥫", { 10603, 0 } }, + { "◺", { 9722, 0 } }, + { "ŀ", { 320, 0 } }, + { "⎰", { 9136, 0 } }, + { "⎰", { 9136, 0 } }, + { "≨", { 8808, 0 } }, + { "⪉", { 10889, 0 } }, + { "⪉", { 10889, 0 } }, + { "⪇", { 10887, 0 } }, + { "⪇", { 10887, 0 } }, + { "≨", { 8808, 0 } }, + { "⋦", { 8934, 0 } }, + { "⟬", { 10220, 0 } }, + { "⇽", { 8701, 0 } }, + { "⟦", { 10214, 0 } }, + { "⟵", { 10229, 0 } }, + { "⟷", { 10231, 0 } }, + { "⟼", { 10236, 0 } }, + { "⟶", { 10230, 0 } }, + { "↫", { 8619, 0 } }, + { "↬", { 8620, 0 } }, + { "⦅", { 10629, 0 } }, + { "𝕝", { 120157, 0 } }, + { "⨭", { 10797, 0 } }, + { "⨴", { 10804, 0 } }, + { "∗", { 8727, 0 } }, + { "_", { 95, 0 } }, + { "◊", { 9674, 0 } }, + { "◊", { 9674, 0 } }, + { "⧫", { 10731, 0 } }, + { "(", { 40, 0 } }, + { "⦓", { 10643, 0 } }, + { "⇆", { 8646, 0 } }, + { "⌟", { 8991, 0 } }, + { "⇋", { 8651, 0 } }, + { "⥭", { 10605, 0 } }, + { "‎", { 8206, 0 } }, + { "⊿", { 8895, 0 } }, + { "‹", { 8249, 0 } }, + { "𝓁", { 120001, 0 } }, + { "↰", { 8624, 0 } }, + { "≲", { 8818, 0 } }, + { "⪍", { 10893, 0 } }, + { "⪏", { 10895, 0 } }, + { "[", { 91, 0 } }, + { "‘", { 8216, 0 } }, + { "‚", { 8218, 0 } }, + { "ł", { 322, 0 } }, + { "<", { 60, 0 } }, + { "⪦", { 10918, 0 } }, + { "⩹", { 10873, 0 } }, + { "⋖", { 8918, 0 } }, + { "⋋", { 8907, 0 } }, + { "⋉", { 8905, 0 } }, + { "⥶", { 10614, 0 } }, + { "⩻", { 10875, 0 } }, + { "⦖", { 10646, 0 } }, + { "◃", { 9667, 0 } }, + { "⊴", { 8884, 0 } }, + { "◂", { 9666, 0 } }, + { "⥊", { 10570, 0 } }, + { "⥦", { 10598, 0 } }, + { "≨︀", { 8808, 65024 } }, + { "≨︀", { 8808, 65024 } }, + { "∺", { 8762, 0 } }, + { "¯", { 175, 0 } }, + { "♂", { 9794, 0 } }, + { "✠", { 10016, 0 } }, + { "✠", { 10016, 0 } }, + { "↦", { 8614, 0 } }, + { "↦", { 8614, 0 } }, + { "↧", { 8615, 0 } }, + { "↤", { 8612, 0 } }, + { "↥", { 8613, 0 } }, + { "▮", { 9646, 0 } }, + { "⨩", { 10793, 0 } }, + { "м", { 1084, 0 } }, + { "—", { 8212, 0 } }, + { "∡", { 8737, 0 } }, + { "𝔪", { 120106, 0 } }, + { "℧", { 8487, 0 } }, + { "µ", { 181, 0 } }, + { "∣", { 8739, 0 } }, + { "*", { 42, 0 } }, + { "⫰", { 10992, 0 } }, + { "·", { 183, 0 } }, + { "−", { 8722, 0 } }, + { "⊟", { 8863, 0 } }, + { "∸", { 8760, 0 } }, + { "⨪", { 10794, 0 } }, + { "⫛", { 10971, 0 } }, + { "…", { 8230, 0 } }, + { "∓", { 8723, 0 } }, + { "⊧", { 8871, 0 } }, + { "𝕞", { 120158, 0 } }, + { "∓", { 8723, 0 } }, + { "𝓂", { 120002, 0 } }, + { "∾", { 8766, 0 } }, + { "μ", { 956, 0 } }, + { "⊸", { 8888, 0 } }, + { "⊸", { 8888, 0 } }, + { "⋙̸", { 8921, 824 } }, + { "≫⃒", { 8811, 8402 } }, + { "≫̸", { 8811, 824 } }, + { "⇍", { 8653, 0 } }, + { "⇎", { 8654, 0 } }, + { "⋘̸", { 8920, 824 } }, + { "≪⃒", { 8810, 8402 } }, + { "≪̸", { 8810, 824 } }, + { "⇏", { 8655, 0 } }, + { "⊯", { 8879, 0 } }, + { "⊮", { 8878, 0 } }, + { "∇", { 8711, 0 } }, + { "ń", { 324, 0 } }, + { "∠⃒", { 8736, 8402 } }, + { "≉", { 8777, 0 } }, + { "⩰̸", { 10864, 824 } }, + { "≋̸", { 8779, 824 } }, + { "ʼn", { 329, 0 } }, + { "≉", { 8777, 0 } }, + { "♮", { 9838, 0 } }, + { "♮", { 9838, 0 } }, + { "ℕ", { 8469, 0 } }, + { " ", { 160, 0 } }, + { "≎̸", { 8782, 824 } }, + { "≏̸", { 8783, 824 } }, + { "⩃", { 10819, 0 } }, + { "ň", { 328, 0 } }, + { "ņ", { 326, 0 } }, + { "≇", { 8775, 0 } }, + { "⩭̸", { 10861, 824 } }, + { "⩂", { 10818, 0 } }, + { "н", { 1085, 0 } }, + { "–", { 8211, 0 } }, + { "≠", { 8800, 0 } }, + { "⇗", { 8663, 0 } }, + { "⤤", { 10532, 0 } }, + { "↗", { 8599, 0 } }, + { "↗", { 8599, 0 } }, + { "≐̸", { 8784, 824 } }, + { "≢", { 8802, 0 } }, + { "⤨", { 10536, 0 } }, + { "≂̸", { 8770, 824 } }, + { "∄", { 8708, 0 } }, + { "∄", { 8708, 0 } }, + { "𝔫", { 120107, 0 } }, + { "≧̸", { 8807, 824 } }, + { "≱", { 8817, 0 } }, + { "≱", { 8817, 0 } }, + { "≧̸", { 8807, 824 } }, + { "⩾̸", { 10878, 824 } }, + { "⩾̸", { 10878, 824 } }, + { "≵", { 8821, 0 } }, + { "≯", { 8815, 0 } }, + { "≯", { 8815, 0 } }, + { "⇎", { 8654, 0 } }, + { "↮", { 8622, 0 } }, + { "⫲", { 10994, 0 } }, + { "∋", { 8715, 0 } }, + { "⋼", { 8956, 0 } }, + { "⋺", { 8954, 0 } }, + { "∋", { 8715, 0 } }, + { "њ", { 1114, 0 } }, + { "⇍", { 8653, 0 } }, + { "≦̸", { 8806, 824 } }, + { "↚", { 8602, 0 } }, + { "‥", { 8229, 0 } }, + { "≰", { 8816, 0 } }, + { "↚", { 8602, 0 } }, + { "↮", { 8622, 0 } }, + { "≰", { 8816, 0 } }, + { "≦̸", { 8806, 824 } }, + { "⩽̸", { 10877, 824 } }, + { "⩽̸", { 10877, 824 } }, + { "≮", { 8814, 0 } }, + { "≴", { 8820, 0 } }, + { "≮", { 8814, 0 } }, + { "⋪", { 8938, 0 } }, + { "⋬", { 8940, 0 } }, + { "∤", { 8740, 0 } }, + { "𝕟", { 120159, 0 } }, + { "¬", { 172, 0 } }, + { "∉", { 8713, 0 } }, + { "⋹̸", { 8953, 824 } }, + { "⋵̸", { 8949, 824 } }, + { "∉", { 8713, 0 } }, + { "⋷", { 8951, 0 } }, + { "⋶", { 8950, 0 } }, + { "∌", { 8716, 0 } }, + { "∌", { 8716, 0 } }, + { "⋾", { 8958, 0 } }, + { "⋽", { 8957, 0 } }, + { "∦", { 8742, 0 } }, + { "∦", { 8742, 0 } }, + { "⫽⃥", { 11005, 8421 } }, + { "∂̸", { 8706, 824 } }, + { "⨔", { 10772, 0 } }, + { "⊀", { 8832, 0 } }, + { "⋠", { 8928, 0 } }, + { "⪯̸", { 10927, 824 } }, + { "⊀", { 8832, 0 } }, + { "⪯̸", { 10927, 824 } }, + { "⇏", { 8655, 0 } }, + { "↛", { 8603, 0 } }, + { "⤳̸", { 10547, 824 } }, + { "↝̸", { 8605, 824 } }, + { "↛", { 8603, 0 } }, + { "⋫", { 8939, 0 } }, + { "⋭", { 8941, 0 } }, + { "⊁", { 8833, 0 } }, + { "⋡", { 8929, 0 } }, + { "⪰̸", { 10928, 824 } }, + { "𝓃", { 120003, 0 } }, + { "∤", { 8740, 0 } }, + { "∦", { 8742, 0 } }, + { "≁", { 8769, 0 } }, + { "≄", { 8772, 0 } }, + { "≄", { 8772, 0 } }, + { "∤", { 8740, 0 } }, + { "∦", { 8742, 0 } }, + { "⋢", { 8930, 0 } }, + { "⋣", { 8931, 0 } }, + { "⊄", { 8836, 0 } }, + { "⫅̸", { 10949, 824 } }, + { "⊈", { 8840, 0 } }, + { "⊂⃒", { 8834, 8402 } }, + { "⊈", { 8840, 0 } }, + { "⫅̸", { 10949, 824 } }, + { "⊁", { 8833, 0 } }, + { "⪰̸", { 10928, 824 } }, + { "⊅", { 8837, 0 } }, + { "⫆̸", { 10950, 824 } }, + { "⊉", { 8841, 0 } }, + { "⊃⃒", { 8835, 8402 } }, + { "⊉", { 8841, 0 } }, + { "⫆̸", { 10950, 824 } }, + { "≹", { 8825, 0 } }, + { "ñ", { 241, 0 } }, + { "≸", { 8824, 0 } }, + { "⋪", { 8938, 0 } }, + { "⋬", { 8940, 0 } }, + { "⋫", { 8939, 0 } }, + { "⋭", { 8941, 0 } }, + { "ν", { 957, 0 } }, + { "#", { 35, 0 } }, + { "№", { 8470, 0 } }, + { " ", { 8199, 0 } }, + { "⊭", { 8877, 0 } }, + { "⤄", { 10500, 0 } }, + { "≍⃒", { 8781, 8402 } }, + { "⊬", { 8876, 0 } }, + { "≥⃒", { 8805, 8402 } }, + { ">⃒", { 62, 8402 } }, + { "⧞", { 10718, 0 } }, + { "⤂", { 10498, 0 } }, + { "≤⃒", { 8804, 8402 } }, + { "<⃒", { 60, 8402 } }, + { "⊴⃒", { 8884, 8402 } }, + { "⤃", { 10499, 0 } }, + { "⊵⃒", { 8885, 8402 } }, + { "∼⃒", { 8764, 8402 } }, + { "⇖", { 8662, 0 } }, + { "⤣", { 10531, 0 } }, + { "↖", { 8598, 0 } }, + { "↖", { 8598, 0 } }, + { "⤧", { 10535, 0 } }, + { "Ⓢ", { 9416, 0 } }, + { "ó", { 243, 0 } }, + { "⊛", { 8859, 0 } }, + { "⊚", { 8858, 0 } }, + { "ô", { 244, 0 } }, + { "о", { 1086, 0 } }, + { "⊝", { 8861, 0 } }, + { "ő", { 337, 0 } }, + { "⨸", { 10808, 0 } }, + { "⊙", { 8857, 0 } }, + { "⦼", { 10684, 0 } }, + { "œ", { 339, 0 } }, + { "⦿", { 10687, 0 } }, + { "𝔬", { 120108, 0 } }, + { "˛", { 731, 0 } }, + { "ò", { 242, 0 } }, + { "⧁", { 10689, 0 } }, + { "⦵", { 10677, 0 } }, + { "Ω", { 937, 0 } }, + { "∮", { 8750, 0 } }, + { "↺", { 8634, 0 } }, + { "⦾", { 10686, 0 } }, + { "⦻", { 10683, 0 } }, + { "‾", { 8254, 0 } }, + { "⧀", { 10688, 0 } }, + { "ō", { 333, 0 } }, + { "ω", { 969, 0 } }, + { "ο", { 959, 0 } }, + { "⦶", { 10678, 0 } }, + { "⊖", { 8854, 0 } }, + { "𝕠", { 120160, 0 } }, + { "⦷", { 10679, 0 } }, + { "⦹", { 10681, 0 } }, + { "⊕", { 8853, 0 } }, + { "∨", { 8744, 0 } }, + { "↻", { 8635, 0 } }, + { "⩝", { 10845, 0 } }, + { "ℴ", { 8500, 0 } }, + { "ℴ", { 8500, 0 } }, + { "ª", { 170, 0 } }, + { "º", { 186, 0 } }, + { "⊶", { 8886, 0 } }, + { "⩖", { 10838, 0 } }, + { "⩗", { 10839, 0 } }, + { "⩛", { 10843, 0 } }, + { "ℴ", { 8500, 0 } }, + { "ø", { 248, 0 } }, + { "⊘", { 8856, 0 } }, + { "õ", { 245, 0 } }, + { "⊗", { 8855, 0 } }, + { "⨶", { 10806, 0 } }, + { "ö", { 246, 0 } }, + { "⌽", { 9021, 0 } }, + { "∥", { 8741, 0 } }, + { "¶", { 182, 0 } }, + { "∥", { 8741, 0 } }, + { "⫳", { 10995, 0 } }, + { "⫽", { 11005, 0 } }, + { "∂", { 8706, 0 } }, + { "п", { 1087, 0 } }, + { "%", { 37, 0 } }, + { ".", { 46, 0 } }, + { "‰", { 8240, 0 } }, + { "⊥", { 8869, 0 } }, + { "‱", { 8241, 0 } }, + { "𝔭", { 120109, 0 } }, + { "φ", { 966, 0 } }, + { "ϕ", { 981, 0 } }, + { "ℳ", { 8499, 0 } }, + { "☎", { 9742, 0 } }, + { "π", { 960, 0 } }, + { "⋔", { 8916, 0 } }, + { "ϖ", { 982, 0 } }, + { "ℏ", { 8463, 0 } }, + { "ℎ", { 8462, 0 } }, + { "ℏ", { 8463, 0 } }, + { "+", { 43, 0 } }, + { "⨣", { 10787, 0 } }, + { "⊞", { 8862, 0 } }, + { "⨢", { 10786, 0 } }, + { "∔", { 8724, 0 } }, + { "⨥", { 10789, 0 } }, + { "⩲", { 10866, 0 } }, + { "±", { 177, 0 } }, + { "⨦", { 10790, 0 } }, + { "⨧", { 10791, 0 } }, + { "±", { 177, 0 } }, + { "⨕", { 10773, 0 } }, + { "𝕡", { 120161, 0 } }, + { "£", { 163, 0 } }, + { "≺", { 8826, 0 } }, + { "⪳", { 10931, 0 } }, + { "⪷", { 10935, 0 } }, + { "≼", { 8828, 0 } }, + { "⪯", { 10927, 0 } }, + { "≺", { 8826, 0 } }, + { "⪷", { 10935, 0 } }, + { "≼", { 8828, 0 } }, + { "⪯", { 10927, 0 } }, + { "⪹", { 10937, 0 } }, + { "⪵", { 10933, 0 } }, + { "⋨", { 8936, 0 } }, + { "≾", { 8830, 0 } }, + { "′", { 8242, 0 } }, + { "ℙ", { 8473, 0 } }, + { "⪵", { 10933, 0 } }, + { "⪹", { 10937, 0 } }, + { "⋨", { 8936, 0 } }, + { "∏", { 8719, 0 } }, + { "⌮", { 9006, 0 } }, + { "⌒", { 8978, 0 } }, + { "⌓", { 8979, 0 } }, + { "∝", { 8733, 0 } }, + { "∝", { 8733, 0 } }, + { "≾", { 8830, 0 } }, + { "⊰", { 8880, 0 } }, + { "𝓅", { 120005, 0 } }, + { "ψ", { 968, 0 } }, + { " ", { 8200, 0 } }, + { "𝔮", { 120110, 0 } }, + { "⨌", { 10764, 0 } }, + { "𝕢", { 120162, 0 } }, + { "⁗", { 8279, 0 } }, + { "𝓆", { 120006, 0 } }, + { "ℍ", { 8461, 0 } }, + { "⨖", { 10774, 0 } }, + { "?", { 63, 0 } }, + { "≟", { 8799, 0 } }, + { """, { 34, 0 } }, + { "⇛", { 8667, 0 } }, + { "⇒", { 8658, 0 } }, + { "⤜", { 10524, 0 } }, + { "⤏", { 10511, 0 } }, + { "⥤", { 10596, 0 } }, + { "∽̱", { 8765, 817 } }, + { "ŕ", { 341, 0 } }, + { "√", { 8730, 0 } }, + { "⦳", { 10675, 0 } }, + { "⟩", { 10217, 0 } }, + { "⦒", { 10642, 0 } }, + { "⦥", { 10661, 0 } }, + { "⟩", { 10217, 0 } }, + { "»", { 187, 0 } }, + { "→", { 8594, 0 } }, + { "⥵", { 10613, 0 } }, + { "⇥", { 8677, 0 } }, + { "⤠", { 10528, 0 } }, + { "⤳", { 10547, 0 } }, + { "⤞", { 10526, 0 } }, + { "↪", { 8618, 0 } }, + { "↬", { 8620, 0 } }, + { "⥅", { 10565, 0 } }, + { "⥴", { 10612, 0 } }, + { "↣", { 8611, 0 } }, + { "↝", { 8605, 0 } }, + { "⤚", { 10522, 0 } }, + { "∶", { 8758, 0 } }, + { "ℚ", { 8474, 0 } }, + { "⤍", { 10509, 0 } }, + { "❳", { 10099, 0 } }, + { "}", { 125, 0 } }, + { "]", { 93, 0 } }, + { "⦌", { 10636, 0 } }, + { "⦎", { 10638, 0 } }, + { "⦐", { 10640, 0 } }, + { "ř", { 345, 0 } }, + { "ŗ", { 343, 0 } }, + { "⌉", { 8969, 0 } }, + { "}", { 125, 0 } }, + { "р", { 1088, 0 } }, + { "⤷", { 10551, 0 } }, + { "⥩", { 10601, 0 } }, + { "”", { 8221, 0 } }, + { "”", { 8221, 0 } }, + { "↳", { 8627, 0 } }, + { "ℜ", { 8476, 0 } }, + { "ℛ", { 8475, 0 } }, + { "ℜ", { 8476, 0 } }, + { "ℝ", { 8477, 0 } }, + { "▭", { 9645, 0 } }, + { "®", { 174, 0 } }, + { "⥽", { 10621, 0 } }, + { "⌋", { 8971, 0 } }, + { "𝔯", { 120111, 0 } }, + { "⇁", { 8641, 0 } }, + { "⇀", { 8640, 0 } }, + { "⥬", { 10604, 0 } }, + { "ρ", { 961, 0 } }, + { "ϱ", { 1009, 0 } }, + { "→", { 8594, 0 } }, + { "↣", { 8611, 0 } }, + { "⇁", { 8641, 0 } }, + { "⇀", { 8640, 0 } }, + { "⇄", { 8644, 0 } }, + { "⇌", { 8652, 0 } }, + { "⇉", { 8649, 0 } }, + { "↝", { 8605, 0 } }, + { "⋌", { 8908, 0 } }, + { "˚", { 730, 0 } }, + { "≓", { 8787, 0 } }, + { "⇄", { 8644, 0 } }, + { "⇌", { 8652, 0 } }, + { "‏", { 8207, 0 } }, + { "⎱", { 9137, 0 } }, + { "⎱", { 9137, 0 } }, + { "⫮", { 10990, 0 } }, + { "⟭", { 10221, 0 } }, + { "⇾", { 8702, 0 } }, + { "⟧", { 10215, 0 } }, + { "⦆", { 10630, 0 } }, + { "𝕣", { 120163, 0 } }, + { "⨮", { 10798, 0 } }, + { "⨵", { 10805, 0 } }, + { ")", { 41, 0 } }, + { "⦔", { 10644, 0 } }, + { "⨒", { 10770, 0 } }, + { "⇉", { 8649, 0 } }, + { "›", { 8250, 0 } }, + { "𝓇", { 120007, 0 } }, + { "↱", { 8625, 0 } }, + { "]", { 93, 0 } }, + { "’", { 8217, 0 } }, + { "’", { 8217, 0 } }, + { "⋌", { 8908, 0 } }, + { "⋊", { 8906, 0 } }, + { "▹", { 9657, 0 } }, + { "⊵", { 8885, 0 } }, + { "▸", { 9656, 0 } }, + { "⧎", { 10702, 0 } }, + { "⥨", { 10600, 0 } }, + { "℞", { 8478, 0 } }, + { "ś", { 347, 0 } }, + { "‚", { 8218, 0 } }, + { "≻", { 8827, 0 } }, + { "⪴", { 10932, 0 } }, + { "⪸", { 10936, 0 } }, + { "š", { 353, 0 } }, + { "≽", { 8829, 0 } }, + { "⪰", { 10928, 0 } }, + { "ş", { 351, 0 } }, + { "ŝ", { 349, 0 } }, + { "⪶", { 10934, 0 } }, + { "⪺", { 10938, 0 } }, + { "⋩", { 8937, 0 } }, + { "⨓", { 10771, 0 } }, + { "≿", { 8831, 0 } }, + { "с", { 1089, 0 } }, + { "⋅", { 8901, 0 } }, + { "⊡", { 8865, 0 } }, + { "⩦", { 10854, 0 } }, + { "⇘", { 8664, 0 } }, + { "⤥", { 10533, 0 } }, + { "↘", { 8600, 0 } }, + { "↘", { 8600, 0 } }, + { "§", { 167, 0 } }, + { ";", { 59, 0 } }, + { "⤩", { 10537, 0 } }, + { "∖", { 8726, 0 } }, + { "∖", { 8726, 0 } }, + { "✶", { 10038, 0 } }, + { "𝔰", { 120112, 0 } }, + { "⌢", { 8994, 0 } }, + { "♯", { 9839, 0 } }, + { "щ", { 1097, 0 } }, + { "ш", { 1096, 0 } }, + { "∣", { 8739, 0 } }, + { "∥", { 8741, 0 } }, + { "­", { 173, 0 } }, + { "σ", { 963, 0 } }, + { "ς", { 962, 0 } }, + { "ς", { 962, 0 } }, + { "∼", { 8764, 0 } }, + { "⩪", { 10858, 0 } }, + { "≃", { 8771, 0 } }, + { "≃", { 8771, 0 } }, + { "⪞", { 10910, 0 } }, + { "⪠", { 10912, 0 } }, + { "⪝", { 10909, 0 } }, + { "⪟", { 10911, 0 } }, + { "≆", { 8774, 0 } }, + { "⨤", { 10788, 0 } }, + { "⥲", { 10610, 0 } }, + { "←", { 8592, 0 } }, + { "∖", { 8726, 0 } }, + { "⨳", { 10803, 0 } }, + { "⧤", { 10724, 0 } }, + { "∣", { 8739, 0 } }, + { "⌣", { 8995, 0 } }, + { "⪪", { 10922, 0 } }, + { "⪬", { 10924, 0 } }, + { "⪬︀", { 10924, 65024 } }, + { "ь", { 1100, 0 } }, + { "/", { 47, 0 } }, + { "⧄", { 10692, 0 } }, + { "⌿", { 9023, 0 } }, + { "𝕤", { 120164, 0 } }, + { "♠", { 9824, 0 } }, + { "♠", { 9824, 0 } }, + { "∥", { 8741, 0 } }, + { "⊓", { 8851, 0 } }, + { "⊓︀", { 8851, 65024 } }, + { "⊔", { 8852, 0 } }, + { "⊔︀", { 8852, 65024 } }, + { "⊏", { 8847, 0 } }, + { "⊑", { 8849, 0 } }, + { "⊏", { 8847, 0 } }, + { "⊑", { 8849, 0 } }, + { "⊐", { 8848, 0 } }, + { "⊒", { 8850, 0 } }, + { "⊐", { 8848, 0 } }, + { "⊒", { 8850, 0 } }, + { "□", { 9633, 0 } }, + { "□", { 9633, 0 } }, + { "▪", { 9642, 0 } }, + { "▪", { 9642, 0 } }, + { "→", { 8594, 0 } }, + { "𝓈", { 120008, 0 } }, + { "∖", { 8726, 0 } }, + { "⌣", { 8995, 0 } }, + { "⋆", { 8902, 0 } }, + { "☆", { 9734, 0 } }, + { "★", { 9733, 0 } }, + { "ϵ", { 1013, 0 } }, + { "ϕ", { 981, 0 } }, + { "¯", { 175, 0 } }, + { "⊂", { 8834, 0 } }, + { "⫅", { 10949, 0 } }, + { "⪽", { 10941, 0 } }, + { "⊆", { 8838, 0 } }, + { "⫃", { 10947, 0 } }, + { "⫁", { 10945, 0 } }, + { "⫋", { 10955, 0 } }, + { "⊊", { 8842, 0 } }, + { "⪿", { 10943, 0 } }, + { "⥹", { 10617, 0 } }, + { "⊂", { 8834, 0 } }, + { "⊆", { 8838, 0 } }, + { "⫅", { 10949, 0 } }, + { "⊊", { 8842, 0 } }, + { "⫋", { 10955, 0 } }, + { "⫇", { 10951, 0 } }, + { "⫕", { 10965, 0 } }, + { "⫓", { 10963, 0 } }, + { "≻", { 8827, 0 } }, + { "⪸", { 10936, 0 } }, + { "≽", { 8829, 0 } }, + { "⪰", { 10928, 0 } }, + { "⪺", { 10938, 0 } }, + { "⪶", { 10934, 0 } }, + { "⋩", { 8937, 0 } }, + { "≿", { 8831, 0 } }, + { "∑", { 8721, 0 } }, + { "♪", { 9834, 0 } }, + { "¹", { 185, 0 } }, + { "¹", { 185, 0 } }, + { "²", { 178, 0 } }, + { "²", { 178, 0 } }, + { "³", { 179, 0 } }, + { "³", { 179, 0 } }, + { "⊃", { 8835, 0 } }, + { "⫆", { 10950, 0 } }, + { "⪾", { 10942, 0 } }, + { "⫘", { 10968, 0 } }, + { "⊇", { 8839, 0 } }, + { "⫄", { 10948, 0 } }, + { "⟉", { 10185, 0 } }, + { "⫗", { 10967, 0 } }, + { "⥻", { 10619, 0 } }, + { "⫂", { 10946, 0 } }, + { "⫌", { 10956, 0 } }, + { "⊋", { 8843, 0 } }, + { "⫀", { 10944, 0 } }, + { "⊃", { 8835, 0 } }, + { "⊇", { 8839, 0 } }, + { "⫆", { 10950, 0 } }, + { "⊋", { 8843, 0 } }, + { "⫌", { 10956, 0 } }, + { "⫈", { 10952, 0 } }, + { "⫔", { 10964, 0 } }, + { "⫖", { 10966, 0 } }, + { "⇙", { 8665, 0 } }, + { "⤦", { 10534, 0 } }, + { "↙", { 8601, 0 } }, + { "↙", { 8601, 0 } }, + { "⤪", { 10538, 0 } }, + { "ß", { 223, 0 } }, + { "⌖", { 8982, 0 } }, + { "τ", { 964, 0 } }, + { "⎴", { 9140, 0 } }, + { "ť", { 357, 0 } }, + { "ţ", { 355, 0 } }, + { "т", { 1090, 0 } }, + { "⃛", { 8411, 0 } }, + { "⌕", { 8981, 0 } }, + { "𝔱", { 120113, 0 } }, + { "∴", { 8756, 0 } }, + { "∴", { 8756, 0 } }, + { "θ", { 952, 0 } }, + { "ϑ", { 977, 0 } }, + { "ϑ", { 977, 0 } }, + { "≈", { 8776, 0 } }, + { "∼", { 8764, 0 } }, + { " ", { 8201, 0 } }, + { "≈", { 8776, 0 } }, + { "∼", { 8764, 0 } }, + { "þ", { 254, 0 } }, + { "˜", { 732, 0 } }, + { "×", { 215, 0 } }, + { "⊠", { 8864, 0 } }, + { "⨱", { 10801, 0 } }, + { "⨰", { 10800, 0 } }, + { "∭", { 8749, 0 } }, + { "⤨", { 10536, 0 } }, + { "⊤", { 8868, 0 } }, + { "⌶", { 9014, 0 } }, + { "⫱", { 10993, 0 } }, + { "𝕥", { 120165, 0 } }, + { "⫚", { 10970, 0 } }, + { "⤩", { 10537, 0 } }, + { "‴", { 8244, 0 } }, + { "™", { 8482, 0 } }, + { "▵", { 9653, 0 } }, + { "▿", { 9663, 0 } }, + { "◃", { 9667, 0 } }, + { "⊴", { 8884, 0 } }, + { "≜", { 8796, 0 } }, + { "▹", { 9657, 0 } }, + { "⊵", { 8885, 0 } }, + { "◬", { 9708, 0 } }, + { "≜", { 8796, 0 } }, + { "⨺", { 10810, 0 } }, + { "⨹", { 10809, 0 } }, + { "⧍", { 10701, 0 } }, + { "⨻", { 10811, 0 } }, + { "⏢", { 9186, 0 } }, + { "𝓉", { 120009, 0 } }, + { "ц", { 1094, 0 } }, + { "ћ", { 1115, 0 } }, + { "ŧ", { 359, 0 } }, + { "≬", { 8812, 0 } }, + { "↞", { 8606, 0 } }, + { "↠", { 8608, 0 } }, + { "⇑", { 8657, 0 } }, + { "⥣", { 10595, 0 } }, + { "ú", { 250, 0 } }, + { "↑", { 8593, 0 } }, + { "ў", { 1118, 0 } }, + { "ŭ", { 365, 0 } }, + { "û", { 251, 0 } }, + { "у", { 1091, 0 } }, + { "⇅", { 8645, 0 } }, + { "ű", { 369, 0 } }, + { "⥮", { 10606, 0 } }, + { "⥾", { 10622, 0 } }, + { "𝔲", { 120114, 0 } }, + { "ù", { 249, 0 } }, + { "↿", { 8639, 0 } }, + { "↾", { 8638, 0 } }, + { "▀", { 9600, 0 } }, + { "⌜", { 8988, 0 } }, + { "⌜", { 8988, 0 } }, + { "⌏", { 8975, 0 } }, + { "◸", { 9720, 0 } }, + { "ū", { 363, 0 } }, + { "¨", { 168, 0 } }, + { "ų", { 371, 0 } }, + { "𝕦", { 120166, 0 } }, + { "↑", { 8593, 0 } }, + { "↕", { 8597, 0 } }, + { "↿", { 8639, 0 } }, + { "↾", { 8638, 0 } }, + { "⊎", { 8846, 0 } }, + { "υ", { 965, 0 } }, + { "ϒ", { 978, 0 } }, + { "υ", { 965, 0 } }, + { "⇈", { 8648, 0 } }, + { "⌝", { 8989, 0 } }, + { "⌝", { 8989, 0 } }, + { "⌎", { 8974, 0 } }, + { "ů", { 367, 0 } }, + { "◹", { 9721, 0 } }, + { "𝓊", { 120010, 0 } }, + { "⋰", { 8944, 0 } }, + { "ũ", { 361, 0 } }, + { "▵", { 9653, 0 } }, + { "▴", { 9652, 0 } }, + { "⇈", { 8648, 0 } }, + { "ü", { 252, 0 } }, + { "⦧", { 10663, 0 } }, + { "⇕", { 8661, 0 } }, + { "⫨", { 10984, 0 } }, + { "⫩", { 10985, 0 } }, + { "⊨", { 8872, 0 } }, + { "⦜", { 10652, 0 } }, + { "ϵ", { 1013, 0 } }, + { "ϰ", { 1008, 0 } }, + { "∅", { 8709, 0 } }, + { "ϕ", { 981, 0 } }, + { "ϖ", { 982, 0 } }, + { "∝", { 8733, 0 } }, + { "↕", { 8597, 0 } }, + { "ϱ", { 1009, 0 } }, + { "ς", { 962, 0 } }, + { "⊊︀", { 8842, 65024 } }, + { "⫋︀", { 10955, 65024 } }, + { "⊋︀", { 8843, 65024 } }, + { "⫌︀", { 10956, 65024 } }, + { "ϑ", { 977, 0 } }, + { "⊲", { 8882, 0 } }, + { "⊳", { 8883, 0 } }, + { "в", { 1074, 0 } }, + { "⊢", { 8866, 0 } }, + { "∨", { 8744, 0 } }, + { "⊻", { 8891, 0 } }, + { "≚", { 8794, 0 } }, + { "⋮", { 8942, 0 } }, + { "|", { 124, 0 } }, + { "|", { 124, 0 } }, + { "𝔳", { 120115, 0 } }, + { "⊲", { 8882, 0 } }, + { "⊂⃒", { 8834, 8402 } }, + { "⊃⃒", { 8835, 8402 } }, + { "𝕧", { 120167, 0 } }, + { "∝", { 8733, 0 } }, + { "⊳", { 8883, 0 } }, + { "𝓋", { 120011, 0 } }, + { "⫋︀", { 10955, 65024 } }, + { "⊊︀", { 8842, 65024 } }, + { "⫌︀", { 10956, 65024 } }, + { "⊋︀", { 8843, 65024 } }, + { "⦚", { 10650, 0 } }, + { "ŵ", { 373, 0 } }, + { "⩟", { 10847, 0 } }, + { "∧", { 8743, 0 } }, + { "≙", { 8793, 0 } }, + { "℘", { 8472, 0 } }, + { "𝔴", { 120116, 0 } }, + { "𝕨", { 120168, 0 } }, + { "℘", { 8472, 0 } }, + { "≀", { 8768, 0 } }, + { "≀", { 8768, 0 } }, + { "𝓌", { 120012, 0 } }, + { "⋂", { 8898, 0 } }, + { "◯", { 9711, 0 } }, + { "⋃", { 8899, 0 } }, + { "▽", { 9661, 0 } }, + { "𝔵", { 120117, 0 } }, + { "⟺", { 10234, 0 } }, + { "⟷", { 10231, 0 } }, + { "ξ", { 958, 0 } }, + { "⟸", { 10232, 0 } }, + { "⟵", { 10229, 0 } }, + { "⟼", { 10236, 0 } }, + { "⋻", { 8955, 0 } }, + { "⨀", { 10752, 0 } }, + { "𝕩", { 120169, 0 } }, + { "⨁", { 10753, 0 } }, + { "⨂", { 10754, 0 } }, + { "⟹", { 10233, 0 } }, + { "⟶", { 10230, 0 } }, + { "𝓍", { 120013, 0 } }, + { "⨆", { 10758, 0 } }, + { "⨄", { 10756, 0 } }, + { "△", { 9651, 0 } }, + { "⋁", { 8897, 0 } }, + { "⋀", { 8896, 0 } }, + { "ý", { 253, 0 } }, + { "я", { 1103, 0 } }, + { "ŷ", { 375, 0 } }, + { "ы", { 1099, 0 } }, + { "¥", { 165, 0 } }, + { "𝔶", { 120118, 0 } }, + { "ї", { 1111, 0 } }, + { "𝕪", { 120170, 0 } }, + { "𝓎", { 120014, 0 } }, + { "ю", { 1102, 0 } }, + { "ÿ", { 255, 0 } }, + { "ź", { 378, 0 } }, + { "ž", { 382, 0 } }, + { "з", { 1079, 0 } }, + { "ż", { 380, 0 } }, + { "ℨ", { 8488, 0 } }, + { "ζ", { 950, 0 } }, + { "𝔷", { 120119, 0 } }, + { "ж", { 1078, 0 } }, + { "⇝", { 8669, 0 } }, + { "𝕫", { 120171, 0 } }, + { "𝓏", { 120015, 0 } }, + { "‍", { 8205, 0 } }, + { "‌", { 8204, 0 } } +}; + + +struct entity_key { + const char* name; + size_t name_size; +}; + +static int +entity_cmp(const void* p_key, const void* p_entity) +{ + struct entity_key* key = (struct entity_key*) p_key; + struct entity* ent = (struct entity*) p_entity; + + return strncmp(key->name, ent->name, key->name_size); +} + +const struct entity* +entity_lookup(const char* name, size_t name_size) +{ + struct entity_key key = { name, name_size }; + + return bsearch(&key, + entity_table, + sizeof(entity_table) / sizeof(entity_table[0]), + sizeof(struct entity), + entity_cmp); +} diff --git a/modules/3rd/md4c/entity.h b/modules/3rd/md4c/entity.h new file mode 100644 index 0000000..36395fe --- /dev/null +++ b/modules/3rd/md4c/entity.h @@ -0,0 +1,42 @@ +/* + * MD4C: Markdown parser for C + * (http://github.com/mity/md4c) + * + * Copyright (c) 2016-2019 Martin Mitas + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef MD4C_ENTITY_H +#define MD4C_ENTITY_H + +#include + + +/* Most entities are formed by single Unicode codepoint, few by two codepoints. + * Single-codepoint entities have codepoints[1] set to zero. */ +struct entity { + const char* name; + unsigned codepoints[2]; +}; + +const struct entity* entity_lookup(const char* name, size_t name_size); + + +#endif /* MD4C_ENTITY_H */ diff --git a/modules/3rd/md4c/md4c-html.c b/modules/3rd/md4c/md4c-html.c new file mode 100644 index 0000000..03162c5 --- /dev/null +++ b/modules/3rd/md4c/md4c-html.c @@ -0,0 +1,750 @@ +/* + * MD4C: Markdown parser for C + * (http://github.com/mity/md4c) + * + * Copyright (c) 2016-2019 Martin Mitas + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "md4c-html.h" +#include "entity.h" + +static int hd_cnt[6] = {0, 0, 0, 0, 0, 0}; + +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L +/* C89/90 or old compilers in general may not understand "inline". */ +#if defined __GNUC__ +#define inline __inline__ +#elif defined _MSC_VER +#define inline __inline +#else +#define inline +#endif +#endif + +#ifdef _WIN32 +#define snprintf _snprintf +#endif + +typedef struct MD_HTML_tag MD_HTML; +struct MD_HTML_tag +{ + void (*process_output)(const MD_CHAR *, MD_SIZE, void *); + void *userdata; + unsigned flags; + int image_nesting_level; + char escape_map[256]; +}; + +#define NEED_HTML_ESC_FLAG 0x1 +#define NEED_URL_ESC_FLAG 0x2 + +/***************************************** + *** HTML rendering helper functions *** + *****************************************/ + +#define ISDIGIT(ch) ('0' <= (ch) && (ch) <= '9') +#define ISLOWER(ch) ('a' <= (ch) && (ch) <= 'z') +#define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z') +#define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch)) + +static inline void +render_verbatim(MD_HTML *r, const MD_CHAR *text, MD_SIZE size) +{ + r->process_output(text, size, r->userdata); +} + +/* Keep this as a macro. Most compiler should then be smart enough to replace + * the strlen() call with a compile-time constant if the string is a C literal. */ +#define RENDER_VERBATIM(r, verbatim) \ + render_verbatim((r), (verbatim), (MD_SIZE)(strlen(verbatim))) + +static void +render_html_escaped(MD_HTML *r, const MD_CHAR *data, MD_SIZE size) +{ + MD_OFFSET beg = 0; + MD_OFFSET off = 0; + +/* Some characters need to be escaped in normal HTML text. */ +#define NEED_HTML_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_HTML_ESC_FLAG) + + while (1) + { + /* Optimization: Use some loop unrolling. */ + while (off + 3 < size && !NEED_HTML_ESC(data[off + 0]) && !NEED_HTML_ESC(data[off + 1]) && !NEED_HTML_ESC(data[off + 2]) && !NEED_HTML_ESC(data[off + 3])) + off += 4; + while (off < size && !NEED_HTML_ESC(data[off])) + off++; + + if (off > beg) + render_verbatim(r, data + beg, off - beg); + + if (off < size) + { + switch (data[off]) + { + case '&': + RENDER_VERBATIM(r, "&"); + break; + case '<': + RENDER_VERBATIM(r, "<"); + break; + case '>': + RENDER_VERBATIM(r, ">"); + break; + case '"': + RENDER_VERBATIM(r, """); + break; + } + off++; + } + else + { + break; + } + beg = off; + } +} + +static void +render_url_escaped(MD_HTML *r, const MD_CHAR *data, MD_SIZE size) +{ + static const MD_CHAR hex_chars[] = "0123456789ABCDEF"; + MD_OFFSET beg = 0; + MD_OFFSET off = 0; + +/* Some characters need to be escaped in URL attributes. */ +#define NEED_URL_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_URL_ESC_FLAG) + + while (1) + { + while (off < size && !NEED_URL_ESC(data[off])) + off++; + if (off > beg) + render_verbatim(r, data + beg, off - beg); + + if (off < size) + { + char hex[3]; + + switch (data[off]) + { + case '&': + RENDER_VERBATIM(r, "&"); + break; + default: + hex[0] = '%'; + hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf]; + hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf]; + render_verbatim(r, hex, 3); + break; + } + off++; + } + else + { + break; + } + + beg = off; + } +} + +static unsigned +hex_val(char ch) +{ + if ('0' <= ch && ch <= '9') + return ch - '0'; + if ('A' <= ch && ch <= 'Z') + return ch - 'A' + 10; + else + return ch - 'a' + 10; +} + +static void +render_utf8_codepoint(MD_HTML *r, unsigned codepoint, + void (*fn_append)(MD_HTML *, const MD_CHAR *, MD_SIZE)) +{ + static const MD_CHAR utf8_replacement_char[] = {0xef, 0xbf, 0xbd}; + + unsigned char utf8[4]; + size_t n; + + if (codepoint <= 0x7f) + { + n = 1; + utf8[0] = codepoint; + } + else if (codepoint <= 0x7ff) + { + n = 2; + utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f); + utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f); + } + else if (codepoint <= 0xffff) + { + n = 3; + utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf); + utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f); + utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f); + } + else + { + n = 4; + utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7); + utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f); + utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f); + utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f); + } + + if (0 < codepoint && codepoint <= 0x10ffff) + fn_append(r, (char *)utf8, n); + else + fn_append(r, utf8_replacement_char, 3); +} + +/* Translate entity to its UTF-8 equivalent, or output the verbatim one + * if such entity is unknown (or if the translation is disabled). */ +static void +render_entity(MD_HTML *r, const MD_CHAR *text, MD_SIZE size, + void (*fn_append)(MD_HTML *, const MD_CHAR *, MD_SIZE)) +{ + if (r->flags & MD_HTML_FLAG_VERBATIM_ENTITIES) + { + render_verbatim(r, text, size); + return; + } + + /* We assume UTF-8 output is what is desired. */ + if (size > 3 && text[1] == '#') + { + unsigned codepoint = 0; + + if (text[2] == 'x' || text[2] == 'X') + { + /* Hexadecimal entity (e.g. "�")). */ + MD_SIZE i; + for (i = 3; i < size - 1; i++) + codepoint = 16 * codepoint + hex_val(text[i]); + } + else + { + /* Decimal entity (e.g. "&1234;") */ + MD_SIZE i; + for (i = 2; i < size - 1; i++) + codepoint = 10 * codepoint + (text[i] - '0'); + } + + render_utf8_codepoint(r, codepoint, fn_append); + return; + } + else + { + /* Named entity (e.g. " "). */ + const struct entity *ent; + + ent = entity_lookup(text, size); + if (ent != NULL) + { + render_utf8_codepoint(r, ent->codepoints[0], fn_append); + if (ent->codepoints[1]) + render_utf8_codepoint(r, ent->codepoints[1], fn_append); + return; + } + } + + fn_append(r, text, size); +} + +static void +render_attribute(MD_HTML *r, const MD_ATTRIBUTE *attr, + void (*fn_append)(MD_HTML *, const MD_CHAR *, MD_SIZE)) +{ + int i; + + for (i = 0; attr->substr_offsets[i] < attr->size; i++) + { + MD_TEXTTYPE type = attr->substr_types[i]; + MD_OFFSET off = attr->substr_offsets[i]; + MD_SIZE size = attr->substr_offsets[i + 1] - off; + const MD_CHAR *text = attr->text + off; + + switch (type) + { + case MD_TEXT_NULLCHAR: + render_utf8_codepoint(r, 0x0000, render_verbatim); + break; + case MD_TEXT_ENTITY: + render_entity(r, text, size, fn_append); + break; + default: + fn_append(r, text, size); + break; + } + } +} + +static void +render_open_ol_block(MD_HTML *r, const MD_BLOCK_OL_DETAIL *det) +{ + char buf[64]; + + if (det->start == 1) + { + RENDER_VERBATIM(r, "
    \n"); + return; + } + + snprintf(buf, sizeof(buf), "
      \n", det->start); + RENDER_VERBATIM(r, buf); +} + +static void +render_open_li_block(MD_HTML *r, const MD_BLOCK_LI_DETAIL *det) +{ + if (det->is_task) + { + RENDER_VERBATIM(r, "
    1. " + "task_mark == 'x' || det->task_mark == 'X') + RENDER_VERBATIM(r, " checked"); + RENDER_VERBATIM(r, ">"); + } + else + { + RENDER_VERBATIM(r, "
    2. "); + } +} + +static void +render_open_code_block(MD_HTML *r, const MD_BLOCK_CODE_DETAIL *det) +{ + RENDER_VERBATIM(r, "
      lang.text != NULL)
      +    {
      +        RENDER_VERBATIM(r, " class=\"language-");
      +        render_attribute(r, &det->lang, render_html_escaped);
      +        RENDER_VERBATIM(r, "\"");
      +    }
      +
      +    RENDER_VERBATIM(r, ">");
      +}
      +
      +static void
      +render_open_td_block(MD_HTML *r, const MD_CHAR *cell_type, const MD_BLOCK_TD_DETAIL *det)
      +{
      +    RENDER_VERBATIM(r, "<");
      +    RENDER_VERBATIM(r, cell_type);
      +
      +    switch (det->align)
      +    {
      +    case MD_ALIGN_LEFT:
      +        RENDER_VERBATIM(r, " align=\"left\">");
      +        break;
      +    case MD_ALIGN_CENTER:
      +        RENDER_VERBATIM(r, " align=\"center\">");
      +        break;
      +    case MD_ALIGN_RIGHT:
      +        RENDER_VERBATIM(r, " align=\"right\">");
      +        break;
      +    default:
      +        RENDER_VERBATIM(r, ">");
      +        break;
      +    }
      +}
      +
      +static void
      +render_open_a_span(MD_HTML *r, const MD_SPAN_A_DETAIL *det)
      +{
      +    RENDER_VERBATIM(r, "href, render_url_escaped);
      +
      +    if (det->title.text != NULL)
      +    {
      +        RENDER_VERBATIM(r, "\" title=\"");
      +        render_attribute(r, &det->title, render_html_escaped);
      +    }
      +
      +    RENDER_VERBATIM(r, "\">");
      +}
      +
      +static void
      +render_open_img_span(MD_HTML *r, const MD_SPAN_IMG_DETAIL *det)
      +{
      +    RENDER_VERBATIM(r, "src, render_url_escaped);
      +
      +    RENDER_VERBATIM(r, "\" alt=\"");
      +
      +    r->image_nesting_level++;
      +}
      +
      +static void
      +render_close_img_span(MD_HTML *r, const MD_SPAN_IMG_DETAIL *det)
      +{
      +    if (det->title.text != NULL)
      +    {
      +        RENDER_VERBATIM(r, "\" title=\"");
      +        render_attribute(r, &det->title, render_html_escaped);
      +    }
      +
      +    RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\" />" : "\">");
      +
      +    r->image_nesting_level--;
      +}
      +
      +static void
      +render_open_wikilink_span(MD_HTML *r, const MD_SPAN_WIKILINK_DETAIL *det)
      +{
      +    RENDER_VERBATIM(r, "target, render_html_escaped);
      +
      +    RENDER_VERBATIM(r, "\">");
      +}
      +
      +/**************************************
      + ***  HTML renderer implementation  ***
      + **************************************/
      +
      +static int
      +enter_block_callback(MD_BLOCKTYPE type, void *detail, void *userdata)
      +{
      +    static const MD_CHAR *head[6] = {"

      ", "

      ", "

      ", "

      ", "

      ", "
      "}; + MD_HTML *r = (MD_HTML *)userdata; + char buf[32]; + switch (type) + { + case MD_BLOCK_DOC: /* noop */ + break; + case MD_BLOCK_QUOTE: + RENDER_VERBATIM(r, "
      \n"); + break; + case MD_BLOCK_UL: + RENDER_VERBATIM(r, "
        \n"); + break; + case MD_BLOCK_OL: + render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL *)detail); + break; + case MD_BLOCK_LI: + render_open_li_block(r, (const MD_BLOCK_LI_DETAIL *)detail); + break; + case MD_BLOCK_HR: + RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "
        \n" : "
        \n"); + break; + case MD_BLOCK_H: + hd_cnt[((MD_BLOCK_H_DETAIL *)detail)->level - 1]++; + snprintf(buf, 32, "
        ", ((MD_BLOCK_H_DETAIL *)detail)->level, hd_cnt[((MD_BLOCK_H_DETAIL *)detail)->level - 1]); + RENDER_VERBATIM(r, buf); + RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL *)detail)->level - 1]); + break; + case MD_BLOCK_CODE: + render_open_code_block(r, (const MD_BLOCK_CODE_DETAIL *)detail); + break; + case MD_BLOCK_HTML: /* noop */ + break; + case MD_BLOCK_P: + RENDER_VERBATIM(r, "

        "); + break; + case MD_BLOCK_TABLE: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_THEAD: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_TBODY: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_TR: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_TH: + render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL *)detail); + break; + case MD_BLOCK_TD: + render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL *)detail); + break; + } + + return 0; +} + +static int +leave_block_callback(MD_BLOCKTYPE type, void *detail, void *userdata) +{ + static const MD_CHAR *head[6] = {"\n", "\n", "\n", "\n", "\n", "\n"}; + MD_HTML *r = (MD_HTML *)userdata; + switch (type) + { + case MD_BLOCK_DOC: /*noop*/ + break; + case MD_BLOCK_QUOTE: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_UL: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_OL: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_LI: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_HR: /*noop*/ + break; + case MD_BLOCK_H: + RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL *)detail)->level - 1]); + break; + case MD_BLOCK_CODE: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_HTML: /* noop */ + break; + case MD_BLOCK_P: + RENDER_VERBATIM(r, "

        \n"); + break; + case MD_BLOCK_TABLE: + RENDER_VERBATIM(r, "
        \n"); + break; + case MD_BLOCK_THEAD: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_TBODY: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_TR: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_TH: + RENDER_VERBATIM(r, "\n"); + break; + case MD_BLOCK_TD: + RENDER_VERBATIM(r, "\n"); + break; + } + + return 0; +} + +static int +enter_span_callback(MD_SPANTYPE type, void *detail, void *userdata) +{ + MD_HTML *r = (MD_HTML *)userdata; + + if (r->image_nesting_level > 0) + { + /* We are inside a Markdown image label. Markdown allows to use any + * emphasis and other rich contents in that context similarly as in + * any link label. + * + * However, unlike in the case of links (where that contents becomes + * contents of the ... tag), in the case of images the contents + * is supposed to fall into the attribute alt: .... + * + * In that context we naturally cannot output nested HTML tags. So lets + * suppress them and only output the plain text (i.e. what falls into + * text() callback). + * + * This make-it-a-plain-text approach is the recommended practice by + * CommonMark specification (for HTML output). + */ + return 0; + } + + switch (type) + { + case MD_SPAN_EM: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_STRONG: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_U: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_A: + render_open_a_span(r, (MD_SPAN_A_DETAIL *)detail); + break; + case MD_SPAN_IMG: + render_open_img_span(r, (MD_SPAN_IMG_DETAIL *)detail); + break; + case MD_SPAN_CODE: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_DEL: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_LATEXMATH: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_LATEXMATH_DISPLAY: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_WIKILINK: + render_open_wikilink_span(r, (MD_SPAN_WIKILINK_DETAIL *)detail); + break; + } + + return 0; +} + +static int +leave_span_callback(MD_SPANTYPE type, void *detail, void *userdata) +{ + MD_HTML *r = (MD_HTML *)userdata; + + if (r->image_nesting_level > 0) + { + /* Ditto as in enter_span_callback(), except we have to allow the + * end of the tag. */ + if (r->image_nesting_level == 1 && type == MD_SPAN_IMG) + render_close_img_span(r, (MD_SPAN_IMG_DETAIL *)detail); + return 0; + } + + switch (type) + { + case MD_SPAN_EM: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_STRONG: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_U: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_A: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_IMG: /*noop, handled above*/ + break; + case MD_SPAN_CODE: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_DEL: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_LATEXMATH: /*fall through*/ + case MD_SPAN_LATEXMATH_DISPLAY: + RENDER_VERBATIM(r, ""); + break; + case MD_SPAN_WIKILINK: + RENDER_VERBATIM(r, ""); + break; + } + + return 0; +} + +static int +text_callback(MD_TEXTTYPE type, const MD_CHAR *text, MD_SIZE size, void *userdata) +{ + MD_HTML *r = (MD_HTML *)userdata; + + switch (type) + { + case MD_TEXT_NULLCHAR: + render_utf8_codepoint(r, 0x0000, render_verbatim); + break; + case MD_TEXT_BR: + RENDER_VERBATIM(r, (r->image_nesting_level == 0 + ? ((r->flags & MD_HTML_FLAG_XHTML) ? "
        \n" : "
        \n") + : " ")); + break; + case MD_TEXT_SOFTBR: + RENDER_VERBATIM(r, (r->image_nesting_level == 0 ? "\n" : " ")); + break; + case MD_TEXT_HTML: + render_verbatim(r, text, size); + break; + case MD_TEXT_ENTITY: + render_entity(r, text, size, render_html_escaped); + break; + default: + render_html_escaped(r, text, size); + break; + } + + return 0; +} + +static void +debug_log_callback(const char *msg, void *userdata) +{ + MD_HTML *r = (MD_HTML *)userdata; + if (r->flags & MD_HTML_FLAG_DEBUG) + fprintf(stderr, "MD4C: %s\n", msg); +} + +int md_html(const MD_CHAR *input, MD_SIZE input_size, + void (*process_output)(const MD_CHAR *, MD_SIZE, void *), + void *userdata, unsigned parser_flags, unsigned renderer_flags) +{ + MD_HTML render = {process_output, userdata, renderer_flags, 0, {0}}; + int i; + + MD_PARSER parser = { + 0, + parser_flags, + enter_block_callback, + leave_block_callback, + enter_span_callback, + leave_span_callback, + text_callback, + debug_log_callback, + NULL}; + + /* Build map of characters which need escaping. */ + for (i = 0; i < 256; i++) + { + unsigned char ch = (unsigned char)i; + + if (strchr("\"&<>", ch) != NULL) + render.escape_map[i] |= NEED_HTML_ESC_FLAG; + + if (!ISALNUM(ch) && strchr("-_.+!*(),%#@?=;:/,+$", ch) == NULL) + render.escape_map[i] |= NEED_URL_ESC_FLAG; + } + + /* Consider skipping UTF-8 byte order mark (BOM). */ + if (renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM && sizeof(MD_CHAR) == 1) + { + static const MD_CHAR bom[3] = {0xef, 0xbb, 0xbf}; + if (input_size >= sizeof(bom) && memcmp(input, bom, sizeof(bom)) == 0) + { + input += sizeof(bom); + input_size -= sizeof(bom); + } + } + + return md_parse(input, input_size, &parser, (void *)&render); +} + +void reset_hd_cnt() +{ + for (size_t i = 0; i < 6; i++) + { + hd_cnt[i] = 0; + } +} \ No newline at end of file diff --git a/modules/3rd/md4c/md4c-html.h b/modules/3rd/md4c/md4c-html.h new file mode 100644 index 0000000..f8ed912 --- /dev/null +++ b/modules/3rd/md4c/md4c-html.h @@ -0,0 +1,70 @@ +/* + * MD4C: Markdown parser for C + * (http://github.com/mity/md4c) + * + * Copyright (c) 2016-2017 Martin Mitas + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef MD4C_HTML_H +#define MD4C_HTML_H + +#include "md4c.h" + +#ifdef __cplusplus + extern "C" { +#endif + + +/* If set, debug output from md_parse() is sent to stderr. */ +#define MD_HTML_FLAG_DEBUG 0x0001 +#define MD_HTML_FLAG_VERBATIM_ENTITIES 0x0002 +#define MD_HTML_FLAG_SKIP_UTF8_BOM 0x0004 +#define MD_HTML_FLAG_XHTML 0x0008 + + +/* Render Markdown into HTML. + * + * Note only contents of tag is generated. Caller must generate + * HTML header/footer manually before/after calling md_html(). + * + * Params input and input_size specify the Markdown input. + * Callback process_output() gets called with chunks of HTML output. + * (Typical implementation may just output the bytes to a file or append to + * some buffer). + * Param userdata is just propgated back to process_output() callback. + * Param parser_flags are flags from md4c.h propagated to md_parse(). + * Param render_flags is bitmask of MD_HTML_FLAG_xxxx. + * + * Returns -1 on error (if md_parse() fails.) + * Returns 0 on success. + */ +int md_html(const MD_CHAR* input, MD_SIZE input_size, + void (*process_output)(const MD_CHAR*, MD_SIZE, void*), + void* userdata, unsigned parser_flags, unsigned renderer_flags); + + +void reset_hd_cnt(); + +#ifdef __cplusplus + } /* extern "C" { */ +#endif + +#endif /* MD4C_HTML_H */ diff --git a/modules/3rd/md4c/md4c.c b/modules/3rd/md4c/md4c.c new file mode 100644 index 0000000..6324446 --- /dev/null +++ b/modules/3rd/md4c/md4c.c @@ -0,0 +1,6306 @@ +/* + * MD4C: Markdown parser for C + * (http://github.com/mity/md4c) + * + * Copyright (c) 2016-2020 Martin Mitas + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "md4c.h" + +#include +#include +#include +#include + + +/***************************** + *** Miscellaneous Stuff *** + *****************************/ + +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L + /* C89/90 or old compilers in general may not understand "inline". */ + #if defined __GNUC__ + #define inline __inline__ + #elif defined _MSC_VER + #define inline __inline + #else + #define inline + #endif +#endif + +/* Make the UTF-8 support the default. */ +#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16 + #define MD4C_USE_UTF8 +#endif + +/* Magic for making wide literals with MD4C_USE_UTF16. */ +#ifdef _T + #undef _T +#endif +#if defined MD4C_USE_UTF16 + #define _T(x) L##x +#else + #define _T(x) x +#endif + +/* Misc. macros. */ +#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0])) + +#define STRINGIZE_(x) #x +#define STRINGIZE(x) STRINGIZE_(x) + +#ifndef TRUE + #define TRUE 1 + #define FALSE 0 +#endif + + +/************************ + *** Internal Types *** + ************************/ + +/* These are omnipresent so lets save some typing. */ +#define CHAR MD_CHAR +#define SZ MD_SIZE +#define OFF MD_OFFSET + +typedef struct MD_MARK_tag MD_MARK; +typedef struct MD_BLOCK_tag MD_BLOCK; +typedef struct MD_CONTAINER_tag MD_CONTAINER; +typedef struct MD_REF_DEF_tag MD_REF_DEF; + + +/* During analyzes of inline marks, we need to manage some "mark chains", + * of (yet unresolved) openers. This structure holds start/end of the chain. + * The chain internals are then realized through MD_MARK::prev and ::next. + */ +typedef struct MD_MARKCHAIN_tag MD_MARKCHAIN; +struct MD_MARKCHAIN_tag { + int head; /* Index of first mark in the chain, or -1 if empty. */ + int tail; /* Index of last mark in the chain, or -1 if empty. */ +}; + +/* Context propagated through all the parsing. */ +typedef struct MD_CTX_tag MD_CTX; +struct MD_CTX_tag { + /* Immutable stuff (parameters of md_parse()). */ + const CHAR* text; + SZ size; + MD_PARSER parser; + void* userdata; + + /* When this is true, it allows some optimizations. */ + int doc_ends_with_newline; + + /* Helper temporary growing buffer. */ + CHAR* buffer; + unsigned alloc_buffer; + + /* Reference definitions. */ + MD_REF_DEF* ref_defs; + int n_ref_defs; + int alloc_ref_defs; + void** ref_def_hashtable; + int ref_def_hashtable_size; + + /* Stack of inline/span markers. + * This is only used for parsing a single block contents but by storing it + * here we may reuse the stack for subsequent blocks; i.e. we have fewer + * (re)allocations. */ + MD_MARK* marks; + int n_marks; + int alloc_marks; + +#if defined MD4C_USE_UTF16 + char mark_char_map[128]; +#else + char mark_char_map[256]; +#endif + + /* For resolving of inline spans. */ + MD_MARKCHAIN mark_chains[13]; +#define PTR_CHAIN (ctx->mark_chains[0]) +#define TABLECELLBOUNDARIES (ctx->mark_chains[1]) +#define ASTERISK_OPENERS_extraword_mod3_0 (ctx->mark_chains[2]) +#define ASTERISK_OPENERS_extraword_mod3_1 (ctx->mark_chains[3]) +#define ASTERISK_OPENERS_extraword_mod3_2 (ctx->mark_chains[4]) +#define ASTERISK_OPENERS_intraword_mod3_0 (ctx->mark_chains[5]) +#define ASTERISK_OPENERS_intraword_mod3_1 (ctx->mark_chains[6]) +#define ASTERISK_OPENERS_intraword_mod3_2 (ctx->mark_chains[7]) +#define UNDERSCORE_OPENERS (ctx->mark_chains[8]) +#define TILDE_OPENERS_1 (ctx->mark_chains[9]) +#define TILDE_OPENERS_2 (ctx->mark_chains[10]) +#define BRACKET_OPENERS (ctx->mark_chains[11]) +#define DOLLAR_OPENERS (ctx->mark_chains[12]) +#define OPENERS_CHAIN_FIRST 2 +#define OPENERS_CHAIN_LAST 12 + + int n_table_cell_boundaries; + + /* For resolving links. */ + int unresolved_link_head; + int unresolved_link_tail; + + /* For resolving raw HTML. */ + OFF html_comment_horizon; + OFF html_proc_instr_horizon; + OFF html_decl_horizon; + OFF html_cdata_horizon; + + /* For block analysis. + * Notes: + * -- It holds MD_BLOCK as well as MD_LINE structures. After each + * MD_BLOCK, its (multiple) MD_LINE(s) follow. + * -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used + * instead of MD_LINE(s). + */ + void* block_bytes; + MD_BLOCK* current_block; + int n_block_bytes; + int alloc_block_bytes; + + /* For container block analysis. */ + MD_CONTAINER* containers; + int n_containers; + int alloc_containers; + + /* Minimal indentation to call the block "indented code block". */ + unsigned code_indent_offset; + + /* Contextual info for line analysis. */ + SZ code_fence_length; /* For checking closing fence length. */ + int html_block_type; /* For checking closing raw HTML condition. */ + int last_line_has_list_loosening_effect; + int last_list_item_starts_with_two_blank_lines; +}; + +enum MD_LINETYPE_tag { + MD_LINE_BLANK, + MD_LINE_HR, + MD_LINE_ATXHEADER, + MD_LINE_SETEXTHEADER, + MD_LINE_SETEXTUNDERLINE, + MD_LINE_INDENTEDCODE, + MD_LINE_FENCEDCODE, + MD_LINE_HTML, + MD_LINE_TEXT, + MD_LINE_TABLE, + MD_LINE_TABLEUNDERLINE +}; +typedef enum MD_LINETYPE_tag MD_LINETYPE; + +typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS; +struct MD_LINE_ANALYSIS_tag { + MD_LINETYPE type : 16; + unsigned data : 16; + OFF beg; + OFF end; + unsigned indent; /* Indentation level. */ +}; + +typedef struct MD_LINE_tag MD_LINE; +struct MD_LINE_tag { + OFF beg; + OFF end; +}; + +typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE; +struct MD_VERBATIMLINE_tag { + OFF beg; + OFF end; + OFF indent; +}; + + +/******************* + *** Debugging *** + *******************/ + +#define MD_LOG(msg) \ + do { \ + if(ctx->parser.debug_log != NULL) \ + ctx->parser.debug_log((msg), ctx->userdata); \ + } while(0) + +#ifdef DEBUG + #define MD_ASSERT(cond) \ + do { \ + if(!(cond)) { \ + MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": " \ + "Assertion '" STRINGIZE(cond) "' failed."); \ + exit(1); \ + } \ + } while(0) + + #define MD_UNREACHABLE() MD_ASSERT(1 == 0) +#else + #ifdef __GNUC__ + #define MD_ASSERT(cond) do { if(!(cond)) __builtin_unreachable(); } while(0) + #define MD_UNREACHABLE() do { __builtin_unreachable(); } while(0) + #elif defined _MSC_VER && _MSC_VER > 120 + #define MD_ASSERT(cond) do { __assume(cond); } while(0) + #define MD_UNREACHABLE() do { __assume(0); } while(0) + #else + #define MD_ASSERT(cond) do {} while(0) + #define MD_UNREACHABLE() do {} while(0) + #endif +#endif + + +/***************** + *** Helpers *** + *****************/ + +/* Character accessors. */ +#define CH(off) (ctx->text[(off)]) +#define STR(off) (ctx->text + (off)) + +/* Character classification. + * Note we assume ASCII compatibility of code points < 128 here. */ +#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max)) +#define ISANYOF_(ch, palette) (md_strchr((palette), (ch)) != NULL) +#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) || (ch) == (ch2)) +#define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3)) +#define ISASCII_(ch) ((unsigned)(ch) <= 127) +#define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t'))) +#define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n'))) +#define ISWHITESPACE_(ch) (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f'))) +#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127) +#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126)) +#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z'))) +#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z'))) +#define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch)) +#define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9'))) +#define ISXDIGIT_(ch) (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f'))) +#define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch)) + +#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette)) +#define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2)) +#define ISANYOF3(off, ch1, ch2, ch3) ISANYOF3_(CH(off), (ch1), (ch2), (ch3)) +#define ISASCII(off) ISASCII_(CH(off)) +#define ISBLANK(off) ISBLANK_(CH(off)) +#define ISNEWLINE(off) ISNEWLINE_(CH(off)) +#define ISWHITESPACE(off) ISWHITESPACE_(CH(off)) +#define ISCNTRL(off) ISCNTRL_(CH(off)) +#define ISPUNCT(off) ISPUNCT_(CH(off)) +#define ISUPPER(off) ISUPPER_(CH(off)) +#define ISLOWER(off) ISLOWER_(CH(off)) +#define ISALPHA(off) ISALPHA_(CH(off)) +#define ISDIGIT(off) ISDIGIT_(CH(off)) +#define ISXDIGIT(off) ISXDIGIT_(CH(off)) +#define ISALNUM(off) ISALNUM_(CH(off)) + + +#if defined MD4C_USE_UTF16 + #define md_strchr wcschr +#else + #define md_strchr strchr +#endif + + +/* Case insensitive check of string equality. */ +static inline int +md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n) +{ + OFF i; + for(i = 0; i < n; i++) { + CHAR ch1 = s1[i]; + CHAR ch2 = s2[i]; + + if(ISLOWER_(ch1)) + ch1 += ('A'-'a'); + if(ISLOWER_(ch2)) + ch2 += ('A'-'a'); + if(ch1 != ch2) + return FALSE; + } + return TRUE; +} + +static inline int +md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n) +{ + return memcmp(s1, s2, n * sizeof(CHAR)) == 0; +} + +static int +md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size) +{ + OFF off = 0; + int ret = 0; + + while(1) { + while(off < size && str[off] != _T('\0')) + off++; + + if(off > 0) { + ret = ctx->parser.text(type, str, off, ctx->userdata); + if(ret != 0) + return ret; + + str += off; + size -= off; + off = 0; + } + + if(off >= size) + return 0; + + ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), 1, ctx->userdata); + if(ret != 0) + return ret; + off++; + } +} + + +#define MD_CHECK(func) \ + do { \ + ret = (func); \ + if(ret < 0) \ + goto abort; \ + } while(0) + + +#define MD_TEMP_BUFFER(sz) \ + do { \ + if(sz > ctx->alloc_buffer) { \ + CHAR* new_buffer; \ + SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; \ + \ + new_buffer = realloc(ctx->buffer, new_size); \ + if(new_buffer == NULL) { \ + MD_LOG("realloc() failed."); \ + ret = -1; \ + goto abort; \ + } \ + \ + ctx->buffer = new_buffer; \ + ctx->alloc_buffer = new_size; \ + } \ + } while(0) + + +#define MD_ENTER_BLOCK(type, arg) \ + do { \ + ret = ctx->parser.enter_block((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from enter_block() callback."); \ + goto abort; \ + } \ + } while(0) + +#define MD_LEAVE_BLOCK(type, arg) \ + do { \ + ret = ctx->parser.leave_block((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from leave_block() callback."); \ + goto abort; \ + } \ + } while(0) + +#define MD_ENTER_SPAN(type, arg) \ + do { \ + ret = ctx->parser.enter_span((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from enter_span() callback."); \ + goto abort; \ + } \ + } while(0) + +#define MD_LEAVE_SPAN(type, arg) \ + do { \ + ret = ctx->parser.leave_span((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from leave_span() callback."); \ + goto abort; \ + } \ + } while(0) + +#define MD_TEXT(type, str, size) \ + do { \ + if(size > 0) { \ + ret = ctx->parser.text((type), (str), (size), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from text() callback."); \ + goto abort; \ + } \ + } \ + } while(0) + +#define MD_TEXT_INSECURE(type, str, size) \ + do { \ + if(size > 0) { \ + ret = md_text_with_null_replacement(ctx, type, str, size); \ + if(ret != 0) { \ + MD_LOG("Aborted from text() callback."); \ + goto abort; \ + } \ + } \ + } while(0) + + + +/************************* + *** Unicode Support *** + *************************/ + +typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO; +struct MD_UNICODE_FOLD_INFO_tag { + unsigned codepoints[3]; + int n_codepoints; +}; + + +#if defined MD4C_USE_UTF16 || defined MD4C_USE_UTF8 + /* Binary search over sorted "map" of codepoints. Consecutive sequences + * of codepoints may be encoded in the map by just using the + * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000). + * + * Returns index of the found record in the map (in the case of ranges, + * the minimal value is used); or -1 on failure. */ + static int + md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size) + { + int beg, end; + int pivot_beg, pivot_end; + + beg = 0; + end = (int) map_size-1; + while(beg <= end) { + /* Pivot may be a range, not just a single value. */ + pivot_beg = pivot_end = (beg + end) / 2; + if(map[pivot_end] & 0x40000000) + pivot_end++; + if(map[pivot_beg] & 0x80000000) + pivot_beg--; + + if(codepoint < (map[pivot_beg] & 0x00ffffff)) + end = pivot_beg - 1; + else if(codepoint > (map[pivot_end] & 0x00ffffff)) + beg = pivot_end + 1; + else + return pivot_beg; + } + + return -1; + } + + static int + md_is_unicode_whitespace__(unsigned codepoint) + { +#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000) +#define S(cp) (cp) + /* Unicode "Zs" category. + * (generated by scripts/build_whitespace_map.py) */ + static const unsigned WHITESPACE_MAP[] = { + S(0x0020), S(0x00a0), S(0x1680), R(0x2000,0x200a), S(0x202f), S(0x205f), S(0x3000) + }; +#undef R +#undef S + + /* The ASCII ones are the most frequently used ones, also CommonMark + * specification requests few more in this range. */ + if(codepoint <= 0x7f) + return ISWHITESPACE_(codepoint); + + return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= 0); + } + + static int + md_is_unicode_punct__(unsigned codepoint) + { +#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000) +#define S(cp) (cp) + /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. + * (generated by scripts/build_punct_map.py) */ + static const unsigned PUNCT_MAP[] = { + R(0x0021,0x0023), R(0x0025,0x002a), R(0x002c,0x002f), R(0x003a,0x003b), R(0x003f,0x0040), + R(0x005b,0x005d), S(0x005f), S(0x007b), S(0x007d), S(0x00a1), S(0x00a7), S(0x00ab), R(0x00b6,0x00b7), + S(0x00bb), S(0x00bf), S(0x037e), S(0x0387), R(0x055a,0x055f), R(0x0589,0x058a), S(0x05be), S(0x05c0), + S(0x05c3), S(0x05c6), R(0x05f3,0x05f4), R(0x0609,0x060a), R(0x060c,0x060d), S(0x061b), R(0x061e,0x061f), + R(0x066a,0x066d), S(0x06d4), R(0x0700,0x070d), R(0x07f7,0x07f9), R(0x0830,0x083e), S(0x085e), + R(0x0964,0x0965), S(0x0970), S(0x09fd), S(0x0a76), S(0x0af0), S(0x0c77), S(0x0c84), S(0x0df4), S(0x0e4f), + R(0x0e5a,0x0e5b), R(0x0f04,0x0f12), S(0x0f14), R(0x0f3a,0x0f3d), S(0x0f85), R(0x0fd0,0x0fd4), + R(0x0fd9,0x0fda), R(0x104a,0x104f), S(0x10fb), R(0x1360,0x1368), S(0x1400), S(0x166e), R(0x169b,0x169c), + R(0x16eb,0x16ed), R(0x1735,0x1736), R(0x17d4,0x17d6), R(0x17d8,0x17da), R(0x1800,0x180a), + R(0x1944,0x1945), R(0x1a1e,0x1a1f), R(0x1aa0,0x1aa6), R(0x1aa8,0x1aad), R(0x1b5a,0x1b60), + R(0x1bfc,0x1bff), R(0x1c3b,0x1c3f), R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), R(0x2010,0x2027), + R(0x2030,0x2043), R(0x2045,0x2051), R(0x2053,0x205e), R(0x207d,0x207e), R(0x208d,0x208e), + R(0x2308,0x230b), R(0x2329,0x232a), R(0x2768,0x2775), R(0x27c5,0x27c6), R(0x27e6,0x27ef), + R(0x2983,0x2998), R(0x29d8,0x29db), R(0x29fc,0x29fd), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70), + R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), S(0x2e52), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f), + S(0x3030), S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e), + R(0xa6f2,0xa6f7), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f), + S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaade,0xaadf), R(0xaaf0,0xaaf1), + S(0xabeb), R(0xfd3e,0xfd3f), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe61), S(0xfe63), S(0xfe68), + R(0xfe6a,0xfe6b), R(0xff01,0xff03), R(0xff05,0xff0a), R(0xff0c,0xff0f), R(0xff1a,0xff1b), + R(0xff1f,0xff20), R(0xff3b,0xff3d), S(0xff3f), S(0xff5b), S(0xff5d), R(0xff5f,0xff65), R(0x10100,0x10102), + S(0x1039f), S(0x103d0), S(0x1056f), S(0x10857), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f), + R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59), + R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175), + R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9), + R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643), + R(0x11660,0x1166c), R(0x1173c,0x1173e), S(0x1183b), R(0x11944,0x11946), S(0x119e2), R(0x11a3f,0x11a46), + R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8), + S(0x11fff), R(0x12470,0x12474), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44), + R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f) + }; +#undef R +#undef S + + /* The ASCII ones are the most frequently used ones, also CommonMark + * specification requests few more in this range. */ + if(codepoint <= 0x7f) + return ISPUNCT_(codepoint); + + return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0); + } + + static void + md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info) + { +#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000) +#define S(cp) (cp) + /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. + * (generated by scripts/build_punct_map.py) */ + static const unsigned FOLD_MAP_1[] = { + R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136), + R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182), + S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190), + S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f), + R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2), + S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8), + S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7), + R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241), + S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f), + S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab), + S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1), + S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f), + R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e), + R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81), + S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba), + R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d), + R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f), + R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8), + S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8), + S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183), + R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b), + S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2), + S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e), + S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792), + R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2), + S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7f5), + R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2), + R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921) + }; + static const unsigned FOLD_MAP_1_DATA[] = { + 0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148, + 0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257, + 0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275, + 0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292, + 0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3, + 0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242, + 0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af, + 0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0, + 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f, + 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586, + 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a, + 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07, + 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, + 0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0, + 0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170, + 0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251, + 0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641, + 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c, + 0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d, + 0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7f6, 0x13a0, 0x13ef, 0xff41, + 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922, + 0x1e943 + }; + static const unsigned FOLD_MAP_2[] = { + S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99), + S(0x1e9a), S(0x1e9e), S(0x1f50), R(0x1f80,0x1f87), R(0x1f88,0x1f8f), R(0x1f90,0x1f97), R(0x1f98,0x1f9f), + R(0x1fa0,0x1fa7), R(0x1fa8,0x1faf), S(0x1fb2), S(0x1fb3), S(0x1fb4), S(0x1fb6), S(0x1fbc), S(0x1fc2), + S(0x1fc3), S(0x1fc4), S(0x1fc6), S(0x1fcc), S(0x1fd6), S(0x1fe4), S(0x1fe6), S(0x1ff2), S(0x1ff3), + S(0x1ff4), S(0x1ff6), S(0x1ffc), S(0xfb00), S(0xfb01), S(0xfb02), S(0xfb05), S(0xfb06), S(0xfb13), + S(0xfb14), S(0xfb15), S(0xfb16), S(0xfb17) + }; + static const unsigned FOLD_MAP_2_DATA[] = { + 0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308, + 0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9, + 0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9, + 0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342, + 0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342, + 0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9, + 0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565, + 0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d + }; + static const unsigned FOLD_MAP_3[] = { + S(0x0390), S(0x03b0), S(0x1f52), S(0x1f54), S(0x1f56), S(0x1fb7), S(0x1fc7), S(0x1fd2), S(0x1fd3), + S(0x1fd7), S(0x1fe2), S(0x1fe3), S(0x1fe7), S(0x1ff7), S(0xfb03), S(0xfb04) + }; + static const unsigned FOLD_MAP_3_DATA[] = { + 0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301, + 0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300, + 0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301, + 0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c + }; +#undef R +#undef S + static const struct { + const unsigned* map; + const unsigned* data; + size_t map_size; + int n_codepoints; + } FOLD_MAP_LIST[] = { + { FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), 1 }, + { FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), 2 }, + { FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), 3 } + }; + + int i; + + /* Fast path for ASCII characters. */ + if(codepoint <= 0x7f) { + info->codepoints[0] = codepoint; + if(ISUPPER_(codepoint)) + info->codepoints[0] += 'a' - 'A'; + info->n_codepoints = 1; + return; + } + + /* Try to locate the codepoint in any of the maps. */ + for(i = 0; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) { + int index; + + index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size); + if(index >= 0) { + /* Found the mapping. */ + int n_codepoints = FOLD_MAP_LIST[i].n_codepoints; + const unsigned* map = FOLD_MAP_LIST[i].map; + const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints); + + memcpy(info->codepoints, codepoints, sizeof(unsigned) * n_codepoints); + info->n_codepoints = n_codepoints; + + if(FOLD_MAP_LIST[i].map[index] != codepoint) { + /* The found mapping maps whole range of codepoints, + * i.e. we have to offset info->codepoints[0] accordingly. */ + if((map[index] & 0x00ffffff)+1 == codepoints[0]) { + /* Alternating type of the range. */ + info->codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0); + } else { + /* Range to range kind of mapping. */ + info->codepoints[0] += (codepoint - (map[index] & 0x00ffffff)); + } + } + + return; + } + } + + /* No mapping found. Map the codepoint to itself. */ + info->codepoints[0] = codepoint; + info->n_codepoints = 1; + } +#endif + + +#if defined MD4C_USE_UTF16 + #define IS_UTF16_SURROGATE_HI(word) (((WORD)(word) & 0xfc00) == 0xd800) + #define IS_UTF16_SURROGATE_LO(word) (((WORD)(word) & 0xfc00) == 0xdc00) + #define UTF16_DECODE_SURROGATE(hi, lo) (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) | (((unsigned)(lo) & 0x3ff) << 0))) + + static unsigned + md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size) + { + if(IS_UTF16_SURROGATE_HI(str[0])) { + if(1 < str_size && IS_UTF16_SURROGATE_LO(str[1])) { + if(p_size != NULL) + *p_size = 2; + return UTF16_DECODE_SURROGATE(str[0], str[1]); + } + } + + if(p_size != NULL) + *p_size = 1; + return str[0]; + } + + static unsigned + md_decode_utf16le_before__(MD_CTX* ctx, OFF off) + { + if(off > 2 && IS_UTF16_SURROGATE_HI(CH(off-2)) && IS_UTF16_SURROGATE_LO(CH(off-1))) + return UTF16_DECODE_SURROGATE(CH(off-2), CH(off-1)); + + return CH(off); + } + + /* No whitespace uses surrogates, so no decoding needed here. */ + #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint) + #define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(CH(off)) + #define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(CH((off)-1)) + + #define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL)) + #define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off)) + + static inline int + md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size) + { + return md_decode_utf16le__(str+off, str_size-off, p_char_size); + } +#elif defined MD4C_USE_UTF8 + #define IS_UTF8_LEAD1(byte) ((unsigned char)(byte) <= 0x7f) + #define IS_UTF8_LEAD2(byte) (((unsigned char)(byte) & 0xe0) == 0xc0) + #define IS_UTF8_LEAD3(byte) (((unsigned char)(byte) & 0xf0) == 0xe0) + #define IS_UTF8_LEAD4(byte) (((unsigned char)(byte) & 0xf8) == 0xf0) + #define IS_UTF8_TAIL(byte) (((unsigned char)(byte) & 0xc0) == 0x80) + + static unsigned + md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size) + { + if(!IS_UTF8_LEAD1(str[0])) { + if(IS_UTF8_LEAD2(str[0])) { + if(1 < str_size && IS_UTF8_TAIL(str[1])) { + if(p_size != NULL) + *p_size = 2; + + return (((unsigned int)str[0] & 0x1f) << 6) | + (((unsigned int)str[1] & 0x3f) << 0); + } + } else if(IS_UTF8_LEAD3(str[0])) { + if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) { + if(p_size != NULL) + *p_size = 3; + + return (((unsigned int)str[0] & 0x0f) << 12) | + (((unsigned int)str[1] & 0x3f) << 6) | + (((unsigned int)str[2] & 0x3f) << 0); + } + } else if(IS_UTF8_LEAD4(str[0])) { + if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) { + if(p_size != NULL) + *p_size = 4; + + return (((unsigned int)str[0] & 0x07) << 18) | + (((unsigned int)str[1] & 0x3f) << 12) | + (((unsigned int)str[2] & 0x3f) << 6) | + (((unsigned int)str[3] & 0x3f) << 0); + } + } + } + + if(p_size != NULL) + *p_size = 1; + return (unsigned) str[0]; + } + + static unsigned + md_decode_utf8_before__(MD_CTX* ctx, OFF off) + { + if(!IS_UTF8_LEAD1(CH(off-1))) { + if(off > 1 && IS_UTF8_LEAD2(CH(off-2)) && IS_UTF8_TAIL(CH(off-1))) + return (((unsigned int)CH(off-2) & 0x1f) << 6) | + (((unsigned int)CH(off-1) & 0x3f) << 0); + + if(off > 2 && IS_UTF8_LEAD3(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1))) + return (((unsigned int)CH(off-3) & 0x0f) << 12) | + (((unsigned int)CH(off-2) & 0x3f) << 6) | + (((unsigned int)CH(off-1) & 0x3f) << 0); + + if(off > 3 && IS_UTF8_LEAD4(CH(off-4)) && IS_UTF8_TAIL(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1))) + return (((unsigned int)CH(off-4) & 0x07) << 18) | + (((unsigned int)CH(off-3) & 0x3f) << 12) | + (((unsigned int)CH(off-2) & 0x3f) << 6) | + (((unsigned int)CH(off-1) & 0x3f) << 0); + } + + return (unsigned) CH(off-1); + } + + #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint) + #define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL)) + #define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off)) + + #define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL)) + #define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf8_before__(ctx, off)) + + static inline unsigned + md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size) + { + return md_decode_utf8__(str+off, str_size-off, p_char_size); + } +#else + #define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint) + #define ISUNICODEWHITESPACE(off) ISWHITESPACE(off) + #define ISUNICODEWHITESPACEBEFORE(off) ISWHITESPACE((off)-1) + + #define ISUNICODEPUNCT(off) ISPUNCT(off) + #define ISUNICODEPUNCTBEFORE(off) ISPUNCT((off)-1) + + static inline void + md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info) + { + info->codepoints[0] = codepoint; + if(ISUPPER_(codepoint)) + info->codepoints[0] += 'a' - 'A'; + info->n_codepoints = 1; + } + + static inline unsigned + md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size) + { + *p_size = 1; + return (unsigned) str[off]; + } +#endif + + +/************************************* + *** Helper string manipulations *** + *************************************/ + +/* Fill buffer with copy of the string between 'beg' and 'end' but replace any + * line breaks with given replacement character. + * + * NOTE: Caller is responsible to make sure the buffer is large enough. + * (Given the output is always shorter then input, (end - beg) is good idea + * what the caller should allocate.) + */ +static void +md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines, + CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size) +{ + CHAR* ptr = buffer; + int line_index = 0; + OFF off = beg; + + while(1) { + const MD_LINE* line = &lines[line_index]; + OFF line_end = line->end; + if(end < line_end) + line_end = end; + + while(off < line_end) { + *ptr = CH(off); + ptr++; + off++; + } + + if(off >= end) { + *p_size = ptr - buffer; + return; + } + + *ptr = line_break_replacement_char; + ptr++; + + line_index++; + off = lines[line_index].beg; + } +} + +/* Wrapper of md_merge_lines() which allocates new buffer for the output string. + */ +static int +md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines, + CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size) +{ + CHAR* buffer; + + buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg)); + if(buffer == NULL) { + MD_LOG("malloc() failed."); + return -1; + } + + md_merge_lines(ctx, beg, end, lines, n_lines, + line_break_replacement_char, buffer, p_size); + + *p_str = buffer; + return 0; +} + +static OFF +md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size) +{ + SZ char_size; + unsigned codepoint; + + while(off < size) { + codepoint = md_decode_unicode(label, off, size, &char_size); + if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off])) + break; + off += char_size; + } + + return off; +} + + +/****************************** + *** Recognizing raw HTML *** + ******************************/ + +/* md_is_html_tag() may be called when processing inlines (inline raw HTML) + * or when breaking document to blocks (checking for start of HTML block type 7). + * + * When breaking document to blocks, we do not yet know line boundaries, but + * in that case the whole tag has to live on a single line. We distinguish this + * by n_lines == 0. + */ +static int +md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + int attr_state; + OFF off = beg; + OFF line_end = (n_lines > 0) ? lines[0].end : ctx->size; + int i = 0; + + MD_ASSERT(CH(beg) == _T('<')); + + if(off + 1 >= line_end) + return FALSE; + off++; + + /* For parsing attributes, we need a little state automaton below. + * State -1: no attributes are allowed. + * State 0: attribute could follow after some whitespace. + * State 1: after a whitespace (attribute name may follow). + * State 2: after attribute name ('=' MAY follow). + * State 3: after '=' (value specification MUST follow). + * State 41: in middle of unquoted attribute value. + * State 42: in middle of single-quoted attribute value. + * State 43: in middle of double-quoted attribute value. + */ + attr_state = 0; + + if(CH(off) == _T('/')) { + /* Closer tag "". No attributes may be present. */ + attr_state = -1; + off++; + } + + /* Tag name */ + if(off >= line_end || !ISALPHA(off)) + return FALSE; + off++; + while(off < line_end && (ISALNUM(off) || CH(off) == _T('-'))) + off++; + + /* (Optional) attributes (if not closer), (optional) '/' (if not closer) + * and final '>'. */ + while(1) { + while(off < line_end && !ISNEWLINE(off)) { + if(attr_state > 40) { + if(attr_state == 41 && (ISBLANK(off) || ISANYOF(off, _T("\"'=<>`")))) { + attr_state = 0; + off--; /* Put the char back for re-inspection in the new state. */ + } else if(attr_state == 42 && CH(off) == _T('\'')) { + attr_state = 0; + } else if(attr_state == 43 && CH(off) == _T('"')) { + attr_state = 0; + } + off++; + } else if(ISWHITESPACE(off)) { + if(attr_state == 0) + attr_state = 1; + off++; + } else if(attr_state <= 2 && CH(off) == _T('>')) { + /* End. */ + goto done; + } else if(attr_state <= 2 && CH(off) == _T('/') && off+1 < line_end && CH(off+1) == _T('>')) { + /* End with digraph '/>' */ + off++; + goto done; + } else if((attr_state == 1 || attr_state == 2) && (ISALPHA(off) || CH(off) == _T('_') || CH(off) == _T(':'))) { + off++; + /* Attribute name */ + while(off < line_end && (ISALNUM(off) || ISANYOF(off, _T("_.:-")))) + off++; + attr_state = 2; + } else if(attr_state == 2 && CH(off) == _T('=')) { + /* Attribute assignment sign */ + off++; + attr_state = 3; + } else if(attr_state == 3) { + /* Expecting start of attribute value. */ + if(CH(off) == _T('"')) + attr_state = 43; + else if(CH(off) == _T('\'')) + attr_state = 42; + else if(!ISANYOF(off, _T("\"'=<>`")) && !ISNEWLINE(off)) + attr_state = 41; + else + return FALSE; + off++; + } else { + /* Anything unexpected. */ + return FALSE; + } + } + + /* We have to be on a single line. See definition of start condition + * of HTML block, type 7. */ + if(n_lines == 0) + return FALSE; + + i++; + if(i >= n_lines) + return FALSE; + + off = lines[i].beg; + line_end = lines[i].end; + + if(attr_state == 0 || attr_state == 41) + attr_state = 1; + + if(off >= max_end) + return FALSE; + } + +done: + if(off >= max_end) + return FALSE; + + *p_end = off+1; + return TRUE; +} + +static int +md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len, + const MD_LINE* lines, int n_lines, + OFF beg, OFF max_end, OFF* p_end, + OFF* p_scan_horizon) +{ + OFF off = beg; + int i = 0; + + if(off < *p_scan_horizon && *p_scan_horizon >= max_end - len) { + /* We have already scanned the range up to the max_end so we know + * there is nothing to see. */ + return FALSE; + } + + while(TRUE) { + while(off + len <= lines[i].end && off + len <= max_end) { + if(md_ascii_eq(STR(off), str, len)) { + /* Success. */ + *p_end = off + len; + return TRUE; + } + off++; + } + + i++; + if(off >= max_end || i >= n_lines) { + /* Failure. */ + *p_scan_horizon = off; + return FALSE; + } + + off = lines[i].beg; + } +} + +static int +md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + MD_ASSERT(CH(beg) == _T('<')); + + if(off + 4 >= lines[0].end) + return FALSE; + if(CH(off+1) != _T('!') || CH(off+2) != _T('-') || CH(off+3) != _T('-')) + return FALSE; + off += 4; + + /* ">" and "->" must not follow the opening. */ + if(off < lines[0].end && CH(off) == _T('>')) + return FALSE; + if(off+1 < lines[0].end && CH(off) == _T('-') && CH(off+1) == _T('>')) + return FALSE; + + /* HTML comment must not contain "--", so we scan just for "--" instead + * of "-->" and verify manually that '>' follows. */ + if(md_scan_for_html_closer(ctx, _T("--"), 2, + lines, n_lines, off, max_end, p_end, &ctx->html_comment_horizon)) + { + if(*p_end < max_end && CH(*p_end) == _T('>')) { + *p_end = *p_end + 1; + return TRUE; + } + } + + return FALSE; +} + +static int +md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + if(off + 2 >= lines[0].end) + return FALSE; + if(CH(off+1) != _T('?')) + return FALSE; + off += 2; + + return md_scan_for_html_closer(ctx, _T("?>"), 2, + lines, n_lines, off, max_end, p_end, &ctx->html_proc_instr_horizon); +} + +static int +md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + if(off + 2 >= lines[0].end) + return FALSE; + if(CH(off+1) != _T('!')) + return FALSE; + off += 2; + + /* Declaration name. */ + if(off >= lines[0].end || !ISALPHA(off)) + return FALSE; + off++; + while(off < lines[0].end && ISALPHA(off)) + off++; + if(off < lines[0].end && !ISWHITESPACE(off)) + return FALSE; + + return md_scan_for_html_closer(ctx, _T(">"), 1, + lines, n_lines, off, max_end, p_end, &ctx->html_decl_horizon); +} + +static int +md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + static const CHAR open_str[] = _T("= lines[0].end) + return FALSE; + if(memcmp(STR(off), open_str, open_size) != 0) + return FALSE; + off += open_size; + + if(lines[n_lines-1].end < max_end) + max_end = lines[n_lines-1].end - 2; + + return md_scan_for_html_closer(ctx, _T("]]>"), 3, + lines, n_lines, off, max_end, p_end, &ctx->html_cdata_horizon); +} + +static int +md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + MD_ASSERT(CH(beg) == _T('<')); + return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end)); +} + + +/**************************** + *** Recognizing Entity *** + ****************************/ + +static int +md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= 8) + off++; + + if(1 <= off - beg && off - beg <= 6) { + *p_end = off; + return TRUE; + } else { + return FALSE; + } +} + +static int +md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + while(off < max_end && ISDIGIT_(text[off]) && off - beg <= 8) + off++; + + if(1 <= off - beg && off - beg <= 7) { + *p_end = off; + return TRUE; + } else { + return FALSE; + } +} + +static int +md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + if(off < max_end && ISALPHA_(text[off])) + off++; + else + return FALSE; + + while(off < max_end && ISALNUM_(text[off]) && off - beg <= 48) + off++; + + if(2 <= off - beg && off - beg <= 48) { + *p_end = off; + return TRUE; + } else { + return FALSE; + } +} + +static int +md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end) +{ + int is_contents; + OFF off = beg; + + MD_ASSERT(text[off] == _T('&')); + off++; + + if(off+2 < max_end && text[off] == _T('#') && (text[off+1] == _T('x') || text[off+1] == _T('X'))) + is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off); + else if(off+1 < max_end && text[off] == _T('#')) + is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off); + else + is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off); + + if(is_contents && off < max_end && text[off] == _T(';')) { + *p_end = off+1; + return TRUE; + } else { + return FALSE; + } +} + +static inline int +md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) +{ + return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end); +} + + +/****************************** + *** Attribute Management *** + ******************************/ + +typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD; +struct MD_ATTRIBUTE_BUILD_tag { + CHAR* text; + MD_TEXTTYPE* substr_types; + OFF* substr_offsets; + int substr_count; + int substr_alloc; + MD_TEXTTYPE trivial_types[1]; + OFF trivial_offsets[2]; +}; + + +#define MD_BUILD_ATTR_NO_ESCAPES 0x0001 + +static int +md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build, + MD_TEXTTYPE type, OFF off) +{ + if(build->substr_count >= build->substr_alloc) { + MD_TEXTTYPE* new_substr_types; + OFF* new_substr_offsets; + + build->substr_alloc = (build->substr_alloc > 0 + ? build->substr_alloc + build->substr_alloc / 2 + : 8); + new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types, + build->substr_alloc * sizeof(MD_TEXTTYPE)); + if(new_substr_types == NULL) { + MD_LOG("realloc() failed."); + return -1; + } + /* Note +1 to reserve space for final offset (== raw_size). */ + new_substr_offsets = (OFF*) realloc(build->substr_offsets, + (build->substr_alloc+1) * sizeof(OFF)); + if(new_substr_offsets == NULL) { + MD_LOG("realloc() failed."); + free(new_substr_types); + return -1; + } + + build->substr_types = new_substr_types; + build->substr_offsets = new_substr_offsets; + } + + build->substr_types[build->substr_count] = type; + build->substr_offsets[build->substr_count] = off; + build->substr_count++; + return 0; +} + +static void +md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build) +{ + if(build->substr_alloc > 0) { + free(build->text); + free(build->substr_types); + free(build->substr_offsets); + } +} + +static int +md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size, + unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build) +{ + OFF raw_off, off; + int is_trivial; + int ret = 0; + + memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD)); + + /* If there is no backslash and no ampersand, build trivial attribute + * without any malloc(). */ + is_trivial = TRUE; + for(raw_off = 0; raw_off < raw_size; raw_off++) { + if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) { + is_trivial = FALSE; + break; + } + } + + if(is_trivial) { + build->text = (CHAR*) (raw_size ? raw_text : NULL); + build->substr_types = build->trivial_types; + build->substr_offsets = build->trivial_offsets; + build->substr_count = 1; + build->substr_alloc = 0; + build->trivial_types[0] = MD_TEXT_NORMAL; + build->trivial_offsets[0] = 0; + build->trivial_offsets[1] = raw_size; + off = raw_size; + } else { + build->text = (CHAR*) malloc(raw_size * sizeof(CHAR)); + if(build->text == NULL) { + MD_LOG("malloc() failed."); + goto abort; + } + + raw_off = 0; + off = 0; + + while(raw_off < raw_size) { + if(raw_text[raw_off] == _T('\0')) { + MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off)); + memcpy(build->text + off, raw_text + raw_off, 1); + off++; + raw_off++; + continue; + } + + if(raw_text[raw_off] == _T('&')) { + OFF ent_end; + + if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) { + MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off)); + memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off); + off += ent_end - raw_off; + raw_off = ent_end; + continue; + } + } + + if(build->substr_count == 0 || build->substr_types[build->substr_count-1] != MD_TEXT_NORMAL) + MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off)); + + if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) && + raw_text[raw_off] == _T('\\') && raw_off+1 < raw_size && + (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1]))) + raw_off++; + + build->text[off++] = raw_text[raw_off++]; + } + build->substr_offsets[build->substr_count] = off; + } + + attr->text = build->text; + attr->size = off; + attr->substr_offsets = build->substr_offsets; + attr->substr_types = build->substr_types; + return 0; + +abort: + md_free_attribute(ctx, build); + return -1; +} + + +/********************************************* + *** Dictionary of Reference Definitions *** + *********************************************/ + +#define MD_FNV1A_BASE 2166136261U +#define MD_FNV1A_PRIME 16777619U + +static inline unsigned +md_fnv1a(unsigned base, const void* data, size_t n) +{ + const unsigned char* buf = (const unsigned char*) data; + unsigned hash = base; + size_t i; + + for(i = 0; i < n; i++) { + hash ^= buf[i]; + hash *= MD_FNV1A_PRIME; + } + + return hash; +} + + +struct MD_REF_DEF_tag { + CHAR* label; + CHAR* title; + unsigned hash; + SZ label_size; + SZ title_size; + OFF dest_beg; + OFF dest_end; + unsigned char label_needs_free : 1; + unsigned char title_needs_free : 1; +}; + +/* Label equivalence is quite complicated with regards to whitespace and case + * folding. This complicates computing a hash of it as well as direct comparison + * of two labels. */ + +static unsigned +md_link_label_hash(const CHAR* label, SZ size) +{ + unsigned hash = MD_FNV1A_BASE; + OFF off; + unsigned codepoint; + int is_whitespace = FALSE; + + off = md_skip_unicode_whitespace(label, 0, size); + while(off < size) { + SZ char_size; + + codepoint = md_decode_unicode(label, off, size, &char_size); + is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]); + + if(is_whitespace) { + codepoint = ' '; + hash = md_fnv1a(hash, &codepoint, sizeof(unsigned)); + off = md_skip_unicode_whitespace(label, off, size); + } else { + MD_UNICODE_FOLD_INFO fold_info; + + md_get_unicode_fold_info(codepoint, &fold_info); + hash = md_fnv1a(hash, fold_info.codepoints, fold_info.n_codepoints * sizeof(unsigned)); + off += char_size; + } + } + + return hash; +} + +static OFF +md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size, + MD_UNICODE_FOLD_INFO* fold_info) +{ + unsigned codepoint; + SZ char_size; + + if(off >= size) { + /* Treat end of a link label as a whitespace. */ + goto whitespace; + } + + if(ISNEWLINE_(label[off])) { + /* Treat new lines as a whitespace. */ + off++; + goto whitespace; + } + + codepoint = md_decode_unicode(label, off, size, &char_size); + off += char_size; + if(ISUNICODEWHITESPACE_(codepoint)) { + /* Treat all whitespace as equivalent */ + goto whitespace; + } + + /* Get real folding info. */ + md_get_unicode_fold_info(codepoint, fold_info); + return off; + +whitespace: + fold_info->codepoints[0] = _T(' '); + fold_info->n_codepoints = 1; + return md_skip_unicode_whitespace(label, off, size); +} + +static int +md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size) +{ + OFF a_off; + OFF b_off; + int a_reached_end = FALSE; + int b_reached_end = FALSE; + MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 }; + MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 }; + OFF a_fi_off = 0; + OFF b_fi_off = 0; + int cmp; + + a_off = md_skip_unicode_whitespace(a_label, 0, a_size); + b_off = md_skip_unicode_whitespace(b_label, 0, b_size); + while(!a_reached_end || !b_reached_end) { + /* If needed, load fold info for next char. */ + if(a_fi_off >= a_fi.n_codepoints) { + a_fi_off = 0; + a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi); + a_reached_end = (a_off >= a_size); + } + if(b_fi_off >= b_fi.n_codepoints) { + b_fi_off = 0; + b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi); + b_reached_end = (b_off >= b_size); + } + + cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off]; + if(cmp != 0) + return cmp; + + a_fi_off++; + b_fi_off++; + } + + return 0; +} + +typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST; +struct MD_REF_DEF_LIST_tag { + int n_ref_defs; + int alloc_ref_defs; + MD_REF_DEF* ref_defs[]; /* Valid items always point into ctx->ref_defs[] */ +}; + +static int +md_ref_def_cmp(const void* a, const void* b) +{ + const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a; + const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b; + + if(a_ref->hash < b_ref->hash) + return -1; + else if(a_ref->hash > b_ref->hash) + return +1; + else + return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size); +} + +static int +md_ref_def_cmp_for_sort(const void* a, const void* b) +{ + int cmp; + + cmp = md_ref_def_cmp(a, b); + + /* Ensure stability of the sorting. */ + if(cmp == 0) { + const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a; + const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b; + + if(a_ref < b_ref) + cmp = -1; + else if(a_ref > b_ref) + cmp = +1; + else + cmp = 0; + } + + return cmp; +} + +static int +md_build_ref_def_hashtable(MD_CTX* ctx) +{ + int i, j; + + if(ctx->n_ref_defs == 0) + return 0; + + ctx->ref_def_hashtable_size = (ctx->n_ref_defs * 5) / 4; + ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*)); + if(ctx->ref_def_hashtable == NULL) { + MD_LOG("malloc() failed."); + goto abort; + } + memset(ctx->ref_def_hashtable, 0, ctx->ref_def_hashtable_size * sizeof(void*)); + + /* Each member of ctx->ref_def_hashtable[] can be: + * -- NULL, + * -- pointer to the MD_REF_DEF in ctx->ref_defs[], or + * -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to + * such MD_REF_DEFs. + */ + for(i = 0; i < ctx->n_ref_defs; i++) { + MD_REF_DEF* def = &ctx->ref_defs[i]; + void* bucket; + MD_REF_DEF_LIST* list; + + def->hash = md_link_label_hash(def->label, def->label_size); + bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size]; + + if(bucket == NULL) { + /* The bucket is empty. Make it just point to the def. */ + ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def; + continue; + } + + if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) { + /* The bucket already contains one ref. def. Lets see whether it + * is the same label (ref. def. duplicate) or different one + * (hash conflict). */ + MD_REF_DEF* old_def = (MD_REF_DEF*) bucket; + + if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) { + /* Duplicate label: Ignore this ref. def. */ + continue; + } + + /* Make the bucket complex, i.e. able to hold more ref. defs. */ + list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*)); + if(list == NULL) { + MD_LOG("malloc() failed."); + goto abort; + } + list->ref_defs[0] = old_def; + list->ref_defs[1] = def; + list->n_ref_defs = 2; + list->alloc_ref_defs = 2; + ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list; + continue; + } + + /* Append the def to the complex bucket list. + * + * Note in this case we ignore potential duplicates to avoid expensive + * iterating over the complex bucket. Below, we revisit all the complex + * buckets and handle it more cheaply after the complex bucket contents + * is sorted. */ + list = (MD_REF_DEF_LIST*) bucket; + if(list->n_ref_defs >= list->alloc_ref_defs) { + int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2; + MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list, + sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*)); + if(list_tmp == NULL) { + MD_LOG("realloc() failed."); + goto abort; + } + list = list_tmp; + list->alloc_ref_defs = alloc_ref_defs; + ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list; + } + + list->ref_defs[list->n_ref_defs] = def; + list->n_ref_defs++; + } + + /* Sort the complex buckets so we can use bsearch() with them. */ + for(i = 0; i < ctx->ref_def_hashtable_size; i++) { + void* bucket = ctx->ref_def_hashtable[i]; + MD_REF_DEF_LIST* list; + + if(bucket == NULL) + continue; + if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) + continue; + + list = (MD_REF_DEF_LIST*) bucket; + qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort); + + /* Disable all duplicates in the complex bucket by forcing all such + * records to point to the 1st such ref. def. I.e. no matter which + * record is found during the lookup, it will always point to the right + * ref. def. in ctx->ref_defs[]. */ + for(j = 1; j < list->n_ref_defs; j++) { + if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0) + list->ref_defs[j] = list->ref_defs[j-1]; + } + } + + return 0; + +abort: + return -1; +} + +static void +md_free_ref_def_hashtable(MD_CTX* ctx) +{ + if(ctx->ref_def_hashtable != NULL) { + int i; + + for(i = 0; i < ctx->ref_def_hashtable_size; i++) { + void* bucket = ctx->ref_def_hashtable[i]; + if(bucket == NULL) + continue; + if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) + continue; + free(bucket); + } + + free(ctx->ref_def_hashtable); + } +} + +static const MD_REF_DEF* +md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size) +{ + unsigned hash; + void* bucket; + + if(ctx->ref_def_hashtable_size == 0) + return NULL; + + hash = md_link_label_hash(label, label_size); + bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size]; + + if(bucket == NULL) { + return NULL; + } else if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) { + const MD_REF_DEF* def = (MD_REF_DEF*) bucket; + + if(md_link_label_cmp(def->label, def->label_size, label, label_size) == 0) + return def; + else + return NULL; + } else { + MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket; + MD_REF_DEF key_buf; + const MD_REF_DEF* key = &key_buf; + const MD_REF_DEF** ret; + + key_buf.label = (CHAR*) label; + key_buf.label_size = label_size; + key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size); + + ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs, + list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp); + if(ret != NULL) + return *ret; + else + return NULL; + } +} + + +/*************************** + *** Recognizing Links *** + ***************************/ + +/* Note this code is partially shared between processing inlines and blocks + * as reference definitions and links share some helper parser functions. + */ + +typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR; +struct MD_LINK_ATTR_tag { + OFF dest_beg; + OFF dest_end; + + CHAR* title; + SZ title_size; + int title_needs_free; +}; + + +static int +md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, + OFF* p_end, int* p_beg_line_index, int* p_end_line_index, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + OFF contents_beg = 0; + OFF contents_end = 0; + int line_index = 0; + int len = 0; + + if(CH(off) != _T('[')) + return FALSE; + off++; + + while(1) { + OFF line_end = lines[line_index].end; + + while(off < line_end) { + if(CH(off) == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) { + if(contents_end == 0) { + contents_beg = off; + *p_beg_line_index = line_index; + } + contents_end = off + 2; + off += 2; + } else if(CH(off) == _T('[')) { + return FALSE; + } else if(CH(off) == _T(']')) { + if(contents_beg < contents_end) { + /* Success. */ + *p_contents_beg = contents_beg; + *p_contents_end = contents_end; + *p_end = off+1; + *p_end_line_index = line_index; + return TRUE; + } else { + /* Link label must have some non-whitespace contents. */ + return FALSE; + } + } else { + unsigned codepoint; + SZ char_size; + + codepoint = md_decode_unicode(ctx->text, off, ctx->size, &char_size); + if(!ISUNICODEWHITESPACE_(codepoint)) { + if(contents_end == 0) { + contents_beg = off; + *p_beg_line_index = line_index; + } + contents_end = off + char_size; + } + + off += char_size; + } + + len++; + if(len > 999) + return FALSE; + } + + line_index++; + len++; + if(line_index < n_lines) + off = lines[line_index].beg; + else + break; + } + + return FALSE; +} + +static int +md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + + if(off >= max_end || CH(off) != _T('<')) + return FALSE; + off++; + + while(off < max_end) { + if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) { + off += 2; + continue; + } + + if(ISNEWLINE(off) || CH(off) == _T('<')) + return FALSE; + + if(CH(off) == _T('>')) { + /* Success. */ + *p_contents_beg = beg+1; + *p_contents_end = off; + *p_end = off+1; + return TRUE; + } + + off++; + } + + return FALSE; +} + +static int +md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + int parenthesis_level = 0; + + while(off < max_end) { + if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) { + off += 2; + continue; + } + + if(ISWHITESPACE(off) || ISCNTRL(off)) + break; + + /* Link destination may include balanced pairs of unescaped '(' ')'. + * Note we limit the maximal nesting level by 32 to protect us from + * https://github.com/jgm/cmark/issues/214 */ + if(CH(off) == _T('(')) { + parenthesis_level++; + if(parenthesis_level > 32) + return FALSE; + } else if(CH(off) == _T(')')) { + if(parenthesis_level == 0) + break; + parenthesis_level--; + } + + off++; + } + + if(parenthesis_level != 0 || off == beg) + return FALSE; + + /* Success. */ + *p_contents_beg = beg; + *p_contents_end = off; + *p_end = off; + return TRUE; +} + +static inline int +md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, + OFF* p_contents_beg, OFF* p_contents_end) +{ + if(CH(beg) == _T('<')) + return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); + else + return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); +} + +static int +md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, + OFF* p_end, int* p_beg_line_index, int* p_end_line_index, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + CHAR closer_char; + int line_index = 0; + + /* White space with up to one line break. */ + while(off < lines[line_index].end && ISWHITESPACE(off)) + off++; + if(off >= lines[line_index].end) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + if(off == beg) + return FALSE; + + *p_beg_line_index = line_index; + + /* First char determines how to detect end of it. */ + switch(CH(off)) { + case _T('"'): closer_char = _T('"'); break; + case _T('\''): closer_char = _T('\''); break; + case _T('('): closer_char = _T(')'); break; + default: return FALSE; + } + off++; + + *p_contents_beg = off; + + while(line_index < n_lines) { + OFF line_end = lines[line_index].end; + + while(off < line_end) { + if(CH(off) == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) { + off++; + } else if(CH(off) == closer_char) { + /* Success. */ + *p_contents_end = off; + *p_end = off+1; + *p_end_line_index = line_index; + return TRUE; + } else if(closer_char == _T(')') && CH(off) == _T('(')) { + /* ()-style title cannot contain (unescaped '(')) */ + return FALSE; + } + + off++; + } + + line_index++; + } + + return FALSE; +} + +/* Returns 0 if it is not a reference definition. + * + * Returns N > 0 if it is a reference definition. N then corresponds to the + * number of lines forming it). In this case the definition is stored for + * resolving any links referring to it. + * + * Returns -1 in case of an error (out of memory). + */ +static int +md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + OFF label_contents_beg; + OFF label_contents_end; + int label_contents_line_index = -1; + int label_is_multiline = FALSE; + OFF dest_contents_beg; + OFF dest_contents_end; + OFF title_contents_beg; + OFF title_contents_end; + int title_contents_line_index; + int title_is_multiline = FALSE; + OFF off; + int line_index = 0; + int tmp_line_index; + MD_REF_DEF* def = NULL; + int ret = 0; + + /* Link label. */ + if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg, + &off, &label_contents_line_index, &line_index, + &label_contents_beg, &label_contents_end)) + return FALSE; + label_is_multiline = (label_contents_line_index != line_index); + + /* Colon. */ + if(off >= lines[line_index].end || CH(off) != _T(':')) + return FALSE; + off++; + + /* Optional white space with up to one line break. */ + while(off < lines[line_index].end && ISWHITESPACE(off)) + off++; + if(off >= lines[line_index].end) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + + /* Link destination. */ + if(!md_is_link_destination(ctx, off, lines[line_index].end, + &off, &dest_contents_beg, &dest_contents_end)) + return FALSE; + + /* (Optional) title. Note we interpret it as an title only if nothing + * more follows on its last line. */ + if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, + &off, &title_contents_line_index, &tmp_line_index, + &title_contents_beg, &title_contents_end) + && off >= lines[line_index + tmp_line_index].end) + { + title_is_multiline = (tmp_line_index != title_contents_line_index); + title_contents_line_index += line_index; + line_index += tmp_line_index; + } else { + /* Not a title. */ + title_is_multiline = FALSE; + title_contents_beg = off; + title_contents_end = off; + title_contents_line_index = 0; + } + + /* Nothing more can follow on the last line. */ + if(off < lines[line_index].end) + return FALSE; + + /* So, it _is_ a reference definition. Remember it. */ + if(ctx->n_ref_defs >= ctx->alloc_ref_defs) { + MD_REF_DEF* new_defs; + + ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0 + ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2 + : 16); + new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF)); + if(new_defs == NULL) { + MD_LOG("realloc() failed."); + goto abort; + } + + ctx->ref_defs = new_defs; + } + def = &ctx->ref_defs[ctx->n_ref_defs]; + memset(def, 0, sizeof(MD_REF_DEF)); + + if(label_is_multiline) { + MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end, + lines + label_contents_line_index, n_lines - label_contents_line_index, + _T(' '), &def->label, &def->label_size)); + def->label_needs_free = TRUE; + } else { + def->label = (CHAR*) STR(label_contents_beg); + def->label_size = label_contents_end - label_contents_beg; + } + + if(title_is_multiline) { + MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, + lines + title_contents_line_index, n_lines - title_contents_line_index, + _T('\n'), &def->title, &def->title_size)); + def->title_needs_free = TRUE; + } else { + def->title = (CHAR*) STR(title_contents_beg); + def->title_size = title_contents_end - title_contents_beg; + } + + def->dest_beg = dest_contents_beg; + def->dest_end = dest_contents_end; + + /* Success. */ + ctx->n_ref_defs++; + return line_index + 1; + +abort: + /* Failure. */ + if(def != NULL && def->label_needs_free) + free(def->label); + if(def != NULL && def->title_needs_free) + free(def->title); + return ret; +} + +static int +md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + OFF beg, OFF end, MD_LINK_ATTR* attr) +{ + const MD_REF_DEF* def; + const MD_LINE* beg_line; + const MD_LINE* end_line; + CHAR* label; + SZ label_size; + int ret; + + MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!')); + MD_ASSERT(CH(end-1) == _T(']')); + + beg += (CH(beg) == _T('!') ? 2 : 1); + end--; + + /* Find lines corresponding to the beg and end positions. */ + MD_ASSERT(lines[0].beg <= beg); + beg_line = lines; + while(beg >= beg_line->end) + beg_line++; + + MD_ASSERT(end <= lines[n_lines-1].end); + end_line = beg_line; + while(end >= end_line->end) + end_line++; + + if(beg_line != end_line) { + MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line, + n_lines - (beg_line - lines), _T(' '), &label, &label_size)); + } else { + label = (CHAR*) STR(beg); + label_size = end - beg; + } + + def = md_lookup_ref_def(ctx, label, label_size); + if(def != NULL) { + attr->dest_beg = def->dest_beg; + attr->dest_end = def->dest_end; + attr->title = def->title; + attr->title_size = def->title_size; + attr->title_needs_free = FALSE; + } + + if(beg_line != end_line) + free(label); + + ret = (def != NULL); + +abort: + return ret; +} + +static int +md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + OFF beg, OFF* p_end, MD_LINK_ATTR* attr) +{ + int line_index = 0; + int tmp_line_index; + OFF title_contents_beg; + OFF title_contents_end; + int title_contents_line_index; + int title_is_multiline; + OFF off = beg; + int ret = FALSE; + + while(off >= lines[line_index].end) + line_index++; + + MD_ASSERT(CH(off) == _T('(')); + off++; + + /* Optional white space with up to one line break. */ + while(off < lines[line_index].end && ISWHITESPACE(off)) + off++; + if(off >= lines[line_index].end && ISNEWLINE(off)) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + + /* Link destination may be omitted, but only when not also having a title. */ + if(off < ctx->size && CH(off) == _T(')')) { + attr->dest_beg = off; + attr->dest_end = off; + attr->title = NULL; + attr->title_size = 0; + attr->title_needs_free = FALSE; + off++; + *p_end = off; + return TRUE; + } + + /* Link destination. */ + if(!md_is_link_destination(ctx, off, lines[line_index].end, + &off, &attr->dest_beg, &attr->dest_end)) + return FALSE; + + /* (Optional) title. */ + if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, + &off, &title_contents_line_index, &tmp_line_index, + &title_contents_beg, &title_contents_end)) + { + title_is_multiline = (tmp_line_index != title_contents_line_index); + title_contents_line_index += line_index; + line_index += tmp_line_index; + } else { + /* Not a title. */ + title_is_multiline = FALSE; + title_contents_beg = off; + title_contents_end = off; + title_contents_line_index = 0; + } + + /* Optional whitespace followed with final ')'. */ + while(off < lines[line_index].end && ISWHITESPACE(off)) + off++; + if(off >= lines[line_index].end && ISNEWLINE(off)) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + if(CH(off) != _T(')')) + goto abort; + off++; + + if(title_contents_beg >= title_contents_end) { + attr->title = NULL; + attr->title_size = 0; + attr->title_needs_free = FALSE; + } else if(!title_is_multiline) { + attr->title = (CHAR*) STR(title_contents_beg); + attr->title_size = title_contents_end - title_contents_beg; + attr->title_needs_free = FALSE; + } else { + MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, + lines + title_contents_line_index, n_lines - title_contents_line_index, + _T('\n'), &attr->title, &attr->title_size)); + attr->title_needs_free = TRUE; + } + + *p_end = off; + ret = TRUE; + +abort: + return ret; +} + +static void +md_free_ref_defs(MD_CTX* ctx) +{ + int i; + + for(i = 0; i < ctx->n_ref_defs; i++) { + MD_REF_DEF* def = &ctx->ref_defs[i]; + + if(def->label_needs_free) + free(def->label); + if(def->title_needs_free) + free(def->title); + } + + free(ctx->ref_defs); +} + + +/****************************************** + *** Processing Inlines (a.k.a Spans) *** + ******************************************/ + +/* We process inlines in few phases: + * + * (1) We go through the block text and collect all significant characters + * which may start/end a span or some other significant position into + * ctx->marks[]. Core of this is what md_collect_marks() does. + * + * We also do some very brief preliminary context-less analysis, whether + * it might be opener or closer (e.g. of an emphasis span). + * + * This speeds the other steps as we do not need to re-iterate over all + * characters anymore. + * + * (2) We analyze each potential mark types, in order by their precedence. + * + * In each md_analyze_XXX() function, we re-iterate list of the marks, + * skipping already resolved regions (in preceding precedences) and try to + * resolve them. + * + * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark + * them as resolved. + * + * (2.2) For range-type marks, we analyze whether the mark could be closer + * and, if yes, whether there is some preceding opener it could satisfy. + * + * If not we check whether it could be really an opener and if yes, we + * remember it so subsequent closers may resolve it. + * + * (3) Finally, when all marks were analyzed, we render the block contents + * by calling MD_RENDERER::text() callback, interrupting by ::enter_span() + * or ::close_span() whenever we reach a resolved mark. + */ + + +/* The mark structure. + * + * '\\': Maybe escape sequence. + * '\0': NULL char. + * '*': Maybe (strong) emphasis start/end. + * '_': Maybe (strong) emphasis start/end. + * '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH). + * '`': Maybe code span start/end. + * '&': Maybe start of entity. + * ';': Maybe end of entity. + * '<': Maybe start of raw HTML or autolink. + * '>': Maybe end of raw HTML or autolink. + * '[': Maybe start of link label or link text. + * '!': Equivalent of '[' for image. + * ']': Maybe end of link label or link text. + * '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS). + * ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS). + * '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS). + * 'D': Dummy mark, it reserves a space for splitting a previous mark + * (e.g. emphasis) or to make more space for storing some special data + * related to the preceding mark (e.g. link). + * + * Note that not all instances of these chars in the text imply creation of the + * structure. Only those which have (or may have, after we see more context) + * the special meaning. + * + * (Keep this struct as small as possible to fit as much of them into CPU + * cache line.) + */ +struct MD_MARK_tag { + OFF beg; + OFF end; + + /* For unresolved openers, 'prev' and 'next' form the chain of open openers + * of given type 'ch'. + * + * During resolving, we disconnect from the chain and point to the + * corresponding counterpart so opener points to its closer and vice versa. + */ + int prev; + int next; + CHAR ch; + unsigned char flags; +}; + +/* Mark flags (these apply to ALL mark types). */ +#define MD_MARK_POTENTIAL_OPENER 0x01 /* Maybe opener. */ +#define MD_MARK_POTENTIAL_CLOSER 0x02 /* Maybe closer. */ +#define MD_MARK_OPENER 0x04 /* Definitely opener. */ +#define MD_MARK_CLOSER 0x08 /* Definitely closer. */ +#define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */ + +/* Mark flags specific for various mark types (so they can share bits). */ +#define MD_MARK_EMPH_INTRAWORD 0x20 /* Helper for the "rule of 3". */ +#define MD_MARK_EMPH_MOD3_0 0x40 +#define MD_MARK_EMPH_MOD3_1 0x80 +#define MD_MARK_EMPH_MOD3_2 (0x40 | 0x80) +#define MD_MARK_EMPH_MOD3_MASK (0x40 | 0x80) +#define MD_MARK_AUTOLINK 0x20 /* Distinguisher for '<', '>'. */ +#define MD_MARK_VALIDPERMISSIVEAUTOLINK 0x20 /* For permissive autolinks. */ + +static MD_MARKCHAIN* +md_asterisk_chain(MD_CTX* ctx, unsigned flags) +{ + switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) { + case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_intraword_mod3_0; + case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_intraword_mod3_1; + case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_intraword_mod3_2; + case MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_extraword_mod3_0; + case MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_extraword_mod3_1; + case MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_extraword_mod3_2; + default: MD_UNREACHABLE(); + } + return NULL; +} + +static MD_MARKCHAIN* +md_mark_chain(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + + switch(mark->ch) { + case _T('*'): return md_asterisk_chain(ctx, mark->flags); + case _T('_'): return &UNDERSCORE_OPENERS; + case _T('~'): return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2; + case _T('['): return &BRACKET_OPENERS; + case _T('|'): return &TABLECELLBOUNDARIES; + default: return NULL; + } +} + +static MD_MARK* +md_push_mark(MD_CTX* ctx) +{ + if(ctx->n_marks >= ctx->alloc_marks) { + MD_MARK* new_marks; + + ctx->alloc_marks = (ctx->alloc_marks > 0 + ? ctx->alloc_marks + ctx->alloc_marks / 2 + : 64); + new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK)); + if(new_marks == NULL) { + MD_LOG("realloc() failed."); + return NULL; + } + + ctx->marks = new_marks; + } + + return &ctx->marks[ctx->n_marks++]; +} + +#define PUSH_MARK_() \ + do { \ + mark = md_push_mark(ctx); \ + if(mark == NULL) { \ + ret = -1; \ + goto abort; \ + } \ + } while(0) + +#define PUSH_MARK(ch_, beg_, end_, flags_) \ + do { \ + PUSH_MARK_(); \ + mark->beg = (beg_); \ + mark->end = (end_); \ + mark->prev = -1; \ + mark->next = -1; \ + mark->ch = (char)(ch_); \ + mark->flags = (flags_); \ + } while(0) + + +static void +md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index) +{ + if(chain->tail >= 0) + ctx->marks[chain->tail].next = mark_index; + else + chain->head = mark_index; + + ctx->marks[mark_index].prev = chain->tail; + ctx->marks[mark_index].next = -1; + chain->tail = mark_index; +} + +/* Sometimes, we need to store a pointer into the mark. It is quite rare + * so we do not bother to make MD_MARK use union, and it can only happen + * for dummy marks. */ +static inline void +md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + MD_ASSERT(mark->ch == 'D'); + + /* Check only members beg and end are misused for this. */ + MD_ASSERT(sizeof(void*) <= 2 * sizeof(OFF)); + memcpy(mark, &ptr, sizeof(void*)); +} + +static inline void* +md_mark_get_ptr(MD_CTX* ctx, int mark_index) +{ + void* ptr; + MD_MARK* mark = &ctx->marks[mark_index]; + MD_ASSERT(mark->ch == 'D'); + memcpy(&ptr, mark, sizeof(void*)); + return ptr; +} + +static void +md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index) +{ + MD_MARK* opener = &ctx->marks[opener_index]; + MD_MARK* closer = &ctx->marks[closer_index]; + + /* Remove opener from the list of openers. */ + if(chain != NULL) { + if(opener->prev >= 0) + ctx->marks[opener->prev].next = opener->next; + else + chain->head = opener->next; + + if(opener->next >= 0) + ctx->marks[opener->next].prev = opener->prev; + else + chain->tail = opener->prev; + } + + /* Interconnect opener and closer and mark both as resolved. */ + opener->next = closer_index; + opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + closer->prev = opener_index; + closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; +} + + +#define MD_ROLLBACK_ALL 0 +#define MD_ROLLBACK_CROSSING 1 + +/* In the range ctx->marks[opener_index] ... [closer_index], undo some or all + * resolvings accordingly to these rules: + * + * (1) All openers BEFORE the range corresponding to any closer inside the + * range are un-resolved and they are re-added to their respective chains + * of unresolved openers. This ensures we can reuse the opener for closers + * AFTER the range. + * + * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range + * are discarded. + * + * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled + * in (1) are discarded. I.e. pairs of openers and closers which are both + * inside the range are retained as well as any unpaired marks. + */ +static void +md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how) +{ + int i; + int mark_index; + + /* Cut all unresolved openers at the mark index. */ + for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) { + MD_MARKCHAIN* chain = &ctx->mark_chains[i]; + + while(chain->tail >= opener_index) + chain->tail = ctx->marks[chain->tail].prev; + + if(chain->tail >= 0) + ctx->marks[chain->tail].next = -1; + else + chain->head = -1; + } + + /* Go backwards so that unresolved openers are re-added into their + * respective chains, in the right order. */ + mark_index = closer_index - 1; + while(mark_index > opener_index) { + MD_MARK* mark = &ctx->marks[mark_index]; + int mark_flags = mark->flags; + int discard_flag = (how == MD_ROLLBACK_ALL); + + if(mark->flags & MD_MARK_CLOSER) { + int mark_opener_index = mark->prev; + + /* Undo opener BEFORE the range. */ + if(mark_opener_index < opener_index) { + MD_MARK* mark_opener = &ctx->marks[mark_opener_index]; + MD_MARKCHAIN* chain; + + mark_opener->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); + chain = md_mark_chain(ctx, opener_index); + if(chain != NULL) { + md_mark_chain_append(ctx, chain, mark_opener_index); + discard_flag = 1; + } + } + } + + /* And reset our flags. */ + if(discard_flag) + mark->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); + + /* Jump as far as we can over unresolved or non-interesting marks. */ + switch(how) { + case MD_ROLLBACK_CROSSING: + if((mark_flags & MD_MARK_CLOSER) && mark->prev > opener_index) { + /* If we are closer with opener INSIDE the range, there may + * not be any other crosser inside the subrange. */ + mark_index = mark->prev; + break; + } + /* Pass through. */ + default: + mark_index--; + break; + } + } +} + +static void +md_build_mark_char_map(MD_CTX* ctx) +{ + memset(ctx->mark_char_map, 0, sizeof(ctx->mark_char_map)); + + ctx->mark_char_map['\\'] = 1; + ctx->mark_char_map['*'] = 1; + ctx->mark_char_map['_'] = 1; + ctx->mark_char_map['`'] = 1; + ctx->mark_char_map['&'] = 1; + ctx->mark_char_map[';'] = 1; + ctx->mark_char_map['<'] = 1; + ctx->mark_char_map['>'] = 1; + ctx->mark_char_map['['] = 1; + ctx->mark_char_map['!'] = 1; + ctx->mark_char_map[']'] = 1; + ctx->mark_char_map['\0'] = 1; + + if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH) + ctx->mark_char_map['~'] = 1; + + if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS) + ctx->mark_char_map['$'] = 1; + + if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS) + ctx->mark_char_map['@'] = 1; + + if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS) + ctx->mark_char_map[':'] = 1; + + if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS) + ctx->mark_char_map['.'] = 1; + + if((ctx->parser.flags & MD_FLAG_TABLES) || (ctx->parser.flags & MD_FLAG_WIKILINKS)) + ctx->mark_char_map['|'] = 1; + + if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) { + int i; + + for(i = 0; i < (int) sizeof(ctx->mark_char_map); i++) { + if(ISWHITESPACE_(i)) + ctx->mark_char_map[i] = 1; + } + } +} + +/* We limit code span marks to lower than 32 backticks. This solves the + * pathologic case of too many openers, each of different length: Their + * resolving would be then O(n^2). */ +#define CODESPAN_MARK_MAXLEN 32 + +static int +md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, + OFF* p_opener_beg, OFF* p_opener_end, + OFF* p_closer_beg, OFF* p_closer_end, + OFF last_potential_closers[CODESPAN_MARK_MAXLEN], + int* p_reached_paragraph_end) +{ + OFF opener_beg = beg; + OFF opener_end; + OFF closer_beg; + OFF closer_end; + SZ mark_len; + OFF line_end; + int has_space_after_opener = FALSE; + int has_eol_after_opener = FALSE; + int has_space_before_closer = FALSE; + int has_eol_before_closer = FALSE; + int has_only_space = TRUE; + int line_index = 0; + + line_end = lines[0].end; + opener_end = opener_beg; + while(opener_end < line_end && CH(opener_end) == _T('`')) + opener_end++; + has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(' ')); + has_eol_after_opener = (opener_end == line_end); + + /* The caller needs to know end of the opening mark even if we fail. */ + *p_opener_end = opener_end; + + mark_len = opener_end - opener_beg; + if(mark_len > CODESPAN_MARK_MAXLEN) + return FALSE; + + /* Check whether we already know there is no closer of this length. + * If so, re-scan does no sense. This fixes issue #59. */ + if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end || + (*p_reached_paragraph_end && last_potential_closers[mark_len-1] < opener_end)) + return FALSE; + + closer_beg = opener_end; + closer_end = opener_end; + + /* Find closer mark. */ + while(TRUE) { + while(closer_beg < line_end && CH(closer_beg) != _T('`')) { + if(CH(closer_beg) != _T(' ')) + has_only_space = FALSE; + closer_beg++; + } + closer_end = closer_beg; + while(closer_end < line_end && CH(closer_end) == _T('`')) + closer_end++; + + if(closer_end - closer_beg == mark_len) { + /* Success. */ + has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-1) == _T(' ')); + has_eol_before_closer = (closer_beg == lines[line_index].beg); + break; + } + + if(closer_end - closer_beg > 0) { + /* We have found a back-tick which is not part of the closer. */ + has_only_space = FALSE; + + /* But if we eventually fail, remember it as a potential closer + * of its own length for future attempts. This mitigates needs for + * rescans. */ + if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) { + if(closer_beg > last_potential_closers[closer_end - closer_beg - 1]) + last_potential_closers[closer_end - closer_beg - 1] = closer_beg; + } + } + + if(closer_end >= line_end) { + line_index++; + if(line_index >= n_lines) { + /* Reached end of the paragraph and still nothing. */ + *p_reached_paragraph_end = TRUE; + return FALSE; + } + /* Try on the next line. */ + line_end = lines[line_index].end; + closer_beg = lines[line_index].beg; + } else { + closer_beg = closer_end; + } + } + + /* If there is a space or a new line both after and before the opener + * (and if the code span is not made of spaces only), consume one initial + * and one trailing space as part of the marks. */ + if(!has_only_space && + (has_space_after_opener || has_eol_after_opener) && + (has_space_before_closer || has_eol_before_closer)) + { + if(has_space_after_opener) + opener_end++; + else + opener_end = lines[1].beg; + + if(has_space_before_closer) + closer_beg--; + else { + closer_beg = lines[line_index-1].end; + /* We need to eat the preceding "\r\n" but not any line trailing + * spaces. */ + while(closer_beg < ctx->size && ISBLANK(closer_beg)) + closer_beg++; + } + } + + *p_opener_beg = opener_beg; + *p_opener_end = opener_end; + *p_closer_beg = closer_beg; + *p_closer_end = closer_end; + return TRUE; +} + +static int +md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg+1; + + MD_ASSERT(CH(beg) == _T('<')); + + /* Check for scheme. */ + if(off >= max_end || !ISASCII(off)) + return FALSE; + off++; + while(1) { + if(off >= max_end) + return FALSE; + if(off - beg > 32) + return FALSE; + if(CH(off) == _T(':') && off - beg >= 3) + break; + if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.')) + return FALSE; + off++; + } + + /* Check the path after the scheme. */ + while(off < max_end && CH(off) != _T('>')) { + if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<')) + return FALSE; + off++; + } + + if(off >= max_end) + return FALSE; + + MD_ASSERT(CH(off) == _T('>')); + *p_end = off+1; + return TRUE; +} + +static int +md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg + 1; + int label_len; + + MD_ASSERT(CH(beg) == _T('<')); + + /* The code should correspond to this regexp: + /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+ + @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? + (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ + */ + + /* Username (before '@'). */ + while(off < max_end && (ISALNUM(off) || ISANYOF(off, _T(".!#$%&'*+/=?^_`{|}~-")))) + off++; + if(off <= beg+1) + return FALSE; + + /* '@' */ + if(off >= max_end || CH(off) != _T('@')) + return FALSE; + off++; + + /* Labels delimited with '.'; each label is sequence of 1 - 63 alnum + * characters or '-', but '-' is not allowed as first or last char. */ + label_len = 0; + while(off < max_end) { + if(ISALNUM(off)) + label_len++; + else if(CH(off) == _T('-') && label_len > 0) + label_len++; + else if(CH(off) == _T('.') && label_len > 0 && CH(off-1) != _T('-')) + label_len = 0; + else + break; + + if(label_len > 63) + return FALSE; + + off++; + } + + if(label_len <= 0 || off >= max_end || CH(off) != _T('>') || CH(off-1) == _T('-')) + return FALSE; + + *p_end = off+1; + return TRUE; +} + +static int +md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto) +{ + if(md_is_autolink_uri(ctx, beg, max_end, p_end)) { + *p_missing_mailto = FALSE; + return TRUE; + } + + if(md_is_autolink_email(ctx, beg, max_end, p_end)) { + *p_missing_mailto = TRUE; + return TRUE; + } + + return FALSE; +} + +static int +md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) +{ + int i; + int ret = 0; + MD_MARK* mark; + OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { 0 }; + int codespan_scanned_till_paragraph_end = FALSE; + + for(i = 0; i < n_lines; i++) { + const MD_LINE* line = &lines[i]; + OFF off = line->beg; + OFF line_end = line->end; + + while(TRUE) { + CHAR ch; + +#ifdef MD4C_USE_UTF16 + /* For UTF-16, mark_char_map[] covers only ASCII. */ + #define IS_MARK_CHAR(off) ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map)) && \ + (ctx->mark_char_map[(unsigned char) CH(off)])) +#else + /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */ + #define IS_MARK_CHAR(off) (ctx->mark_char_map[(unsigned char) CH(off)]) +#endif + + /* Optimization: Use some loop unrolling. */ + while(off + 3 < line_end && !IS_MARK_CHAR(off+0) && !IS_MARK_CHAR(off+1) + && !IS_MARK_CHAR(off+2) && !IS_MARK_CHAR(off+3)) + off += 4; + while(off < line_end && !IS_MARK_CHAR(off+0)) + off++; + + if(off >= line_end) + break; + + ch = CH(off); + + /* A backslash escape. + * It can go beyond line->end as it may involve escaped new + * line to form a hard break. */ + if(ch == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) { + /* Hard-break cannot be on the last line of the block. */ + if(!ISNEWLINE(off+1) || i+1 < n_lines) + PUSH_MARK(ch, off, off+2, MD_MARK_RESOLVED); + off += 2; + continue; + } + + /* A potential (string) emphasis start/end. */ + if(ch == _T('*') || ch == _T('_')) { + OFF tmp = off+1; + int left_level; /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */ + int right_level; /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */ + + while(tmp < line_end && CH(tmp) == ch) + tmp++; + + if(off == line->beg || ISUNICODEWHITESPACEBEFORE(off)) + left_level = 0; + else if(ISUNICODEPUNCTBEFORE(off)) + left_level = 1; + else + left_level = 2; + + if(tmp == line_end || ISUNICODEWHITESPACE(tmp)) + right_level = 0; + else if(ISUNICODEPUNCT(tmp)) + right_level = 1; + else + right_level = 2; + + /* Intra-word underscore doesn't have special meaning. */ + if(ch == _T('_') && left_level == 2 && right_level == 2) { + left_level = 0; + right_level = 0; + } + + if(left_level != 0 || right_level != 0) { + unsigned flags = 0; + + if(left_level > 0 && left_level >= right_level) + flags |= MD_MARK_POTENTIAL_CLOSER; + if(right_level > 0 && right_level >= left_level) + flags |= MD_MARK_POTENTIAL_OPENER; + if(left_level == 2 && right_level == 2) + flags |= MD_MARK_EMPH_INTRAWORD; + + /* For "the rule of three" we need to remember the original + * size of the mark (modulo three), before we potentially + * split the mark when being later resolved partially by some + * shorter closer. */ + switch((tmp - off) % 3) { + case 0: flags |= MD_MARK_EMPH_MOD3_0; break; + case 1: flags |= MD_MARK_EMPH_MOD3_1; break; + case 2: flags |= MD_MARK_EMPH_MOD3_2; break; + } + + PUSH_MARK(ch, off, tmp, flags); + + /* During resolving, multiple asterisks may have to be + * split into independent span start/ends. Consider e.g. + * "**foo* bar*". Therefore we push also some empty dummy + * marks to have enough space for that. */ + off++; + while(off < tmp) { + PUSH_MARK('D', off, off, 0); + off++; + } + continue; + } + + off = tmp; + continue; + } + + /* A potential code span start/end. */ + if(ch == _T('`')) { + OFF opener_beg, opener_end; + OFF closer_beg, closer_end; + int is_code_span; + + is_code_span = md_is_code_span(ctx, lines + i, n_lines - i, off, + &opener_beg, &opener_end, &closer_beg, &closer_end, + codespan_last_potential_closers, + &codespan_scanned_till_paragraph_end); + if(is_code_span) { + PUSH_MARK(_T('`'), opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED); + PUSH_MARK(_T('`'), closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); + ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1; + ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2; + + off = closer_end; + + /* Advance the current line accordingly. */ + while(off > line_end) { + i++; + line++; + line_end = line->end; + } + continue; + } + + off = opener_end; + continue; + } + + /* A potential entity start. */ + if(ch == _T('&')) { + PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER); + off++; + continue; + } + + /* A potential entity end. */ + if(ch == _T(';')) { + /* We surely cannot be entity unless the previous mark is '&'. */ + if(ctx->n_marks > 0 && ctx->marks[ctx->n_marks-1].ch == _T('&')) + PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); + + off++; + continue; + } + + /* A potential autolink or raw HTML start/end. */ + if(ch == _T('<')) { + int is_autolink; + OFF autolink_end; + int missing_mailto; + + if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) { + int is_html; + OFF html_end; + + /* Given the nature of the raw HTML, we have to recognize + * it here. Doing so later in md_analyze_lt_gt() could + * open can of worms of quadratic complexity. */ + is_html = md_is_html_any(ctx, lines + i, n_lines - i, off, + lines[n_lines-1].end, &html_end); + if(is_html) { + PUSH_MARK(_T('<'), off, off, MD_MARK_OPENER | MD_MARK_RESOLVED); + PUSH_MARK(_T('>'), html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); + ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1; + ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2; + off = html_end; + + /* Advance the current line accordingly. */ + while(off > line_end) { + i++; + line++; + line_end = line->end; + } + continue; + } + } + + is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end, + &autolink_end, &missing_mailto); + if(is_autolink) { + PUSH_MARK((missing_mailto ? _T('@') : _T('<')), off, off+1, + MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); + PUSH_MARK(_T('>'), autolink_end-1, autolink_end, + MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); + ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1; + ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2; + off = autolink_end; + continue; + } + + off++; + continue; + } + + /* A potential link or its part. */ + if(ch == _T('[') || (ch == _T('!') && off+1 < line_end && CH(off+1) == _T('['))) { + OFF tmp = (ch == _T('[') ? off+1 : off+2); + PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER); + off = tmp; + /* Two dummies to make enough place for data we need if it is + * a link. */ + PUSH_MARK('D', off, off, 0); + PUSH_MARK('D', off, off, 0); + continue; + } + if(ch == _T(']')) { + PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); + off++; + continue; + } + + /* A potential permissive e-mail autolink. */ + if(ch == _T('@')) { + if(line->beg + 1 <= off && ISALNUM(off-1) && + off + 3 < line->end && ISALNUM(off+1)) + { + PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER); + /* Push a dummy as a reserve for a closer. */ + PUSH_MARK('D', off, off, 0); + } + + off++; + continue; + } + + /* A potential permissive URL autolink. */ + if(ch == _T(':')) { + static struct { + const CHAR* scheme; + SZ scheme_size; + const CHAR* suffix; + SZ suffix_size; + } scheme_map[] = { + /* In the order from the most frequently used, arguably. */ + { _T("http"), 4, _T("//"), 2 }, + { _T("https"), 5, _T("//"), 2 }, + { _T("ftp"), 3, _T("//"), 2 } + }; + int scheme_index; + + for(scheme_index = 0; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) { + const CHAR* scheme = scheme_map[scheme_index].scheme; + const SZ scheme_size = scheme_map[scheme_index].scheme_size; + const CHAR* suffix = scheme_map[scheme_index].suffix; + const SZ suffix_size = scheme_map[scheme_index].suffix_size; + + if(line->beg + scheme_size <= off && md_ascii_eq(STR(off-scheme_size), scheme, scheme_size) && + (line->beg + scheme_size == off || ISWHITESPACE(off-scheme_size-1) || ISANYOF(off-scheme_size-1, _T("*_~(["))) && + off + 1 + suffix_size < line->end && md_ascii_eq(STR(off+1), suffix, suffix_size)) + { + PUSH_MARK(ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER); + /* Push a dummy as a reserve for a closer. */ + PUSH_MARK('D', off, off, 0); + off += 1 + suffix_size; + continue; + } + } + + off++; + continue; + } + + /* A potential permissive WWW autolink. */ + if(ch == _T('.')) { + if(line->beg + 3 <= off && md_ascii_eq(STR(off-3), _T("www"), 3) && + (line->beg + 3 == off || ISWHITESPACE(off-4) || ISANYOF(off-4, _T("*_~(["))) && + off + 1 < line_end) + { + PUSH_MARK(ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER); + /* Push a dummy as a reserve for a closer. */ + PUSH_MARK('D', off, off, 0); + off++; + continue; + } + + off++; + continue; + } + + /* A potential table cell boundary or wiki link label delimiter. */ + if((table_mode || ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T('|')) { + PUSH_MARK(ch, off, off+1, 0); + off++; + continue; + } + + /* A potential strikethrough start/end. */ + if(ch == _T('~')) { + OFF tmp = off+1; + + while(tmp < line_end && CH(tmp) == _T('~')) + tmp++; + + if(tmp - off < 3) { + unsigned flags = 0; + + if(tmp < line_end && !ISUNICODEWHITESPACE(tmp)) + flags |= MD_MARK_POTENTIAL_OPENER; + if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off)) + flags |= MD_MARK_POTENTIAL_CLOSER; + if(flags != 0) + PUSH_MARK(ch, off, tmp, flags); + } + + off = tmp; + continue; + } + + /* A potential equation start/end */ + if(ch == _T('$')) { + /* We can have at most two consecutive $ signs, + * where two dollar signs signify a display equation. */ + OFF tmp = off+1; + + while(tmp < line_end && CH(tmp) == _T('$')) + tmp++; + + if (tmp - off <= 2) + PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); + off = tmp; + continue; + } + + /* Turn non-trivial whitespace into single space. */ + if(ISWHITESPACE_(ch)) { + OFF tmp = off+1; + + while(tmp < line_end && ISWHITESPACE(tmp)) + tmp++; + + if(tmp - off > 1 || ch != _T(' ')) + PUSH_MARK(ch, off, tmp, MD_MARK_RESOLVED); + + off = tmp; + continue; + } + + /* NULL character. */ + if(ch == _T('\0')) { + PUSH_MARK(ch, off, off+1, MD_MARK_RESOLVED); + off++; + continue; + } + + off++; + } + } + + /* Add a dummy mark at the end of the mark vector to simplify + * process_inlines(). */ + PUSH_MARK(127, ctx->size, ctx->size, MD_MARK_RESOLVED); + +abort: + return ret; +} + +static void +md_analyze_bracket(MD_CTX* ctx, int mark_index) +{ + /* We cannot really resolve links here as for that we would need + * more context. E.g. a following pair of brackets (reference link), + * or enclosing pair of brackets (if the inner is the link, the outer + * one cannot be.) + * + * Therefore we here only construct a list of resolved '[' ']' pairs + * ordered by position of the closer. This allows ur to analyze what is + * or is not link in the right order, from inside to outside in case + * of nested brackets. + * + * The resolving itself is deferred into md_resolve_links(). + */ + + MD_MARK* mark = &ctx->marks[mark_index]; + + if(mark->flags & MD_MARK_POTENTIAL_OPENER) { + md_mark_chain_append(ctx, &BRACKET_OPENERS, mark_index); + return; + } + + if(BRACKET_OPENERS.tail >= 0) { + /* Pop the opener from the chain. */ + int opener_index = BRACKET_OPENERS.tail; + MD_MARK* opener = &ctx->marks[opener_index]; + if(opener->prev >= 0) + ctx->marks[opener->prev].next = -1; + else + BRACKET_OPENERS.head = -1; + BRACKET_OPENERS.tail = opener->prev; + + /* Interconnect the opener and closer. */ + opener->next = mark_index; + mark->prev = opener_index; + + /* Add the pair into chain of potential links for md_resolve_links(). + * Note we misuse opener->prev for this as opener->next points to its + * closer. */ + if(ctx->unresolved_link_tail >= 0) + ctx->marks[ctx->unresolved_link_tail].prev = opener_index; + else + ctx->unresolved_link_head = opener_index; + ctx->unresolved_link_tail = opener_index; + opener->prev = -1; + } +} + +/* Forward declaration. */ +static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + int mark_beg, int mark_end); + +static int +md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + int opener_index = ctx->unresolved_link_head; + OFF last_link_beg = 0; + OFF last_link_end = 0; + OFF last_img_beg = 0; + OFF last_img_end = 0; + + while(opener_index >= 0) { + MD_MARK* opener = &ctx->marks[opener_index]; + int closer_index = opener->next; + MD_MARK* closer = &ctx->marks[closer_index]; + int next_index = opener->prev; + MD_MARK* next_opener; + MD_MARK* next_closer; + MD_LINK_ATTR attr; + int is_link = FALSE; + + if(next_index >= 0) { + next_opener = &ctx->marks[next_index]; + next_closer = &ctx->marks[next_opener->next]; + } else { + next_opener = NULL; + next_closer = NULL; + } + + /* If nested ("[ [ ] ]"), we need to make sure that: + * - The outer does not end inside of (...) belonging to the inner. + * - The outer cannot be link if the inner is link (i.e. not image). + * + * (Note we here analyze from inner to outer as the marks are ordered + * by closer->beg.) + */ + if((opener->beg < last_link_beg && closer->end < last_link_end) || + (opener->beg < last_img_beg && closer->end < last_img_end) || + (opener->beg < last_link_end && opener->ch == '[')) + { + opener_index = next_index; + continue; + } + + /* Recognize and resolve wiki links. + * Wiki-links maybe '[[destination]]' or '[[destination|label]]'. + */ + if ((ctx->parser.flags & MD_FLAG_WIKILINKS) && + (opener->end - opener->beg == 1) && /* not image */ + next_opener != NULL && /* double '[' opener */ + next_opener->ch == '[' && + (next_opener->beg == opener->beg - 1) && + (next_opener->end - next_opener->beg == 1) && + next_closer != NULL && /* double ']' closer */ + next_closer->ch == ']' && + (next_closer->beg == closer->beg + 1) && + (next_closer->end - next_closer->beg == 1)) + { + MD_MARK* delim = NULL; + int delim_index; + OFF dest_beg, dest_end; + + is_link = TRUE; + + /* We don't allow destination to be longer than 100 characters. + * Lets scan to see whether there is '|'. (If not then the whole + * wiki-link has to be below the 100 characters.) */ + delim_index = opener_index + 1; + while(delim_index < closer_index) { + MD_MARK* m = &ctx->marks[delim_index]; + if(m->ch == '|') { + delim = m; + break; + } + if(m->ch != 'D' && m->beg - opener->end > 100) + break; + delim_index++; + } + dest_beg = opener->end; + dest_end = (delim != NULL) ? delim->beg : closer->beg; + if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100) + is_link = FALSE; + + /* There may not be any new line in the destination. */ + if(is_link) { + OFF off; + for(off = dest_beg; off < dest_end; off++) { + if(ISNEWLINE(off)) { + is_link = FALSE; + break; + } + } + } + + if(is_link) { + if(delim != NULL) { + if(delim->end < closer->beg) { + opener->end = delim->beg; + } else { + /* The pipe is just before the closer: [[foo|]] */ + closer->beg = delim->beg; + delim = NULL; + } + } + + opener->beg = next_opener->beg; + opener->next = closer_index; + opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + + closer->end = next_closer->end; + closer->prev = opener_index; + closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; + + last_link_beg = opener->beg; + last_link_end = closer->end; + + if(delim != NULL) { + delim->flags |= MD_MARK_RESOLVED; + md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL); + md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index); + } else { + md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL); + } + + opener_index = next_opener->prev; + continue; + } + } + + if(next_opener != NULL && next_opener->beg == closer->end) { + if(next_closer->beg > closer->end + 1) { + /* Might be full reference link. */ + is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr); + } else { + /* Might be shortcut reference link. */ + is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr); + } + + if(is_link < 0) + return -1; + + if(is_link) { + /* Eat the 2nd "[...]". */ + closer->end = next_closer->end; + } + } else { + if(closer->end < ctx->size && CH(closer->end) == _T('(')) { + /* Might be inline link. */ + OFF inline_link_end = UINT_MAX; + + is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr); + if(is_link < 0) + return -1; + + /* Check the closing ')' is not inside an already resolved range + * (i.e. a range with a higher priority), e.g. a code span. */ + if(is_link) { + int i = closer_index + 1; + + while(i < ctx->n_marks) { + MD_MARK* mark = &ctx->marks[i]; + + if(mark->beg >= inline_link_end) + break; + if((mark->flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) { + if(ctx->marks[mark->next].beg >= inline_link_end) { + /* Cancel the link status. */ + if(attr.title_needs_free) + free(attr.title); + is_link = FALSE; + break; + } + + i = mark->next + 1; + } else { + i++; + } + } + } + + if(is_link) { + /* Eat the "(...)" */ + closer->end = inline_link_end; + } + } + + if(!is_link) { + /* Might be collapsed reference link. */ + is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr); + if(is_link < 0) + return -1; + } + } + + if(is_link) { + /* Resolve the brackets as a link. */ + opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; + + /* If it is a link, we store the destination and title in the two + * dummy marks after the opener. */ + MD_ASSERT(ctx->marks[opener_index+1].ch == 'D'); + ctx->marks[opener_index+1].beg = attr.dest_beg; + ctx->marks[opener_index+1].end = attr.dest_end; + + MD_ASSERT(ctx->marks[opener_index+2].ch == 'D'); + md_mark_store_ptr(ctx, opener_index+2, attr.title); + /* The title might or might not have been allocated for us. */ + if(attr.title_needs_free) + md_mark_chain_append(ctx, &PTR_CHAIN, opener_index+2); + ctx->marks[opener_index+2].prev = attr.title_size; + + if(opener->ch == '[') { + last_link_beg = opener->beg; + last_link_end = closer->end; + } else { + last_img_beg = opener->beg; + last_img_end = closer->end; + } + + md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index); + } + + opener_index = next_index; + } + + return 0; +} + +/* Analyze whether the mark '&' starts a HTML entity. + * If so, update its flags as well as flags of corresponding closer ';'. */ +static void +md_analyze_entity(MD_CTX* ctx, int mark_index) +{ + MD_MARK* opener = &ctx->marks[mark_index]; + MD_MARK* closer; + OFF off; + + /* Cannot be entity if there is no closer as the next mark. + * (Any other mark between would mean strange character which cannot be + * part of the entity. + * + * So we can do all the work on '&' and do not call this later for the + * closing mark ';'. + */ + if(mark_index + 1 >= ctx->n_marks) + return; + closer = &ctx->marks[mark_index+1]; + if(closer->ch != ';') + return; + + if(md_is_entity(ctx, opener->beg, closer->end, &off)) { + MD_ASSERT(off == closer->end); + + md_resolve_range(ctx, NULL, mark_index, mark_index+1); + opener->end = closer->end; + } +} + +static void +md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + mark->flags |= MD_MARK_RESOLVED; + + md_mark_chain_append(ctx, &TABLECELLBOUNDARIES, mark_index); + ctx->n_table_cell_boundaries++; +} + +/* Split a longer mark into two. The new mark takes the given count of + * characters. May only be called if an adequate number of dummy 'D' marks + * follows. + */ +static int +md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + int new_mark_index = mark_index + (mark->end - mark->beg - n); + MD_MARK* dummy = &ctx->marks[new_mark_index]; + + MD_ASSERT(mark->end - mark->beg > n); + MD_ASSERT(dummy->ch == 'D'); + + memcpy(dummy, mark, sizeof(MD_MARK)); + mark->end -= n; + dummy->beg = mark->end; + + return new_mark_index; +} + +static void +md_analyze_emph(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index); + + /* If we can be a closer, try to resolve with the preceding opener. */ + if(mark->flags & MD_MARK_POTENTIAL_CLOSER) { + MD_MARK* opener = NULL; + int opener_index; + + if(mark->ch == _T('*')) { + MD_MARKCHAIN* opener_chains[6]; + int i, n_opener_chains; + unsigned flags = mark->flags; + + /* Apply the "rule of three". */ + n_opener_chains = 0; + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0; + if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_1; + if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_2; + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_0; + if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_1; + if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_2; + + /* Opener is the most recent mark from the allowed chains. */ + for(i = 0; i < n_opener_chains; i++) { + if(opener_chains[i]->tail >= 0) { + int tmp_index = opener_chains[i]->tail; + MD_MARK* tmp_mark = &ctx->marks[tmp_index]; + if(opener == NULL || tmp_mark->end > opener->end) { + opener_index = tmp_index; + opener = tmp_mark; + } + } + } + } else { + /* Simple emph. mark */ + if(chain->tail >= 0) { + opener_index = chain->tail; + opener = &ctx->marks[opener_index]; + } + } + + /* Resolve, if we have found matching opener. */ + if(opener != NULL) { + SZ opener_size = opener->end - opener->beg; + SZ closer_size = mark->end - mark->beg; + MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, opener_index); + + if(opener_size > closer_size) { + opener_index = md_split_emph_mark(ctx, opener_index, closer_size); + md_mark_chain_append(ctx, opener_chain, opener_index); + } else if(opener_size < closer_size) { + md_split_emph_mark(ctx, mark_index, closer_size - opener_size); + } + + md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); + md_resolve_range(ctx, opener_chain, opener_index, mark_index); + return; + } + } + + /* If we could not resolve as closer, we may be yet be an opener. */ + if(mark->flags & MD_MARK_POTENTIAL_OPENER) + md_mark_chain_append(ctx, chain, mark_index); +} + +static void +md_analyze_tilde(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index); + + /* We attempt to be Github Flavored Markdown compatible here. GFM accepts + * only tildes sequences of length 1 and 2, and the length of the opener + * and closer has to match. */ + + if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && chain->head >= 0) { + int opener_index = chain->head; + + md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); + md_resolve_range(ctx, chain, opener_index, mark_index); + return; + } + + if(mark->flags & MD_MARK_POTENTIAL_OPENER) + md_mark_chain_append(ctx, chain, mark_index); +} + +static void +md_analyze_dollar(MD_CTX* ctx, int mark_index) +{ + /* This should mimic the way inline equations work in LaTeX, so there + * can only ever be one item in the chain (i.e. the dollars can't be + * nested). This is basically the same as the md_analyze_tilde function, + * except that we require matching openers and closers to be of the same + * length. + * + * E.g.: $abc$$def$$ => abc (display equation) def (end equation) */ + if(DOLLAR_OPENERS.head >= 0) { + /* If the potential closer has a non-matching number of $, discard */ + MD_MARK* open = &ctx->marks[DOLLAR_OPENERS.head]; + MD_MARK* close = &ctx->marks[mark_index]; + + int opener_index = DOLLAR_OPENERS.head; + md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL); + if (open->end - open->beg == close->end - close->beg) { + /* We are the matching closer */ + md_resolve_range(ctx, &DOLLAR_OPENERS, opener_index, mark_index); + } else { + /* We don't match the opener, so discard old opener and insert as opener */ + md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index); + } + } else { + /* No unmatched openers, so we are opener */ + md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index); + } +} + +static void +md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index) +{ + MD_MARK* opener = &ctx->marks[mark_index]; + int closer_index = mark_index + 1; + MD_MARK* closer = &ctx->marks[closer_index]; + MD_MARK* next_resolved_mark; + OFF off = opener->end; + int n_dots = FALSE; + int has_underscore_in_last_seg = FALSE; + int has_underscore_in_next_to_last_seg = FALSE; + int n_opened_parenthesis = 0; + + /* Check for domain. */ + while(off < ctx->size) { + if(ISALNUM(off) || CH(off) == _T('-')) { + off++; + } else if(CH(off) == _T('.')) { + /* We must see at least one period. */ + n_dots++; + has_underscore_in_next_to_last_seg = has_underscore_in_last_seg; + has_underscore_in_last_seg = FALSE; + off++; + } else if(CH(off) == _T('_')) { + /* No underscore may be present in the last two domain segments. */ + has_underscore_in_last_seg = TRUE; + off++; + } else { + break; + } + } + if(off > opener->end && CH(off-1) == _T('.')) { + off--; + n_dots--; + } + if(off <= opener->end || n_dots == 0 || has_underscore_in_next_to_last_seg || has_underscore_in_last_seg) + return; + + /* Check for path. */ + next_resolved_mark = closer + 1; + while(next_resolved_mark->ch == 'D' || !(next_resolved_mark->flags & MD_MARK_RESOLVED)) + next_resolved_mark++; + while(off < next_resolved_mark->beg && CH(off) != _T('<') && !ISWHITESPACE(off) && !ISNEWLINE(off)) { + /* Parenthesis must be balanced. */ + if(CH(off) == _T('(')) { + n_opened_parenthesis++; + } else if(CH(off) == _T(')')) { + if(n_opened_parenthesis > 0) + n_opened_parenthesis--; + else + break; + } + + off++; + } + /* These cannot be last char In such case they are more likely normal + * punctuation. */ + if(ISANYOF(off-1, _T("?!.,:*_~"))) + off--; + + /* Ok. Lets call it auto-link. Adapt opener and create closer to zero + * length so all the contents becomes the link text. */ + MD_ASSERT(closer->ch == 'D'); + opener->end = opener->beg; + closer->ch = opener->ch; + closer->beg = off; + closer->end = off; + md_resolve_range(ctx, NULL, mark_index, closer_index); +} + +/* The permissive autolinks do not have to be enclosed in '<' '>' but we + * instead impose stricter rules what is understood as an e-mail address + * here. Actually any non-alphanumeric characters with exception of '.' + * are prohibited both in username and after '@'. */ +static void +md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index) +{ + MD_MARK* opener = &ctx->marks[mark_index]; + int closer_index; + MD_MARK* closer; + OFF beg = opener->beg; + OFF end = opener->end; + int dot_count = 0; + + MD_ASSERT(CH(beg) == _T('@')); + + /* Scan for name before '@'. */ + while(beg > 0 && (ISALNUM(beg-1) || ISANYOF(beg-1, _T(".-_+")))) + beg--; + + /* Scan for domain after '@'. */ + while(end < ctx->size && (ISALNUM(end) || ISANYOF(end, _T(".-_")))) { + if(CH(end) == _T('.')) + dot_count++; + end++; + } + if(CH(end-1) == _T('.')) { /* Final '.' not part of it. */ + dot_count--; + end--; + } + else if(ISANYOF2(end-1, _T('-'), _T('_'))) /* These are forbidden at the end. */ + return; + if(CH(end-1) == _T('@') || dot_count == 0) + return; + + /* Ok. Lets call it auto-link. Adapt opener and create closer to zero + * length so all the contents becomes the link text. */ + closer_index = mark_index + 1; + closer = &ctx->marks[closer_index]; + MD_ASSERT(closer->ch == 'D'); + + opener->beg = beg; + opener->end = beg; + closer->ch = opener->ch; + closer->beg = end; + closer->end = end; + md_resolve_range(ctx, NULL, mark_index, closer_index); +} + +static inline void +md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + int mark_beg, int mark_end, const CHAR* mark_chars) +{ + int i = mark_beg; + + while(i < mark_end) { + MD_MARK* mark = &ctx->marks[i]; + + /* Skip resolved spans. */ + if(mark->flags & MD_MARK_RESOLVED) { + if(mark->flags & MD_MARK_OPENER) { + MD_ASSERT(i < mark->next); + i = mark->next + 1; + } else { + i++; + } + continue; + } + + /* Skip marks we do not want to deal with. */ + if(!ISANYOF_(mark->ch, mark_chars)) { + i++; + continue; + } + + /* Analyze the mark. */ + switch(mark->ch) { + case '[': /* Pass through. */ + case '!': /* Pass through. */ + case ']': md_analyze_bracket(ctx, i); break; + case '&': md_analyze_entity(ctx, i); break; + case '|': md_analyze_table_cell_boundary(ctx, i); break; + case '_': /* Pass through. */ + case '*': md_analyze_emph(ctx, i); break; + case '~': md_analyze_tilde(ctx, i); break; + case '$': md_analyze_dollar(ctx, i); break; + case '.': /* Pass through. */ + case ':': md_analyze_permissive_url_autolink(ctx, i); break; + case '@': md_analyze_permissive_email_autolink(ctx, i); break; + } + + i++; + } +} + +/* Analyze marks (build ctx->marks). */ +static int +md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) +{ + int ret; + + /* Reset the previously collected stack of marks. */ + ctx->n_marks = 0; + + /* Collect all marks. */ + MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode)); + + /* We analyze marks in few groups to handle their precedence. */ + /* (1) Entities; code spans; autolinks; raw HTML. */ + md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("&")); + + /* (2) Links. */ + md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("[]!")); + MD_CHECK(md_resolve_links(ctx, lines, n_lines)); + BRACKET_OPENERS.head = -1; + BRACKET_OPENERS.tail = -1; + ctx->unresolved_link_head = -1; + ctx->unresolved_link_tail = -1; + + if(table_mode) { + /* (3) Analyze table cell boundaries. + * Note we reset TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(), + * not after, because caller may need it. */ + MD_ASSERT(n_lines == 1); + TABLECELLBOUNDARIES.head = -1; + TABLECELLBOUNDARIES.tail = -1; + ctx->n_table_cell_boundaries = 0; + md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("|")); + return ret; + } + + /* (4) Emphasis and strong emphasis; permissive autolinks. */ + md_analyze_link_contents(ctx, lines, n_lines, 0, ctx->n_marks); + +abort: + return ret; +} + +static void +md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + int mark_beg, int mark_end) +{ + int i; + + md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$@:.")); + + for(i = OPENERS_CHAIN_FIRST; i <= OPENERS_CHAIN_LAST; i++) { + ctx->mark_chains[i].head = -1; + ctx->mark_chains[i].tail = -1; + } +} + +static int +md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type, + const CHAR* dest, SZ dest_size, int prohibit_escapes_in_dest, + const CHAR* title, SZ title_size) +{ + MD_ATTRIBUTE_BUILD href_build = { 0 }; + MD_ATTRIBUTE_BUILD title_build = { 0 }; + MD_SPAN_A_DETAIL det; + int ret = 0; + + /* Note we here rely on fact that MD_SPAN_A_DETAIL and + * MD_SPAN_IMG_DETAIL are binary-compatible. */ + memset(&det, 0, sizeof(MD_SPAN_A_DETAIL)); + MD_CHECK(md_build_attribute(ctx, dest, dest_size, + (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0), + &det.href, &href_build)); + MD_CHECK(md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build)); + + if(enter) + MD_ENTER_SPAN(type, &det); + else + MD_LEAVE_SPAN(type, &det); + +abort: + md_free_attribute(ctx, &href_build); + md_free_attribute(ctx, &title_build); + return ret; +} + +static int +md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size) +{ + MD_ATTRIBUTE_BUILD target_build = { 0 }; + MD_SPAN_WIKILINK_DETAIL det; + int ret = 0; + + memset(&det, 0, sizeof(MD_SPAN_WIKILINK_DETAIL)); + MD_CHECK(md_build_attribute(ctx, target, target_size, 0, &det.target, &target_build)); + + if (enter) + MD_ENTER_SPAN(MD_SPAN_WIKILINK, &det); + else + MD_LEAVE_SPAN(MD_SPAN_WIKILINK, &det); + +abort: + md_free_attribute(ctx, &target_build); + return ret; +} + + +/* Render the output, accordingly to the analyzed ctx->marks. */ +static int +md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + MD_TEXTTYPE text_type; + const MD_LINE* line = lines; + MD_MARK* prev_mark = NULL; + MD_MARK* mark; + OFF off = lines[0].beg; + OFF end = lines[n_lines-1].end; + int enforce_hardbreak = 0; + int ret = 0; + + /* Find first resolved mark. Note there is always at least one resolved + * mark, the dummy last one after the end of the latest line we actually + * never really reach. This saves us of a lot of special checks and cases + * in this function. */ + mark = ctx->marks; + while(!(mark->flags & MD_MARK_RESOLVED)) + mark++; + + text_type = MD_TEXT_NORMAL; + + while(1) { + /* Process the text up to the next mark or end-of-line. */ + OFF tmp = (line->end < mark->beg ? line->end : mark->beg); + if(tmp > off) { + MD_TEXT(text_type, STR(off), tmp - off); + off = tmp; + } + + /* If reached the mark, process it and move to next one. */ + if(off >= mark->beg) { + switch(mark->ch) { + case '\\': /* Backslash escape. */ + if(ISNEWLINE(mark->beg+1)) + enforce_hardbreak = 1; + else + MD_TEXT(text_type, STR(mark->beg+1), 1); + break; + + case ' ': /* Non-trivial space. */ + MD_TEXT(text_type, _T(" "), 1); + break; + + case '`': /* Code span. */ + if(mark->flags & MD_MARK_OPENER) { + MD_ENTER_SPAN(MD_SPAN_CODE, NULL); + text_type = MD_TEXT_CODE; + } else { + MD_LEAVE_SPAN(MD_SPAN_CODE, NULL); + text_type = MD_TEXT_NORMAL; + } + break; + + case '_': /* Underline (or emphasis if we fall through). */ + if(ctx->parser.flags & MD_FLAG_UNDERLINE) { + if(mark->flags & MD_MARK_OPENER) { + while(off < mark->end) { + MD_ENTER_SPAN(MD_SPAN_U, NULL); + off++; + } + } else { + while(off < mark->end) { + MD_LEAVE_SPAN(MD_SPAN_U, NULL); + off++; + } + } + break; + } + /* Fall though. */ + + case '*': /* Emphasis, strong emphasis. */ + if(mark->flags & MD_MARK_OPENER) { + if((mark->end - off) % 2) { + MD_ENTER_SPAN(MD_SPAN_EM, NULL); + off++; + } + while(off + 1 < mark->end) { + MD_ENTER_SPAN(MD_SPAN_STRONG, NULL); + off += 2; + } + } else { + while(off + 1 < mark->end) { + MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL); + off += 2; + } + if((mark->end - off) % 2) { + MD_LEAVE_SPAN(MD_SPAN_EM, NULL); + off++; + } + } + break; + + case '~': + if(mark->flags & MD_MARK_OPENER) + MD_ENTER_SPAN(MD_SPAN_DEL, NULL); + else + MD_LEAVE_SPAN(MD_SPAN_DEL, NULL); + break; + + case '$': + if(mark->flags & MD_MARK_OPENER) { + MD_ENTER_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL); + text_type = MD_TEXT_LATEXMATH; + } else { + MD_LEAVE_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL); + text_type = MD_TEXT_NORMAL; + } + break; + + case '[': /* Link, wiki link, image. */ + case '!': + case ']': + { + const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]); + const MD_MARK* closer = &ctx->marks[opener->next]; + const MD_MARK* dest_mark; + const MD_MARK* title_mark; + + if ((opener->ch == '[' && closer->ch == ']') && + opener->end - opener->beg >= 2 && + closer->end - closer->beg >= 2) + { + int has_label = (opener->end - opener->beg > 2); + SZ target_sz; + + if(has_label) + target_sz = opener->end - (opener->beg+2); + else + target_sz = closer->beg - opener->end; + + MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != ']'), + has_label ? STR(opener->beg+2) : STR(opener->end), + target_sz)); + + break; + } + + dest_mark = opener+1; + MD_ASSERT(dest_mark->ch == 'D'); + title_mark = opener+2; + MD_ASSERT(title_mark->ch == 'D'); + + MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'), + (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A), + STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE, + md_mark_get_ptr(ctx, title_mark - ctx->marks), title_mark->prev)); + + /* link/image closer may span multiple lines. */ + if(mark->ch == ']') { + while(mark->end > line->end) + line++; + } + + break; + } + + case '<': + case '>': /* Autolink or raw HTML. */ + if(!(mark->flags & MD_MARK_AUTOLINK)) { + /* Raw HTML. */ + if(mark->flags & MD_MARK_OPENER) + text_type = MD_TEXT_HTML; + else + text_type = MD_TEXT_NORMAL; + break; + } + /* Pass through, if auto-link. */ + + case '@': /* Permissive e-mail autolink. */ + case ':': /* Permissive URL autolink. */ + case '.': /* Permissive WWW autolink. */ + { + MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]); + MD_MARK* closer = &ctx->marks[opener->next]; + const CHAR* dest = STR(opener->end); + SZ dest_size = closer->beg - opener->end; + + /* For permissive auto-links we do not know closer mark + * position at the time of md_collect_marks(), therefore + * it can be out-of-order in ctx->marks[]. + * + * With this flag, we make sure that we output the closer + * only if we processed the opener. */ + if(mark->flags & MD_MARK_OPENER) + closer->flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK; + + if(opener->ch == '@' || opener->ch == '.') { + dest_size += 7; + MD_TEMP_BUFFER(dest_size * sizeof(CHAR)); + memcpy(ctx->buffer, + (opener->ch == '@' ? _T("mailto:") : _T("http://")), + 7 * sizeof(CHAR)); + memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR)); + dest = ctx->buffer; + } + + if(closer->flags & MD_MARK_VALIDPERMISSIVEAUTOLINK) + MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER), + MD_SPAN_A, dest, dest_size, TRUE, NULL, 0)); + break; + } + + case '&': /* Entity. */ + MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg); + break; + + case '\0': + MD_TEXT(MD_TEXT_NULLCHAR, _T(""), 1); + break; + + case 127: + goto abort; + } + + off = mark->end; + + /* Move to next resolved mark. */ + prev_mark = mark; + mark++; + while(!(mark->flags & MD_MARK_RESOLVED) || mark->beg < off) + mark++; + } + + /* If reached end of line, move to next one. */ + if(off >= line->end) { + /* If it is the last line, we are done. */ + if(off >= end) + break; + + if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) { + OFF tmp; + + MD_ASSERT(prev_mark != NULL); + MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', '$') && (prev_mark->flags & MD_MARK_OPENER)); + MD_ASSERT(ISANYOF2_(mark->ch, '`', '$') && (mark->flags & MD_MARK_CLOSER)); + + /* Inside a code span, trailing line whitespace has to be + * outputted. */ + tmp = off; + while(off < ctx->size && ISBLANK(off)) + off++; + if(off > tmp) + MD_TEXT(text_type, STR(tmp), off-tmp); + + /* and new lines are transformed into single spaces. */ + if(prev_mark->end < off && off < mark->beg) + MD_TEXT(text_type, _T(" "), 1); + } else if(text_type == MD_TEXT_HTML) { + /* Inside raw HTML, we output the new line verbatim, including + * any trailing spaces. */ + OFF tmp = off; + + while(tmp < end && ISBLANK(tmp)) + tmp++; + if(tmp > off) + MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off); + MD_TEXT(MD_TEXT_HTML, _T("\n"), 1); + } else { + /* Output soft or hard line break. */ + MD_TEXTTYPE break_type = MD_TEXT_SOFTBR; + + if(text_type == MD_TEXT_NORMAL) { + if(enforce_hardbreak) + break_type = MD_TEXT_BR; + else if((CH(line->end) == _T(' ') && CH(line->end+1) == _T(' '))) + break_type = MD_TEXT_BR; + } + + MD_TEXT(break_type, _T("\n"), 1); + } + + /* Move to the next line. */ + line++; + off = line->beg; + + enforce_hardbreak = 0; + } + } + +abort: + return ret; +} + + +/*************************** + *** Processing Tables *** + ***************************/ + +static void +md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align) +{ + static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER }; + OFF off = beg; + + while(n_align > 0) { + int index = 0; /* index into align_map[] */ + + while(CH(off) != _T('-')) + off++; + if(off > beg && CH(off-1) == _T(':')) + index |= 1; + while(off < end && CH(off) == _T('-')) + off++; + if(off < end && CH(off) == _T(':')) + index |= 2; + + *align = align_map[index]; + align++; + n_align--; + } + +} + +/* Forward declaration. */ +static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines); + +static int +md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end) +{ + MD_LINE line; + MD_BLOCK_TD_DETAIL det; + int ret = 0; + + while(beg < end && ISWHITESPACE(beg)) + beg++; + while(end > beg && ISWHITESPACE(end-1)) + end--; + + det.align = align; + line.beg = beg; + line.end = end; + + MD_ENTER_BLOCK(cell_type, &det); + MD_CHECK(md_process_normal_block_contents(ctx, &line, 1)); + MD_LEAVE_BLOCK(cell_type, &det); + +abort: + return ret; +} + +static int +md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end, + const MD_ALIGN* align, int col_count) +{ + MD_LINE line; + OFF* pipe_offs = NULL; + int i, j, k, n; + int ret = 0; + + line.beg = beg; + line.end = end; + + /* Break the line into table cells by identifying pipe characters who + * form the cell boundary. */ + MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE)); + + /* We have to remember the cell boundaries in local buffer because + * ctx->marks[] shall be reused during cell contents processing. */ + n = ctx->n_table_cell_boundaries + 2; + pipe_offs = (OFF*) malloc(n * sizeof(OFF)); + if(pipe_offs == NULL) { + MD_LOG("malloc() failed."); + ret = -1; + goto abort; + } + j = 0; + pipe_offs[j++] = beg; + for(i = TABLECELLBOUNDARIES.head; i >= 0; i = ctx->marks[i].next) { + MD_MARK* mark = &ctx->marks[i]; + pipe_offs[j++] = mark->end; + } + pipe_offs[j++] = end+1; + + /* Process cells. */ + MD_ENTER_BLOCK(MD_BLOCK_TR, NULL); + k = 0; + for(i = 0; i < j-1 && k < col_count; i++) { + if(pipe_offs[i] < pipe_offs[i+1]-1) + MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+1]-1)); + } + /* Make sure we call enough table cells even if the current table contains + * too few of them. */ + while(k < col_count) + MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], 0, 0)); + MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL); + +abort: + free(pipe_offs); + + /* Free any temporary memory blocks stored within some dummy marks. */ + for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next) + free(md_mark_get_ptr(ctx, i)); + PTR_CHAIN.head = -1; + PTR_CHAIN.tail = -1; + + return ret; +} + +static int +md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines) +{ + MD_ALIGN* align; + int i; + int ret = 0; + + /* At least two lines have to be present: The column headers and the line + * with the underlines. */ + MD_ASSERT(n_lines >= 2); + + align = malloc(col_count * sizeof(MD_ALIGN)); + if(align == NULL) { + MD_LOG("malloc() failed."); + ret = -1; + goto abort; + } + + md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align, col_count); + + MD_ENTER_BLOCK(MD_BLOCK_THEAD, NULL); + MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH, + lines[0].beg, lines[0].end, align, col_count)); + MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL); + + MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL); + for(i = 2; i < n_lines; i++) { + MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD, + lines[i].beg, lines[i].end, align, col_count)); + } + MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL); + +abort: + free(align); + return ret; +} + + +/************************** + *** Processing Block *** + **************************/ + +#define MD_BLOCK_CONTAINER_OPENER 0x01 +#define MD_BLOCK_CONTAINER_CLOSER 0x02 +#define MD_BLOCK_CONTAINER (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER) +#define MD_BLOCK_LOOSE_LIST 0x04 +#define MD_BLOCK_SETEXT_HEADER 0x08 + +struct MD_BLOCK_tag { + MD_BLOCKTYPE type : 8; + unsigned flags : 8; + + /* MD_BLOCK_H: Header level (1 - 6) + * MD_BLOCK_CODE: Non-zero if fenced, zero if indented. + * MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' '). + * MD_BLOCK_TABLE: Column count (as determined by the table underline). + */ + unsigned data : 16; + + /* Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block. + * MD_BLOCK_LI: Task mark offset in the input doc. + * MD_BLOCK_OL: Start item number. + */ + unsigned n_lines; +}; + +struct MD_CONTAINER_tag { + CHAR ch; + unsigned is_loose : 8; + unsigned is_task : 8; + unsigned start; + unsigned mark_indent; + unsigned contents_indent; + OFF block_byte_off; + OFF task_mark_off; +}; + + +static int +md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + int i; + int ret; + + MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE)); + MD_CHECK(md_process_inlines(ctx, lines, n_lines)); + +abort: + /* Free any temporary memory blocks stored within some dummy marks. */ + for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next) + free(md_mark_get_ptr(ctx, i)); + PTR_CHAIN.head = -1; + PTR_CHAIN.tail = -1; + + return ret; +} + +static int +md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines) +{ + static const CHAR indent_chunk_str[] = _T(" "); + static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - 1; + + int i; + int ret = 0; + + for(i = 0; i < n_lines; i++) { + const MD_VERBATIMLINE* line = &lines[i]; + int indent = line->indent; + + MD_ASSERT(indent >= 0); + + /* Output code indentation. */ + while(indent > (int) indent_chunk_size) { + MD_TEXT(text_type, indent_chunk_str, indent_chunk_size); + indent -= indent_chunk_size; + } + if(indent > 0) + MD_TEXT(text_type, indent_chunk_str, indent); + + /* Output the code line itself. */ + MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg); + + /* Enforce end-of-line. */ + MD_TEXT(text_type, _T("\n"), 1); + } + +abort: + return ret; +} + +static int +md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, int n_lines) +{ + if(is_fenced) { + /* Skip the first line in case of fenced code: It is the fence. + * (Only the starting fence is present due to logic in md_analyze_line().) */ + lines++; + n_lines--; + } else { + /* Ignore blank lines at start/end of indented code block. */ + while(n_lines > 0 && lines[0].beg == lines[0].end) { + lines++; + n_lines--; + } + while(n_lines > 0 && lines[n_lines-1].beg == lines[n_lines-1].end) { + n_lines--; + } + } + + if(n_lines == 0) + return 0; + + return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines); +} + +static int +md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det, + MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build) +{ + const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1); + OFF beg = fence_line->beg; + OFF end = fence_line->end; + OFF lang_end; + CHAR fence_ch = CH(fence_line->beg); + int ret = 0; + + /* Skip the fence itself. */ + while(beg < ctx->size && CH(beg) == fence_ch) + beg++; + /* Trim initial spaces. */ + while(beg < ctx->size && CH(beg) == _T(' ')) + beg++; + + /* Trim trailing spaces. */ + while(end > beg && CH(end-1) == _T(' ')) + end--; + + /* Build info string attribute. */ + MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->info, info_build)); + + /* Build info string attribute. */ + lang_end = beg; + while(lang_end < end && !ISWHITESPACE(lang_end)) + lang_end++; + MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, 0, &det->lang, lang_build)); + + det->fence_char = fence_ch; + +abort: + return ret; +} + +static int +md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) +{ + union { + MD_BLOCK_H_DETAIL header; + MD_BLOCK_CODE_DETAIL code; + } det; + MD_ATTRIBUTE_BUILD info_build; + MD_ATTRIBUTE_BUILD lang_build; + int is_in_tight_list; + int clean_fence_code_detail = FALSE; + int ret = 0; + + memset(&det, 0, sizeof(det)); + + if(ctx->n_containers == 0) + is_in_tight_list = FALSE; + else + is_in_tight_list = !ctx->containers[ctx->n_containers-1].is_loose; + + switch(block->type) { + case MD_BLOCK_H: + det.header.level = block->data; + break; + + case MD_BLOCK_CODE: + /* For fenced code block, we may need to set the info string. */ + if(block->data != 0) { + memset(&det.code, 0, sizeof(MD_BLOCK_CODE_DETAIL)); + clean_fence_code_detail = TRUE; + MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build)); + } + break; + + default: + /* Noop. */ + break; + } + + if(!is_in_tight_list || block->type != MD_BLOCK_P) + MD_ENTER_BLOCK(block->type, (void*) &det); + + /* Process the block contents accordingly to is type. */ + switch(block->type) { + case MD_BLOCK_HR: + /* noop */ + break; + + case MD_BLOCK_CODE: + MD_CHECK(md_process_code_block_contents(ctx, (block->data != 0), + (const MD_VERBATIMLINE*)(block + 1), block->n_lines)); + break; + + case MD_BLOCK_HTML: + MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML, + (const MD_VERBATIMLINE*)(block + 1), block->n_lines)); + break; + + case MD_BLOCK_TABLE: + MD_CHECK(md_process_table_block_contents(ctx, block->data, + (const MD_LINE*)(block + 1), block->n_lines)); + break; + + default: + MD_CHECK(md_process_normal_block_contents(ctx, + (const MD_LINE*)(block + 1), block->n_lines)); + break; + } + + if(!is_in_tight_list || block->type != MD_BLOCK_P) + MD_LEAVE_BLOCK(block->type, (void*) &det); + +abort: + if(clean_fence_code_detail) { + md_free_attribute(ctx, &info_build); + md_free_attribute(ctx, &lang_build); + } + return ret; +} + +static int +md_process_all_blocks(MD_CTX* ctx) +{ + int byte_off = 0; + int ret = 0; + + /* ctx->containers now is not needed for detection of lists and list items + * so we reuse it for tracking what lists are loose or tight. We rely + * on the fact the vector is large enough to hold the deepest nesting + * level of lists. */ + ctx->n_containers = 0; + + while(byte_off < ctx->n_block_bytes) { + MD_BLOCK* block = (MD_BLOCK*)((char*)ctx->block_bytes + byte_off); + union { + MD_BLOCK_UL_DETAIL ul; + MD_BLOCK_OL_DETAIL ol; + MD_BLOCK_LI_DETAIL li; + } det; + + switch(block->type) { + case MD_BLOCK_UL: + det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; + det.ul.mark = (CHAR) block->data; + break; + + case MD_BLOCK_OL: + det.ol.start = block->n_lines; + det.ol.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; + det.ol.mark_delimiter = (CHAR) block->data; + break; + + case MD_BLOCK_LI: + det.li.is_task = (block->data != 0); + det.li.task_mark = (CHAR) block->data; + det.li.task_mark_offset = (OFF) block->n_lines; + break; + + default: + /* noop */ + break; + } + + if(block->flags & MD_BLOCK_CONTAINER) { + if(block->flags & MD_BLOCK_CONTAINER_CLOSER) { + MD_LEAVE_BLOCK(block->type, &det); + + if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE) + ctx->n_containers--; + } + + if(block->flags & MD_BLOCK_CONTAINER_OPENER) { + MD_ENTER_BLOCK(block->type, &det); + + if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL) { + ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST); + ctx->n_containers++; + } else if(block->type == MD_BLOCK_QUOTE) { + /* This causes that any text in a block quote, even if + * nested inside a tight list item, is wrapped with + *

        ...

        . */ + ctx->containers[ctx->n_containers].is_loose = TRUE; + ctx->n_containers++; + } + } + } else { + MD_CHECK(md_process_leaf_block(ctx, block)); + + if(block->type == MD_BLOCK_CODE || block->type == MD_BLOCK_HTML) + byte_off += block->n_lines * sizeof(MD_VERBATIMLINE); + else + byte_off += block->n_lines * sizeof(MD_LINE); + } + + byte_off += sizeof(MD_BLOCK); + } + + ctx->n_block_bytes = 0; + +abort: + return ret; +} + + +/************************************ + *** Grouping Lines into Blocks *** + ************************************/ + +static void* +md_push_block_bytes(MD_CTX* ctx, int n_bytes) +{ + void* ptr; + + if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) { + void* new_block_bytes; + + ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0 + ? ctx->alloc_block_bytes + ctx->alloc_block_bytes / 2 + : 512); + new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes); + if(new_block_bytes == NULL) { + MD_LOG("realloc() failed."); + return NULL; + } + + /* Fix the ->current_block after the reallocation. */ + if(ctx->current_block != NULL) { + OFF off_current_block = (char*) ctx->current_block - (char*) ctx->block_bytes; + ctx->current_block = (MD_BLOCK*) ((char*) new_block_bytes + off_current_block); + } + + ctx->block_bytes = new_block_bytes; + } + + ptr = (char*)ctx->block_bytes + ctx->n_block_bytes; + ctx->n_block_bytes += n_bytes; + return ptr; +} + +static int +md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line) +{ + MD_BLOCK* block; + + MD_ASSERT(ctx->current_block == NULL); + + block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK)); + if(block == NULL) + return -1; + + switch(line->type) { + case MD_LINE_HR: + block->type = MD_BLOCK_HR; + break; + + case MD_LINE_ATXHEADER: + case MD_LINE_SETEXTHEADER: + block->type = MD_BLOCK_H; + break; + + case MD_LINE_FENCEDCODE: + case MD_LINE_INDENTEDCODE: + block->type = MD_BLOCK_CODE; + break; + + case MD_LINE_TEXT: + block->type = MD_BLOCK_P; + break; + + case MD_LINE_HTML: + block->type = MD_BLOCK_HTML; + break; + + case MD_LINE_BLANK: + case MD_LINE_SETEXTUNDERLINE: + case MD_LINE_TABLEUNDERLINE: + default: + MD_UNREACHABLE(); + break; + } + + block->flags = 0; + block->data = line->data; + block->n_lines = 0; + + ctx->current_block = block; + return 0; +} + +/* Eat from start of current (textual) block any reference definitions and + * remember them so we can resolve any links referring to them. + * + * (Reference definitions can only be at start of it as they cannot break + * a paragraph.) + */ +static int +md_consume_link_reference_definitions(MD_CTX* ctx) +{ + MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1); + int n_lines = ctx->current_block->n_lines; + int n = 0; + + /* Compute how many lines at the start of the block form one or more + * reference definitions. */ + while(n < n_lines) { + int n_link_ref_lines; + + n_link_ref_lines = md_is_link_reference_definition(ctx, + lines + n, n_lines - n); + /* Not a reference definition? */ + if(n_link_ref_lines == 0) + break; + + /* We fail if it is the ref. def. but it could not be stored due + * a memory allocation error. */ + if(n_link_ref_lines < 0) + return -1; + + n += n_link_ref_lines; + } + + /* If there was at least one reference definition, we need to remove + * its lines from the block, or perhaps even the whole block. */ + if(n > 0) { + if(n == n_lines) { + /* Remove complete block. */ + ctx->n_block_bytes -= n * sizeof(MD_LINE); + ctx->n_block_bytes -= sizeof(MD_BLOCK); + ctx->current_block = NULL; + } else { + /* Remove just some initial lines from the block. */ + memmove(lines, lines + n, (n_lines - n) * sizeof(MD_LINE)); + ctx->current_block->n_lines -= n; + ctx->n_block_bytes -= n * sizeof(MD_LINE); + } + } + + return 0; +} + +static int +md_end_current_block(MD_CTX* ctx) +{ + int ret = 0; + + if(ctx->current_block == NULL) + return ret; + + /* Check whether there is a reference definition. (We do this here instead + * of in md_analyze_line() because reference definition can take multiple + * lines.) */ + if(ctx->current_block->type == MD_BLOCK_P || + (ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER))) + { + MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1); + if(CH(lines[0].beg) == _T('[')) { + MD_CHECK(md_consume_link_reference_definitions(ctx)); + if(ctx->current_block == NULL) + return ret; + } + } + + if(ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)) { + int n_lines = ctx->current_block->n_lines; + + if(n_lines > 1) { + /* Get rid of the underline. */ + ctx->current_block->n_lines--; + ctx->n_block_bytes -= sizeof(MD_LINE); + } else { + /* Only the underline has left after eating the ref. defs. + * Keep the line as beginning of a new ordinary paragraph. */ + ctx->current_block->type = MD_BLOCK_P; + return 0; + } + } + + /* Mark we are not building any block anymore. */ + ctx->current_block = NULL; + +abort: + return ret; +} + +static int +md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis) +{ + MD_ASSERT(ctx->current_block != NULL); + + if(ctx->current_block->type == MD_BLOCK_CODE || ctx->current_block->type == MD_BLOCK_HTML) { + MD_VERBATIMLINE* line; + + line = (MD_VERBATIMLINE*) md_push_block_bytes(ctx, sizeof(MD_VERBATIMLINE)); + if(line == NULL) + return -1; + + line->indent = analysis->indent; + line->beg = analysis->beg; + line->end = analysis->end; + } else { + MD_LINE* line; + + line = (MD_LINE*) md_push_block_bytes(ctx, sizeof(MD_LINE)); + if(line == NULL) + return -1; + + line->beg = analysis->beg; + line->end = analysis->end; + } + ctx->current_block->n_lines++; + + return 0; +} + +static int +md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start, + unsigned data, unsigned flags) +{ + MD_BLOCK* block; + int ret = 0; + + MD_CHECK(md_end_current_block(ctx)); + + block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK)); + if(block == NULL) + return -1; + + block->type = type; + block->flags = flags; + block->data = data; + block->n_lines = start; + +abort: + return ret; +} + + + +/*********************** + *** Line Analysis *** + ***********************/ + +static int +md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer) +{ + OFF off = beg + 1; + int n = 1; + + while(off < ctx->size && (CH(off) == CH(beg) || CH(off) == _T(' ') || CH(off) == _T('\t'))) { + if(CH(off) == CH(beg)) + n++; + off++; + } + + if(n < 3) { + *p_killer = off; + return FALSE; + } + + /* Nothing else can be present on the line. */ + if(off < ctx->size && !ISNEWLINE(off)) { + *p_killer = off; + return FALSE; + } + + *p_end = off; + return TRUE; +} + +static int +md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level) +{ + int n; + OFF off = beg + 1; + + while(off < ctx->size && CH(off) == _T('#') && off - beg < 7) + off++; + n = off - beg; + + if(n > 6) + return FALSE; + *p_level = n; + + if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx->size && + CH(off) != _T(' ') && CH(off) != _T('\t') && !ISNEWLINE(off)) + return FALSE; + + while(off < ctx->size && CH(off) == _T(' ')) + off++; + *p_beg = off; + *p_end = off; + return TRUE; +} + +static int +md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level) +{ + OFF off = beg + 1; + + while(off < ctx->size && CH(off) == CH(beg)) + off++; + + /* Optionally, space(s) can follow. */ + while(off < ctx->size && CH(off) == _T(' ')) + off++; + + /* But nothing more is allowed on the line. */ + if(off < ctx->size && !ISNEWLINE(off)) + return FALSE; + + *p_level = (CH(beg) == _T('=') ? 1 : 2); + *p_end = off; + return TRUE; +} + +static int +md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count) +{ + OFF off = beg; + int found_pipe = FALSE; + unsigned col_count = 0; + + if(off < ctx->size && CH(off) == _T('|')) { + found_pipe = TRUE; + off++; + while(off < ctx->size && ISWHITESPACE(off)) + off++; + } + + while(1) { + OFF cell_beg; + int delimited = FALSE; + + /* Cell underline ("-----", ":----", "----:" or ":----:") */ + cell_beg = off; + if(off < ctx->size && CH(off) == _T(':')) + off++; + while(off < ctx->size && CH(off) == _T('-')) + off++; + if(off < ctx->size && CH(off) == _T(':')) + off++; + if(off - cell_beg < 3) + return FALSE; + + col_count++; + + /* Pipe delimiter (optional at the end of line). */ + while(off < ctx->size && ISWHITESPACE(off)) + off++; + if(off < ctx->size && CH(off) == _T('|')) { + delimited = TRUE; + found_pipe = TRUE; + off++; + while(off < ctx->size && ISWHITESPACE(off)) + off++; + } + + /* Success, if we reach end of line. */ + if(off >= ctx->size || ISNEWLINE(off)) + break; + + if(!delimited) + return FALSE; + } + + if(!found_pipe) + return FALSE; + + *p_end = off; + *p_col_count = col_count; + return TRUE; +} + +static int +md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end) +{ + OFF off = beg; + + while(off < ctx->size && CH(off) == CH(beg)) + off++; + + /* Fence must have at least three characters. */ + if(off - beg < 3) + return FALSE; + + ctx->code_fence_length = off - beg; + + /* Optionally, space(s) can follow. */ + while(off < ctx->size && CH(off) == _T(' ')) + off++; + + /* Optionally, an info string can follow. */ + while(off < ctx->size && !ISNEWLINE(off)) { + /* Backtick-based fence must not contain '`' in the info string. */ + if(CH(beg) == _T('`') && CH(off) == _T('`')) + return FALSE; + off++; + } + + *p_end = off; + return TRUE; +} + +static int +md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end) +{ + OFF off = beg; + int ret = FALSE; + + /* Closing fence must have at least the same length and use same char as + * opening one. */ + while(off < ctx->size && CH(off) == ch) + off++; + if(off - beg < ctx->code_fence_length) + goto out; + + /* Optionally, space(s) can follow */ + while(off < ctx->size && CH(off) == _T(' ')) + off++; + + /* But nothing more is allowed on the line. */ + if(off < ctx->size && !ISNEWLINE(off)) + goto out; + + ret = TRUE; + +out: + /* Note we set *p_end even on failure: If we are not closing fence, caller + * would eat the line anyway without any parsing. */ + *p_end = off; + return ret; +} + +/* Returns type of the raw HTML block, or FALSE if it is not HTML block. + * (Refer to CommonMark specification for details about the types.) + */ +static int +md_is_html_block_start_condition(MD_CTX* ctx, OFF beg) +{ + typedef struct TAG_tag TAG; + struct TAG_tag { + const CHAR* name; + unsigned len : 8; + }; + + /* Type 6 is started by a long list of allowed tags. We use two-level + * tree to speed-up the search. */ +#ifdef X + #undef X +#endif +#define X(name) { _T(name), (sizeof(name)-1) / sizeof(CHAR) } +#define Xend { NULL, 0 } + static const TAG t1[] = { X("script"), X("pre"), X("style"), Xend }; + + static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend }; + static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend }; + static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend }; + static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"), + X("div"), X("dl"), X("dt"), Xend }; + static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"), + X("form"), X("frame"), X("frameset"), Xend }; + static const TAG h6[] = { X("h1"), X("head"), X("header"), X("hr"), X("html"), Xend }; + static const TAG i6[] = { X("iframe"), Xend }; + static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend }; + static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend }; + static const TAG n6[] = { X("nav"), X("noframes"), Xend }; + static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend }; + static const TAG p6[] = { X("p"), X("param"), Xend }; + static const TAG s6[] = { X("section"), X("source"), X("summary"), Xend }; + static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"), + X("thead"), X("title"), X("tr"), X("track"), Xend }; + static const TAG u6[] = { X("ul"), Xend }; + static const TAG xx[] = { Xend }; +#undef X + + static const TAG* map6[26] = { + a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6, + n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx + }; + OFF off = beg + 1; + int i; + + /* Check for type 1: size) { + if(md_ascii_case_eq(STR(off), t1[i].name, t1[i].len)) + return 1; + } + } + + /* Check for type 2: "), 3, p_end) ? 2 : FALSE); + + case 3: + return (md_line_contains(ctx, beg, _T("?>"), 2, p_end) ? 3 : FALSE); + + case 4: + return (md_line_contains(ctx, beg, _T(">"), 1, p_end) ? 4 : FALSE); + + case 5: + return (md_line_contains(ctx, beg, _T("]]>"), 3, p_end) ? 5 : FALSE); + + case 6: /* Pass through */ + case 7: + *p_end = beg; + return (ISNEWLINE(beg) ? ctx->html_block_type : FALSE); + + default: + MD_UNREACHABLE(); + } + return FALSE; +} + + +static int +md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container) +{ + /* Block quote has no "items" like lists. */ + if(container->ch == _T('>')) + return FALSE; + + if(container->ch != pivot->ch) + return FALSE; + if(container->mark_indent > pivot->contents_indent) + return FALSE; + + return TRUE; +} + +static int +md_push_container(MD_CTX* ctx, const MD_CONTAINER* container) +{ + if(ctx->n_containers >= ctx->alloc_containers) { + MD_CONTAINER* new_containers; + + ctx->alloc_containers = (ctx->alloc_containers > 0 + ? ctx->alloc_containers + ctx->alloc_containers / 2 + : 16); + new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER)); + if(new_containers == NULL) { + MD_LOG("realloc() failed."); + return -1; + } + + ctx->containers = new_containers; + } + + memcpy(&ctx->containers[ctx->n_containers++], container, sizeof(MD_CONTAINER)); + return 0; +} + +static int +md_enter_child_containers(MD_CTX* ctx, int n_children, unsigned data) +{ + int i; + int ret = 0; + + for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) { + MD_CONTAINER* c = &ctx->containers[i]; + int is_ordered_list = FALSE; + + switch(c->ch) { + case _T(')'): + case _T('.'): + is_ordered_list = TRUE; + /* Pass through */ + + case _T('-'): + case _T('+'): + case _T('*'): + /* Remember offset in ctx->block_bytes so we can revisit the + * block if we detect it is a loose list. */ + md_end_current_block(ctx); + c->block_byte_off = ctx->n_block_bytes; + + MD_CHECK(md_push_container_bytes(ctx, + (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), + c->start, data, MD_BLOCK_CONTAINER_OPENER)); + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI, + c->task_mark_off, + (c->is_task ? CH(c->task_mark_off) : 0), + MD_BLOCK_CONTAINER_OPENER)); + break; + + case _T('>'): + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER)); + break; + + default: + MD_UNREACHABLE(); + break; + } + } + +abort: + return ret; +} + +static int +md_leave_child_containers(MD_CTX* ctx, int n_keep) +{ + int ret = 0; + + while(ctx->n_containers > n_keep) { + MD_CONTAINER* c = &ctx->containers[ctx->n_containers-1]; + int is_ordered_list = FALSE; + + switch(c->ch) { + case _T(')'): + case _T('.'): + is_ordered_list = TRUE; + /* Pass through */ + + case _T('-'): + case _T('+'): + case _T('*'): + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI, + c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : 0), + MD_BLOCK_CONTAINER_CLOSER)); + MD_CHECK(md_push_container_bytes(ctx, + (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0, + c->ch, MD_BLOCK_CONTAINER_CLOSER)); + break; + + case _T('>'): + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, + 0, MD_BLOCK_CONTAINER_CLOSER)); + break; + + default: + MD_UNREACHABLE(); + break; + } + + ctx->n_containers--; + } + +abort: + return ret; +} + +static int +md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container) +{ + OFF off = beg; + OFF max_end; + + if(indent >= ctx->code_indent_offset) + return FALSE; + + /* Check for block quote mark. */ + if(off < ctx->size && CH(off) == _T('>')) { + off++; + p_container->ch = _T('>'); + p_container->is_loose = FALSE; + p_container->is_task = FALSE; + p_container->mark_indent = indent; + p_container->contents_indent = indent + 1; + *p_end = off; + return TRUE; + } + + /* Check for list item bullet mark. */ + if(off+1 < ctx->size && ISANYOF(off, _T("-+*")) && (ISBLANK(off+1) || ISNEWLINE(off+1))) { + p_container->ch = CH(off); + p_container->is_loose = FALSE; + p_container->is_task = FALSE; + p_container->mark_indent = indent; + p_container->contents_indent = indent + 1; + *p_end = off + 1; + return TRUE; + } + + /* Check for ordered list item marks. */ + max_end = off + 9; + if(max_end > ctx->size) + max_end = ctx->size; + p_container->start = 0; + while(off < max_end && ISDIGIT(off)) { + p_container->start = p_container->start * 10 + CH(off) - _T('0'); + off++; + } + if(off > beg && off+1 < ctx->size && + (CH(off) == _T('.') || CH(off) == _T(')')) && + (ISBLANK(off+1) || ISNEWLINE(off+1))) + { + p_container->ch = CH(off); + p_container->is_loose = FALSE; + p_container->is_task = FALSE; + p_container->mark_indent = indent; + p_container->contents_indent = indent + off - beg + 1; + *p_end = off + 1; + return TRUE; + } + + return FALSE; +} + +static unsigned +md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end) +{ + OFF off = beg; + unsigned indent = total_indent; + + while(off < ctx->size && ISBLANK(off)) { + if(CH(off) == _T('\t')) + indent = (indent + 4) & ~3; + else + indent++; + off++; + } + + *p_end = off; + return indent - total_indent; +} + +static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 }; + +/* Analyze type of the line and find some its properties. This serves as a + * main input for determining type and boundaries of a block. */ +static int +md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, + const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line) +{ + unsigned total_indent = 0; + int n_parents = 0; + int n_brothers = 0; + int n_children = 0; + MD_CONTAINER container = { 0 }; + int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect; + OFF off = beg; + OFF hr_killer = 0; + int ret = 0; + + line->indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line->indent; + line->beg = off; + + /* Given the indentation and block quote marks '>', determine how many of + * the current containers are our parents. */ + while(n_parents < ctx->n_containers) { + MD_CONTAINER* c = &ctx->containers[n_parents]; + + if(c->ch == _T('>') && line->indent < ctx->code_indent_offset && + off < ctx->size && CH(off) == _T('>')) + { + /* Block quote mark. */ + off++; + total_indent++; + line->indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line->indent; + + /* The optional 1st space after '>' is part of the block quote mark. */ + if(line->indent > 0) + line->indent--; + + line->beg = off; + + } else if(c->ch != _T('>') && line->indent >= c->contents_indent) { + /* List. */ + line->indent -= c->contents_indent; + } else { + break; + } + + n_parents++; + } + + if(off >= ctx->size || ISNEWLINE(off)) { + /* Blank line does not need any real indentation to be nested inside + * a list. */ + if(n_brothers + n_children == 0) { + while(n_parents < ctx->n_containers && ctx->containers[n_parents].ch != _T('>')) + n_parents++; + } + } + + while(TRUE) { + /* Check whether we are fenced code continuation. */ + if(pivot_line->type == MD_LINE_FENCEDCODE) { + line->beg = off; + + /* We are another MD_LINE_FENCEDCODE unless we are closing fence + * which we transform into MD_LINE_BLANK. */ + if(line->indent < ctx->code_indent_offset) { + if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), off, &off)) { + line->type = MD_LINE_BLANK; + ctx->last_line_has_list_loosening_effect = FALSE; + break; + } + } + + /* Change indentation accordingly to the initial code fence. */ + if(n_parents == ctx->n_containers) { + if(line->indent > pivot_line->indent) + line->indent -= pivot_line->indent; + else + line->indent = 0; + + line->type = MD_LINE_FENCEDCODE; + break; + } + } + + /* Check whether we are HTML block continuation. */ + if(pivot_line->type == MD_LINE_HTML && ctx->html_block_type > 0) { + int html_block_type; + + html_block_type = md_is_html_block_end_condition(ctx, off, &off); + if(html_block_type > 0) { + MD_ASSERT(html_block_type == ctx->html_block_type); + + /* Make sure this is the last line of the block. */ + ctx->html_block_type = 0; + + /* Some end conditions serve as blank lines at the same time. */ + if(html_block_type == 6 || html_block_type == 7) { + line->type = MD_LINE_BLANK; + line->indent = 0; + break; + } + } + + if(n_parents == ctx->n_containers) { + line->type = MD_LINE_HTML; + break; + } + } + + /* Check for blank line. */ + if(off >= ctx->size || ISNEWLINE(off)) { + if(pivot_line->type == MD_LINE_INDENTEDCODE && n_parents == ctx->n_containers) { + line->type = MD_LINE_INDENTEDCODE; + if(line->indent > ctx->code_indent_offset) + line->indent -= ctx->code_indent_offset; + else + line->indent = 0; + ctx->last_line_has_list_loosening_effect = FALSE; + } else { + line->type = MD_LINE_BLANK; + ctx->last_line_has_list_loosening_effect = (n_parents > 0 && + n_brothers + n_children == 0 && + ctx->containers[n_parents-1].ch != _T('>')); + + #if 1 + /* See https://github.com/mity/md4c/issues/6 + * + * This ugly checking tests we are in (yet empty) list item but not + * its very first line (with the list item mark). + * + * If we are such blank line, then any following non-blank line + * which would be part of this list item actually ends the list + * because "a list item can begin with at most one blank line." + */ + if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') && + n_brothers + n_children == 0 && ctx->current_block == NULL && + ctx->n_block_bytes > (int) sizeof(MD_BLOCK)) + { + MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK)); + if(top_block->type == MD_BLOCK_LI) + ctx->last_list_item_starts_with_two_blank_lines = TRUE; + } + #endif + } + break; + } else { + #if 1 + /* This is 2nd half of the hack. If the flag is set (that is there + * were 2nd blank line at the start of the list item) and we would also + * belonging to such list item, than interrupt the list. */ + ctx->last_line_has_list_loosening_effect = FALSE; + if(ctx->last_list_item_starts_with_two_blank_lines) { + if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') && + n_brothers + n_children == 0 && ctx->current_block == NULL && + ctx->n_block_bytes > (int) sizeof(MD_BLOCK)) + { + MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK)); + if(top_block->type == MD_BLOCK_LI) + n_parents--; + } + + ctx->last_list_item_starts_with_two_blank_lines = FALSE; + } + #endif + } + + /* Check whether we are Setext underline. */ + if(line->indent < ctx->code_indent_offset && pivot_line->type == MD_LINE_TEXT + && (CH(off) == _T('=') || CH(off) == _T('-')) + && (n_parents == ctx->n_containers)) + { + unsigned level; + + if(md_is_setext_underline(ctx, off, &off, &level)) { + line->type = MD_LINE_SETEXTUNDERLINE; + line->data = level; + break; + } + } + + /* Check for thematic break line. */ + if(line->indent < ctx->code_indent_offset && ISANYOF(off, _T("-_*")) && off >= hr_killer) { + if(md_is_hr_line(ctx, off, &off, &hr_killer)) { + line->type = MD_LINE_HR; + break; + } + } + + /* Check for "brother" container. I.e. whether we are another list item + * in already started list. */ + if(n_parents < ctx->n_containers && n_brothers + n_children == 0) { + OFF tmp; + + if(md_is_container_mark(ctx, line->indent, off, &tmp, &container) && + md_is_container_compatible(&ctx->containers[n_parents], &container)) + { + pivot_line = &md_dummy_blank_line; + + off = tmp; + + total_indent += container.contents_indent - container.mark_indent; + line->indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line->indent; + line->beg = off; + + /* Some of the following whitespace actually still belongs to the mark. */ + if(off >= ctx->size || ISNEWLINE(off)) { + container.contents_indent++; + } else if(line->indent <= ctx->code_indent_offset) { + container.contents_indent += line->indent; + line->indent = 0; + } else { + container.contents_indent += 1; + line->indent--; + } + + ctx->containers[n_parents].mark_indent = container.mark_indent; + ctx->containers[n_parents].contents_indent = container.contents_indent; + + n_brothers++; + continue; + } + } + + /* Check for indented code. + * Note indented code block cannot interrupt a paragraph. */ + if(line->indent >= ctx->code_indent_offset && + (pivot_line->type == MD_LINE_BLANK || pivot_line->type == MD_LINE_INDENTEDCODE)) + { + line->type = MD_LINE_INDENTEDCODE; + MD_ASSERT(line->indent >= ctx->code_indent_offset); + line->indent -= ctx->code_indent_offset; + line->data = 0; + break; + } + + /* Check for start of a new container block. */ + if(line->indent < ctx->code_indent_offset && + md_is_container_mark(ctx, line->indent, off, &off, &container)) + { + if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers && + (off >= ctx->size || ISNEWLINE(off)) && container.ch != _T('>')) + { + /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */ + } else if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers && + (container.ch == _T('.') || container.ch == _T(')')) && container.start != 1) + { + /* Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. */ + } else { + total_indent += container.contents_indent - container.mark_indent; + line->indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line->indent; + + line->beg = off; + line->data = container.ch; + + /* Some of the following whitespace actually still belongs to the mark. */ + if(off >= ctx->size || ISNEWLINE(off)) { + container.contents_indent++; + } else if(line->indent <= ctx->code_indent_offset) { + container.contents_indent += line->indent; + line->indent = 0; + } else { + container.contents_indent += 1; + line->indent--; + } + + if(n_brothers + n_children == 0) + pivot_line = &md_dummy_blank_line; + + if(n_children == 0) + MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers)); + + n_children++; + MD_CHECK(md_push_container(ctx, &container)); + continue; + } + } + + /* Check whether we are table continuation. */ + if(pivot_line->type == MD_LINE_TABLE && n_parents == ctx->n_containers) { + line->type = MD_LINE_TABLE; + break; + } + + /* Check for ATX header. */ + if(line->indent < ctx->code_indent_offset && CH(off) == _T('#')) { + unsigned level; + + if(md_is_atxheader_line(ctx, off, &line->beg, &off, &level)) { + line->type = MD_LINE_ATXHEADER; + line->data = level; + break; + } + } + + /* Check whether we are starting code fence. */ + if(CH(off) == _T('`') || CH(off) == _T('~')) { + if(md_is_opening_code_fence(ctx, off, &off)) { + line->type = MD_LINE_FENCEDCODE; + line->data = 1; + break; + } + } + + /* Check for start of raw HTML block. */ + if(CH(off) == _T('<') && !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS)) + { + ctx->html_block_type = md_is_html_block_start_condition(ctx, off); + + /* HTML block type 7 cannot interrupt paragraph. */ + if(ctx->html_block_type == 7 && pivot_line->type == MD_LINE_TEXT) + ctx->html_block_type = 0; + + if(ctx->html_block_type > 0) { + /* The line itself also may immediately close the block. */ + if(md_is_html_block_end_condition(ctx, off, &off) == ctx->html_block_type) { + /* Make sure this is the last line of the block. */ + ctx->html_block_type = 0; + } + + line->type = MD_LINE_HTML; + break; + } + } + + /* Check for table underline. */ + if((ctx->parser.flags & MD_FLAG_TABLES) && pivot_line->type == MD_LINE_TEXT && + (CH(off) == _T('|') || CH(off) == _T('-') || CH(off) == _T(':')) && + n_parents == ctx->n_containers) + { + unsigned col_count; + + if(ctx->current_block != NULL && ctx->current_block->n_lines == 1 && + md_is_table_underline(ctx, off, &off, &col_count)) + { + line->data = col_count; + line->type = MD_LINE_TABLEUNDERLINE; + break; + } + } + + /* By default, we are normal text line. */ + line->type = MD_LINE_TEXT; + if(pivot_line->type == MD_LINE_TEXT && n_brothers + n_children == 0) { + /* Lazy continuation. */ + n_parents = ctx->n_containers; + } + + /* Check for task mark. */ + if((ctx->parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > 0 && + ISANYOF_(ctx->containers[ctx->n_containers-1].ch, _T("-+*.)"))) + { + OFF tmp = off; + + while(tmp < ctx->size && tmp < off + 3 && ISBLANK(tmp)) + tmp++; + if(tmp + 2 < ctx->size && CH(tmp) == _T('[') && + ISANYOF(tmp+1, _T("xX ")) && CH(tmp+2) == _T(']') && + (tmp + 3 == ctx->size || ISBLANK(tmp+3) || ISNEWLINE(tmp+3))) + { + MD_CONTAINER* task_container = (n_children > 0 ? &ctx->containers[ctx->n_containers-1] : &container); + task_container->is_task = TRUE; + task_container->task_mark_off = tmp + 1; + off = tmp + 3; + while(ISWHITESPACE(off)) + off++; + line->beg = off; + } + } + + break; + } + + /* Scan for end of the line. + * + * Note this is quite a bottleneck of the parsing as we here iterate almost + * over compete document. + */ +#if defined __linux__ && !defined MD4C_USE_UTF16 + /* Recent glibc versions have superbly optimized strcspn(), even using + * vectorization if available. */ + if(ctx->doc_ends_with_newline && off < ctx->size) { + while(TRUE) { + off += (OFF) strcspn(STR(off), "\r\n"); + + /* strcspn() can stop on zero terminator; but that can appear + * anywhere in the Markfown input... */ + if(CH(off) == _T('\0')) + off++; + else + break; + } + } else +#endif + { + /* Optimization: Use some loop unrolling. */ + while(off + 3 < ctx->size && !ISNEWLINE(off+0) && !ISNEWLINE(off+1) + && !ISNEWLINE(off+2) && !ISNEWLINE(off+3)) + off += 4; + while(off < ctx->size && !ISNEWLINE(off)) + off++; + } + + /* Set end of the line. */ + line->end = off; + + /* But for ATX header, we should exclude the optional trailing mark. */ + if(line->type == MD_LINE_ATXHEADER) { + OFF tmp = line->end; + while(tmp > line->beg && CH(tmp-1) == _T(' ')) + tmp--; + while(tmp > line->beg && CH(tmp-1) == _T('#')) + tmp--; + if(tmp == line->beg || CH(tmp-1) == _T(' ') || (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS)) + line->end = tmp; + } + + /* Trim trailing spaces. */ + if(line->type != MD_LINE_INDENTEDCODE && line->type != MD_LINE_FENCEDCODE) { + while(line->end > line->beg && CH(line->end-1) == _T(' ')) + line->end--; + } + + /* Eat also the new line. */ + if(off < ctx->size && CH(off) == _T('\r')) + off++; + if(off < ctx->size && CH(off) == _T('\n')) + off++; + + *p_end = off; + + /* If we belong to a list after seeing a blank line, the list is loose. */ + if(prev_line_has_list_loosening_effect && line->type != MD_LINE_BLANK && n_parents + n_brothers > 0) { + MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - 1]; + if(c->ch != _T('>')) { + MD_BLOCK* block = (MD_BLOCK*) (((char*)ctx->block_bytes) + c->block_byte_off); + block->flags |= MD_BLOCK_LOOSE_LIST; + } + } + + /* Leave any containers we are not part of anymore. */ + if(n_children == 0 && n_parents + n_brothers < ctx->n_containers) + MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers)); + + /* Enter any container we found a mark for. */ + if(n_brothers > 0) { + MD_ASSERT(n_brothers == 1); + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI, + ctx->containers[n_parents].task_mark_off, + (ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : 0), + MD_BLOCK_CONTAINER_CLOSER)); + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI, + container.task_mark_off, + (container.is_task ? CH(container.task_mark_off) : 0), + MD_BLOCK_CONTAINER_OPENER)); + ctx->containers[n_parents].is_task = container.is_task; + ctx->containers[n_parents].task_mark_off = container.task_mark_off; + } + + if(n_children > 0) + MD_CHECK(md_enter_child_containers(ctx, n_children, line->data)); + +abort: + return ret; +} + +static int +md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line) +{ + const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line; + int ret = 0; + + /* Blank line ends current leaf block. */ + if(line->type == MD_LINE_BLANK) { + MD_CHECK(md_end_current_block(ctx)); + *p_pivot_line = &md_dummy_blank_line; + return 0; + } + + /* Some line types form block on their own. */ + if(line->type == MD_LINE_HR || line->type == MD_LINE_ATXHEADER) { + MD_CHECK(md_end_current_block(ctx)); + + /* Add our single-line block. */ + MD_CHECK(md_start_new_block(ctx, line)); + MD_CHECK(md_add_line_into_current_block(ctx, line)); + MD_CHECK(md_end_current_block(ctx)); + *p_pivot_line = &md_dummy_blank_line; + return 0; + } + + /* MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. */ + if(line->type == MD_LINE_SETEXTUNDERLINE) { + MD_ASSERT(ctx->current_block != NULL); + ctx->current_block->type = MD_BLOCK_H; + ctx->current_block->data = line->data; + ctx->current_block->flags |= MD_BLOCK_SETEXT_HEADER; + MD_CHECK(md_add_line_into_current_block(ctx, line)); + MD_CHECK(md_end_current_block(ctx)); + if(ctx->current_block == NULL) { + *p_pivot_line = &md_dummy_blank_line; + } else { + /* This happens if we have consumed all the body as link ref. defs. + * and downgraded the underline into start of a new paragraph block. */ + line->type = MD_LINE_TEXT; + *p_pivot_line = line; + } + return 0; + } + + /* MD_LINE_TABLEUNDERLINE changes meaning of the current block. */ + if(line->type == MD_LINE_TABLEUNDERLINE) { + MD_ASSERT(ctx->current_block != NULL); + MD_ASSERT(ctx->current_block->n_lines == 1); + ctx->current_block->type = MD_BLOCK_TABLE; + ctx->current_block->data = line->data; + MD_ASSERT(pivot_line != &md_dummy_blank_line); + ((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE; + MD_CHECK(md_add_line_into_current_block(ctx, line)); + return 0; + } + + /* The current block also ends if the line has different type. */ + if(line->type != pivot_line->type) + MD_CHECK(md_end_current_block(ctx)); + + /* The current line may start a new block. */ + if(ctx->current_block == NULL) { + MD_CHECK(md_start_new_block(ctx, line)); + *p_pivot_line = line; + } + + /* In all other cases the line is just a continuation of the current block. */ + MD_CHECK(md_add_line_into_current_block(ctx, line)); + +abort: + return ret; +} + +static int +md_process_doc(MD_CTX *ctx) +{ + const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line; + MD_LINE_ANALYSIS line_buf[2]; + MD_LINE_ANALYSIS* line = &line_buf[0]; + OFF off = 0; + int ret = 0; + + MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL); + + while(off < ctx->size) { + if(line == pivot_line) + line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]); + + MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line)); + MD_CHECK(md_process_line(ctx, &pivot_line, line)); + } + + md_end_current_block(ctx); + + MD_CHECK(md_build_ref_def_hashtable(ctx)); + + /* Process all blocks. */ + MD_CHECK(md_leave_child_containers(ctx, 0)); + MD_CHECK(md_process_all_blocks(ctx)); + + MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL); + +abort: + +#if 0 + /* Output some memory consumption statistics. */ + { + char buffer[256]; + sprintf(buffer, "Alloced %u bytes for block buffer.", + (unsigned)(ctx->alloc_block_bytes)); + MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for containers buffer.", + (unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER))); + MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for marks buffer.", + (unsigned)(ctx->alloc_marks * sizeof(MD_MARK))); + MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for aux. buffer.", + (unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR))); + MD_LOG(buffer); + } +#endif + + return ret; +} + + +/******************** + *** Public API *** + ********************/ + +int +md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata) +{ + MD_CTX ctx; + int i; + int ret; + + if(parser->abi_version != 0) { + if(parser->debug_log != NULL) + parser->debug_log("Unsupported abi_version.", userdata); + return -1; + } + + /* Setup context structure. */ + memset(&ctx, 0, sizeof(MD_CTX)); + ctx.text = text; + ctx.size = size; + memcpy(&ctx.parser, parser, sizeof(MD_PARSER)); + ctx.userdata = userdata; + ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4; + md_build_mark_char_map(&ctx); + ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1])); + + /* Reset all unresolved opener mark chains. */ + for(i = 0; i < (int) SIZEOF_ARRAY(ctx.mark_chains); i++) { + ctx.mark_chains[i].head = -1; + ctx.mark_chains[i].tail = -1; + } + ctx.unresolved_link_head = -1; + ctx.unresolved_link_tail = -1; + + /* All the work. */ + ret = md_process_doc(&ctx); + + /* Clean-up. */ + md_free_ref_defs(&ctx); + md_free_ref_def_hashtable(&ctx); + free(ctx.buffer); + free(ctx.marks); + free(ctx.block_bytes); + free(ctx.containers); + + return ret; +} diff --git a/modules/3rd/md4c/md4c.h b/modules/3rd/md4c/md4c.h new file mode 100644 index 0000000..8bba712 --- /dev/null +++ b/modules/3rd/md4c/md4c.h @@ -0,0 +1,397 @@ +/* + * MD4C: Markdown parser for C + * (http://github.com/mity/md4c) + * + * Copyright (c) 2016-2020 Martin Mitas + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef MD4C_H +#define MD4C_H + +#ifdef __cplusplus + extern "C" { +#endif + +#if defined MD4C_USE_UTF16 + /* Magic to support UTF-16. Note that in order to use it, you have to define + * the macro MD4C_USE_UTF16 both when building MD4C as well as when + * including this header in your code. */ + #ifdef _WIN32 + #include + typedef WCHAR MD_CHAR; + #else + #error MD4C_USE_UTF16 is only supported on Windows. + #endif +#else + typedef char MD_CHAR; +#endif + +typedef unsigned MD_SIZE; +typedef unsigned MD_OFFSET; + + +/* Block represents a part of document hierarchy structure like a paragraph + * or list item. + */ +typedef enum MD_BLOCKTYPE { + /* ... */ + MD_BLOCK_DOC = 0, + + /*
        ...
        */ + MD_BLOCK_QUOTE, + + /*
          ...
        + * Detail: Structure MD_BLOCK_UL_DETAIL. */ + MD_BLOCK_UL, + + /*
          ...
        + * Detail: Structure MD_BLOCK_OL_DETAIL. */ + MD_BLOCK_OL, + + /*
      • ...
      • + * Detail: Structure MD_BLOCK_LI_DETAIL. */ + MD_BLOCK_LI, + + /*
        */ + MD_BLOCK_HR, + + /*

        ...

        (for levels up to 6) + * Detail: Structure MD_BLOCK_H_DETAIL. */ + MD_BLOCK_H, + + /*
        ...
        + * Note the text lines within code blocks are terminated with '\n' + * instead of explicit MD_TEXT_BR. */ + MD_BLOCK_CODE, + + /* Raw HTML block. This itself does not correspond to any particular HTML + * tag. The contents of it _is_ raw HTML source intended to be put + * in verbatim form to the HTML output. */ + MD_BLOCK_HTML, + + /*

        ...

        */ + MD_BLOCK_P, + + /* ...
        and its contents. + * Detail: Structure MD_BLOCK_TD_DETAIL (used with MD_BLOCK_TH and MD_BLOCK_TD) + * Note all of these are used only if extension MD_FLAG_TABLES is enabled. */ + MD_BLOCK_TABLE, + MD_BLOCK_THEAD, + MD_BLOCK_TBODY, + MD_BLOCK_TR, + MD_BLOCK_TH, + MD_BLOCK_TD +} MD_BLOCKTYPE; + +/* Span represents an in-line piece of a document which should be rendered with + * the same font, color and other attributes. A sequence of spans forms a block + * like paragraph or list item. */ +typedef enum MD_SPANTYPE { + /* ... */ + MD_SPAN_EM, + + /* ... */ + MD_SPAN_STRONG, + + /* ... + * Detail: Structure MD_SPAN_A_DETAIL. */ + MD_SPAN_A, + + /* ... + * Detail: Structure MD_SPAN_IMG_DETAIL. + * Note: Image text can contain nested spans and even nested images. + * If rendered into ALT attribute of HTML tag, it's responsibility + * of the parser to deal with it. + */ + MD_SPAN_IMG, + + /* ... */ + MD_SPAN_CODE, + + /* ... + * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled. + */ + MD_SPAN_DEL, + + /* For recognizing inline ($) and display ($$) equations + * Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled. + */ + MD_SPAN_LATEXMATH, + MD_SPAN_LATEXMATH_DISPLAY, + + /* Wiki links + * Note: Recognized only when MD_FLAG_WIKILINKS is enabled. + */ + MD_SPAN_WIKILINK, + + /* ... + * Note: Recognized only when MD_FLAG_UNDERLINE is enabled. */ + MD_SPAN_U +} MD_SPANTYPE; + +/* Text is the actual textual contents of span. */ +typedef enum MD_TEXTTYPE { + /* Normal text. */ + MD_TEXT_NORMAL = 0, + + /* NULL character. CommonMark requires replacing NULL character with + * the replacement char U+FFFD, so this allows caller to do that easily. */ + MD_TEXT_NULLCHAR, + + /* Line breaks. + * Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE + * or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. */ + MD_TEXT_BR, /*
        (hard break) */ + MD_TEXT_SOFTBR, /* '\n' in source text where it is not semantically meaningful (soft break) */ + + /* Entity. + * (a) Named entity, e.g.   + * (Note MD4C does not have a list of known entities. + * Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is + * treated as a named entity.) + * (b) Numerical entity, e.g. Ӓ + * (c) Hexadecimal entity, e.g. ካ + * + * As MD4C is mostly encoding agnostic, application gets the verbatim + * entity text into the MD_PARSER::text_callback(). */ + MD_TEXT_ENTITY, + + /* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`). + * If it is inside MD_BLOCK_CODE, it includes spaces for indentation and + * '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this + * kind of text. */ + MD_TEXT_CODE, + + /* Text is a raw HTML. If it is contents of a raw HTML block (i.e. not + * an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used. + * The text contains verbatim '\n' for the new lines. */ + MD_TEXT_HTML, + + /* Text is inside an equation. This is processed the same way as inlined code + * spans (`code`). */ + MD_TEXT_LATEXMATH +} MD_TEXTTYPE; + + +/* Alignment enumeration. */ +typedef enum MD_ALIGN { + MD_ALIGN_DEFAULT = 0, /* When unspecified. */ + MD_ALIGN_LEFT, + MD_ALIGN_CENTER, + MD_ALIGN_RIGHT +} MD_ALIGN; + + +/* String attribute. + * + * This wraps strings which are outside of a normal text flow and which are + * propagated within various detailed structures, but which still may contain + * string portions of different types like e.g. entities. + * + * So, for example, lets consider this image: + * + * ![image alt text](http://example.org/image.png 'foo " bar') + * + * The image alt text is propagated as a normal text via the MD_PARSER::text() + * callback. However, the image title ('foo " bar') is propagated as + * MD_ATTRIBUTE in MD_SPAN_IMG_DETAIL::title. + * + * Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following: + * -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0) + * -- [1]: """ (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4) + * -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10) + * -- [3]: (n/a) (n/a ; substr_offsets[3] == 14) + * + * Note that these invariants are always guaranteed: + * -- substr_offsets[0] == 0 + * -- substr_offsets[LAST+1] == size + * -- Currently, only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR + * substrings can appear. This could change only of the specification + * changes. + */ +typedef struct MD_ATTRIBUTE { + const MD_CHAR* text; + MD_SIZE size; + const MD_TEXTTYPE* substr_types; + const MD_OFFSET* substr_offsets; +} MD_ATTRIBUTE; + + +/* Detailed info for MD_BLOCK_UL. */ +typedef struct MD_BLOCK_UL_DETAIL { + int is_tight; /* Non-zero if tight list, zero if loose. */ + MD_CHAR mark; /* Item bullet character in MarkDown source of the list, e.g. '-', '+', '*'. */ +} MD_BLOCK_UL_DETAIL; + +/* Detailed info for MD_BLOCK_OL. */ +typedef struct MD_BLOCK_OL_DETAIL { + unsigned start; /* Start index of the ordered list. */ + int is_tight; /* Non-zero if tight list, zero if loose. */ + MD_CHAR mark_delimiter; /* Character delimiting the item marks in MarkDown source, e.g. '.' or ')' */ +} MD_BLOCK_OL_DETAIL; + +/* Detailed info for MD_BLOCK_LI. */ +typedef struct MD_BLOCK_LI_DETAIL { + int is_task; /* Can be non-zero only with MD_FLAG_TASKLISTS */ + MD_CHAR task_mark; /* If is_task, then one of 'x', 'X' or ' '. Undefined otherwise. */ + MD_OFFSET task_mark_offset; /* If is_task, then offset in the input of the char between '[' and ']'. */ +} MD_BLOCK_LI_DETAIL; + +/* Detailed info for MD_BLOCK_H. */ +typedef struct MD_BLOCK_H_DETAIL { + unsigned level; /* Header level (1 - 6) */ +} MD_BLOCK_H_DETAIL; + +/* Detailed info for MD_BLOCK_CODE. */ +typedef struct MD_BLOCK_CODE_DETAIL { + MD_ATTRIBUTE info; + MD_ATTRIBUTE lang; + MD_CHAR fence_char; /* The character used for fenced code block; or zero for indented code block. */ +} MD_BLOCK_CODE_DETAIL; + +/* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */ +typedef struct MD_BLOCK_TD_DETAIL { + MD_ALIGN align; +} MD_BLOCK_TD_DETAIL; + +/* Detailed info for MD_SPAN_A. */ +typedef struct MD_SPAN_A_DETAIL { + MD_ATTRIBUTE href; + MD_ATTRIBUTE title; +} MD_SPAN_A_DETAIL; + +/* Detailed info for MD_SPAN_IMG. */ +typedef struct MD_SPAN_IMG_DETAIL { + MD_ATTRIBUTE src; + MD_ATTRIBUTE title; +} MD_SPAN_IMG_DETAIL; + +/* Detailed info for MD_SPAN_WIKILINK. */ +typedef struct MD_SPAN_WIKILINK { + MD_ATTRIBUTE target; +} MD_SPAN_WIKILINK_DETAIL; + +/* Flags specifying extensions/deviations from CommonMark specification. + * + * By default (when MD_PARSER::flags == 0), we follow CommonMark specification. + * The following flags may allow some extensions or deviations from it. + */ +#define MD_FLAG_COLLAPSEWHITESPACE 0x0001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */ +#define MD_FLAG_PERMISSIVEATXHEADERS 0x0002 /* Do not require space in ATX headers ( ###header ) */ +#define MD_FLAG_PERMISSIVEURLAUTOLINKS 0x0004 /* Recognize URLs as autolinks even without '<', '>' */ +#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS 0x0008 /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */ +#define MD_FLAG_NOINDENTEDCODEBLOCKS 0x0010 /* Disable indented code blocks. (Only fenced code works.) */ +#define MD_FLAG_NOHTMLBLOCKS 0x0020 /* Disable raw HTML blocks. */ +#define MD_FLAG_NOHTMLSPANS 0x0040 /* Disable raw HTML (inline). */ +#define MD_FLAG_TABLES 0x0100 /* Enable tables extension. */ +#define MD_FLAG_STRIKETHROUGH 0x0200 /* Enable strikethrough extension. */ +#define MD_FLAG_PERMISSIVEWWWAUTOLINKS 0x0400 /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */ +#define MD_FLAG_TASKLISTS 0x0800 /* Enable task list extension. */ +#define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */ +#define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */ +#define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */ + +#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS) +#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS) + +/* Convenient sets of flags corresponding to well-known Markdown dialects. + * + * Note we may only support subset of features of the referred dialect. + * The constant just enables those extensions which bring us as close as + * possible given what features we implement. + * + * ABI compatibility note: Meaning of these can change in time as new + * extensions, bringing the dialect closer to the original, are implemented. + */ +#define MD_DIALECT_COMMONMARK 0 +#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS) + +/* Parser structure. + */ +typedef struct MD_PARSER { + /* Reserved. Set to zero. + */ + unsigned abi_version; + + /* Dialect options. Bitmask of MD_FLAG_xxxx values. + */ + unsigned flags; + + /* Caller-provided rendering callbacks. + * + * For some block/span types, more detailed information is provided in a + * type-specific structure pointed by the argument 'detail'. + * + * The last argument of all callbacks, 'userdata', is just propagated from + * md_parse() and is available for any use by the application. + * + * Note any strings provided to the callbacks as their arguments or as + * members of any detail structure are generally not zero-terminated. + * Application has to take the respective size information into account. + * + * Any rendering callback may abort further parsing of the document by + * returning non-zero. + */ + int (*enter_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/); + int (*leave_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/); + + int (*enter_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/); + int (*leave_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/); + + int (*text)(MD_TEXTTYPE /*type*/, const MD_CHAR* /*text*/, MD_SIZE /*size*/, void* /*userdata*/); + + /* Debug callback. Optional (may be NULL). + * + * If provided and something goes wrong, this function gets called. + * This is intended for debugging and problem diagnosis for developers; + * it is not intended to provide any errors suitable for displaying to an + * end user. + */ + void (*debug_log)(const char* /*msg*/, void* /*userdata*/); + + /* Reserved. Set to NULL. + */ + void (*syntax)(void); +} MD_PARSER; + + +/* For backward compatibility. Do not use in new code. + */ +typedef MD_PARSER MD_RENDERER; + + +/* Parse the Markdown document stored in the string 'text' of size 'size'. + * The parser provides callbacks to be called during the parsing so the + * caller can render the document on the screen or convert the Markdown + * to another format. + * + * Zero is returned on success. If a runtime error occurs (e.g. a memory + * fails), -1 is returned. If the processing is aborted due any callback + * returning non-zero, the return value of the callback is returned. + */ +int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata); + + +#ifdef __cplusplus + } /* extern "C" { */ +#endif + +#endif /* MD4C_H */ diff --git a/modules/3rd/zip/main.c b/modules/3rd/zip/main.c new file mode 100644 index 0000000..6cd3112 --- /dev/null +++ b/modules/3rd/zip/main.c @@ -0,0 +1,50 @@ +#include +#include + +#include "zip.h" + +// callback function +int on_extract_entry(const char *filename, void *arg) { + static int i = 0; + int n = *(int *)arg; + printf("Extracted: %s (%d of %d)\n", filename, ++i, n); + + return 0; +} + +int main(int argc, const char** argv) { + /* + Create a new zip archive with default compression level (6) + */ + /* + struct zip_t *zip = zip_open("foo.zip", ZIP_DEFAULT_COMPRESSION_LEVEL, 0); + // we should check if zip is NULL + { + zip_entry_open(zip, "foo-1.txt"); + { + char *buf = "Some data here..."; + zip_entry_write(zip, buf, strlen(buf)); + } + zip_entry_close(zip); + + zip_entry_open(zip, "foo-2.txt"); + { + // merge 3 files into one entry and compress them on-the-fly. + zip_entry_fwrite(zip, "foo-2.1.txt"); + zip_entry_fwrite(zip, "foo-2.2.txt"); + zip_entry_fwrite(zip, "foo-2.3.txt"); + } + zip_entry_close(zip); + } + // always remember to close and release resources + zip_close(zip); + */ + + /* + Extract a zip archive into /tmp folder + */ + int arg = 5; + zip_extract(argv[1], "/Users/mrsang/Downloads/zip-master/src/tmp", on_extract_entry, &arg); + + return 0; +} \ No newline at end of file diff --git a/modules/3rd/zip/miniz.c b/modules/3rd/zip/miniz.c new file mode 100755 index 0000000..358143a --- /dev/null +++ b/modules/3rd/zip/miniz.c @@ -0,0 +1,4916 @@ +/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing + See "unlicense" statement at the end of this file. + Rich Geldreich , last updated Oct. 13, 2013 + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt + + Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define + MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). + + * Change History + 10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major release with Zip64 support (almost there!): + - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug (thanks kahmyong.moon@hp.com) which could cause locate files to not find files. This bug + would only have occured in earlier versions if you explicitly used this flag, OR if you used mz_zip_extract_archive_file_to_heap() or mz_zip_add_mem_to_archive_file_in_place() + (which used this flag). If you can't switch to v1.15 but want to fix this bug, just remove the uses of this flag from both helper funcs (and of course don't use the flag). + - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when pUser_read_buf is not NULL and compressed size is > uncompressed size + - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract compressed data from directory entries, to account for weird zipfiles which contain zero-size compressed data on dir entries. + Hopefully this fix won't cause any issues on weird zip archives, because it assumes the low 16-bits of zip external attributes are DOS attributes (which I believe they always are in practice). + - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the internal attributes, just the filename and external attributes + - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed + - Added cmake support for Linux builds which builds all the examples, tested with clang v3.3 and gcc v4.6. + - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti + - Merged MZ_FORCEINLINE fix from hdeanclark + - Fix include before config #ifdef, thanks emil.brink + - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping (super useful for OpenGL apps), and explicit control over the compression level (so you can + set it to 1 for real-time compression). + - Merged in some compiler fixes from paulharris's github repro. + - Retested this build under Windows (VS 2010, including static analysis), tcc 0.9.26, gcc v4.6 and clang v3.3. + - Added example6.c, which dumps an image of the mandelbrot set to a PNG file. + - Modified example2 to help test the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more. + - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix possible src file fclose() leak if alignment bytes+local header file write faiiled + - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader(): Was pushing the wrong central dir header offset, appears harmless in this release, but it became a problem in the zip64 branch + 5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, #include (thanks fermtect). + 5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit. + - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and re-ran a randomized regression test on ~500k files. + - Eliminated a bunch of warnings when compiling with GCC 32-bit/64. + - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze (static analysis) option and fixed all warnings (except for the silly + "Use of the comma-operator in a tested expression.." analysis warning, which I purposely use to work around a MSVC compiler warning). + - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and tested Linux executables. The codeblocks workspace is compatible with Linux+Win32/x64. + - Added miniz_tester solution/project, which is a useful little app derived from LZHAM's tester app that I use as part of the regression test. + - Ran miniz.c and tinfl.c through another series of regression testing on ~500,000 files and archives. + - Modified example5.c so it purposely disables a bunch of high-level functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the MINIZ_NO_STDIO bug report.) + - Fix ftell() usage in examples so they exit with an error on files which are too large (a limitation of the examples, not miniz itself). + 4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple minor level_and_flags issues in the archive API's. + level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce Dawson for the feedback/bug report. + 5/28/11 v1.11 - Added statement from unlicense.org + 5/27/11 v1.10 - Substantial compressor optimizations: + - Level 1 is now ~4x faster than before. The L1 compressor's throughput now varies between 70-110MB/sec. on a + - Core i7 (actual throughput varies depending on the type of data, and x64 vs. x86). + - Improved baseline L2-L9 compression perf. Also, greatly improved compression perf. issues on some file types. + - Refactored the compression code for better readability and maintainability. + - Added level 10 compression level (L10 has slightly better ratio than level 9, but could have a potentially large + drop in throughput on some files). + 5/15/11 v1.09 - Initial stable release. + + * Low-level Deflate/Inflate implementation notes: + + Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or + greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses + approximately as well as zlib. + + Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function + coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory + block large enough to hold the entire file. + + The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. + + * zlib-style API notes: + + miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in + zlib replacement in many apps: + The z_stream struct, optional memory allocation callbacks + deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound + inflateInit/inflateInit2/inflate/inflateEnd + compress, compress2, compressBound, uncompress + CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. + Supports raw deflate streams or standard zlib streams with adler-32 checking. + + Limitations: + The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. + I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but + there are no guarantees that miniz.c pulls this off perfectly. + + * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by + Alex Evans. Supports 1-4 bytes/pixel images. + + * ZIP archive API notes: + + The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to + get the job done with minimal fuss. There are simple API's to retrieve file information, read files from + existing archives, create new archives, append new files to existing archives, or clone archive data from + one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), + or you can specify custom file read/write callbacks. + + - Archive reading: Just call this function to read a single file from a disk archive: + + void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, + size_t *pSize, mz_uint zip_flags); + + For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central + directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. + + - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: + + int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); + + The locate operation can optionally check file comments too, which (as one example) can be used to identify + multiple versions of the same file in an archive. This function uses a simple linear search through the central + directory, so it's not very fast. + + Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and + retrieve detailed info on each file by calling mz_zip_reader_file_stat(). + + - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data + to disk and builds an exact image of the central directory in memory. The central directory image is written + all at once at the end of the archive file when the archive is finalized. + + The archive writer can optionally align each file's local header and file data to any power of 2 alignment, + which can be useful when the archive will be read from optical media. Also, the writer supports placing + arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still + readable by any ZIP tool. + + - Archive appending: The simple way to add a single file to an archive is to call this function: + + mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, + const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + + The archive will be created if it doesn't already exist, otherwise it'll be appended to. + Note the appending is done in-place and is not an atomic operation, so if something goes wrong + during the operation it's possible the archive could be left without a central directory (although the local + file headers and file data will be fine, so the archive will be recoverable). + + For more complex archive modification scenarios: + 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to + preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the + compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and + you're done. This is safe but requires a bunch of temporary disk space or heap memory. + + 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), + append new files as needed, then finalize the archive which will write an updated central directory to the + original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a + possibility that the archive's central directory could be lost with this method if anything goes wrong, though. + + - ZIP archive support limitations: + No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. + Requires streams capable of seeking. + + * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the + below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. + + * Important: For best perf. be sure to customize the below macros for your target platform: + #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 + #define MINIZ_LITTLE_ENDIAN 1 + #define MINIZ_HAS_64BIT_REGISTERS 1 + + * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz + uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files + (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). +*/ + +#ifndef MINIZ_HEADER_INCLUDED +#define MINIZ_HEADER_INCLUDED + +#include + +// Defines to completely disable specific portions of miniz.c: +// If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. + +// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. +//#define MINIZ_NO_STDIO + +// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or +// get/set file times, and the C run-time funcs that get/set times won't be called. +// The current downside is the times written to your archives will be from 1979. +//#define MINIZ_NO_TIME + +// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_APIS + +// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_WRITING_APIS + +// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. +//#define MINIZ_NO_ZLIB_APIS + +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. +//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. +// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc +// callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user +// functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. +//#define MINIZ_NO_MALLOC + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) + // TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux + #define MINIZ_NO_TIME +#endif + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) + #include +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) +// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. +#define MINIZ_X86_OR_X64_CPU 1 +#endif + +#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. +#define MINIZ_LITTLE_ENDIAN 1 +#endif + +#if MINIZ_X86_OR_X64_CPU +// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 +#endif + +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) +// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). +#define MINIZ_HAS_64BIT_REGISTERS 1 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------- zlib-style API Definitions. + +// For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! +typedef unsigned long mz_ulong; + +// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. +void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) +// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) +// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. +mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + +// Compression strategies. +enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; + +// Method +#define MZ_DEFLATED 8 + +#ifndef MINIZ_NO_ZLIB_APIS + +// Heap allocation callbacks. +// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. +typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); +typedef void (*mz_free_func)(void *opaque, void *address); +typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); + +#define MZ_VERSION "9.1.15" +#define MZ_VERNUM 0x91F0 +#define MZ_VER_MAJOR 9 +#define MZ_VER_MINOR 1 +#define MZ_VER_REVISION 15 +#define MZ_VER_SUBREVISION 0 + +// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). +enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; + +// Return status codes. MZ_PARAM_ERROR is non-standard. +enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 }; + +// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. +enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 }; + +// Window bits +#define MZ_DEFAULT_WINDOW_BITS 15 + +struct mz_internal_state; + +// Compression/decompression stream struct. +typedef struct mz_stream_s +{ + const unsigned char *next_in; // pointer to next byte to read + unsigned int avail_in; // number of bytes available at next_in + mz_ulong total_in; // total number of bytes consumed so far + + unsigned char *next_out; // pointer to next byte to write + unsigned int avail_out; // number of bytes that can be written to next_out + mz_ulong total_out; // total number of bytes produced so far + + char *msg; // error msg (unused) + struct mz_internal_state *state; // internal state, allocated by zalloc/zfree + + mz_alloc_func zalloc; // optional heap allocation function (defaults to malloc) + mz_free_func zfree; // optional heap free function (defaults to free) + void *opaque; // heap alloc function user pointer + + int data_type; // data_type (unused) + mz_ulong adler; // adler32 of the source or uncompressed data + mz_ulong reserved; // not used +} mz_stream; + +typedef mz_stream *mz_streamp; + +// Returns the version string of miniz.c. +const char *mz_version(void); + +// mz_deflateInit() initializes a compressor with default options: +// Parameters: +// pStream must point to an initialized mz_stream struct. +// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. +// level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. +// (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if the input parameters are bogus. +// MZ_MEM_ERROR on out of memory. +int mz_deflateInit(mz_streamp pStream, int level); + +// mz_deflateInit2() is like mz_deflate(), except with more control: +// Additional parameters: +// method must be MZ_DEFLATED +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) +// mem_level must be between [1, 9] (it's checked but ignored by miniz.c) +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); + +// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). +int mz_deflateReset(mz_streamp pStream); + +// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. +// Return values: +// MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). +// MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) +int mz_deflate(mz_streamp pStream, int flush); + +// mz_deflateEnd() deinitializes a compressor: +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +int mz_deflateEnd(mz_streamp pStream); + +// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + +// Single-call compression functions mz_compress() and mz_compress2(): +// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level); + +// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). +mz_ulong mz_compressBound(mz_ulong source_len); + +// Initializes a decompressor. +int mz_inflateInit(mz_streamp pStream); + +// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). +int mz_inflateInit2(mz_streamp pStream, int window_bits); + +// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. +// On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). +// MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. +// Return values: +// MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. +// MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_DATA_ERROR if the deflate stream is invalid. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again +// with more input data, or with more room in the output buffer (except when using single call decompression, described above). +int mz_inflate(mz_streamp pStream, int flush); + +// Deinitializes a decompressor. +int mz_inflateEnd(mz_streamp pStream); + +// Single-call decompression. +// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); + +// Returns a string description of the specified error code, or NULL if the error code is invalid. +const char *mz_error(int err); + +// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + typedef unsigned char Byte; + typedef unsigned int uInt; + typedef mz_ulong uLong; + typedef Byte Bytef; + typedef uInt uIntf; + typedef char charf; + typedef int intf; + typedef void *voidpf; + typedef uLong uLongf; + typedef void *voidp; + typedef void *const voidpc; + #define Z_NULL 0 + #define Z_NO_FLUSH MZ_NO_FLUSH + #define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH + #define Z_SYNC_FLUSH MZ_SYNC_FLUSH + #define Z_FULL_FLUSH MZ_FULL_FLUSH + #define Z_FINISH MZ_FINISH + #define Z_BLOCK MZ_BLOCK + #define Z_OK MZ_OK + #define Z_STREAM_END MZ_STREAM_END + #define Z_NEED_DICT MZ_NEED_DICT + #define Z_ERRNO MZ_ERRNO + #define Z_STREAM_ERROR MZ_STREAM_ERROR + #define Z_DATA_ERROR MZ_DATA_ERROR + #define Z_MEM_ERROR MZ_MEM_ERROR + #define Z_BUF_ERROR MZ_BUF_ERROR + #define Z_VERSION_ERROR MZ_VERSION_ERROR + #define Z_PARAM_ERROR MZ_PARAM_ERROR + #define Z_NO_COMPRESSION MZ_NO_COMPRESSION + #define Z_BEST_SPEED MZ_BEST_SPEED + #define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION + #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION + #define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY + #define Z_FILTERED MZ_FILTERED + #define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY + #define Z_RLE MZ_RLE + #define Z_FIXED MZ_FIXED + #define Z_DEFLATED MZ_DEFLATED + #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS + #define alloc_func mz_alloc_func + #define free_func mz_free_func + #define internal_state mz_internal_state + #define z_stream mz_stream + #define deflateInit mz_deflateInit + #define deflateInit2 mz_deflateInit2 + #define deflateReset mz_deflateReset + #define deflate mz_deflate + #define deflateEnd mz_deflateEnd + #define deflateBound mz_deflateBound + #define compress mz_compress + #define compress2 mz_compress2 + #define compressBound mz_compressBound + #define inflateInit mz_inflateInit + #define inflateInit2 mz_inflateInit2 + #define inflate mz_inflate + #define inflateEnd mz_inflateEnd + #define uncompress mz_uncompress + #define crc32 mz_crc32 + #define adler32 mz_adler32 + #define MAX_WBITS 15 + #define MAX_MEM_LEVEL 9 + #define zError mz_error + #define ZLIB_VERSION MZ_VERSION + #define ZLIB_VERNUM MZ_VERNUM + #define ZLIB_VER_MAJOR MZ_VER_MAJOR + #define ZLIB_VER_MINOR MZ_VER_MINOR + #define ZLIB_VER_REVISION MZ_VER_REVISION + #define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION + #define zlibVersion mz_version + #define zlib_version mz_version() +#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +#endif // MINIZ_NO_ZLIB_APIS + +// ------------------- Types and macros + +typedef unsigned char mz_uint8; +typedef signed short mz_int16; +typedef unsigned short mz_uint16; +typedef unsigned int mz_uint32; +typedef unsigned int mz_uint; +typedef long long mz_int64; +typedef unsigned long long mz_uint64; +typedef int mz_bool; + +#define MZ_FALSE (0) +#define MZ_TRUE (1) + +// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message. +#ifdef _MSC_VER + #define MZ_MACRO_END while (0, 0) +#else + #define MZ_MACRO_END while (0) +#endif + +// ------------------- ZIP archive reading/writing + +#ifndef MINIZ_NO_ARCHIVE_APIS + +enum +{ + MZ_ZIP_MAX_IO_BUF_SIZE = 64*1024, + MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260, + MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256 +}; + +typedef struct +{ + mz_uint32 m_file_index; + mz_uint32 m_central_dir_ofs; + mz_uint16 m_version_made_by; + mz_uint16 m_version_needed; + mz_uint16 m_bit_flag; + mz_uint16 m_method; +#ifndef MINIZ_NO_TIME + time_t m_time; +#endif + mz_uint32 m_crc32; + mz_uint64 m_comp_size; + mz_uint64 m_uncomp_size; + mz_uint16 m_internal_attr; + mz_uint32 m_external_attr; + mz_uint64 m_local_header_ofs; + mz_uint32 m_comment_size; + char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; + char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; +} mz_zip_archive_file_stat; + +typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n); +typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n); + +struct mz_zip_internal_state_tag; +typedef struct mz_zip_internal_state_tag mz_zip_internal_state; + +typedef enum +{ + MZ_ZIP_MODE_INVALID = 0, + MZ_ZIP_MODE_READING = 1, + MZ_ZIP_MODE_WRITING = 2, + MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 +} mz_zip_mode; + +typedef struct mz_zip_archive_tag +{ + mz_uint64 m_archive_size; + mz_uint64 m_central_directory_file_ofs; + mz_uint m_total_files; + mz_zip_mode m_zip_mode; + + mz_uint m_file_offset_alignment; + + mz_alloc_func m_pAlloc; + mz_free_func m_pFree; + mz_realloc_func m_pRealloc; + void *m_pAlloc_opaque; + + mz_file_read_func m_pRead; + mz_file_write_func m_pWrite; + void *m_pIO_opaque; + + mz_zip_internal_state *m_pState; + +} mz_zip_archive; + +typedef enum +{ + MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, + MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, + MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, + MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800 +} mz_zip_flags; + +// ZIP archive reading + +// Inits a ZIP archive reader. +// These functions read and validate the archive's central directory. +mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags); +mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags); + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags); +#endif + +// Returns the total number of files in the archive. +mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); + +// Returns detailed information about an archive file entry. +mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat); + +// Determines if an archive file entry is a directory entry. +mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index); +mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index); + +// Retrieves the filename of an archive file entry. +// Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. +mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size); + +// Attempts to locates a file in the archive's central directory. +// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH +// Returns -1 if the file cannot be found. +int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); + +// Extracts a archive file to a memory buffer using no memory allocation. +mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); +mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); + +// Extracts a archive file to a memory buffer. +mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags); + +// Extracts a archive file to a dynamically allocated heap buffer. +void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags); +void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags); + +// Extracts a archive file using a callback function to output the file's data. +mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); + +#ifndef MINIZ_NO_STDIO +// Extracts a archive file to a disk file and sets its last accessed and modified times. +// This function only extracts files, not archive directory records. +mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags); +#endif + +// Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used. +mz_bool mz_zip_reader_end(mz_zip_archive *pZip); + +// ZIP archive writing + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +// Inits a ZIP archive writer. +mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); +mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size); + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning); +#endif + +// Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. +// For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. +// For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). +// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. +// Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before +// the archive is finalized the file's central directory will be hosed. +mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename); + +// Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. +// To add a directory entry, call this method with an archive name ending in a forwardslash with empty buffer. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags); +mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32); + +#ifndef MINIZ_NO_STDIO +// Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); +#endif + +// Adds a file to an archive by fully cloning the data from another archive. +// This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data, and comment fields. +mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index); + +// Finalizes the archive by writing the central directory records followed by the end of central directory record. +// After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). +// An archive must be manually finalized by calling this function for it to be valid. +mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); +mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize); + +// Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. +// Note for the archive to be valid, it must have been finalized before ending. +mz_bool mz_zip_writer_end(mz_zip_archive *pZip); + +// Misc. high-level helper functions: + +// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + +// Reads a single file from an archive into a heap block. +// Returns NULL on failure. +void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags); + +#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +#endif // #ifndef MINIZ_NO_ARCHIVE_APIS + +// ------------------- Low-level Decompression API Definitions + +// Decompression flags used by tinfl_decompress(). +// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. +// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. +// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). +// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. +enum +{ + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 +}; + +// High level decompression functions: +// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. +// On return: +// Function returns a pointer to the decompressed data, or NULL on failure. +// *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must call mz_free() on the returned block when it's no longer needed. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. +// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. +// Returns 1 on success or 0 on failure. +typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor; + +// Max size of LZ dictionary. +#define TINFL_LZ_DICT_SIZE 32768 + +// Return status. +typedef enum +{ + TINFL_STATUS_BAD_PARAM = -3, + TINFL_STATUS_ADLER32_MISMATCH = -2, + TINFL_STATUS_FAILED = -1, + TINFL_STATUS_DONE = 0, + TINFL_STATUS_NEEDS_MORE_INPUT = 1, + TINFL_STATUS_HAS_MORE_OUTPUT = 2 +} tinfl_status; + +// Initializes the decompressor to its initial state. +#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 + +// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. +// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); + +// Internal/private bits follow. +enum +{ + TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS +}; + +typedef struct +{ + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; +} tinfl_huff_table; + +#if MINIZ_HAS_64BIT_REGISTERS + #define TINFL_USE_64BIT_BITBUF 1 +#endif + +#if TINFL_USE_64BIT_BITBUF + typedef mz_uint64 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (64) +#else + typedef mz_uint32 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (32) +#endif + +struct tinfl_decompressor_tag +{ + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; +}; + +// ------------------- Low-level Compression API Definitions + +// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). +#define TDEFL_LESS_MEMORY 0 + +// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): +// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). +enum +{ + TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF +}; + +// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. +// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). +// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. +// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). +// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) +// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. +// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. +// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. +// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). +enum +{ + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 +}; + +// High level compression functions: +// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of source block to compress. +// flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must free() the returned block when it's no longer needed. +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. +// Returns 0 on failure. +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// Compresses an image to a compressed PNG file in memory. +// On entry: +// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. +// The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. +// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL +// If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pLen_out will be set to the size of the PNG image file. +// The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); + +// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. +typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); + +// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 }; + +// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). +#if TDEFL_LESS_MEMORY +enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#else +enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#endif + +// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. +typedef enum +{ + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1, +} tdefl_status; + +// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums +typedef enum +{ + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 +} tdefl_flush; + +// tdefl's compression state structure. +typedef struct +{ + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; +} tdefl_compressor; + +// Initializes the compressor. +// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. +// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. +// If pBut_buf_func is NULL the user should always call the tdefl_compress() API. +// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); + +// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. +// tdefl_compress_buffer() always consumes the entire input buffer. +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); +mz_uint32 tdefl_get_adler32(tdefl_compressor *d); + +// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros. +#ifndef MINIZ_NO_ZLIB_APIS +// Create tdefl_compress() flags given zlib-style compression parameters. +// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) +// window_bits may be -15 (raw deflate) or 15 (zlib) +// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); +#endif // #ifndef MINIZ_NO_ZLIB_APIS + +#ifdef __cplusplus +} +#endif + +#endif // MINIZ_HEADER_INCLUDED + +// ------------------- End of Header: Implementation follows. (If you only want the header, define MINIZ_HEADER_FILE_ONLY.) + +#ifndef MINIZ_HEADER_FILE_ONLY + +typedef unsigned char mz_validate_uint16[sizeof(mz_uint16)==2 ? 1 : -1]; +typedef unsigned char mz_validate_uint32[sizeof(mz_uint32)==4 ? 1 : -1]; +typedef unsigned char mz_validate_uint64[sizeof(mz_uint64)==8 ? 1 : -1]; + +#include +#include + +#define MZ_ASSERT(x) assert(x) + +#ifdef MINIZ_NO_MALLOC + #define MZ_MALLOC(x) NULL + #define MZ_FREE(x) (void)x, ((void)0) + #define MZ_REALLOC(p, x) NULL +#else + #define MZ_MALLOC(x) malloc(x) + #define MZ_FREE(x) free(x) + #define MZ_REALLOC(p, x) realloc(p, x) +#endif + +#define MZ_MAX(a,b) (((a)>(b))?(a):(b)) +#define MZ_MIN(a,b) (((a)<(b))?(a):(b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + #define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) + #define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else + #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) + #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif + +#ifdef _MSC_VER + #define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) + #define MZ_FORCEINLINE inline __attribute__((__always_inline__)) +#else + #define MZ_FORCEINLINE inline +#endif + +#ifdef __cplusplus + extern "C" { +#endif + +// ------------------- zlib-style API's + +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) +{ + mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552; + if (!ptr) return MZ_ADLER32_INIT; + while (buf_len) { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + return (s2 << 16) + s1; +} + +// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ +mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) +{ + static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, + 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; + mz_uint32 crcu32 = (mz_uint32)crc; + if (!ptr) return MZ_CRC32_INIT; + crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; } + return ~crcu32; +} + +void mz_free(void *p) +{ + MZ_FREE(p); +} + +#ifndef MINIZ_NO_ZLIB_APIS + +static void *def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); } +static void def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); } +static void *def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); } + +const char *mz_version(void) +{ + return MZ_VERSION; +} + +int mz_deflateInit(mz_streamp pStream, int level) +{ + return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); +} + +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) +{ + tdefl_compressor *pComp; + mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); + + if (!pStream) return MZ_STREAM_ERROR; + if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = MZ_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pComp; + + if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) + { + mz_deflateEnd(pStream); + return MZ_PARAM_ERROR; + } + + return MZ_OK; +} + +int mz_deflateReset(mz_streamp pStream) +{ + if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR; + pStream->total_in = pStream->total_out = 0; + tdefl_init((tdefl_compressor*)pStream->state, NULL, NULL, ((tdefl_compressor*)pStream->state)->m_flags); + return MZ_OK; +} + +int mz_deflate(mz_streamp pStream, int flush) +{ + size_t in_bytes, out_bytes; + mz_ulong orig_total_in, orig_total_out; + int mz_status = MZ_OK; + + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR; + if (!pStream->avail_out) return MZ_BUF_ERROR; + + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + + if (((tdefl_compressor*)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) + return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; + + orig_total_in = pStream->total_in; orig_total_out = pStream->total_out; + for ( ; ; ) + { + tdefl_status defl_status; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + + defl_status = tdefl_compress((tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor*)pStream->state); + + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (defl_status < 0) + { + mz_status = MZ_STREAM_ERROR; + break; + } + else if (defl_status == TDEFL_STATUS_DONE) + { + mz_status = MZ_STREAM_END; + break; + } + else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != MZ_FINISH)) + { + if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) + break; + return MZ_BUF_ERROR; // Can't make forward progress without some input. + } + } + return mz_status; +} + +int mz_deflateEnd(mz_streamp pStream) +{ + if (!pStream) return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) +{ + (void)pStream; + // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) + return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); +} + +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level) +{ + int status; + mz_stream stream; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_deflateInit(&stream, level); + if (status != MZ_OK) return status; + + status = mz_deflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_deflateEnd(&stream); + return (status == MZ_OK) ? MZ_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return mz_deflateEnd(&stream); +} + +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); +} + +mz_ulong mz_compressBound(mz_ulong source_len) +{ + return mz_deflateBound(NULL, source_len); +} + +typedef struct +{ + tinfl_decompressor m_decomp; + mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits; + mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; + tinfl_status m_last_status; +} inflate_state; + +int mz_inflateInit2(mz_streamp pStream, int window_bits) +{ + inflate_state *pDecomp; + if (!pStream) return MZ_STREAM_ERROR; + if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pDecomp = (inflate_state*)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); + if (!pDecomp) return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pDecomp; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + pDecomp->m_window_bits = window_bits; + + return MZ_OK; +} + +int mz_inflateInit(mz_streamp pStream) +{ + return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); +} + +int mz_inflate(mz_streamp pStream, int flush) +{ + inflate_state* pState; + mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; + size_t in_bytes, out_bytes, orig_avail_in; + tinfl_status status; + + if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + + pState = (inflate_state*)pStream->state; + if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; + orig_avail_in = pStream->avail_in; + + first_call = pState->m_first_call; pState->m_first_call = 0; + if (pState->m_last_status < 0) return MZ_DATA_ERROR; + + if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + pState->m_has_flushed |= (flush == MZ_FINISH); + + if ((flush == MZ_FINISH) && (first_call)) + { + // MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. + decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); + pState->m_last_status = status; + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; + + if (status < 0) + return MZ_DATA_ERROR; + else if (status != TINFL_STATUS_DONE) + { + pState->m_last_status = TINFL_STATUS_FAILED; + return MZ_BUF_ERROR; + } + return MZ_STREAM_END; + } + // flush != MZ_FINISH then we must assume there's more input. + if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; + + if (pState->m_dict_avail) + { + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; + } + + for ( ; ; ) + { + in_bytes = pStream->avail_in; + out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; + + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); + pState->m_last_status = status; + + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); + + pState->m_dict_avail = (mz_uint)out_bytes; + + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + + if (status < 0) + return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). + else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. + else if (flush == MZ_FINISH) + { + // The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. + if (status == TINFL_STATUS_DONE) + return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; + // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. + else if (!pStream->avail_out) + return MZ_BUF_ERROR; + } + else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) + break; + } + + return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; +} + +int mz_inflateEnd(mz_streamp pStream) +{ + if (!pStream) + return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + mz_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_inflateInit(&stream); + if (status != MZ_OK) + return status; + + status = mz_inflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_inflateEnd(&stream); + return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; + } + *pDest_len = stream.total_out; + + return mz_inflateEnd(&stream); +} + +const char *mz_error(int err) +{ + static struct { int m_err; const char *m_pDesc; } s_error_descs[] = + { + { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, + { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" } + }; + mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; + return NULL; +} + +#endif //MINIZ_NO_ZLIB_APIS + +// ------------------- Low-level Decompression (completely independent from all compression API's) + +#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) +#define TINFL_MEMSET(p, c, l) memset(p, c, l) + +#define TINFL_CR_BEGIN switch(r->m_state) { case 0: +#define TINFL_CR_RETURN(state_index, result) do { status = result; r->m_state = state_index; goto common_exit; case state_index:; } MZ_MACRO_END +#define TINFL_CR_RETURN_FOREVER(state_index, result) do { for ( ; ; ) { TINFL_CR_RETURN(state_index, result); } } MZ_MACRO_END +#define TINFL_CR_FINISH } + +// TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never +// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario. +#define TINFL_GET_BYTE(state_index, c) do { \ + if (pIn_buf_cur >= pIn_buf_end) { \ + for ( ; ; ) { \ + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ + TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ + if (pIn_buf_cur < pIn_buf_end) { \ + c = *pIn_buf_cur++; \ + break; \ + } \ + } else { \ + c = 0; \ + break; \ + } \ + } \ + } else c = *pIn_buf_cur++; } MZ_MACRO_END + +#define TINFL_NEED_BITS(state_index, n) do { mz_uint c; TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; } while (num_bits < (mz_uint)(n)) +#define TINFL_SKIP_BITS(state_index, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END +#define TINFL_GET_BITS(state_index, b, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } b = bit_buf & ((1 << (n)) - 1); bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END + +// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. +// It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a +// Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the +// bit buffer contains >=15 bits (deflate's max. Huffman code size). +#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ + do { \ + temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ + if (temp >= 0) { \ + code_len = temp >> 9; \ + if ((code_len) && (num_bits >= code_len)) \ + break; \ + } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while ((temp < 0) && (num_bits >= (code_len + 1))); if (temp >= 0) break; \ + } TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; \ + } while (num_bits < 15); + +// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read +// beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully +// decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. +// The slow path is only executed at the very end of the input buffer. +#define TINFL_HUFF_DECODE(state_index, sym, pHuff) do { \ + int temp; mz_uint code_len, c; \ + if (num_bits < 15) { \ + if ((pIn_buf_end - pIn_buf_cur) < 2) { \ + TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ + } else { \ + bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); pIn_buf_cur += 2; num_bits += 16; \ + } \ + } \ + if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ + code_len = temp >> 9, temp &= 511; \ + else { \ + code_len = TINFL_FAST_LOOKUP_BITS; do { temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; } while (temp < 0); \ + } sym = temp; bit_buf >>= code_len; num_bits -= code_len; } MZ_MACRO_END + +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags) +{ + static const int s_length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 }; + static const int s_length_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + static const int s_dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + static const int s_dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + static const mz_uint8 s_length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + static const int s_min_table_sizes[3] = { 257, 1, 4 }; + + tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf; + const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; + mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; + size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; + + // Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). + if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; } + + num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start; + TINFL_CR_BEGIN + + bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1; + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1); + counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); + if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1U << (8U + (r->m_zhdr0 >> 4))))); + if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); } + } + + do + { + TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1; + if (r->m_type == 0) + { + TINFL_SKIP_BITS(5, num_bits & 7); + for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); } + if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); } + while ((counter) && (num_bits)) + { + TINFL_GET_BITS(51, dist, 8); + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)dist; + counter--; + } + while (counter) + { + size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); } + while (pIn_buf_cur >= pIn_buf_end) + { + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) + { + TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); + } + else + { + TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); + } + } + n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); + TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n; + } + } + else if (r->m_type == 3) + { + TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); + } + else + { + if (r->m_type == 1) + { + mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i; + r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + for ( i = 0; i <= 143; ++i) *p++ = 8; for ( ; i <= 255; ++i) *p++ = 9; for ( ; i <= 279; ++i) *p++ = 7; for ( ; i <= 287; ++i) *p++ = 8; + } + else + { + for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; } + MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; } + r->m_table_sizes[2] = 19; + } + for ( ; (int)r->m_type >= 0; r->m_type--) + { + int tree_next, tree_cur; tinfl_huff_table *pTable; + mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++; + used_syms = 0, total = 0; next_code[0] = next_code[1] = 0; + for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); } + if ((65536 != total) && (used_syms > 1)) + { + TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); + } + for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) + { + mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue; + cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); + if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; } + if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } + rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); + for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1]; + } + tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; + } + if (r->m_type == 2) + { + for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]); ) + { + mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; } + if ((dist == 16) && (!counter)) + { + TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); + } + num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16]; + TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s; + } + if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) + { + TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); + } + TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); + } + } + for ( ; ; ) + { + mz_uint8 *pSrc; + for ( ; ; ) + { + if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) + { + TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); + if (counter >= 256) + break; + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)counter; + } + else + { + int sym2; mz_uint code_len; +#if TINFL_USE_64BIT_BITBUF + if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; } +#else + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + counter = sym2; bit_buf >>= code_len; num_bits -= code_len; + if (counter & 256) + break; + +#if !TINFL_USE_64BIT_BITBUF + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + bit_buf >>= code_len; num_bits -= code_len; + + pOut_buf_cur[0] = (mz_uint8)counter; + if (sym2 & 256) + { + pOut_buf_cur++; + counter = sym2; + break; + } + pOut_buf_cur[1] = (mz_uint8)sym2; + pOut_buf_cur += 2; + } + } + if ((counter &= 511) == 256) break; + + num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; } + + TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); + num_extra = s_dist_extra[dist]; dist = s_dist_base[dist]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; } + + dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; + if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + { + TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); + } + + pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); + + if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) + { + while (counter--) + { + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; + } + continue; + } +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + else if ((counter >= 9) && (counter <= dist)) + { + const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + do + { + ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; + ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; + pOut_buf_cur += 8; + } while ((pSrc += 8) < pSrc_end); + if ((counter &= 7) < 3) + { + if (counter) + { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + continue; + } + } +#endif + do + { + pOut_buf_cur[0] = pSrc[0]; + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur[2] = pSrc[2]; + pOut_buf_cur += 3; pSrc += 3; + } while ((int)(counter -= 3) > 2); + if ((int)counter > 0) + { + pOut_buf_cur[0] = pSrc[0]; + if ((int)counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + } + } + } while (!(r->m_final & 1)); + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_SKIP_BITS(32, num_bits & 7); for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; } + } + TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); + TINFL_CR_FINISH + +common_exit: + r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start; + *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next; + if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) + { + const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size; + mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552; + while (buf_len) + { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) + { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + r->m_check_adler32 = (s2 << 16) + s1; if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) status = TINFL_STATUS_ADLER32_MISMATCH; + } + return status; +} + +// Higher level helper functions. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0; + *pOut_len = 0; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + src_buf_ofs += src_buf_size; + *pOut_len += dst_buf_size; + if (status == TINFL_STATUS_DONE) break; + new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; + pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); + if (!pNew_buf) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity; + } + return pBuf; +} + +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp); + status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; +} + +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + int result = 0; + tinfl_decompressor decomp; + mz_uint8 *pDict = (mz_uint8*)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0; + if (!pDict) + return TINFL_STATUS_FAILED; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, + (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); + in_buf_ofs += in_buf_size; + if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) + break; + if (status != TINFL_STATUS_HAS_MORE_OUTPUT) + { + result = (status == TINFL_STATUS_DONE); + break; + } + dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); + } + MZ_FREE(pDict); + *pIn_buf_size = in_buf_ofs; + return result; +} + +// ------------------- Low-level Compression (independent from all decompression API's) + +// Purposely making these tables static for faster init and thread safety. +static const mz_uint16 s_tdefl_len_sym[256] = { + 257,258,259,260,261,262,263,264,265,265,266,266,267,267,268,268,269,269,269,269,270,270,270,270,271,271,271,271,272,272,272,272, + 273,273,273,273,273,273,273,273,274,274,274,274,274,274,274,274,275,275,275,275,275,275,275,275,276,276,276,276,276,276,276,276, + 277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278, + 279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280, + 281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281, + 282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282, + 283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283, + 284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,285 }; + +static const mz_uint8 s_tdefl_len_extra[256] = { + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0 }; + +static const mz_uint8 s_tdefl_small_dist_sym[512] = { + 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11, + 11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17 }; + +static const mz_uint8 s_tdefl_small_dist_extra[512] = { + 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7 }; + +static const mz_uint8 s_tdefl_large_dist_sym[128] = { + 0,0,18,19,20,20,21,21,22,22,22,22,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,26,26,26,26, + 26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28, + 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 }; + +static const mz_uint8 s_tdefl_large_dist_extra[128] = { + 0,0,8,8,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 }; + +// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. +typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq; +static tdefl_sym_freq* tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1) +{ + mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist); + for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const mz_uint32* pHist = &hist[pass << 8]; + mz_uint offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } + for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + { tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } + } + return pCur_syms; +} + +// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. +static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) +{ + int root, leaf, next, avbl, used, dpth; + if (n==0) return; else if (n==1) { A[0].m_key = 1; return; } + A[0].m_key += A[1].m_key; root = 0; leaf = 2; + for (next=1; next < n-1; next++) + { + if (leaf>=n || A[root].m_key=n || (root=0; next--) A[next].m_key = A[A[next].m_key].m_key+1; + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) + { + while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; } + while (avbl>used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; } + avbl = 2*used; dpth++; used = 0; + } +} + +// Limits canonical Huffman code table's max code size. +enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; +static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) +{ + int i; mz_uint32 total = 0; if (code_list_len <= 1) return; + for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; + for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } + total--; + } +} + +static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table) +{ + int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes); + if (static_table) + { + for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++; + } + else + { + tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; + int num_used_syms = 0; + const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; + for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; } + + pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); + + for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; + + tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); + + MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); + for (i = 1, j = num_used_syms; i <= code_size_limit; i++) + for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); + } + + next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (i = 0; i < table_len; i++) + { + mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; + code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1); + d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; + } +} + +#define TDEFL_PUT_BITS(b, l) do { \ + mz_uint bits = b; mz_uint len = l; MZ_ASSERT(bits <= ((1U << len) - 1U)); \ + d->m_bit_buffer |= (bits << d->m_bits_in); d->m_bits_in += len; \ + while (d->m_bits_in >= 8) { \ + if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ + *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ + d->m_bit_buffer >>= 8; \ + d->m_bits_in -= 8; \ + } \ +} MZ_MACRO_END + +#define TDEFL_RLE_PREV_CODE_SIZE() { if (rle_repeat_count) { \ + if (rle_repeat_count < 3) { \ + d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ + while (rle_repeat_count--) packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ + } else { \ + d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); packed_code_sizes[num_packed_code_sizes++] = 16; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ +} rle_repeat_count = 0; } } + +#define TDEFL_RLE_ZERO_CODE_SIZE() { if (rle_z_count) { \ + if (rle_z_count < 3) { \ + d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ + } else if (rle_z_count <= 10) { \ + d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); packed_code_sizes[num_packed_code_sizes++] = 17; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ + } else { \ + d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); packed_code_sizes[num_packed_code_sizes++] = 18; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ +} rle_z_count = 0; } } + +static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static void tdefl_start_dynamic_block(tdefl_compressor *d) +{ + int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; + mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; + + d->m_huff_count[0][256] = 1; + + tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); + tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); + + for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; + for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; + + memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); + memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); + total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0; + + memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); + for (i = 0; i < total_code_sizes_to_pack; i++) + { + mz_uint8 code_size = code_sizes_to_pack[i]; + if (!code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); } + } + else + { + TDEFL_RLE_ZERO_CODE_SIZE(); + if (code_size != prev_code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size; + } + else if (++rle_repeat_count == 6) + { + TDEFL_RLE_PREV_CODE_SIZE(); + } + } + prev_code_size = code_size; + } + if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); } + + tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); + + TDEFL_PUT_BITS(2, 2); + + TDEFL_PUT_BITS(num_lit_codes - 257, 5); + TDEFL_PUT_BITS(num_dist_codes - 1, 5); + + for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break; + num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4); + for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); + + for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes; ) + { + mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); + TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); + if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); + } +} + +static void tdefl_start_static_block(tdefl_compressor *d) +{ + mz_uint i; + mz_uint8 *p = &d->m_huff_code_sizes[0][0]; + + for (i = 0; i <= 143; ++i) *p++ = 8; + for ( ; i <= 255; ++i) *p++ = 9; + for ( ; i <= 279; ++i) *p++ = 7; + for ( ; i <= 287; ++i) *p++ = 8; + + memset(d->m_huff_code_sizes[1], 5, 32); + + tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); + tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); + + TDEFL_PUT_BITS(1, 2); +} + +static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF }; + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + mz_uint8 *pOutput_buf = d->m_pOutput_buf; + mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; + mz_uint64 bit_buffer = d->m_bit_buffer; + mz_uint bits_in = d->m_bits_in; + +#define TDEFL_PUT_BITS_FAST(b, l) { bit_buffer |= (((mz_uint64)(b)) << bits_in); bits_in += (l); } + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + + if (flags & 1) + { + mz_uint s0, s1, n0, n1, sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + // This sequence coaxes MSVC into using cmov's vs. jmp's. + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + n0 = s_tdefl_small_dist_extra[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[match_dist >> 8]; + n1 = s_tdefl_large_dist_extra[match_dist >> 8]; + sym = (match_dist < 512) ? s0 : s1; + num_extra_bits = (match_dist < 512) ? n0 : n1; + + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + } + + if (pOutput_buf >= d->m_pOutput_buf_end) + return MZ_FALSE; + + *(mz_uint64*)pOutput_buf = bit_buffer; + pOutput_buf += (bits_in >> 3); + bit_buffer >>= (bits_in & ~7); + bits_in &= 7; + } + +#undef TDEFL_PUT_BITS_FAST + + d->m_pOutput_buf = pOutput_buf; + d->m_bits_in = 0; + d->m_bit_buffer = 0; + + while (bits_in) + { + mz_uint32 n = MZ_MIN(bits_in, 16); + TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); + bit_buffer >>= n; + bits_in -= n; + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#else +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + if (flags & 1) + { + mz_uint sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + if (match_dist < 512) + { + sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist]; + } + else + { + sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; + } + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS + +static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) +{ + if (static_block) + tdefl_start_static_block(d); + else + tdefl_start_dynamic_block(d); + return tdefl_compress_lz_codes(d); +} + +static int tdefl_flush_block(tdefl_compressor *d, int flush) +{ + mz_uint saved_bit_buf, saved_bits_in; + mz_uint8 *pSaved_output_buf; + mz_bool comp_block_succeeded = MZ_FALSE; + int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; + mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; + + d->m_pOutput_buf = pOutput_buf_start; + d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; + + MZ_ASSERT(!d->m_output_flush_remaining); + d->m_output_flush_ofs = 0; + d->m_output_flush_remaining = 0; + + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); + d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); + + if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) + { + TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8); + } + + TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); + + pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in; + + if (!use_raw_block) + comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); + + // If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. + if ( ((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && + ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size) ) + { + mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + TDEFL_PUT_BITS(0, 2); + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) + { + TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); + } + for (i = 0; i < d->m_total_lz_bytes; ++i) + { + TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); + } + } + // Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. + else if (!comp_block_succeeded) + { + d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + tdefl_compress_block(d, MZ_TRUE); + } + + if (flush) + { + if (flush == TDEFL_FINISH) + { + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } } + } + else + { + mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); } + } + } + + MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); + + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++; + + if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) + { + if (d->m_pPut_buf_func) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) + return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); + } + else if (pOutput_buf_start == d->m_output_buf) + { + int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); + d->m_out_buf_ofs += bytes_to_copy; + if ((n -= bytes_to_copy) != 0) + { + d->m_output_flush_ofs = bytes_to_copy; + d->m_output_flush_remaining = n; + } + } + else + { + d->m_out_buf_ofs += n; + } + } + + return d->m_output_flush_remaining; +} + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p) +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint16 *s = (const mz_uint16*)(d->m_dict + pos), *p, *q; + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s); + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; q = (const mz_uint16*)(d->m_dict + probe_pos); if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; p = s; probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + if (!probe_len) + { + *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); break; + } + else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break; + c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); + } + } +} +#else +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint8 *s = d->m_dict + pos, *p, *q; + mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break; + if (probe_len > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return; + c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1]; + } + } +} +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +static mz_bool tdefl_compress_fast(tdefl_compressor *d) +{ + // Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. + mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; + mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; + mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + + while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) + { + const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; + mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); + d->m_src_buf_left -= num_bytes_to_process; + lookahead_size += num_bytes_to_process; + + while (num_bytes_to_process) + { + mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + memcpy(d->m_dict + dst_pos, d->m_pSrc, n); + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); + d->m_pSrc += n; + dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; + num_bytes_to_process -= n; + } + + dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); + if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break; + + while (lookahead_size >= 4) + { + mz_uint cur_match_dist, cur_match_len = 1; + mz_uint8 *pCur_dict = d->m_dict + cur_pos; + mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; + mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; + mz_uint probe_pos = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)lookahead_pos; + + if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((*(const mz_uint32 *)(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) + { + const mz_uint16 *p = (const mz_uint16 *)pCur_dict; + const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); + mz_uint32 probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); + if (!probe_len) + cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; + + if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U))) + { + cur_match_len = 1; + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + else + { + mz_uint32 s0, s1; + cur_match_len = MZ_MIN(cur_match_len, lookahead_size); + + MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); + + cur_match_dist--; + + pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); + *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; + pLZ_code_buf += 3; + *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); + + s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; + s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; + d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; + + d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; + } + } + else + { + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + total_lz_bytes += cur_match_len; + lookahead_pos += cur_match_len; + dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; + MZ_ASSERT(lookahead_size >= cur_match_len); + lookahead_size -= cur_match_len; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + + while (lookahead_size) + { + mz_uint8 lit = d->m_dict[cur_pos]; + + total_lz_bytes++; + *pLZ_code_buf++ = lit; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + d->m_huff_count[0][lit]++; + + lookahead_pos++; + dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + lookahead_size--; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + } + + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + return MZ_TRUE; +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + +static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit) +{ + d->m_total_lz_bytes++; + *d->m_pLZ_code_buf++ = lit; + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + d->m_huff_count[0][lit]++; +} + +static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist) +{ + mz_uint32 s0, s1; + + MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); + + d->m_total_lz_bytes += match_len; + + d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); + + match_dist -= 1; + d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); + d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3; + + *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + + s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; + d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; + + if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; +} + +static mz_bool tdefl_compress_normal(tdefl_compressor *d) +{ + const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left; + tdefl_flush flush = d->m_flush; + + while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) + { + mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; + // Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. + if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) + { + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; + mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); + const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; + src_buf_left -= num_bytes_to_process; + d->m_lookahead_size += num_bytes_to_process; + while (pSrc != pSrc_end) + { + mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++; + } + } + else + { + while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + { + mz_uint8 c = *pSrc++; + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + src_buf_left--; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) + { + mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; + mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + } + } + } + d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); + if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + break; + + // Simple lazy/greedy parsing state machine. + len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) + { + if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) + { + mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; + cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; } + if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1; + } + } + else + { + tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); + } + if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) + { + cur_match_dist = cur_match_len = 0; + } + if (d->m_saved_match_len) + { + if (cur_match_len > d->m_saved_match_len) + { + tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); + if (cur_match_len >= 128) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + d->m_saved_match_len = 0; len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + } + else + { + tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); + len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0; + } + } + else if (!cur_match_dist) + tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); + else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + // Move the lookahead forward by len_to_move bytes. + d->m_lookahead_pos += len_to_move; + MZ_ASSERT(d->m_lookahead_size >= len_to_move); + d->m_lookahead_size -= len_to_move; + d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE); + // Check if it's time to flush the current LZ codes to the internal output buffer. + if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || + ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) ) + { + int n; + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + } + } + + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + return MZ_TRUE; +} + +static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) +{ + if (d->m_pIn_buf_size) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + } + + if (d->m_pOut_buf_size) + { + size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); + d->m_output_flush_ofs += (mz_uint)n; + d->m_output_flush_remaining -= (mz_uint)n; + d->m_out_buf_ofs += n; + + *d->m_pOut_buf_size = d->m_out_buf_ofs; + } + + return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush) +{ + if (!d) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return TDEFL_STATUS_BAD_PARAM; + } + + d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size; + d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size; + d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; + d->m_out_buf_ofs = 0; + d->m_flush = flush; + + if ( ((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || + (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf) ) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); + } + d->m_wants_to_finish |= (flush == TDEFL_FINISH); + + if ((d->m_output_flush_remaining) || (d->m_finished)) + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && + ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && + ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) + { + if (!tdefl_compress_fast(d)) + return d->m_prev_return_status; + } + else +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + { + if (!tdefl_compress_normal(d)) + return d->m_prev_return_status; + } + + if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) + d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf); + + if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) + { + if (tdefl_flush_block(d, flush) < 0) + return d->m_prev_return_status; + d->m_finished = (flush == TDEFL_FINISH); + if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; } + } + + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); +} + +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush) +{ + MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); +} + +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user; + d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; + d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); + d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; + d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; + d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY; + d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1; + d->m_pIn_buf = NULL; d->m_pOut_buf = NULL; + d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL; + d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0; + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + return TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) +{ + return d->m_prev_return_status; +} + +mz_uint32 tdefl_get_adler32(tdefl_compressor *d) +{ + return d->m_adler32; +} + +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; + pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE; + succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); + succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); + MZ_FREE(pComp); return succeeded; +} + +typedef struct +{ + size_t m_size, m_capacity; + mz_uint8 *m_pBuf; + mz_bool m_expandable; +} tdefl_output_buffer; + +static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser) +{ + tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; + size_t new_size = p->m_size + len; + if (new_size > p->m_capacity) + { + size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE; + do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity); + pNew_buf = (mz_uint8*)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE; + p->m_pBuf = pNew_buf; p->m_capacity = new_capacity; + } + memcpy((mz_uint8*)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size; + return MZ_TRUE; +} + +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_len) return MZ_FALSE; else *pOut_len = 0; + out_buf.m_expandable = MZ_TRUE; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL; + *pOut_len = out_buf.m_size; return out_buf.m_pBuf; +} + +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_buf) return 0; + out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0; + return out_buf.m_size; +} + +#ifndef MINIZ_NO_ZLIB_APIS +static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + +// level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) +{ + mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); + if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + + if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES; + else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK; + else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES; + + return comp_flags; +} +#endif //MINIZ_NO_ZLIB_APIS + +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable:4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) +#endif + +// Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at +// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. +// This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip) +{ + // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. + static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0; + if (!pComp) return NULL; + MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57+MZ_MAX(64, (1+bpl)*h); if (NULL == (out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; } + // write dummy header + for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); + // compress image data + tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); + for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8*)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); } + if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + // write real header + *pLen_out = out_buf.m_size-41; + { + static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06}; + mz_uint8 pnghdr[41]={0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x00,0x00,0x00,0x0d,0x49,0x48,0x44,0x52, + 0,0,(mz_uint8)(w>>8),(mz_uint8)w,0,0,(mz_uint8)(h>>8),(mz_uint8)h,8,chans[num_chans],0,0,0,0,0,0,0, + (mz_uint8)(*pLen_out>>24),(mz_uint8)(*pLen_out>>16),(mz_uint8)(*pLen_out>>8),(mz_uint8)*pLen_out,0x49,0x44,0x41,0x54}; + c=(mz_uint32)mz_crc32(MZ_CRC32_INIT,pnghdr+12,17); for (i=0; i<4; ++i, c<<=8) ((mz_uint8*)(pnghdr+29))[i]=(mz_uint8)(c>>24); + memcpy(out_buf.m_pBuf, pnghdr, 41); + } + // write footer (IDAT CRC-32, followed by IEND chunk) + if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT,out_buf.m_pBuf+41-4, *pLen_out+4); for (i=0; i<4; ++i, c<<=8) (out_buf.m_pBuf+out_buf.m_size-16)[i] = (mz_uint8)(c >> 24); + // compute final size of file, grab compressed data buffer and return + *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf; +} +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out) +{ + // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) + return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE); +} + +#ifdef _MSC_VER +#pragma warning (pop) +#endif + +// ------------------- .ZIP archive reading + +#ifndef MINIZ_NO_ARCHIVE_APIS + +#ifdef MINIZ_NO_STDIO + #define MZ_FILE void * +#else + #include + #include + + #if defined(_MSC_VER) || defined(__MINGW64__) + static FILE *mz_fopen(const char *pFilename, const char *pMode) + { + FILE* pFile = NULL; + fopen_s(&pFile, pFilename, pMode); + return pFile; + } + static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) + { + FILE* pFile = NULL; + if (freopen_s(&pFile, pPath, pMode, pStream)) + return NULL; + return pFile; + } + #ifndef MINIZ_NO_TIME + #include + #endif + #define MZ_FILE FILE + #define MZ_FOPEN mz_fopen + #define MZ_FCLOSE fclose + #define MZ_FREAD fread + #define MZ_FWRITE fwrite + #define MZ_FTELL64 _ftelli64 + #define MZ_FSEEK64 _fseeki64 + #define MZ_FILE_STAT_STRUCT _stat + #define MZ_FILE_STAT _stat + #define MZ_FFLUSH fflush + #define MZ_FREOPEN mz_freopen + #define MZ_DELETE_FILE remove + #elif defined(__MINGW32__) + #ifndef MINIZ_NO_TIME + #include + #endif + #define MZ_FILE FILE + #define MZ_FOPEN(f, m) fopen(f, m) + #define MZ_FCLOSE fclose + #define MZ_FREAD fread + #define MZ_FWRITE fwrite + #define MZ_FTELL64 ftello64 + #define MZ_FSEEK64 fseeko64 + #define MZ_FILE_STAT_STRUCT _stat + #define MZ_FILE_STAT _stat + #define MZ_FFLUSH fflush + #define MZ_FREOPEN(f, m, s) freopen(f, m, s) + #define MZ_DELETE_FILE remove + #elif defined(__TINYC__) + #ifndef MINIZ_NO_TIME + #include + #endif + #define MZ_FILE FILE + #define MZ_FOPEN(f, m) fopen(f, m) + #define MZ_FCLOSE fclose + #define MZ_FREAD fread + #define MZ_FWRITE fwrite + #define MZ_FTELL64 ftell + #define MZ_FSEEK64 fseek + #define MZ_FILE_STAT_STRUCT stat + #define MZ_FILE_STAT stat + #define MZ_FFLUSH fflush + #define MZ_FREOPEN(f, m, s) freopen(f, m, s) + #define MZ_DELETE_FILE remove + #elif defined(__GNUC__) && _LARGEFILE64_SOURCE + #ifndef MINIZ_NO_TIME + #include + #endif + #define MZ_FILE FILE + #define MZ_FOPEN(f, m) fopen64(f, m) + #define MZ_FCLOSE fclose + #define MZ_FREAD fread + #define MZ_FWRITE fwrite + #define MZ_FTELL64 ftello64 + #define MZ_FSEEK64 fseeko64 + #define MZ_FILE_STAT_STRUCT stat64 + #define MZ_FILE_STAT stat64 + #define MZ_FFLUSH fflush + #define MZ_FREOPEN(p, m, s) freopen64(p, m, s) + #define MZ_DELETE_FILE remove + #else + #ifndef MINIZ_NO_TIME + #include + #endif + #define MZ_FILE FILE + #define MZ_FOPEN(f, m) fopen(f, m) + #define MZ_FCLOSE fclose + #define MZ_FREAD fread + #define MZ_FWRITE fwrite + #define MZ_FTELL64 ftello + #define MZ_FSEEK64 fseeko + #define MZ_FILE_STAT_STRUCT stat + #define MZ_FILE_STAT stat + #define MZ_FFLUSH fflush + #define MZ_FREOPEN(f, m, s) freopen(f, m, s) + #define MZ_DELETE_FILE remove + #endif // #ifdef _MSC_VER +#endif // #ifdef MINIZ_NO_STDIO + +#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) + +// Various ZIP archive enums. To completely avoid cross platform compiler alignment and platform endian issues, miniz.c doesn't use structs for any of this stuff. +enum +{ + // ZIP archive identifiers and record sizes + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, + // Central directory header record offsets + MZ_ZIP_CDH_SIG_OFS = 0, MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, MZ_ZIP_CDH_BIT_FLAG_OFS = 8, + MZ_ZIP_CDH_METHOD_OFS = 10, MZ_ZIP_CDH_FILE_TIME_OFS = 12, MZ_ZIP_CDH_FILE_DATE_OFS = 14, MZ_ZIP_CDH_CRC32_OFS = 16, + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, + MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, MZ_ZIP_CDH_DISK_START_OFS = 34, MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, + // Local directory header offsets + MZ_ZIP_LDH_SIG_OFS = 0, MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, MZ_ZIP_LDH_BIT_FLAG_OFS = 6, MZ_ZIP_LDH_METHOD_OFS = 8, MZ_ZIP_LDH_FILE_TIME_OFS = 10, + MZ_ZIP_LDH_FILE_DATE_OFS = 12, MZ_ZIP_LDH_CRC32_OFS = 14, MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, + MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, + // End of central directory offsets + MZ_ZIP_ECDH_SIG_OFS = 0, MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, +}; + +typedef struct +{ + void *m_p; + size_t m_size, m_capacity; + mz_uint m_element_size; +} mz_zip_array; + +struct mz_zip_internal_state_tag +{ + mz_zip_array m_central_dir; + mz_zip_array m_central_dir_offsets; + mz_zip_array m_sorted_central_dir_offsets; + MZ_FILE *m_pFile; + void *m_pMem; + size_t m_mem_size; + size_t m_mem_capacity; +}; + +#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size +#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[index] + +static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, mz_zip_array *pArray) +{ + pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); + memset(pArray, 0, sizeof(mz_zip_array)); +} + +static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, mz_zip_array *pArray, size_t min_new_capacity, mz_uint growing) +{ + void *pNew_p; size_t new_capacity = min_new_capacity; MZ_ASSERT(pArray->m_element_size); if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE; + if (growing) { new_capacity = MZ_MAX(1, pArray->m_capacity); while (new_capacity < min_new_capacity) new_capacity *= 2; } + if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size, new_capacity))) return MZ_FALSE; + pArray->m_p = pNew_p; pArray->m_capacity = new_capacity; + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_capacity, mz_uint growing) +{ + if (new_capacity > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) return MZ_FALSE; } + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_size, mz_uint growing) +{ + if (new_size > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) return MZ_FALSE; } + pArray->m_size = new_size; + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, mz_zip_array *pArray, size_t n) +{ + return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, mz_zip_array *pArray, const void *pElements, size_t n) +{ + size_t orig_size = pArray->m_size; if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) return MZ_FALSE; + memcpy((mz_uint8*)pArray->m_p + orig_size * pArray->m_element_size, pElements, n * pArray->m_element_size); + return MZ_TRUE; +} + +#ifndef MINIZ_NO_TIME +static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date) +{ + struct tm tm; + memset(&tm, 0, sizeof(tm)); tm.tm_isdst = -1; + tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; tm.tm_mon = ((dos_date >> 5) & 15) - 1; tm.tm_mday = dos_date & 31; + tm.tm_hour = (dos_time >> 11) & 31; tm.tm_min = (dos_time >> 5) & 63; tm.tm_sec = (dos_time << 1) & 62; + return mktime(&tm); +} + +static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date) +{ +#ifdef _MSC_VER + struct tm tm_struct; + struct tm *tm = &tm_struct; + errno_t err = localtime_s(tm, &time); + if (err) + { + *pDOS_date = 0; *pDOS_time = 0; + return; + } +#else + struct tm *tm = localtime(&time); +#endif + *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1)); + *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday); +} +#endif + +#ifndef MINIZ_NO_STDIO +static mz_bool mz_zip_get_file_modified_time(const char *pFilename, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date) +{ +#ifdef MINIZ_NO_TIME + (void)pFilename; *pDOS_date = *pDOS_time = 0; +#else + struct MZ_FILE_STAT_STRUCT file_stat; + // On Linux with x86 glibc, this call will fail on large files (>= 0x80000000 bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. + if (MZ_FILE_STAT(pFilename, &file_stat) != 0) + return MZ_FALSE; + mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date); +#endif // #ifdef MINIZ_NO_TIME + return MZ_TRUE; +} + +#ifndef MINIZ_NO_TIME +static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time, time_t modified_time) +{ + struct utimbuf t; t.actime = access_time; t.modtime = modified_time; + return !utime(pFilename, &t); +} +#endif // #ifndef MINIZ_NO_TIME +#endif // #ifndef MINIZ_NO_STDIO + +static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint32 flags) +{ + (void)flags; + if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return MZ_FALSE; + + if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; + if (!pZip->m_pFree) pZip->m_pFree = def_free_func; + if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; + + pZip->m_zip_mode = MZ_ZIP_MODE_READING; + pZip->m_archive_size = 0; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return MZ_FALSE; + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index) +{ + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; + const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) + { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) + break; + pL++; pR++; + } + return (pL == pE) ? (l_len < r_len) : (l < r); +} + +#define MZ_SWAP_UINT32(a, b) do { mz_uint32 t = a; a = b; b = t; } MZ_MACRO_END + +// Heap sort of lowercased filenames, used to help accelerate plain central directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), but it could allocate memory.) +static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive *pZip) +{ + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); + const int size = pZip->m_total_files; + int start = (size - 2) >> 1, end; + while (start >= 0) + { + int child, root = start; + for ( ; ; ) + { + if ((child = (root << 1) + 1) >= size) + break; + child += (((child + 1) < size) && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1]))); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; + } + start--; + } + + end = size - 1; + while (end > 0) + { + int child, root = 0; + MZ_SWAP_UINT32(pIndices[end], pIndices[0]); + for ( ; ; ) + { + if ((child = (root << 1) + 1) >= end) + break; + child += (((child + 1) < end) && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1])); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; + } + end--; + } +} + +static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, mz_uint32 flags) +{ + mz_uint cdir_size, num_this_disk, cdir_disk_index; + mz_uint64 cdir_ofs; + mz_int64 cur_file_ofs; + const mz_uint8 *p; + mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; mz_uint8 *pBuf = (mz_uint8 *)buf_u32; + mz_bool sort_central_dir = ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0); + // Basic sanity checks - reject files which are too small, and check the first 4 bytes of the file to make sure a local header is there. + if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + // Find the end of central directory record by scanning the file from the end towards the beginning. + cur_file_ofs = MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); + for ( ; ; ) + { + int i, n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) + return MZ_FALSE; + for (i = n - 4; i >= 0; --i) + if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) + break; + if (i >= 0) + { + cur_file_ofs += i; + break; + } + if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) + return MZ_FALSE; + cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); + } + // Read and verify the end of central directory record. + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) || + ((pZip->m_total_files = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) != MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS))) + return MZ_FALSE; + + num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); + cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); + if (((num_this_disk | cdir_disk_index) != 0) && ((num_this_disk != 1) || (cdir_disk_index != 1))) + return MZ_FALSE; + + if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) < pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + + cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); + if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) + return MZ_FALSE; + + pZip->m_central_directory_file_ofs = cdir_ofs; + + if (pZip->m_total_files) + { + mz_uint i, n; + + // Read the entire central directory into a heap block, and allocate another heap block to hold the unsorted central dir file record offsets, and another to hold the sorted indices. + if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, MZ_FALSE)) || + (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, pZip->m_total_files, MZ_FALSE))) + return MZ_FALSE; + + if (sort_central_dir) + { + if (!mz_zip_array_resize(pZip, &pZip->m_pState->m_sorted_central_dir_offsets, pZip->m_total_files, MZ_FALSE)) + return MZ_FALSE; + } + + if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, pZip->m_pState->m_central_dir.m_p, cdir_size) != cdir_size) + return MZ_FALSE; + + // Now create an index into the central directory file records, do some basic sanity checking on each record, and check for zip64 entries (which are not yet supported). + p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p; + for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) + { + mz_uint total_header_size, comp_size, decomp_size, disk_index; + if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) + return MZ_FALSE; + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, i) = (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p); + if (sort_central_dir) + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, mz_uint32, i) = i; + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && (decomp_size != comp_size)) || (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) || (comp_size == 0xFFFFFFFF)) + return MZ_FALSE; + disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); + if ((disk_index != num_this_disk) && (disk_index != 1)) + return MZ_FALSE; + if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) + return MZ_FALSE; + if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > n) + return MZ_FALSE; + n -= total_header_size; p += total_header_size; + } + } + + if (sort_central_dir) + mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); + + return MZ_TRUE; +} + +mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags) +{ + if ((!pZip) || (!pZip->m_pRead)) + return MZ_FALSE; + if (!mz_zip_reader_init_internal(pZip, flags)) + return MZ_FALSE; + pZip->m_archive_size = size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} + +static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + size_t s = (file_ofs >= pZip->m_archive_size) ? 0 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); + memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s); + return s; +} + +mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags) +{ + if (!mz_zip_reader_init_internal(pZip, flags)) + return MZ_FALSE; + pZip->m_archive_size = size; + pZip->m_pRead = mz_zip_mem_read_func; + pZip->m_pIO_opaque = pZip; +#ifdef __cplusplus + pZip->m_pState->m_pMem = const_cast(pMem); +#else + pZip->m_pState->m_pMem = (void *)pMem; +#endif + pZip->m_pState->m_mem_size = size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); +} + +mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags) +{ + mz_uint64 file_size; + MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb"); + if (!pFile) + return MZ_FALSE; + if (MZ_FSEEK64(pFile, 0, SEEK_END)) + { + MZ_FCLOSE(pFile); + return MZ_FALSE; + } + file_size = MZ_FTELL64(pFile); + if (!mz_zip_reader_init_internal(pZip, flags)) + { + MZ_FCLOSE(pFile); + return MZ_FALSE; + } + pZip->m_pRead = mz_zip_file_read_func; + pZip->m_pIO_opaque = pZip; + pZip->m_pState->m_pFile = pFile; + pZip->m_archive_size = file_size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) +{ + return pZip ? pZip->m_total_files : 0; +} + +static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh(mz_zip_archive *pZip, mz_uint file_index) +{ + if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return NULL; + return &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); +} + +mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index) +{ + mz_uint m_bit_flag; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) + return MZ_FALSE; + m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + return (m_bit_flag & 1); +} + +mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index) +{ + mz_uint filename_len, external_attr; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) + return MZ_FALSE; + + // First see if the filename ends with a '/' character. + filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_len) + { + if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') + return MZ_TRUE; + } + + // Bugfix: This code was also checking if the internal attribute was non-zero, which wasn't correct. + // Most/all zip writers (hopefully) set DOS file/directory attributes in the low 16-bits, so check for the DOS directory flag and ignore the source OS ID in the created by field. + // FIXME: Remove this check? Is it necessary - we already check the filename. + external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + if ((external_attr & 0x10) != 0) + return MZ_TRUE; + + return MZ_FALSE; +} + +mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat) +{ + mz_uint n; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if ((!p) || (!pStat)) + return MZ_FALSE; + + // Unpack the central directory record. + pStat->m_file_index = file_index; + pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); + pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); + pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); + pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); +#ifndef MINIZ_NO_TIME + pStat->m_time = mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); +#endif + pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); + pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); + pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + + // Copy as much of the filename and comment as possible. + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); + memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); pStat->m_filename[n] = '\0'; + + n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); + pStat->m_comment_size = n; + memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), n); pStat->m_comment[n] = '\0'; + + return MZ_TRUE; +} + +mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size) +{ + mz_uint n; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) { if (filename_buf_size) pFilename[0] = '\0'; return 0; } + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_buf_size) + { + n = MZ_MIN(n, filename_buf_size - 1); + memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); + pFilename[n] = '\0'; + } + return n + 1; +} + +static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA, const char *pB, mz_uint len, mz_uint flags) +{ + mz_uint i; + if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) + return 0 == memcmp(pA, pB, len); + for (i = 0; i < len; ++i) + if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) + return MZ_FALSE; + return MZ_TRUE; +} + +static MZ_FORCEINLINE int mz_zip_reader_filename_compare(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, mz_uint r_len) +{ + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) + { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) + break; + pL++; pR++; + } + return (pL == pE) ? (int)(l_len - r_len) : (l - r); +} + +static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip, const char *pFilename) +{ + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); + const int size = pZip->m_total_files; + const mz_uint filename_len = (mz_uint)strlen(pFilename); + int l = 0, h = size - 1; + while (l <= h) + { + int m = (l + h) >> 1, file_index = pIndices[m], comp = mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets, file_index, pFilename, filename_len); + if (!comp) + return file_index; + else if (comp < 0) + l = m + 1; + else + h = m - 1; + } + return -1; +} + +int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags) +{ + mz_uint file_index; size_t name_len, comment_len; + if ((!pZip) || (!pZip->m_pState) || (!pName) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return -1; + if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size)) + return mz_zip_reader_locate_file_binary_search(pZip, pName); + name_len = strlen(pName); if (name_len > 0xFFFF) return -1; + comment_len = pComment ? strlen(pComment) : 0; if (comment_len > 0xFFFF) return -1; + for (file_index = 0; file_index < pZip->m_total_files; file_index++) + { + const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); + mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); + const char *pFilename = (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + if (filename_len < name_len) + continue; + if (comment_len) + { + mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), file_comment_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); + const char *pFile_comment = pFilename + filename_len + file_extra_len; + if ((file_comment_len != comment_len) || (!mz_zip_reader_string_equal(pComment, pFile_comment, file_comment_len, flags))) + continue; + } + if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) + { + int ofs = filename_len - 1; + do + { + if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || (pFilename[ofs] == ':')) + break; + } while (--ofs >= 0); + ofs++; + pFilename += ofs; filename_len -= ofs; + } + if ((filename_len == name_len) && (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags))) + return file_index; + } + return -1; +} + +mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) +{ + int status = TINFL_STATUS_DONE; + mz_uint64 needed_size, cur_file_ofs, comp_remaining, out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; + mz_zip_archive_file_stat file_stat; + void *pRead_buf; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + tinfl_decompressor inflator; + + if ((buf_size) && (!pBuf)) + return MZ_FALSE; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + + // Empty file, or a directory (but not always a directory - I've seen odd zips with directories that have compressed data which inflates to 0 bytes) + if (!file_stat.m_comp_size) + return MZ_TRUE; + + // Entry is a subdirectory (I've seen old zips with dir entries which have compressed deflate data which inflates to 0 bytes, but these entries claim to uncompress to 512 bytes in the headers). + // I'm torn how to handle this case - should it fail instead? + if (mz_zip_reader_is_file_a_directory(pZip, file_index)) + return MZ_TRUE; + + // Encryption and patch files are not supported. + if (file_stat.m_bit_flag & (1 | 32)) + return MZ_FALSE; + + // This function only supports stored and deflate. + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) + return MZ_FALSE; + + // Ensure supplied output buffer is large enough. + needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size; + if (buf_size < needed_size) + return MZ_FALSE; + + // Read and parse the local directory entry. + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return MZ_FALSE; + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) + { + // The file is stored or the caller has requested the compressed data. + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, (size_t)needed_size) != needed_size) + return MZ_FALSE; + return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32); + } + + // Decompress the file either directly from memory or from a file input buffer. + tinfl_init(&inflator); + + if (pZip->m_pState->m_pMem) + { + // Read directly from the archive in memory. + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } + else if (pUser_read_buf) + { + // Use a user provided read buffer. + if (!user_read_buf_size) + return MZ_FALSE; + pRead_buf = (mz_uint8 *)pUser_read_buf; + read_buf_size = user_read_buf_size; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + else + { + // Temporarily allocate a read buffer. + read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE); +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) +#endif + return MZ_FALSE; + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) + return MZ_FALSE; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + do + { + size_t in_buf_size, out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress(&inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + out_buf_ofs += out_buf_size; + } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); + + if (status == TINFL_STATUS_DONE) + { + // Make sure the entire file was decompressed, and check its CRC. + if ((out_buf_ofs != file_stat.m_uncomp_size) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32)) + status = TINFL_STATUS_FAILED; + } + + if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + + return status == TINFL_STATUS_DONE; +} + +mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) +{ + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) + return MZ_FALSE; + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, pUser_read_buf, user_read_buf_size); +} + +mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags) +{ + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, NULL, 0); +} + +mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags) +{ + return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, buf_size, flags, NULL, 0); +} + +void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags) +{ + mz_uint64 comp_size, uncomp_size, alloc_size; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + void *pBuf; + + if (pSize) + *pSize = 0; + if (!p) + return NULL; + + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + + alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) +#endif + return NULL; + if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) + return NULL; + + if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, flags)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return NULL; + } + + if (pSize) *pSize = (size_t)alloc_size; + return pBuf; +} + +void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags) +{ + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) + { + if (pSize) *pSize = 0; + return MZ_FALSE; + } + return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); +} + +mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) +{ + int status = TINFL_STATUS_DONE; mz_uint file_crc32 = MZ_CRC32_INIT; + mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, out_buf_ofs = 0, cur_file_ofs; + mz_zip_archive_file_stat file_stat; + void *pRead_buf = NULL; void *pWrite_buf = NULL; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + + // Empty file, or a directory (but not always a directory - I've seen odd zips with directories that have compressed data which inflates to 0 bytes) + if (!file_stat.m_comp_size) + return MZ_TRUE; + + // Entry is a subdirectory (I've seen old zips with dir entries which have compressed deflate data which inflates to 0 bytes, but these entries claim to uncompress to 512 bytes in the headers). + // I'm torn how to handle this case - should it fail instead? + if (mz_zip_reader_is_file_a_directory(pZip, file_index)) + return MZ_TRUE; + + // Encryption and patch files are not supported. + if (file_stat.m_bit_flag & (1 | 32)) + return MZ_FALSE; + + // This function only supports stored and deflate. + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) + return MZ_FALSE; + + // Read and parse the local directory entry. + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return MZ_FALSE; + + // Decompress the file either directly from memory or from a file input buffer. + if (pZip->m_pState->m_pMem) + { + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } + else + { + read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE); + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) + return MZ_FALSE; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) + { + // The file is stored or the caller has requested the compressed data. + if (pZip->m_pState->m_pMem) + { +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF)) +#endif + return MZ_FALSE; + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) + status = TINFL_STATUS_FAILED; + else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)file_stat.m_comp_size); + cur_file_ofs += file_stat.m_comp_size; + out_buf_ofs += file_stat.m_comp_size; + comp_remaining = 0; + } + else + { + while (comp_remaining) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + + if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail); + + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + out_buf_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + } + } + } + else + { + tinfl_decompressor inflator; + tinfl_init(&inflator); + + if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE))) + status = TINFL_STATUS_FAILED; + else + { + do + { + mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress(&inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + + if (out_buf_size) + { + if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != out_buf_size) + { + status = TINFL_STATUS_FAILED; + break; + } + file_crc32 = (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); + if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) + { + status = TINFL_STATUS_FAILED; + break; + } + } + } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || (status == TINFL_STATUS_HAS_MORE_OUTPUT)); + } + } + + if ((status == TINFL_STATUS_DONE) && (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) + { + // Make sure the entire file was decompressed, and check its CRC. + if ((out_buf_ofs != file_stat.m_uncomp_size) || (file_crc32 != file_stat.m_crc32)) + status = TINFL_STATUS_FAILED; + } + + if (!pZip->m_pState->m_pMem) + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + if (pWrite_buf) + pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); + + return status == TINFL_STATUS_DONE; +} + +mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) +{ + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) + return MZ_FALSE; + return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, flags); +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, const void *pBuf, size_t n) +{ + (void)ofs; return MZ_FWRITE(pBuf, 1, n, (MZ_FILE*)pOpaque); +} + +mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags) +{ + mz_bool status; + mz_zip_archive_file_stat file_stat; + MZ_FILE *pFile; + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + pFile = MZ_FOPEN(pDst_filename, "wb"); + if (!pFile) + return MZ_FALSE; + status = mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags); + if (MZ_FCLOSE(pFile) == EOF) + return MZ_FALSE; +#ifndef MINIZ_NO_TIME + if (status) + mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); +#endif + return status; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_reader_end(mz_zip_archive *pZip) +{ + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return MZ_FALSE; + + if (pZip->m_pState) + { + mz_zip_internal_state *pState = pZip->m_pState; pZip->m_pState = NULL; + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) + { + MZ_FCLOSE(pState->m_pFile); + pState->m_pFile = NULL; + } +#endif // #ifndef MINIZ_NO_STDIO + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + } + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags) +{ + int file_index = mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags); + if (file_index < 0) + return MZ_FALSE; + return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); +} +#endif + +// ------------------- .ZIP archive writing + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +static void mz_write_le16(mz_uint8 *p, mz_uint16 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); } +static void mz_write_le32(mz_uint8 *p, mz_uint32 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); p[2] = (mz_uint8)(v >> 16); p[3] = (mz_uint8)(v >> 24); } +#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v)) +#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v)) + +mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) +{ + if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return MZ_FALSE; + + if (pZip->m_file_offset_alignment) + { + // Ensure user specified file offset alignment is a power of 2. + if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) + return MZ_FALSE; + } + + if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; + if (!pZip->m_pFree) pZip->m_pFree = def_free_func; + if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + pZip->m_archive_size = existing_size; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return MZ_FALSE; + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); + return MZ_TRUE; +} + +static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); +#ifdef _MSC_VER + if ((!n) || ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) +#else + if ((!n) || ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) +#endif + return 0; + if (new_size > pState->m_mem_capacity) + { + void *pNew_block; + size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); while (new_capacity < new_size) new_capacity *= 2; + if (NULL == (pNew_block = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) + return 0; + pState->m_pMem = pNew_block; pState->m_mem_capacity = new_capacity; + } + memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n); + pState->m_mem_size = (size_t)new_size; + return n; +} + +mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size) +{ + pZip->m_pWrite = mz_zip_heap_write_func; + pZip->m_pIO_opaque = pZip; + if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) + return MZ_FALSE; + if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, size_to_reserve_at_beginning))) + { + if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, initial_allocation_size))) + { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + pZip->m_pState->m_mem_capacity = initial_allocation_size; + } + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); +} + +mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning) +{ + MZ_FILE *pFile; + pZip->m_pWrite = mz_zip_file_write_func; + pZip->m_pIO_opaque = pZip; + if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) + return MZ_FALSE; + if (NULL == (pFile = MZ_FOPEN(pFilename, "wb"))) + { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + pZip->m_pState->m_pFile = pFile; + if (size_to_reserve_at_beginning) + { + mz_uint64 cur_ofs = 0; char buf[4096]; MZ_CLEAR_OBJ(buf); + do + { + size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) + { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + cur_ofs += n; size_to_reserve_at_beginning -= n; + } while (size_to_reserve_at_beginning); + } + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename) +{ + mz_zip_internal_state *pState; + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return MZ_FALSE; + // No sense in trying to write to an archive that's already at the support max size + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + pState = pZip->m_pState; + + if (pState->m_pFile) + { +#ifdef MINIZ_NO_STDIO + pFilename; return MZ_FALSE; +#else + // Archive is being read from stdio - try to reopen as writable. + if (pZip->m_pIO_opaque != pZip) + return MZ_FALSE; + if (!pFilename) + return MZ_FALSE; + pZip->m_pWrite = mz_zip_file_write_func; + if (NULL == (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) + { + // The mz_zip_archive is now in a bogus state because pState->m_pFile is NULL, so just close it. + mz_zip_reader_end(pZip); + return MZ_FALSE; + } +#endif // #ifdef MINIZ_NO_STDIO + } + else if (pState->m_pMem) + { + // Archive lives in a memory block. Assume it's from the heap that we can resize using the realloc callback. + if (pZip->m_pIO_opaque != pZip) + return MZ_FALSE; + pState->m_mem_capacity = pState->m_mem_size; + pZip->m_pWrite = mz_zip_heap_write_func; + } + // Archive is being read via a user provided read function - make sure the user has specified a write function too. + else if (!pZip->m_pWrite) + return MZ_FALSE; + + // Start writing new files at the archive's current central directory location. + pZip->m_archive_size = pZip->m_central_directory_file_ofs; + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + pZip->m_central_directory_file_ofs = 0; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags) +{ + return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, level_and_flags, 0, 0); +} + +typedef struct +{ + mz_zip_archive *m_pZip; + mz_uint64 m_cur_archive_file_ofs; + mz_uint64 m_comp_size; +} mz_zip_writer_add_state; + +static mz_bool mz_zip_writer_add_put_buf_callback(const void* pBuf, int len, void *pUser) +{ + mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser; + if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, pState->m_cur_archive_file_ofs, pBuf, len) != len) + return MZ_FALSE; + pState->m_cur_archive_file_ofs += len; + pState->m_comp_size += len; + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_create_local_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date) +{ + (void)pZip; + memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_create_central_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes) +{ + (void)pZip; + memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs); + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_add_to_central_dir(mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, const void *pExtra, mz_uint16 extra_size, const void *pComment, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes) +{ + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; + size_t orig_central_dir_size = pState->m_central_dir.m_size; + mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + + // No zip64 support yet + if ((local_header_ofs > 0xFFFFFFFF) || (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + comment_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_central_dir_header(pZip, central_dir_header, filename_size, extra_size, comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_header_ofs, ext_attributes)) + return MZ_FALSE; + + if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, filename_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, extra_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, comment_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, ¢ral_dir_ofs, 1))) + { + // Try to push the central directory array back into its original state. + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return MZ_FALSE; + } + + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) +{ + // Basic ZIP archive filename validity checks: Valid filenames cannot start with a forward slash, cannot contain a drive letter, and cannot use DOS-style backward slashes. + if (*pArchive_name == '/') + return MZ_FALSE; + while (*pArchive_name) + { + if ((*pArchive_name == '\\') || (*pArchive_name == ':')) + return MZ_FALSE; + pArchive_name++; + } + return MZ_TRUE; +} + +static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(mz_zip_archive *pZip) +{ + mz_uint32 n; + if (!pZip->m_file_offset_alignment) + return 0; + n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); + return (pZip->m_file_offset_alignment - n) & (pZip->m_file_offset_alignment - 1); +} + +static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, mz_uint64 cur_file_ofs, mz_uint32 n) +{ + char buf[4096]; + memset(buf, 0, MZ_MIN(sizeof(buf), n)); + while (n) + { + mz_uint32 s = MZ_MIN(sizeof(buf), n); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) + return MZ_FALSE; + cur_file_ofs += s; n -= s; + } + return MZ_TRUE; +} + +mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32) +{ + mz_uint16 method = 0, dos_time = 0, dos_date = 0; + mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; + mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + tdefl_compressor *pComp = NULL; + mz_bool store_data_uncompressed; + mz_zip_internal_state *pState; + + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + level = level_and_flags & 0xF; + store_data_uncompressed = ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || (!pArchive_name) || ((comment_size) && (!pComment)) || (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + + pState = pZip->m_pState; + + if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) + return MZ_FALSE; + // No zip64 support yet + if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) + return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + return MZ_FALSE; + +#ifndef MINIZ_NO_TIME + { + time_t cur_time; time(&cur_time); + mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date); + } +#endif // #ifndef MINIZ_NO_TIME + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > 0xFFFF) + return MZ_FALSE; + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) + { + // Set DOS Subdirectory attribute bit. + ext_attributes |= 0x10; + // Subdirectories cannot contain data. + if ((buf_size) || (uncomp_size)) + return MZ_FALSE; + } + + // Try to do any allocations before writing to the archive, so if an allocation fails the file remains unmodified. (A good idea if we're doing an in-place modification.) + if ((!mz_zip_array_ensure_room(pZip, &pState->m_central_dir, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) || (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) + return MZ_FALSE; + + if ((!store_data_uncompressed) && (buf_size)) + { + if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) + return MZ_FALSE; + } + + if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header))) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + local_dir_header_ofs += num_alignment_padding_bytes; + if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } + cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header); + + MZ_CLEAR_OBJ(local_dir_header); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + cur_archive_file_ofs += archive_name_size; + + if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + { + uncomp_crc32 = (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8*)pBuf, buf_size); + uncomp_size = buf_size; + if (uncomp_size <= 3) + { + level = 0; + store_data_uncompressed = MZ_TRUE; + } + } + + if (store_data_uncompressed) + { + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, buf_size) != buf_size) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + + cur_archive_file_ofs += buf_size; + comp_size = buf_size; + + if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) + method = MZ_DEFLATED; + } + else if (buf_size) + { + mz_zip_writer_add_state state; + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) || + (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != TDEFL_STATUS_DONE)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + + method = MZ_DEFLATED; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pComp = NULL; + + // no zip64 support yet + if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) + return MZ_FALSE; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) + return MZ_FALSE; + + if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) +{ + mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; + mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; + mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + MZ_FILE *pSrc_file = NULL; + + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + level = level_and_flags & 0xF; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) + return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + return MZ_FALSE; + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > 0xFFFF) + return MZ_FALSE; + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date)) + return MZ_FALSE; + + pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); + if (!pSrc_file) + return MZ_FALSE; + MZ_FSEEK64(pSrc_file, 0, SEEK_END); + uncomp_size = MZ_FTELL64(pSrc_file); + MZ_FSEEK64(pSrc_file, 0, SEEK_SET); + + if (uncomp_size > 0xFFFFFFFF) + { + // No zip64 support yet + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + if (uncomp_size <= 3) + level = 0; + + if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header))) + { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + local_dir_header_ofs += num_alignment_padding_bytes; + if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } + cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header); + + MZ_CLEAR_OBJ(local_dir_header); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) + { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + cur_archive_file_ofs += archive_name_size; + + if (uncomp_size) + { + mz_uint64 uncomp_remaining = uncomp_size; + void *pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); + if (!pRead_buf) + { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + if (!level) + { + while (uncomp_remaining) + { + mz_uint n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining); + if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) || (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, n) != n)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); + uncomp_remaining -= n; + cur_archive_file_ofs += n; + } + comp_size = uncomp_size; + } + else + { + mz_bool result = MZ_FALSE; + mz_zip_writer_add_state state; + tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + for ( ; ; ) + { + size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, MZ_ZIP_MAX_IO_BUF_SIZE); + tdefl_status status; + + if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size) + break; + + uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size); + uncomp_remaining -= in_buf_size; + + status = tdefl_compress_buffer(pComp, pRead_buf, in_buf_size, uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH); + if (status == TDEFL_STATUS_DONE) + { + result = MZ_TRUE; + break; + } + else if (status != TDEFL_STATUS_OKAY) + break; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + + if (!result) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + + method = MZ_DEFLATED; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + } + + MZ_FCLOSE(pSrc_file); pSrc_file = NULL; + + // no zip64 support yet + if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) + return MZ_FALSE; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) + return MZ_FALSE; + + if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index) +{ + mz_uint n, bit_flags, num_alignment_padding_bytes; + mz_uint64 comp_bytes_remaining, local_dir_header_ofs; + mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + size_t orig_central_dir_size; + mz_zip_internal_state *pState; + void *pBuf; const mz_uint8 *pSrc_central_header; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) + return MZ_FALSE; + if (NULL == (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index))) + return MZ_FALSE; + pState = pZip->m_pState; + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + cur_src_file_ofs = MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + cur_dst_file_ofs = pZip->m_archive_size; + + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, num_alignment_padding_bytes)) + return MZ_FALSE; + cur_dst_file_ofs += num_alignment_padding_bytes; + local_dir_header_ofs = cur_dst_file_ofs; + if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + comp_bytes_remaining = n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + + if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)MZ_MAX(sizeof(mz_uint32) * 4, MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining))))) + return MZ_FALSE; + + while (comp_bytes_remaining) + { + n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining); + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + cur_src_file_ofs += n; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + cur_dst_file_ofs += n; + + comp_bytes_remaining -= n; + } + + bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); + if (bit_flags & 8) + { + // Copy data descriptor + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + + n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + + cur_src_file_ofs += n; + cur_dst_file_ofs += n; + } + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + + // no zip64 support yet + if (cur_dst_file_ofs > 0xFFFFFFFF) + return MZ_FALSE; + + orig_central_dir_size = pState->m_central_dir.m_size; + + memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_dir_header_ofs); + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) + return MZ_FALSE; + + n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n)) + { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return MZ_FALSE; + } + + if (pState->m_central_dir.m_size > 0xFFFFFFFF) + return MZ_FALSE; + n = (mz_uint32)orig_central_dir_size; + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) + { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return MZ_FALSE; + } + + pZip->m_total_files++; + pZip->m_archive_size = cur_dst_file_ofs; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) +{ + mz_zip_internal_state *pState; + mz_uint64 central_dir_ofs, central_dir_size; + mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE]; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) + return MZ_FALSE; + + pState = pZip->m_pState; + + // no zip64 support yet + if ((pZip->m_total_files > 0xFFFF) || ((pZip->m_archive_size + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + central_dir_ofs = 0; + central_dir_size = 0; + if (pZip->m_total_files) + { + // Write central directory + central_dir_ofs = pZip->m_archive_size; + central_dir_size = pState->m_central_dir.m_size; + pZip->m_central_directory_file_ofs = central_dir_ofs; + if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, pState->m_central_dir.m_p, (size_t)central_dir_size) != central_dir_size) + return MZ_FALSE; + pZip->m_archive_size += central_dir_size; + } + + // Write end of central directory record + MZ_CLEAR_OBJ(hdr); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, pZip->m_total_files); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, sizeof(hdr)) != sizeof(hdr)) + return MZ_FALSE; +#ifndef MINIZ_NO_STDIO + if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) + return MZ_FALSE; +#endif // #ifndef MINIZ_NO_STDIO + + pZip->m_archive_size += sizeof(hdr); + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; + return MZ_TRUE; +} + +mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize) +{ + if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize)) + return MZ_FALSE; + if (pZip->m_pWrite != mz_zip_heap_write_func) + return MZ_FALSE; + if (!mz_zip_writer_finalize_archive(pZip)) + return MZ_FALSE; + + *pBuf = pZip->m_pState->m_pMem; + *pSize = pZip->m_pState->m_mem_size; + pZip->m_pState->m_pMem = NULL; + pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; + return MZ_TRUE; +} + +mz_bool mz_zip_writer_end(mz_zip_archive *pZip) +{ + mz_zip_internal_state *pState; + mz_bool status = MZ_TRUE; + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) + return MZ_FALSE; + + pState = pZip->m_pState; + pZip->m_pState = NULL; + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) + { + MZ_FCLOSE(pState->m_pFile); + pState->m_pFile = NULL; + } +#endif // #ifndef MINIZ_NO_STDIO + + if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); + pState->m_pMem = NULL; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + return status; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) +{ + mz_bool status, created_new_archive = MZ_FALSE; + mz_zip_archive zip_archive; + struct MZ_FILE_STAT_STRUCT file_stat; + MZ_CLEAR_OBJ(zip_archive); + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || ((comment_size) && (!pComment)) || ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + return MZ_FALSE; + if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) + { + // Create a new archive. + if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0)) + return MZ_FALSE; + created_new_archive = MZ_TRUE; + } + else + { + // Append to an existing archive. + if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) + return MZ_FALSE; + if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename)) + { + mz_zip_reader_end(&zip_archive); + return MZ_FALSE; + } + } + status = mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, 0, 0); + // Always finalize, even if adding failed for some reason, so we have a valid central directory. (This may not always succeed, but we can try.) + if (!mz_zip_writer_finalize_archive(&zip_archive)) + status = MZ_FALSE; + if (!mz_zip_writer_end(&zip_archive)) + status = MZ_FALSE; + if ((!status) && (created_new_archive)) + { + // It's a new archive and something went wrong, so just delete it. + int ignoredStatus = MZ_DELETE_FILE(pZip_filename); + (void)ignoredStatus; + } + return status; +} + +void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags) +{ + int file_index; + mz_zip_archive zip_archive; + void *p = NULL; + + if (pSize) + *pSize = 0; + + if ((!pZip_filename) || (!pArchive_name)) + return NULL; + + MZ_CLEAR_OBJ(zip_archive); + if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) + return NULL; + + if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, NULL, flags)) >= 0) + p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); + + mz_zip_reader_end(&zip_archive); + return p; +} + +#endif // #ifndef MINIZ_NO_STDIO + +#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +#endif // #ifndef MINIZ_NO_ARCHIVE_APIS + +#ifdef __cplusplus +} +#endif + +#endif // MINIZ_HEADER_FILE_ONLY + +/* + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to +*/ diff --git a/modules/3rd/zip/zip.c b/modules/3rd/zip/zip.c new file mode 100755 index 0000000..565eb30 --- /dev/null +++ b/modules/3rd/zip/zip.c @@ -0,0 +1,471 @@ +/* + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. +*/ +#include +#include + +#include "miniz.c" +#include "zip.h" + +#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__ +/* Win32, OS/2, DOS */ +#define HAS_DEVICE(P) ((((P)[0] >= 'A' && (P)[0] <= 'Z') || ((P)[0] >= 'a' && (P)[0] <= 'z')) && (P)[1] == ':') +#define FILESYSTEM_PREFIX_LEN(P) (HAS_DEVICE (P) ? 2 : 0) +#define ISSLASH(C) ((C) == '/' || (C) == '\\') +#endif + +#ifndef FILESYSTEM_PREFIX_LEN +#define FILESYSTEM_PREFIX_LEN(P) 0 +#endif + +#ifndef ISSLASH +#define ISSLASH(C) ((C) == '/') +#endif + +#define cleanup(ptr) do { if (ptr) { free((void *)ptr); ptr = NULL; } } while (0) +#define strclone(ptr) ((ptr) ? strdup(ptr) : NULL) + +char *basename(const char *name) { + char const *p; + char const *base = name += FILESYSTEM_PREFIX_LEN (name); + int all_slashes = 1; + + for (p = name; *p; p++) { + if (ISSLASH(*p)) + base = p + 1; + else + all_slashes = 0; + } + + /* If NAME is all slashes, arrange to return `/'. */ + if (*base == '\0' && ISSLASH(*name) && all_slashes) + --base; + + return (char *)base; +} + +static int mkpath(const char *path) { + const int mode = 0755; + char const *p; + char npath[MAX_PATH + 1] = { 0 }; + int len = 0; + + for (p = path; *p && len < MAX_PATH; p++) { + if (ISSLASH(*p) && len > 0) { + if (mkdir(npath, mode) == -1) + if (errno != EEXIST) return -1; + } + npath[len++] = *p; + } + + return 0; +} + +struct zip_entry_t { + const char *name; + mz_uint64 uncomp_size; + mz_uint64 comp_size; + mz_uint32 uncomp_crc32; + mz_uint64 offset; + mz_uint8 header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + mz_uint64 header_offset; + mz_uint16 method; + mz_zip_writer_add_state state; + tdefl_compressor comp; +}; + +struct zip_t { + mz_zip_archive archive; + mz_uint level; + struct zip_entry_t entry; +}; + +struct zip_t *zip_open(const char *zipname, int level, int append) { + struct zip_t *zip = NULL; + struct MZ_FILE_STAT_STRUCT fstat; + + if (!zipname || strlen(zipname) < 1) { + // zip_t archive name is empty or NULL + return NULL; + } + + if (level < 0) level = MZ_DEFAULT_LEVEL; + if ((level & 0xF) > MZ_UBER_COMPRESSION) { + // Wrong compression level + return NULL; + } + + zip = (struct zip_t *)calloc((size_t)1, sizeof(struct zip_t)); + if (zip) { + zip->level = level; + + if (append && MZ_FILE_STAT(zipname, &fstat) == 0) { + // Append to an existing archive. + if (!mz_zip_reader_init_file(&(zip->archive), zipname, level | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) { + cleanup(zip); + return NULL; + } + + if (!mz_zip_writer_init_from_reader(&(zip->archive), zipname)) { + mz_zip_reader_end(&(zip->archive)); + cleanup(zip); + return NULL; + } + } else { + // Create a new archive. + if (!mz_zip_writer_init_file(&(zip->archive), zipname, 0)) { + // Cannot initialize zip_archive writer + cleanup(zip); + return NULL; + } + } + } + + return zip; +} + +void zip_close(struct zip_t *zip) { + if (zip) { + // Always finalize, even if adding failed for some reason, so we have a valid central directory. + mz_zip_writer_finalize_archive(&(zip->archive)); + mz_zip_writer_end(&(zip->archive)); + + cleanup(zip); + } +} + +int zip_entry_open(struct zip_t *zip, const char *entryname) { + size_t entrylen = 0; + mz_zip_archive *pzip = NULL; + mz_uint num_alignment_padding_bytes, level; + + if (!zip || !entryname) { + return -1; + } + + entrylen = strlen(entryname); + if (entrylen < 1) { + return -1; + } + + zip->entry.name = strclone(entryname); + if (!zip->entry.name) { + // Cannot parse zip entry name + return -1; + } + + zip->entry.comp_size = 0; + zip->entry.uncomp_size = 0; + zip->entry.uncomp_crc32 = MZ_CRC32_INIT; + zip->entry.offset = zip->archive.m_archive_size; + zip->entry.header_offset = zip->archive.m_archive_size; + memset(zip->entry.header, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE * sizeof(mz_uint8)); + zip->entry.method = 0; + + pzip = &(zip->archive); + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pzip); + + if (!pzip->m_pState || (pzip->m_zip_mode != MZ_ZIP_MODE_WRITING)) { + // Wrong zip mode + return -1; + } + if (zip->level & MZ_ZIP_FLAG_COMPRESSED_DATA) { + // Wrong zip compression level + return -1; + } + // no zip64 support yet + if ((pzip->m_total_files == 0xFFFF) || ((pzip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + entrylen) > 0xFFFFFFFF)) { + // No zip64 support yet + return -1; + } + if (!mz_zip_writer_write_zeros(pzip, zip->entry.offset, num_alignment_padding_bytes + sizeof(zip->entry.header))) { + // Cannot memset zip entry header + return -1; + } + + zip->entry.header_offset += num_alignment_padding_bytes; + if (pzip->m_file_offset_alignment) { MZ_ASSERT((zip->entry.header_offset & (pzip->m_file_offset_alignment - 1)) == 0); } + zip->entry.offset += num_alignment_padding_bytes + sizeof(zip->entry.header); + + if (pzip->m_pWrite(pzip->m_pIO_opaque, zip->entry.offset, zip->entry.name, entrylen) != entrylen) { + // Cannot write data to zip entry + return -1; + } + + zip->entry.offset += entrylen; + level = zip->level & 0xF; + if (level) { + zip->entry.state.m_pZip = pzip; + zip->entry.state.m_cur_archive_file_ofs = zip->entry.offset; + zip->entry.state.m_comp_size = 0; + + if (tdefl_init(&(zip->entry.comp), mz_zip_writer_add_put_buf_callback, &(zip->entry.state), tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) { + // Cannot initialize the zip compressor + return -1; + } + } + + return 0; +} + +int zip_entry_close(struct zip_t *zip) { + mz_zip_archive *pzip = NULL; + mz_uint level; + tdefl_status done; + mz_uint16 entrylen; + time_t t; + struct tm *tm; + mz_uint16 dos_time, dos_date; + + if (!zip) { + // zip_t handler is not initialized + return -1; + } + + pzip = &(zip->archive); + level = zip->level & 0xF; + if (level) { + done = tdefl_compress_buffer(&(zip->entry.comp), "", 0, TDEFL_FINISH); + if (done != TDEFL_STATUS_DONE && done != TDEFL_STATUS_OKAY) { + // Cannot flush compressed buffer + cleanup(zip->entry.name); + return -1; + } + zip->entry.comp_size = zip->entry.state.m_comp_size; + zip->entry.offset = zip->entry.state.m_cur_archive_file_ofs; + zip->entry.method = MZ_DEFLATED; + } + + entrylen = (mz_uint16)strlen(zip->entry.name); + t = time(NULL); + tm = localtime(&t); + dos_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1)); + dos_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday); + + // no zip64 support yet + if ((zip->entry.comp_size > 0xFFFFFFFF) || (zip->entry.offset > 0xFFFFFFFF)) { + // No zip64 support, yet + cleanup(zip->entry.name); + return -1; + } + + if (!mz_zip_writer_create_local_dir_header(pzip, zip->entry.header, entrylen, 0, zip->entry.uncomp_size, zip->entry.comp_size, zip->entry.uncomp_crc32, zip->entry.method, 0, dos_time, dos_date)) { + // Cannot create zip entry header + cleanup(zip->entry.name); + return -1; + } + + if (pzip->m_pWrite(pzip->m_pIO_opaque, zip->entry.header_offset, zip->entry.header, sizeof(zip->entry.header)) != sizeof(zip->entry.header)) { + // Cannot write zip entry header + cleanup(zip->entry.name); + return -1; + } + + if (!mz_zip_writer_add_to_central_dir(pzip, zip->entry.name, entrylen, NULL, 0, "", 0, zip->entry.uncomp_size, zip->entry.comp_size, zip->entry.uncomp_crc32, zip->entry.method, 0, dos_time, dos_date, zip->entry.header_offset, 0)) { + // Cannot write to zip central dir + cleanup(zip->entry.name); + return -1; + } + + pzip->m_total_files++; + pzip->m_archive_size = zip->entry.offset; + + cleanup(zip->entry.name); + return 0; +} + +int zip_entry_write(struct zip_t *zip, const void *buf, size_t bufsize) { + mz_uint level; + mz_zip_archive *pzip = NULL; + tdefl_status status; + + if (!zip) { + // zip_t handler is not initialized + return -1; + } + + pzip = &(zip->archive); + if (buf && bufsize > 0) { + zip->entry.uncomp_size += bufsize; + zip->entry.uncomp_crc32 = (mz_uint32)mz_crc32(zip->entry.uncomp_crc32, (const mz_uint8 *)buf, bufsize); + + level = zip->level & 0xF; + if (!level) { + if ((pzip->m_pWrite(pzip->m_pIO_opaque, zip->entry.offset, buf, bufsize) != bufsize)) { + // Cannot write buffer + return -1; + } + zip->entry.offset += bufsize; + zip->entry.comp_size += bufsize; + } else { + status = tdefl_compress_buffer(&(zip->entry.comp), buf, bufsize, TDEFL_NO_FLUSH); + if (status != TDEFL_STATUS_DONE && status != TDEFL_STATUS_OKAY) { + // Cannot compress buffer + return -1; + } + } + } + + return 0; +} + +int zip_entry_fwrite(struct zip_t *zip, const char *filename) { + int status = 0; + size_t n = 0; + FILE *stream = NULL; + mz_uint8 buf[MZ_ZIP_MAX_IO_BUF_SIZE] = { 0 }; + + if (!zip) { + // zip_t handler is not initialized + return -1; + } + + stream = fopen(filename, "rb"); + if (!stream) { + // Cannot open filename + return -1; + } + + while((n = fread(buf, sizeof(mz_uint8), MZ_ZIP_MAX_IO_BUF_SIZE, stream)) > 0) { + if (zip_entry_write(zip, buf, n) < 0) { + status = -1; + break; + } + } + fclose(stream); + + return status; +} + +int zip_create(const char *zipname, const char *filenames[], size_t len) { + int status = 0; + size_t i; + mz_zip_archive zip_archive; + + if (!zipname || strlen(zipname) < 1) { + // zip_t archive name is empty or NULL + return -1; + } + + // Create a new archive. + if (!memset(&(zip_archive), 0, sizeof(zip_archive))) { + // Cannot memset zip archive + return -1; + } + + if (!mz_zip_writer_init_file(&zip_archive, zipname, 0)) { + // Cannot initialize zip_archive writer + return -1; + } + + for (i = 0; i < len; ++i) { + const char *name = filenames[i]; + if (!name) { + status = -1; + break; + } + + if (!mz_zip_writer_add_file(&zip_archive, basename(name), name, "", 0, ZIP_DEFAULT_COMPRESSION_LEVEL)) { + // Cannot add file to zip_archive + status = -1; + break; + } + } + + mz_zip_writer_finalize_archive(&zip_archive); + mz_zip_writer_end(&zip_archive); + return status; +} + +int zip_extract(const char *zipname, const char *dir, int (* on_extract)(const char *filename, void *arg), void *arg) { + int status = 0; + mz_uint i, n; + char path[MAX_PATH + 1] = { 0 }; + mz_zip_archive zip_archive; + mz_zip_archive_file_stat info; + size_t dirlen = 0; + + if (!memset(&(zip_archive), 0, sizeof(zip_archive))) { + // Cannot memset zip archive + return -1; + } + + if (!zipname || !dir) { + // Cannot parse zip archive name + return -1; + } + + dirlen = strlen(dir); + if (dirlen + 1 > MAX_PATH) { + return -1; + } + + // Now try to open the archive. + if (!mz_zip_reader_init_file(&zip_archive, zipname, 0)) { + // Cannot initialize zip_archive reader + status = -1; + goto finally; + } + + strcpy(path, dir); + if (!ISSLASH(path[dirlen -1])) { +#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__ + path[dirlen] = '\\'; +#else + path[dirlen] = '/'; +#endif + ++dirlen; + } + + // Get and print information about each file in the archive. + n = mz_zip_reader_get_num_files(&zip_archive); + for (i = 0; i < n; ++i) { + if (!mz_zip_reader_file_stat(&zip_archive, i, &info)) { + // Cannot get information about zip archive; + //printf("Cant get information about fimes"); + status = -1; + break; + } + + strncpy(&path[dirlen], info.m_filename, MAX_PATH - dirlen); + if (mkpath(path) < 0) { + // Cannot make a path + //printf("Cant create path\n"); + status = -1; + break; + } + + // verify if the path is directory first + // issue fixed by xsang.le@gmail.com + struct stat path_stat; + if(stat(path, &path_stat) != 0 || !(path_stat.st_mode & S_IFDIR)) + if (!mz_zip_reader_extract_to_file(&zip_archive, i, path, 0)) { + // Cannot extract zip archive to file + //printf("Cant extract %s\n", path); + status = -1; + break; + } + + if (on_extract) { + if (on_extract(path, arg) < 0) { + status = -1; + break; + } + } + } + + // Close the archive, freeing any resources it was using + if (!mz_zip_reader_end(&zip_archive)) { + // Cannot end zip reader + status = -1; + } + + finally: + return status; +} diff --git a/modules/3rd/zip/zip.h b/modules/3rd/zip/zip.h new file mode 100755 index 0000000..2bddc02 --- /dev/null +++ b/modules/3rd/zip/zip.h @@ -0,0 +1,90 @@ +/* + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. +*/ + +#pragma once +#ifndef ZIP_H +#define ZIP_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef MAX_PATH +#define MAX_PATH 32767 /* # chars in a path name including NULL */ +#endif + +#define ZIP_DEFAULT_COMPRESSION_LEVEL 6 + +/* This data structure is used throughout the library to represent zip archive - forward declaration. */ +struct zip_t; + +/* + Opens zip archive with compression level. + If append is 0 then new archive will be created, otherwise function will try to append to the specified zip archive, + instead of creating a new one. + Compression levels: 0-9 are the standard zlib-style levels. + Returns pointer to zip_t structure or NULL on error. +*/ +struct zip_t *zip_open(const char *zipname, int level, int append); + +/* Closes zip archive, releases resources - always finalize. */ +void zip_close(struct zip_t *zip); + +/* + Opens a new entry for writing in a zip archive. + Returns negative number (< 0) on error, 0 on success. +*/ +int zip_entry_open(struct zip_t *zip, const char *entryname); + +/* + Closes zip entry, flushes buffer and releases resources. + Returns negative number (< 0) on error, 0 on success. +*/ +int zip_entry_close(struct zip_t *zip); + +/* + Compresses an input buffer for the current zip entry. + Returns negative number (< 0) on error, 0 on success. +*/ +int zip_entry_write(struct zip_t *zip, const void *buf, size_t bufsize); + +/* + Compresses a file for the current zip entry. + Returns negative number (< 0) on error, 0 on success. +*/ +int zip_entry_fwrite(struct zip_t *zip, const char *filename); + +/* + Creates a new archive and puts len files into a single zip archive + Returns negative number (< 0) on error, 0 on success. +*/ +int zip_create(const char *zipname, const char *filenames[], size_t len); + +/* + Extracts a zip archive file into dir. + If on_extract_entry is not NULL, the callback will be called after successfully extracted each zip entry. + Returning a negative value from the callback will cause abort the extract and return an error. + + The last argument (void *arg) is optional, which you can use to pass some data to the on_extract_entry callback. + + Returns negative number (< 0) on error, 0 on success. +*/ +int zip_extract(const char *zipname, const char *dir, int (* on_extract_entry)(const char *filename, void *arg), void *arg); + +#ifdef __cplusplus +} +#endif + +#endif + + + diff --git a/modules/Makefile.am b/modules/Makefile.am new file mode 100644 index 0000000..7b05646 --- /dev/null +++ b/modules/Makefile.am @@ -0,0 +1,49 @@ +lib_LTLIBRARIES = ulib.la +ulib_la_LDFLAGS = -module -avoid-version -shared +ulib_la_SOURCES = 3rd/zip/zip.c ulib.c + + +lib_LTLIBRARIES += enc.la +enc_la_LDFLAGS = -module -avoid-version -shared +enc_la_SOURCES = enc.c base64.c sha1.c + +lib_LTLIBRARIES += slice.la +slice_la_LDFLAGS = -module -avoid-version -shared +slice_la_SOURCES = slice.c + +lib_LTLIBRARIES += stmr.la +stmr_la_LDFLAGS = -module -avoid-version -shared +stmr_la_SOURCES = stmr.c + +lib_LTLIBRARIES += json.la +json_la_LDFLAGS = -module -avoid-version -shared +json_la_SOURCES = 3rd/jsmn/jsmn.c json.c + + +lib_LTLIBRARIES += md.la +md_la_LDFLAGS = -module -avoid-version -shared +md_la_SOURCES = md.c 3rd/md4c/entity.c 3rd/md4c/md4c.c 3rd/md4c/md4c-html.c + + +if HAS_DB +lib_LTLIBRARIES += sqlitedb.la +sqlitedb_la_LDFLAGS = -module -avoid-version -shared +sqlitedb_la_SOURCES = sqlitedb.c +endif + +libdir=$(prefix)/lib/lua/ + +EXTRA_DIST = base64.h \ + sha1.h \ + 3rd/zip/miniz.c \ + 3rd/zip/zip.h \ + 3rd/jsmn/jsmn.h \ + 3rd/md4c/md4c-html.h \ + 3rd/md4c/entity.h \ + 3rd/md4c/md4c.h \ + lua/lualib.h \ + lua/lua54/luaconf.h \ + lua/lua54/lualib.h \ + lua/lua54/lua.h \ + lua/lua54/lauxlib.h + diff --git a/modules/base64.c b/modules/base64.c new file mode 100644 index 0000000..416c6d3 --- /dev/null +++ b/modules/base64.c @@ -0,0 +1,185 @@ +/* ==================================================================== + * Copyright (c) 1995-1999 The Apache Group. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the Apache Group + * for use in the Apache HTTP server project (http://www.apache.org/)." + * + * 4. The names "Apache Server" and "Apache Group" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the Apache Group + * for use in the Apache HTTP server project (http://www.apache.org/)." + * + * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Group and was originally based + * on public domain software written at the National Center for + * Supercomputing Applications, University of Illinois, Urbana-Champaign. + * For more information on the Apache Group and the Apache HTTP server + * project, please see . + * + */ + +/* Base64 encoder/decoder. Originally Apache file ap_base64.c + */ + +#include + +#include "base64.h" + +/* aaaack but it's fast and const should make it shared text page. */ +static const unsigned char pr2six[256] = +{ + /* ASCII table */ + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64, + 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64, + 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 +}; + +int Base64decode_len(const char *bufcoded) +{ + int nbytesdecoded; + register const unsigned char *bufin; + register int nprbytes; + + bufin = (const unsigned char *) bufcoded; + while (pr2six[*(bufin++)] <= 63); + + nprbytes = (bufin - (const unsigned char *) bufcoded) - 1; + nbytesdecoded = ((nprbytes + 3) / 4) * 3; + + return nbytesdecoded + 1; +} + +int Base64decode(char *bufplain, const char *bufcoded) +{ + int nbytesdecoded; + register const unsigned char *bufin; + register unsigned char *bufout; + register int nprbytes; + + bufin = (const unsigned char *) bufcoded; + while (pr2six[*(bufin++)] <= 63); + nprbytes = (bufin - (const unsigned char *) bufcoded) - 1; + nbytesdecoded = ((nprbytes + 3) / 4) * 3; + + bufout = (unsigned char *) bufplain; + bufin = (const unsigned char *) bufcoded; + + while (nprbytes > 4) { + *(bufout++) = + (unsigned char) (pr2six[*bufin] << 2 | pr2six[bufin[1]] >> 4); + *(bufout++) = + (unsigned char) (pr2six[bufin[1]] << 4 | pr2six[bufin[2]] >> 2); + *(bufout++) = + (unsigned char) (pr2six[bufin[2]] << 6 | pr2six[bufin[3]]); + bufin += 4; + nprbytes -= 4; + } + + /* Note: (nprbytes == 1) would be an error, so just ingore that case */ + if (nprbytes > 1) { + *(bufout++) = + (unsigned char) (pr2six[*bufin] << 2 | pr2six[bufin[1]] >> 4); + } + if (nprbytes > 2) { + *(bufout++) = + (unsigned char) (pr2six[bufin[1]] << 4 | pr2six[bufin[2]] >> 2); + } + if (nprbytes > 3) { + *(bufout++) = + (unsigned char) (pr2six[bufin[2]] << 6 | pr2six[bufin[3]]); + } + + *(bufout++) = '\0'; + nbytesdecoded -= (4 - nprbytes) & 3; + return nbytesdecoded; +} + +static const char basis_64[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +int Base64encode_len(int len) +{ + return ((len + 2) / 3 * 4) + 1; +} + +int Base64encode(char *encoded, const char *string, int len) +{ + int i; + char *p; + + p = encoded; + for (i = 0; i < len - 2; i += 3) { + *p++ = basis_64[(string[i] >> 2) & 0x3F]; + *p++ = basis_64[((string[i] & 0x3) << 4) | + ((int) (string[i + 1] & 0xF0) >> 4)]; + *p++ = basis_64[((string[i + 1] & 0xF) << 2) | + ((int) (string[i + 2] & 0xC0) >> 6)]; + *p++ = basis_64[string[i + 2] & 0x3F]; + } + if (i < len) { + *p++ = basis_64[(string[i] >> 2) & 0x3F]; + if (i == (len - 1)) { + *p++ = basis_64[((string[i] & 0x3) << 4)]; + *p++ = '='; + } + else { + *p++ = basis_64[((string[i] & 0x3) << 4) | + ((int) (string[i + 1] & 0xF0) >> 4)]; + *p++ = basis_64[((string[i + 1] & 0xF) << 2)]; + } + *p++ = '='; + } + + *p++ = '\0'; + return p - encoded; +} diff --git a/modules/base64.h b/modules/base64.h new file mode 100644 index 0000000..4ab7d36 --- /dev/null +++ b/modules/base64.h @@ -0,0 +1,77 @@ +/* ==================================================================== + * Copyright (c) 1995-1999 The Apache Group. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the Apache Group + * for use in the Apache HTTP server project (http://www.apache.org/)." + * + * 4. The names "Apache Server" and "Apache Group" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the Apache Group + * for use in the Apache HTTP server project (http://www.apache.org/)." + * + * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Group and was originally based + * on public domain software written at the National Center for + * Supercomputing Applications, University of Illinois, Urbana-Champaign. + * For more information on the Apache Group and the Apache HTTP server + * project, please see . + * + */ + + + +#ifndef _BASE64_H_ +#define _BASE64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +int Base64encode_len(int len); +int Base64encode(char * coded_dst, const char *plain_src,int len_plain_src); + +int Base64decode_len(const char * coded_src); +int Base64decode(char * plain_dst, const char *coded_src); + +#ifdef __cplusplus +} +#endif + +#endif //_BASE64_H_ diff --git a/modules/enc.c b/modules/enc.c new file mode 100644 index 0000000..af365fc --- /dev/null +++ b/modules/enc.c @@ -0,0 +1,231 @@ +#include +#include +#include +#include +#include +#include "sha1.h" +#include "base64.h" + +#include "lua/lualib.h" + +#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c)))) + +static unsigned simple_hash(const char* key) +{ + unsigned hashval; + for (hashval = 0; *key != '\0'; key++) + hashval = *key + 31 * hashval; + return hashval; +} + +static void md5(uint8_t *initial_msg, size_t initial_len, char* buff) { + uint32_t h0, h1, h2, h3; + char tmp[80]; + uint8_t *msg = NULL; + uint32_t r[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, + 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, + 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, + 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21}; + // Use binary integer part of the sines of integers (in radians) as constants// Initialize variables: + uint32_t k[] = { + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, + 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, + 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, + 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, + 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, + 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, + 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, + 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, + 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391}; + + h0 = 0x67452301; + h1 = 0xefcdab89; + h2 = 0x98badcfe; + h3 = 0x10325476; + + // Pre-processing: adding a single 1 bit + //append "1" bit to message + /* Notice: the input bytes are considered as bits strings, + where the first bit is the most significant bit of the byte.[37] */ + + // Pre-processing: padding with zeros + //append "0" bit until message length in bit ≡ 448 (mod 512) + //append length mod (2 pow 64) to message + + int new_len; + for(new_len = initial_len*8 + 1; new_len%512!=448; new_len++); + new_len /= 8; + + msg = calloc(new_len + 64, 1); // also appends "0" bits + // (we alloc also 64 extra bytes...) + memcpy(msg, initial_msg, initial_len); + msg[initial_len] = 128; // write the "1" bit + + uint32_t bits_len = 8*initial_len; // note, we append the len + memcpy(msg + new_len, &bits_len, 4); // in bits at the end of the buffer + + // Process the message in successive 512-bit chunks: + //for each 512-bit chunk of message: + int offset; + for(offset=0; offsetdata; + len = vec->len; + } + if (len == 0) + { + lua_pushstring(L, ""); + return 1; + } + dst = (char *)malloc(((len * 4) / 3) + (len / 96) + 6); //(((4 * len / 3) + 3) & ~3)+1 + Base64encode(dst, s, len); + lua_pushstring(L, dst); + free(dst); + return 1; +} +static int l_base64_decode(lua_State *L) +{ + const char *s = luaL_checkstring(L, 1); + int len = Base64decode_len(s); + // decode data to a byte array + lua_new_slice(L, len); + slice_t *vec = NULL; + vec = lua_check_slice(L, 2); + len = Base64decode((char *)vec->data, s); + vec->len = len; + // lua_pushstring(L,dst); + // free(dst); + return 1; +} + +static const struct luaL_Reg enc[] = { + {"b64encode", l_base64_encode}, + {"b64decode", l_base64_decode}, + {"hash", l_simple_hash}, + {"md5", l_md5}, + {"sha1", l_sha1}, + {NULL, NULL}}; + +int luaopen_enc(lua_State *L) +{ + luaL_newlib(L, enc); + return 1; +} diff --git a/modules/json.c b/modules/json.c new file mode 100644 index 0000000..f7ed512 --- /dev/null +++ b/modules/json.c @@ -0,0 +1,264 @@ +#include "lua/lualib.h" +#include "3rd/jsmn/jsmn.h" + +#define MAXTOKEN 8192 + +// define unescape sequence + +static int token_to_object(lua_State *L, jsmntok_t* t, const char* s, int cid); +static int l_json_parser(lua_State *L, const char* s); + +//void header(int,const char*); +static int l_json_decode_s (lua_State *L) { + const char* s = luaL_checkstring(L,1); + return l_json_parser(L,s); +} + +static int l_json_decode_f (lua_State *L) { + // read the entire file + char * buffer = 0; + long length; + const char* ph = luaL_checkstring(L,1); + FILE * f = fopen (ph, "rb"); + + if (f) + { + fseek (f, 0, SEEK_END); + length = ftell (f); + fseek (f, 0, SEEK_SET); + buffer = malloc (length+1); + if (buffer) + { + int ret = fread(buffer, 1, length, f); + } + fclose (f); + } + + if (buffer) + { + buffer[length] = '\0'; + l_json_parser(L,buffer); + free(buffer); + return 1; + } + else + { + lua_pushnil(L); + return 1; + } + +} +static int process_token_object(lua_State* L, jsmntok_t* t, const char* s, int cid) +{ + lua_newtable(L); + int id = cid+1; + //printf("%d\n", t[cid].size); + for(int i = 0; i < t[cid].size; i++) + { + char*str = strndup(s+t[id].start, t[id].end-t[id].start); + lua_pushstring(L,str); + free(str); + id = token_to_object(L,t,s,id+1); + lua_settable(L, -3); + } + return id; +} +static int process_token_array(lua_State* L, jsmntok_t* t, const char* s, int cid) +{ + lua_newtable(L); + int id = cid+1; + for(int i = 1; i <= t[cid].size; i++) + { + lua_pushnumber(L,i); + id = token_to_object(L,t,s,id); + lua_settable(L, -3); + } + return id; +} + +/* +static void stackDump (lua_State *L) { + int i; + int top = lua_gettop(L); + for (i = 1; i <= top; i++) { + int t = lua_type(L, i); + switch (t) { + + case LUA_TSTRING: + printf("`%s' \n", lua_tostring(L, i)); + break; + + case LUA_TBOOLEAN: + printf(lua_toboolean(L, i) ? "true\n" : "false\n"); + break; + + case LUA_TNUMBER: + printf("%g\n", lua_tonumber(L, i)); + break; + + default: + printf("%s\n", lua_typename(L, t)); + break; + + } + printf(" "); + } + printf("\n"); + } +*/ +static int process_token_string(lua_State* L, jsmntok_t* t, const char* s, int cid) +{ + // un escape the string + char * str = (char*) malloc(t[cid].end-t[cid].start + 1); + int index = 0; + char c; + uint8_t escape = 0; + + for (int i = t[cid].start; i < t[cid].end; i++) + { + c = *(s+i); + if(c == '\\') + { + if(escape) + { + str[index] = c; + escape = 0; + index++; + } + else + { + escape = 1; + } + } + else + { + if(escape) + { + switch (c) + { + case 'b': + str[index] = '\b'; + break; + case 'f': + str[index] = '\f'; + break; + case 'n': + str[index] = '\n'; + break; + case 'r': + str[index] = '\r'; + break; + case 't': + str[index] = '\t'; + break; + default: + str[index] = c; + } + } + else + { + str[index] = c; + } + index++; + escape = 0; + } + } + str[index] = '\0'; + //strndup(s+t[cid].start, t[cid].end-t[cid].start); + // un escape the string + /*lua_getglobal(L, "utils"); + lua_getfield(L, -1, "unescape"); + lua_pushstring(L,str); + if (lua_pcall(L, 1, 1, 0) != 0) + printf("Error running function `unescape': %s\n",lua_tostring(L, -1)); + if(str) free(str); + str = (char*)luaL_checkstring(L,-1); + lua_settop(L, -3);*/ + lua_pushstring(L,str); + //stackDump(L); + //lua_pushstring(L, str); + //printf("%s\n",strndup(s+t[cid].start, t[cid].end-t[cid].start) ); + if(str) free(str); + return cid+1; +} +static int process_token_primitive(lua_State* L, jsmntok_t* t, const char* s, int cid) +{ + //printf("%s\n",strndup(s+t[cid].start, t[cid].end-t[cid].start) ); + char c = s[t[cid].start]; + char *str; + switch(c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '+': + case '-': + str = strndup(s+t[cid].start, t[cid].end-t[cid].start); + lua_pushnumber(L,atof(str)); + free(str); + break; + + case 't': lua_pushboolean(L,1); break; + case 'f': lua_pushboolean(L,0); break; + default: lua_pushnil(L);break; + + } + return cid+1; +} +static int token_to_object(lua_State *L, jsmntok_t* t, const char* s, int cid) +{ + switch(t[cid].type) + { + case JSMN_OBJECT: + return process_token_object(L,t,s,cid); + break; + case JSMN_ARRAY: + return process_token_array(L,t,s,cid); + break; + case JSMN_STRING: + return process_token_string(L,t,s,cid); + break; + + case JSMN_PRIMITIVE: + return process_token_primitive(L,t,s,cid); + break; + + default: + lua_pushnil(L); + return cid + t[cid].size; break; + + } +} + +static int l_json_parser(lua_State *L, const char* s) +{ + jsmn_parser p; + jsmntok_t t[MAXTOKEN]; + + jsmn_init(&p); + int r = jsmn_parse(&p, s, strlen(s), t, sizeof(t)/sizeof(t[0])); + if (r < 0) { + lua_pushnil(L); + return 0; + } + token_to_object(L,t,s,0); + return 1; +} +static const struct luaL_Reg _json [] = { + {"decodeString", l_json_decode_s}, + {"decodeFile", l_json_decode_f}, + {NULL,NULL} +}; + +int luaopen_json(lua_State *L) +{ + luaL_newlib(L, _json); + return 1; +} \ No newline at end of file diff --git a/modules/lua/lua54/lauxlib.h b/modules/lua/lua54/lauxlib.h new file mode 100644 index 0000000..5b977e2 --- /dev/null +++ b/modules/lua/lua54/lauxlib.h @@ -0,0 +1,301 @@ +/* +** $Id: lauxlib.h $ +** Auxiliary functions for building Lua libraries +** See Copyright Notice in lua.h +*/ + + +#ifndef lauxlib_h +#define lauxlib_h + + +#include +#include + +#include "luaconf.h" +#include "lua.h" + + +/* global table */ +#define LUA_GNAME "_G" + + +typedef struct luaL_Buffer luaL_Buffer; + + +/* extra error code for 'luaL_loadfilex' */ +#define LUA_ERRFILE (LUA_ERRERR+1) + + +/* key, in the registry, for table of loaded modules */ +#define LUA_LOADED_TABLE "_LOADED" + + +/* key, in the registry, for table of preloaded loaders */ +#define LUA_PRELOAD_TABLE "_PRELOAD" + + +typedef struct luaL_Reg { + const char *name; + lua_CFunction func; +} luaL_Reg; + + +#define LUAL_NUMSIZES (sizeof(lua_Integer)*16 + sizeof(lua_Number)) + +LUALIB_API void (luaL_checkversion_) (lua_State *L, lua_Number ver, size_t sz); +#define luaL_checkversion(L) \ + luaL_checkversion_(L, LUA_VERSION_NUM, LUAL_NUMSIZES) + +LUALIB_API int (luaL_getmetafield) (lua_State *L, int obj, const char *e); +LUALIB_API int (luaL_callmeta) (lua_State *L, int obj, const char *e); +LUALIB_API const char *(luaL_tolstring) (lua_State *L, int idx, size_t *len); +LUALIB_API int (luaL_argerror) (lua_State *L, int arg, const char *extramsg); +LUALIB_API int (luaL_typeerror) (lua_State *L, int arg, const char *tname); +LUALIB_API const char *(luaL_checklstring) (lua_State *L, int arg, + size_t *l); +LUALIB_API const char *(luaL_optlstring) (lua_State *L, int arg, + const char *def, size_t *l); +LUALIB_API lua_Number (luaL_checknumber) (lua_State *L, int arg); +LUALIB_API lua_Number (luaL_optnumber) (lua_State *L, int arg, lua_Number def); + +LUALIB_API lua_Integer (luaL_checkinteger) (lua_State *L, int arg); +LUALIB_API lua_Integer (luaL_optinteger) (lua_State *L, int arg, + lua_Integer def); + +LUALIB_API void (luaL_checkstack) (lua_State *L, int sz, const char *msg); +LUALIB_API void (luaL_checktype) (lua_State *L, int arg, int t); +LUALIB_API void (luaL_checkany) (lua_State *L, int arg); + +LUALIB_API int (luaL_newmetatable) (lua_State *L, const char *tname); +LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname); +LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname); +LUALIB_API void *(luaL_checkudata) (lua_State *L, int ud, const char *tname); + +LUALIB_API void (luaL_where) (lua_State *L, int lvl); +LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...); + +LUALIB_API int (luaL_checkoption) (lua_State *L, int arg, const char *def, + const char *const lst[]); + +LUALIB_API int (luaL_fileresult) (lua_State *L, int stat, const char *fname); +LUALIB_API int (luaL_execresult) (lua_State *L, int stat); + + +/* predefined references */ +#define LUA_NOREF (-2) +#define LUA_REFNIL (-1) + +LUALIB_API int (luaL_ref) (lua_State *L, int t); +LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); + +LUALIB_API int (luaL_loadfilex) (lua_State *L, const char *filename, + const char *mode); + +#define luaL_loadfile(L,f) luaL_loadfilex(L,f,NULL) + +LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz, + const char *name, const char *mode); +LUALIB_API int (luaL_loadstring) (lua_State *L, const char *s); + +LUALIB_API lua_State *(luaL_newstate) (void); + +LUALIB_API lua_Integer (luaL_len) (lua_State *L, int idx); + +LUALIB_API void (luaL_addgsub) (luaL_Buffer *b, const char *s, + const char *p, const char *r); +LUALIB_API const char *(luaL_gsub) (lua_State *L, const char *s, + const char *p, const char *r); + +LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup); + +LUALIB_API int (luaL_getsubtable) (lua_State *L, int idx, const char *fname); + +LUALIB_API void (luaL_traceback) (lua_State *L, lua_State *L1, + const char *msg, int level); + +LUALIB_API void (luaL_requiref) (lua_State *L, const char *modname, + lua_CFunction openf, int glb); + +/* +** =============================================================== +** some useful macros +** =============================================================== +*/ + + +#define luaL_newlibtable(L,l) \ + lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1) + +#define luaL_newlib(L,l) \ + (luaL_checkversion(L), luaL_newlibtable(L,l), luaL_setfuncs(L,l,0)) + +#define luaL_argcheck(L, cond,arg,extramsg) \ + ((void)(luai_likely(cond) || luaL_argerror(L, (arg), (extramsg)))) + +#define luaL_argexpected(L,cond,arg,tname) \ + ((void)(luai_likely(cond) || luaL_typeerror(L, (arg), (tname)))) + +#define luaL_checkstring(L,n) (luaL_checklstring(L, (n), NULL)) +#define luaL_optstring(L,n,d) (luaL_optlstring(L, (n), (d), NULL)) + +#define luaL_typename(L,i) lua_typename(L, lua_type(L,(i))) + +#define luaL_dofile(L, fn) \ + (luaL_loadfile(L, fn) || lua_pcall(L, 0, LUA_MULTRET, 0)) + +#define luaL_dostring(L, s) \ + (luaL_loadstring(L, s) || lua_pcall(L, 0, LUA_MULTRET, 0)) + +#define luaL_getmetatable(L,n) (lua_getfield(L, LUA_REGISTRYINDEX, (n))) + +#define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) + +#define luaL_loadbuffer(L,s,sz,n) luaL_loadbufferx(L,s,sz,n,NULL) + + +/* +** Perform arithmetic operations on lua_Integer values with wrap-around +** semantics, as the Lua core does. +*/ +#define luaL_intop(op,v1,v2) \ + ((lua_Integer)((lua_Unsigned)(v1) op (lua_Unsigned)(v2))) + + +/* push the value used to represent failure/error */ +#define luaL_pushfail(L) lua_pushnil(L) + + +/* +** Internal assertions for in-house debugging +*/ +#if !defined(lua_assert) + +#if defined LUAI_ASSERT + #include + #define lua_assert(c) assert(c) +#else + #define lua_assert(c) ((void)0) +#endif + +#endif + + + +/* +** {====================================================== +** Generic Buffer manipulation +** ======================================================= +*/ + +struct luaL_Buffer { + char *b; /* buffer address */ + size_t size; /* buffer size */ + size_t n; /* number of characters in buffer */ + lua_State *L; + union { + LUAI_MAXALIGN; /* ensure maximum alignment for buffer */ + char b[LUAL_BUFFERSIZE]; /* initial buffer */ + } init; +}; + + +#define luaL_bufflen(bf) ((bf)->n) +#define luaL_buffaddr(bf) ((bf)->b) + + +#define luaL_addchar(B,c) \ + ((void)((B)->n < (B)->size || luaL_prepbuffsize((B), 1)), \ + ((B)->b[(B)->n++] = (c))) + +#define luaL_addsize(B,s) ((B)->n += (s)) + +#define luaL_buffsub(B,s) ((B)->n -= (s)) + +LUALIB_API void (luaL_buffinit) (lua_State *L, luaL_Buffer *B); +LUALIB_API char *(luaL_prepbuffsize) (luaL_Buffer *B, size_t sz); +LUALIB_API void (luaL_addlstring) (luaL_Buffer *B, const char *s, size_t l); +LUALIB_API void (luaL_addstring) (luaL_Buffer *B, const char *s); +LUALIB_API void (luaL_addvalue) (luaL_Buffer *B); +LUALIB_API void (luaL_pushresult) (luaL_Buffer *B); +LUALIB_API void (luaL_pushresultsize) (luaL_Buffer *B, size_t sz); +LUALIB_API char *(luaL_buffinitsize) (lua_State *L, luaL_Buffer *B, size_t sz); + +#define luaL_prepbuffer(B) luaL_prepbuffsize(B, LUAL_BUFFERSIZE) + +/* }====================================================== */ + + + +/* +** {====================================================== +** File handles for IO library +** ======================================================= +*/ + +/* +** A file handle is a userdata with metatable 'LUA_FILEHANDLE' and +** initial structure 'luaL_Stream' (it may contain other fields +** after that initial structure). +*/ + +#define LUA_FILEHANDLE "FILE*" + + +typedef struct luaL_Stream { + FILE *f; /* stream (NULL for incompletely created streams) */ + lua_CFunction closef; /* to close stream (NULL for closed streams) */ +} luaL_Stream; + +/* }====================================================== */ + +/* +** {================================================================== +** "Abstraction Layer" for basic report of messages and errors +** =================================================================== +*/ + +/* print a string */ +#if !defined(lua_writestring) +#define lua_writestring(s,l) fwrite((s), sizeof(char), (l), stdout) +#endif + +/* print a newline and flush the output */ +#if !defined(lua_writeline) +#define lua_writeline() (lua_writestring("\n", 1), fflush(stdout)) +#endif + +/* print an error message */ +#if !defined(lua_writestringerror) +#define lua_writestringerror(s,p) \ + (fprintf(stderr, (s), (p)), fflush(stderr)) +#endif + +/* }================================================================== */ + + +/* +** {============================================================ +** Compatibility with deprecated conversions +** ============================================================= +*/ +#if defined(LUA_COMPAT_APIINTCASTS) + +#define luaL_checkunsigned(L,a) ((lua_Unsigned)luaL_checkinteger(L,a)) +#define luaL_optunsigned(L,a,d) \ + ((lua_Unsigned)luaL_optinteger(L,a,(lua_Integer)(d))) + +#define luaL_checkint(L,n) ((int)luaL_checkinteger(L, (n))) +#define luaL_optint(L,n,d) ((int)luaL_optinteger(L, (n), (d))) + +#define luaL_checklong(L,n) ((long)luaL_checkinteger(L, (n))) +#define luaL_optlong(L,n,d) ((long)luaL_optinteger(L, (n), (d))) + +#endif +/* }============================================================ */ + + + +#endif + + diff --git a/modules/lua/lua54/lua.h b/modules/lua/lua54/lua.h new file mode 100644 index 0000000..e661839 --- /dev/null +++ b/modules/lua/lua54/lua.h @@ -0,0 +1,518 @@ +/* +** $Id: lua.h $ +** Lua - A Scripting Language +** Lua.org, PUC-Rio, Brazil (http://www.lua.org) +** See Copyright Notice at the end of this file +*/ + + +#ifndef lua_h +#define lua_h + +#include +#include + + +#include "luaconf.h" + + +#define LUA_VERSION_MAJOR "5" +#define LUA_VERSION_MINOR "4" +#define LUA_VERSION_RELEASE "4" + +#define LUA_VERSION_NUM 504 +#define LUA_VERSION_RELEASE_NUM (LUA_VERSION_NUM * 100 + 4) + +#define LUA_VERSION "Lua " LUA_VERSION_MAJOR "." LUA_VERSION_MINOR +#define LUA_RELEASE LUA_VERSION "." LUA_VERSION_RELEASE +#define LUA_COPYRIGHT LUA_RELEASE " Copyright (C) 1994-2022 Lua.org, PUC-Rio" +#define LUA_AUTHORS "R. Ierusalimschy, L. H. de Figueiredo, W. Celes" + + +/* mark for precompiled code ('Lua') */ +#define LUA_SIGNATURE "\x1bLua" + +/* option for multiple returns in 'lua_pcall' and 'lua_call' */ +#define LUA_MULTRET (-1) + + +/* +** Pseudo-indices +** (-LUAI_MAXSTACK is the minimum valid index; we keep some free empty +** space after that to help overflow detection) +*/ +#define LUA_REGISTRYINDEX (-LUAI_MAXSTACK - 1000) +#define lua_upvalueindex(i) (LUA_REGISTRYINDEX - (i)) + + +/* thread status */ +#define LUA_OK 0 +#define LUA_YIELD 1 +#define LUA_ERRRUN 2 +#define LUA_ERRSYNTAX 3 +#define LUA_ERRMEM 4 +#define LUA_ERRERR 5 + + +typedef struct lua_State lua_State; + + +/* +** basic types +*/ +#define LUA_TNONE (-1) + +#define LUA_TNIL 0 +#define LUA_TBOOLEAN 1 +#define LUA_TLIGHTUSERDATA 2 +#define LUA_TNUMBER 3 +#define LUA_TSTRING 4 +#define LUA_TTABLE 5 +#define LUA_TFUNCTION 6 +#define LUA_TUSERDATA 7 +#define LUA_TTHREAD 8 + +#define LUA_NUMTYPES 9 + + + +/* minimum Lua stack available to a C function */ +#define LUA_MINSTACK 20 + + +/* predefined values in the registry */ +#define LUA_RIDX_MAINTHREAD 1 +#define LUA_RIDX_GLOBALS 2 +#define LUA_RIDX_LAST LUA_RIDX_GLOBALS + + +/* type of numbers in Lua */ +typedef LUA_NUMBER lua_Number; + + +/* type for integer functions */ +typedef LUA_INTEGER lua_Integer; + +/* unsigned integer type */ +typedef LUA_UNSIGNED lua_Unsigned; + +/* type for continuation-function contexts */ +typedef LUA_KCONTEXT lua_KContext; + + +/* +** Type for C functions registered with Lua +*/ +typedef int (*lua_CFunction) (lua_State *L); + +/* +** Type for continuation functions +*/ +typedef int (*lua_KFunction) (lua_State *L, int status, lua_KContext ctx); + + +/* +** Type for functions that read/write blocks when loading/dumping Lua chunks +*/ +typedef const char * (*lua_Reader) (lua_State *L, void *ud, size_t *sz); + +typedef int (*lua_Writer) (lua_State *L, const void *p, size_t sz, void *ud); + + +/* +** Type for memory-allocation functions +*/ +typedef void * (*lua_Alloc) (void *ud, void *ptr, size_t osize, size_t nsize); + + +/* +** Type for warning functions +*/ +typedef void (*lua_WarnFunction) (void *ud, const char *msg, int tocont); + + + + +/* +** generic extra include file +*/ +#if defined(LUA_USER_H) +#include LUA_USER_H +#endif + + +/* +** RCS ident string +*/ +extern const char lua_ident[]; + + +/* +** state manipulation +*/ +LUA_API lua_State *(lua_newstate) (lua_Alloc f, void *ud); +LUA_API void (lua_close) (lua_State *L); +LUA_API lua_State *(lua_newthread) (lua_State *L); +LUA_API int (lua_resetthread) (lua_State *L); + +LUA_API lua_CFunction (lua_atpanic) (lua_State *L, lua_CFunction panicf); + + +LUA_API lua_Number (lua_version) (lua_State *L); + + +/* +** basic stack manipulation +*/ +LUA_API int (lua_absindex) (lua_State *L, int idx); +LUA_API int (lua_gettop) (lua_State *L); +LUA_API void (lua_settop) (lua_State *L, int idx); +LUA_API void (lua_pushvalue) (lua_State *L, int idx); +LUA_API void (lua_rotate) (lua_State *L, int idx, int n); +LUA_API void (lua_copy) (lua_State *L, int fromidx, int toidx); +LUA_API int (lua_checkstack) (lua_State *L, int n); + +LUA_API void (lua_xmove) (lua_State *from, lua_State *to, int n); + + +/* +** access functions (stack -> C) +*/ + +LUA_API int (lua_isnumber) (lua_State *L, int idx); +LUA_API int (lua_isstring) (lua_State *L, int idx); +LUA_API int (lua_iscfunction) (lua_State *L, int idx); +LUA_API int (lua_isinteger) (lua_State *L, int idx); +LUA_API int (lua_isuserdata) (lua_State *L, int idx); +LUA_API int (lua_type) (lua_State *L, int idx); +LUA_API const char *(lua_typename) (lua_State *L, int tp); + +LUA_API lua_Number (lua_tonumberx) (lua_State *L, int idx, int *isnum); +LUA_API lua_Integer (lua_tointegerx) (lua_State *L, int idx, int *isnum); +LUA_API int (lua_toboolean) (lua_State *L, int idx); +LUA_API const char *(lua_tolstring) (lua_State *L, int idx, size_t *len); +LUA_API lua_Unsigned (lua_rawlen) (lua_State *L, int idx); +LUA_API lua_CFunction (lua_tocfunction) (lua_State *L, int idx); +LUA_API void *(lua_touserdata) (lua_State *L, int idx); +LUA_API lua_State *(lua_tothread) (lua_State *L, int idx); +LUA_API const void *(lua_topointer) (lua_State *L, int idx); + + +/* +** Comparison and arithmetic functions +*/ + +#define LUA_OPADD 0 /* ORDER TM, ORDER OP */ +#define LUA_OPSUB 1 +#define LUA_OPMUL 2 +#define LUA_OPMOD 3 +#define LUA_OPPOW 4 +#define LUA_OPDIV 5 +#define LUA_OPIDIV 6 +#define LUA_OPBAND 7 +#define LUA_OPBOR 8 +#define LUA_OPBXOR 9 +#define LUA_OPSHL 10 +#define LUA_OPSHR 11 +#define LUA_OPUNM 12 +#define LUA_OPBNOT 13 + +LUA_API void (lua_arith) (lua_State *L, int op); + +#define LUA_OPEQ 0 +#define LUA_OPLT 1 +#define LUA_OPLE 2 + +LUA_API int (lua_rawequal) (lua_State *L, int idx1, int idx2); +LUA_API int (lua_compare) (lua_State *L, int idx1, int idx2, int op); + + +/* +** push functions (C -> stack) +*/ +LUA_API void (lua_pushnil) (lua_State *L); +LUA_API void (lua_pushnumber) (lua_State *L, lua_Number n); +LUA_API void (lua_pushinteger) (lua_State *L, lua_Integer n); +LUA_API const char *(lua_pushlstring) (lua_State *L, const char *s, size_t len); +LUA_API const char *(lua_pushstring) (lua_State *L, const char *s); +LUA_API const char *(lua_pushvfstring) (lua_State *L, const char *fmt, + va_list argp); +LUA_API const char *(lua_pushfstring) (lua_State *L, const char *fmt, ...); +LUA_API void (lua_pushcclosure) (lua_State *L, lua_CFunction fn, int n); +LUA_API void (lua_pushboolean) (lua_State *L, int b); +LUA_API void (lua_pushlightuserdata) (lua_State *L, void *p); +LUA_API int (lua_pushthread) (lua_State *L); + + +/* +** get functions (Lua -> stack) +*/ +LUA_API int (lua_getglobal) (lua_State *L, const char *name); +LUA_API int (lua_gettable) (lua_State *L, int idx); +LUA_API int (lua_getfield) (lua_State *L, int idx, const char *k); +LUA_API int (lua_geti) (lua_State *L, int idx, lua_Integer n); +LUA_API int (lua_rawget) (lua_State *L, int idx); +LUA_API int (lua_rawgeti) (lua_State *L, int idx, lua_Integer n); +LUA_API int (lua_rawgetp) (lua_State *L, int idx, const void *p); + +LUA_API void (lua_createtable) (lua_State *L, int narr, int nrec); +LUA_API void *(lua_newuserdatauv) (lua_State *L, size_t sz, int nuvalue); +LUA_API int (lua_getmetatable) (lua_State *L, int objindex); +LUA_API int (lua_getiuservalue) (lua_State *L, int idx, int n); + + +/* +** set functions (stack -> Lua) +*/ +LUA_API void (lua_setglobal) (lua_State *L, const char *name); +LUA_API void (lua_settable) (lua_State *L, int idx); +LUA_API void (lua_setfield) (lua_State *L, int idx, const char *k); +LUA_API void (lua_seti) (lua_State *L, int idx, lua_Integer n); +LUA_API void (lua_rawset) (lua_State *L, int idx); +LUA_API void (lua_rawseti) (lua_State *L, int idx, lua_Integer n); +LUA_API void (lua_rawsetp) (lua_State *L, int idx, const void *p); +LUA_API int (lua_setmetatable) (lua_State *L, int objindex); +LUA_API int (lua_setiuservalue) (lua_State *L, int idx, int n); + + +/* +** 'load' and 'call' functions (load and run Lua code) +*/ +LUA_API void (lua_callk) (lua_State *L, int nargs, int nresults, + lua_KContext ctx, lua_KFunction k); +#define lua_call(L,n,r) lua_callk(L, (n), (r), 0, NULL) + +LUA_API int (lua_pcallk) (lua_State *L, int nargs, int nresults, int errfunc, + lua_KContext ctx, lua_KFunction k); +#define lua_pcall(L,n,r,f) lua_pcallk(L, (n), (r), (f), 0, NULL) + +LUA_API int (lua_load) (lua_State *L, lua_Reader reader, void *dt, + const char *chunkname, const char *mode); + +LUA_API int (lua_dump) (lua_State *L, lua_Writer writer, void *data, int strip); + + +/* +** coroutine functions +*/ +LUA_API int (lua_yieldk) (lua_State *L, int nresults, lua_KContext ctx, + lua_KFunction k); +LUA_API int (lua_resume) (lua_State *L, lua_State *from, int narg, + int *nres); +LUA_API int (lua_status) (lua_State *L); +LUA_API int (lua_isyieldable) (lua_State *L); + +#define lua_yield(L,n) lua_yieldk(L, (n), 0, NULL) + + +/* +** Warning-related functions +*/ +LUA_API void (lua_setwarnf) (lua_State *L, lua_WarnFunction f, void *ud); +LUA_API void (lua_warning) (lua_State *L, const char *msg, int tocont); + + +/* +** garbage-collection function and options +*/ + +#define LUA_GCSTOP 0 +#define LUA_GCRESTART 1 +#define LUA_GCCOLLECT 2 +#define LUA_GCCOUNT 3 +#define LUA_GCCOUNTB 4 +#define LUA_GCSTEP 5 +#define LUA_GCSETPAUSE 6 +#define LUA_GCSETSTEPMUL 7 +#define LUA_GCISRUNNING 9 +#define LUA_GCGEN 10 +#define LUA_GCINC 11 + +LUA_API int (lua_gc) (lua_State *L, int what, ...); + + +/* +** miscellaneous functions +*/ + +LUA_API int (lua_error) (lua_State *L); + +LUA_API int (lua_next) (lua_State *L, int idx); + +LUA_API void (lua_concat) (lua_State *L, int n); +LUA_API void (lua_len) (lua_State *L, int idx); + +LUA_API size_t (lua_stringtonumber) (lua_State *L, const char *s); + +LUA_API lua_Alloc (lua_getallocf) (lua_State *L, void **ud); +LUA_API void (lua_setallocf) (lua_State *L, lua_Alloc f, void *ud); + +LUA_API void (lua_toclose) (lua_State *L, int idx); +LUA_API void (lua_closeslot) (lua_State *L, int idx); + + +/* +** {============================================================== +** some useful macros +** =============================================================== +*/ + +#define lua_getextraspace(L) ((void *)((char *)(L) - LUA_EXTRASPACE)) + +#define lua_tonumber(L,i) lua_tonumberx(L,(i),NULL) +#define lua_tointeger(L,i) lua_tointegerx(L,(i),NULL) + +#define lua_pop(L,n) lua_settop(L, -(n)-1) + +#define lua_newtable(L) lua_createtable(L, 0, 0) + +#define lua_register(L,n,f) (lua_pushcfunction(L, (f)), lua_setglobal(L, (n))) + +#define lua_pushcfunction(L,f) lua_pushcclosure(L, (f), 0) + +#define lua_isfunction(L,n) (lua_type(L, (n)) == LUA_TFUNCTION) +#define lua_istable(L,n) (lua_type(L, (n)) == LUA_TTABLE) +#define lua_islightuserdata(L,n) (lua_type(L, (n)) == LUA_TLIGHTUSERDATA) +#define lua_isnil(L,n) (lua_type(L, (n)) == LUA_TNIL) +#define lua_isboolean(L,n) (lua_type(L, (n)) == LUA_TBOOLEAN) +#define lua_isthread(L,n) (lua_type(L, (n)) == LUA_TTHREAD) +#define lua_isnone(L,n) (lua_type(L, (n)) == LUA_TNONE) +#define lua_isnoneornil(L, n) (lua_type(L, (n)) <= 0) + +#define lua_pushliteral(L, s) lua_pushstring(L, "" s) + +#define lua_pushglobaltable(L) \ + ((void)lua_rawgeti(L, LUA_REGISTRYINDEX, LUA_RIDX_GLOBALS)) + +#define lua_tostring(L,i) lua_tolstring(L, (i), NULL) + + +#define lua_insert(L,idx) lua_rotate(L, (idx), 1) + +#define lua_remove(L,idx) (lua_rotate(L, (idx), -1), lua_pop(L, 1)) + +#define lua_replace(L,idx) (lua_copy(L, -1, (idx)), lua_pop(L, 1)) + +/* }============================================================== */ + + +/* +** {============================================================== +** compatibility macros +** =============================================================== +*/ +#if defined(LUA_COMPAT_APIINTCASTS) + +#define lua_pushunsigned(L,n) lua_pushinteger(L, (lua_Integer)(n)) +#define lua_tounsignedx(L,i,is) ((lua_Unsigned)lua_tointegerx(L,i,is)) +#define lua_tounsigned(L,i) lua_tounsignedx(L,(i),NULL) + +#endif + +#define lua_newuserdata(L,s) lua_newuserdatauv(L,s,1) +#define lua_getuservalue(L,idx) lua_getiuservalue(L,idx,1) +#define lua_setuservalue(L,idx) lua_setiuservalue(L,idx,1) + +#define LUA_NUMTAGS LUA_NUMTYPES + +/* }============================================================== */ + +/* +** {====================================================================== +** Debug API +** ======================================================================= +*/ + + +/* +** Event codes +*/ +#define LUA_HOOKCALL 0 +#define LUA_HOOKRET 1 +#define LUA_HOOKLINE 2 +#define LUA_HOOKCOUNT 3 +#define LUA_HOOKTAILCALL 4 + + +/* +** Event masks +*/ +#define LUA_MASKCALL (1 << LUA_HOOKCALL) +#define LUA_MASKRET (1 << LUA_HOOKRET) +#define LUA_MASKLINE (1 << LUA_HOOKLINE) +#define LUA_MASKCOUNT (1 << LUA_HOOKCOUNT) + +typedef struct lua_Debug lua_Debug; /* activation record */ + + +/* Functions to be called by the debugger in specific events */ +typedef void (*lua_Hook) (lua_State *L, lua_Debug *ar); + + +LUA_API int (lua_getstack) (lua_State *L, int level, lua_Debug *ar); +LUA_API int (lua_getinfo) (lua_State *L, const char *what, lua_Debug *ar); +LUA_API const char *(lua_getlocal) (lua_State *L, const lua_Debug *ar, int n); +LUA_API const char *(lua_setlocal) (lua_State *L, const lua_Debug *ar, int n); +LUA_API const char *(lua_getupvalue) (lua_State *L, int funcindex, int n); +LUA_API const char *(lua_setupvalue) (lua_State *L, int funcindex, int n); + +LUA_API void *(lua_upvalueid) (lua_State *L, int fidx, int n); +LUA_API void (lua_upvaluejoin) (lua_State *L, int fidx1, int n1, + int fidx2, int n2); + +LUA_API void (lua_sethook) (lua_State *L, lua_Hook func, int mask, int count); +LUA_API lua_Hook (lua_gethook) (lua_State *L); +LUA_API int (lua_gethookmask) (lua_State *L); +LUA_API int (lua_gethookcount) (lua_State *L); + +LUA_API int (lua_setcstacklimit) (lua_State *L, unsigned int limit); + +struct lua_Debug { + int event; + const char *name; /* (n) */ + const char *namewhat; /* (n) 'global', 'local', 'field', 'method' */ + const char *what; /* (S) 'Lua', 'C', 'main', 'tail' */ + const char *source; /* (S) */ + size_t srclen; /* (S) */ + int currentline; /* (l) */ + int linedefined; /* (S) */ + int lastlinedefined; /* (S) */ + unsigned char nups; /* (u) number of upvalues */ + unsigned char nparams;/* (u) number of parameters */ + char isvararg; /* (u) */ + char istailcall; /* (t) */ + unsigned short ftransfer; /* (r) index of first value transferred */ + unsigned short ntransfer; /* (r) number of transferred values */ + char short_src[LUA_IDSIZE]; /* (S) */ + /* private part */ + struct CallInfo *i_ci; /* active function */ +}; + +/* }====================================================================== */ + + +/****************************************************************************** +* Copyright (C) 1994-2022 Lua.org, PUC-Rio. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ + + +#endif diff --git a/modules/lua/lua54/luaconf.h b/modules/lua/lua54/luaconf.h new file mode 100644 index 0000000..d42d14b --- /dev/null +++ b/modules/lua/lua54/luaconf.h @@ -0,0 +1,786 @@ +/* +** $Id: luaconf.h $ +** Configuration file for Lua +** See Copyright Notice in lua.h +*/ + + +#ifndef luaconf_h +#define luaconf_h + +#include +#include + + +/* +** =================================================================== +** General Configuration File for Lua +** +** Some definitions here can be changed externally, through the compiler +** (e.g., with '-D' options): They are commented out or protected +** by '#if !defined' guards. However, several other definitions +** should be changed directly here, either because they affect the +** Lua ABI (by making the changes here, you ensure that all software +** connected to Lua, such as C libraries, will be compiled with the same +** configuration); or because they are seldom changed. +** +** Search for "@@" to find all configurable definitions. +** =================================================================== +*/ + + +/* +** {==================================================================== +** System Configuration: macros to adapt (if needed) Lua to some +** particular platform, for instance restricting it to C89. +** ===================================================================== +*/ + +/* +@@ LUA_USE_C89 controls the use of non-ISO-C89 features. +** Define it if you want Lua to avoid the use of a few C99 features +** or Windows-specific features on Windows. +*/ +/* #define LUA_USE_C89 */ + + +/* +** By default, Lua on Windows use (some) specific Windows features +*/ +#if !defined(LUA_USE_C89) && defined(_WIN32) && !defined(_WIN32_WCE) +#define LUA_USE_WINDOWS /* enable goodies for regular Windows */ +#endif + + +#if defined(LUA_USE_WINDOWS) +#define LUA_DL_DLL /* enable support for DLL */ +#define LUA_USE_C89 /* broadly, Windows is C89 */ +#endif + + +#if defined(LUA_USE_LINUX) +#define LUA_USE_POSIX +#define LUA_USE_DLOPEN /* needs an extra library: -ldl */ +#endif + + +#if defined(LUA_USE_MACOSX) +#define LUA_USE_POSIX +#define LUA_USE_DLOPEN /* MacOS does not need -ldl */ +#endif + + +/* +@@ LUAI_IS32INT is true iff 'int' has (at least) 32 bits. +*/ +#define LUAI_IS32INT ((UINT_MAX >> 30) >= 3) + +/* }================================================================== */ + + + +/* +** {================================================================== +** Configuration for Number types. These options should not be +** set externally, because any other code connected to Lua must +** use the same configuration. +** =================================================================== +*/ + +/* +@@ LUA_INT_TYPE defines the type for Lua integers. +@@ LUA_FLOAT_TYPE defines the type for Lua floats. +** Lua should work fine with any mix of these options supported +** by your C compiler. The usual configurations are 64-bit integers +** and 'double' (the default), 32-bit integers and 'float' (for +** restricted platforms), and 'long'/'double' (for C compilers not +** compliant with C99, which may not have support for 'long long'). +*/ + +/* predefined options for LUA_INT_TYPE */ +#define LUA_INT_INT 1 +#define LUA_INT_LONG 2 +#define LUA_INT_LONGLONG 3 + +/* predefined options for LUA_FLOAT_TYPE */ +#define LUA_FLOAT_FLOAT 1 +#define LUA_FLOAT_DOUBLE 2 +#define LUA_FLOAT_LONGDOUBLE 3 + + +/* Default configuration ('long long' and 'double', for 64-bit Lua) */ +#define LUA_INT_DEFAULT LUA_INT_LONGLONG +#define LUA_FLOAT_DEFAULT LUA_FLOAT_DOUBLE + + +/* +@@ LUA_32BITS enables Lua with 32-bit integers and 32-bit floats. +*/ +#define LUA_32BITS 0 + + +/* +@@ LUA_C89_NUMBERS ensures that Lua uses the largest types available for +** C89 ('long' and 'double'); Windows always has '__int64', so it does +** not need to use this case. +*/ +#if defined(LUA_USE_C89) && !defined(LUA_USE_WINDOWS) +#define LUA_C89_NUMBERS 1 +#else +#define LUA_C89_NUMBERS 0 +#endif + + +#if LUA_32BITS /* { */ +/* +** 32-bit integers and 'float' +*/ +#if LUAI_IS32INT /* use 'int' if big enough */ +#define LUA_INT_TYPE LUA_INT_INT +#else /* otherwise use 'long' */ +#define LUA_INT_TYPE LUA_INT_LONG +#endif +#define LUA_FLOAT_TYPE LUA_FLOAT_FLOAT + +#elif LUA_C89_NUMBERS /* }{ */ +/* +** largest types available for C89 ('long' and 'double') +*/ +#define LUA_INT_TYPE LUA_INT_LONG +#define LUA_FLOAT_TYPE LUA_FLOAT_DOUBLE + +#else /* }{ */ +/* use defaults */ + +#define LUA_INT_TYPE LUA_INT_DEFAULT +#define LUA_FLOAT_TYPE LUA_FLOAT_DEFAULT + +#endif /* } */ + + +/* }================================================================== */ + + + +/* +** {================================================================== +** Configuration for Paths. +** =================================================================== +*/ + +/* +** LUA_PATH_SEP is the character that separates templates in a path. +** LUA_PATH_MARK is the string that marks the substitution points in a +** template. +** LUA_EXEC_DIR in a Windows path is replaced by the executable's +** directory. +*/ +#define LUA_PATH_SEP ";" +#define LUA_PATH_MARK "?" +#define LUA_EXEC_DIR "!" + + +/* +@@ LUA_PATH_DEFAULT is the default path that Lua uses to look for +** Lua libraries. +@@ LUA_CPATH_DEFAULT is the default path that Lua uses to look for +** C libraries. +** CHANGE them if your machine has a non-conventional directory +** hierarchy or if you want to install your libraries in +** non-conventional directories. +*/ + +#define LUA_VDIR LUA_VERSION_MAJOR "." LUA_VERSION_MINOR +#if defined(_WIN32) /* { */ +/* +** In Windows, any exclamation mark ('!') in the path is replaced by the +** path of the directory of the executable file of the current process. +*/ +#define LUA_LDIR "!\\lua\\" +#define LUA_CDIR "!\\" +#define LUA_SHRDIR "!\\..\\share\\lua\\" LUA_VDIR "\\" + +#if !defined(LUA_PATH_DEFAULT) +#define LUA_PATH_DEFAULT \ + LUA_LDIR"?.lua;" LUA_LDIR"?\\init.lua;" \ + LUA_CDIR"?.lua;" LUA_CDIR"?\\init.lua;" \ + LUA_SHRDIR"?.lua;" LUA_SHRDIR"?\\init.lua;" \ + ".\\?.lua;" ".\\?\\init.lua" +#endif + +#if !defined(LUA_CPATH_DEFAULT) +#define LUA_CPATH_DEFAULT \ + LUA_CDIR"?.dll;" \ + LUA_CDIR"..\\lib\\lua\\" LUA_VDIR "\\?.dll;" \ + LUA_CDIR"loadall.dll;" ".\\?.dll" +#endif + +#else /* }{ */ + +#define LUA_ROOT "/usr/local/" +#define LUA_LDIR LUA_ROOT "share/lua/" LUA_VDIR "/" +#define LUA_CDIR LUA_ROOT "lib/lua/" LUA_VDIR "/" + +#if !defined(LUA_PATH_DEFAULT) +#define LUA_PATH_DEFAULT \ + LUA_LDIR"?.lua;" LUA_LDIR"?/init.lua;" \ + LUA_CDIR"?.lua;" LUA_CDIR"?/init.lua;" \ + "./?.lua;" "./?/init.lua" +#endif + +#if !defined(LUA_CPATH_DEFAULT) +#define LUA_CPATH_DEFAULT \ + LUA_CDIR"?.so;" LUA_CDIR"loadall.so;" "./?.so" +#endif + +#endif /* } */ + + +/* +@@ LUA_DIRSEP is the directory separator (for submodules). +** CHANGE it if your machine does not use "/" as the directory separator +** and is not Windows. (On Windows Lua automatically uses "\".) +*/ +#if !defined(LUA_DIRSEP) + +#if defined(_WIN32) +#define LUA_DIRSEP "\\" +#else +#define LUA_DIRSEP "/" +#endif + +#endif + +/* }================================================================== */ + + +/* +** {================================================================== +** Marks for exported symbols in the C code +** =================================================================== +*/ + +/* +@@ LUA_API is a mark for all core API functions. +@@ LUALIB_API is a mark for all auxiliary library functions. +@@ LUAMOD_API is a mark for all standard library opening functions. +** CHANGE them if you need to define those functions in some special way. +** For instance, if you want to create one Windows DLL with the core and +** the libraries, you may want to use the following definition (define +** LUA_BUILD_AS_DLL to get it). +*/ +#if defined(LUA_BUILD_AS_DLL) /* { */ + +#if defined(LUA_CORE) || defined(LUA_LIB) /* { */ +#define LUA_API __declspec(dllexport) +#else /* }{ */ +#define LUA_API __declspec(dllimport) +#endif /* } */ + +#else /* }{ */ + +#define LUA_API extern + +#endif /* } */ + + +/* +** More often than not the libs go together with the core. +*/ +#define LUALIB_API LUA_API +#define LUAMOD_API LUA_API + + +/* +@@ LUAI_FUNC is a mark for all extern functions that are not to be +** exported to outside modules. +@@ LUAI_DDEF and LUAI_DDEC are marks for all extern (const) variables, +** none of which to be exported to outside modules (LUAI_DDEF for +** definitions and LUAI_DDEC for declarations). +** CHANGE them if you need to mark them in some special way. Elf/gcc +** (versions 3.2 and later) mark them as "hidden" to optimize access +** when Lua is compiled as a shared library. Not all elf targets support +** this attribute. Unfortunately, gcc does not offer a way to check +** whether the target offers that support, and those without support +** give a warning about it. To avoid these warnings, change to the +** default definition. +*/ +#if defined(__GNUC__) && ((__GNUC__*100 + __GNUC_MINOR__) >= 302) && \ + defined(__ELF__) /* { */ +#define LUAI_FUNC __attribute__((visibility("internal"))) extern +#else /* }{ */ +#define LUAI_FUNC extern +#endif /* } */ + +#define LUAI_DDEC(dec) LUAI_FUNC dec +#define LUAI_DDEF /* empty */ + +/* }================================================================== */ + + +/* +** {================================================================== +** Compatibility with previous versions +** =================================================================== +*/ + +/* +@@ LUA_COMPAT_5_3 controls other macros for compatibility with Lua 5.3. +** You can define it to get all options, or change specific options +** to fit your specific needs. +*/ +#if defined(LUA_COMPAT_5_3) /* { */ + +/* +@@ LUA_COMPAT_MATHLIB controls the presence of several deprecated +** functions in the mathematical library. +** (These functions were already officially removed in 5.3; +** nevertheless they are still available here.) +*/ +#define LUA_COMPAT_MATHLIB + +/* +@@ LUA_COMPAT_APIINTCASTS controls the presence of macros for +** manipulating other integer types (lua_pushunsigned, lua_tounsigned, +** luaL_checkint, luaL_checklong, etc.) +** (These macros were also officially removed in 5.3, but they are still +** available here.) +*/ +#define LUA_COMPAT_APIINTCASTS + + +/* +@@ LUA_COMPAT_LT_LE controls the emulation of the '__le' metamethod +** using '__lt'. +*/ +#define LUA_COMPAT_LT_LE + + +/* +@@ The following macros supply trivial compatibility for some +** changes in the API. The macros themselves document how to +** change your code to avoid using them. +** (Once more, these macros were officially removed in 5.3, but they are +** still available here.) +*/ +#define lua_strlen(L,i) lua_rawlen(L, (i)) + +#define lua_objlen(L,i) lua_rawlen(L, (i)) + +#define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) +#define lua_lessthan(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPLT) + +#endif /* } */ + +/* }================================================================== */ + + + +/* +** {================================================================== +** Configuration for Numbers (low-level part). +** Change these definitions if no predefined LUA_FLOAT_* / LUA_INT_* +** satisfy your needs. +** =================================================================== +*/ + +/* +@@ LUAI_UACNUMBER is the result of a 'default argument promotion' +@@ over a floating number. +@@ l_floatatt(x) corrects float attribute 'x' to the proper float type +** by prefixing it with one of FLT/DBL/LDBL. +@@ LUA_NUMBER_FRMLEN is the length modifier for writing floats. +@@ LUA_NUMBER_FMT is the format for writing floats. +@@ lua_number2str converts a float to a string. +@@ l_mathop allows the addition of an 'l' or 'f' to all math operations. +@@ l_floor takes the floor of a float. +@@ lua_str2number converts a decimal numeral to a number. +*/ + + +/* The following definitions are good for most cases here */ + +#define l_floor(x) (l_mathop(floor)(x)) + +#define lua_number2str(s,sz,n) \ + l_sprintf((s), sz, LUA_NUMBER_FMT, (LUAI_UACNUMBER)(n)) + +/* +@@ lua_numbertointeger converts a float number with an integral value +** to an integer, or returns 0 if float is not within the range of +** a lua_Integer. (The range comparisons are tricky because of +** rounding. The tests here assume a two-complement representation, +** where MININTEGER always has an exact representation as a float; +** MAXINTEGER may not have one, and therefore its conversion to float +** may have an ill-defined value.) +*/ +#define lua_numbertointeger(n,p) \ + ((n) >= (LUA_NUMBER)(LUA_MININTEGER) && \ + (n) < -(LUA_NUMBER)(LUA_MININTEGER) && \ + (*(p) = (LUA_INTEGER)(n), 1)) + + +/* now the variable definitions */ + +#if LUA_FLOAT_TYPE == LUA_FLOAT_FLOAT /* { single float */ + +#define LUA_NUMBER float + +#define l_floatatt(n) (FLT_##n) + +#define LUAI_UACNUMBER double + +#define LUA_NUMBER_FRMLEN "" +#define LUA_NUMBER_FMT "%.7g" + +#define l_mathop(op) op##f + +#define lua_str2number(s,p) strtof((s), (p)) + + +#elif LUA_FLOAT_TYPE == LUA_FLOAT_LONGDOUBLE /* }{ long double */ + +#define LUA_NUMBER long double + +#define l_floatatt(n) (LDBL_##n) + +#define LUAI_UACNUMBER long double + +#define LUA_NUMBER_FRMLEN "L" +#define LUA_NUMBER_FMT "%.19Lg" + +#define l_mathop(op) op##l + +#define lua_str2number(s,p) strtold((s), (p)) + +#elif LUA_FLOAT_TYPE == LUA_FLOAT_DOUBLE /* }{ double */ + +#define LUA_NUMBER double + +#define l_floatatt(n) (DBL_##n) + +#define LUAI_UACNUMBER double + +#define LUA_NUMBER_FRMLEN "" +#define LUA_NUMBER_FMT "%.14g" + +#define l_mathop(op) op + +#define lua_str2number(s,p) strtod((s), (p)) + +#else /* }{ */ + +#error "numeric float type not defined" + +#endif /* } */ + + + +/* +@@ LUA_UNSIGNED is the unsigned version of LUA_INTEGER. +@@ LUAI_UACINT is the result of a 'default argument promotion' +@@ over a LUA_INTEGER. +@@ LUA_INTEGER_FRMLEN is the length modifier for reading/writing integers. +@@ LUA_INTEGER_FMT is the format for writing integers. +@@ LUA_MAXINTEGER is the maximum value for a LUA_INTEGER. +@@ LUA_MININTEGER is the minimum value for a LUA_INTEGER. +@@ LUA_MAXUNSIGNED is the maximum value for a LUA_UNSIGNED. +@@ lua_integer2str converts an integer to a string. +*/ + + +/* The following definitions are good for most cases here */ + +#define LUA_INTEGER_FMT "%" LUA_INTEGER_FRMLEN "d" + +#define LUAI_UACINT LUA_INTEGER + +#define lua_integer2str(s,sz,n) \ + l_sprintf((s), sz, LUA_INTEGER_FMT, (LUAI_UACINT)(n)) + +/* +** use LUAI_UACINT here to avoid problems with promotions (which +** can turn a comparison between unsigneds into a signed comparison) +*/ +#define LUA_UNSIGNED unsigned LUAI_UACINT + + +/* now the variable definitions */ + +#if LUA_INT_TYPE == LUA_INT_INT /* { int */ + +#define LUA_INTEGER int +#define LUA_INTEGER_FRMLEN "" + +#define LUA_MAXINTEGER INT_MAX +#define LUA_MININTEGER INT_MIN + +#define LUA_MAXUNSIGNED UINT_MAX + +#elif LUA_INT_TYPE == LUA_INT_LONG /* }{ long */ + +#define LUA_INTEGER long +#define LUA_INTEGER_FRMLEN "l" + +#define LUA_MAXINTEGER LONG_MAX +#define LUA_MININTEGER LONG_MIN + +#define LUA_MAXUNSIGNED ULONG_MAX + +#elif LUA_INT_TYPE == LUA_INT_LONGLONG /* }{ long long */ + +/* use presence of macro LLONG_MAX as proxy for C99 compliance */ +#if defined(LLONG_MAX) /* { */ +/* use ISO C99 stuff */ + +#define LUA_INTEGER long long +#define LUA_INTEGER_FRMLEN "ll" + +#define LUA_MAXINTEGER LLONG_MAX +#define LUA_MININTEGER LLONG_MIN + +#define LUA_MAXUNSIGNED ULLONG_MAX + +#elif defined(LUA_USE_WINDOWS) /* }{ */ +/* in Windows, can use specific Windows types */ + +#define LUA_INTEGER __int64 +#define LUA_INTEGER_FRMLEN "I64" + +#define LUA_MAXINTEGER _I64_MAX +#define LUA_MININTEGER _I64_MIN + +#define LUA_MAXUNSIGNED _UI64_MAX + +#else /* }{ */ + +#error "Compiler does not support 'long long'. Use option '-DLUA_32BITS' \ + or '-DLUA_C89_NUMBERS' (see file 'luaconf.h' for details)" + +#endif /* } */ + +#else /* }{ */ + +#error "numeric integer type not defined" + +#endif /* } */ + +/* }================================================================== */ + + +/* +** {================================================================== +** Dependencies with C99 and other C details +** =================================================================== +*/ + +/* +@@ l_sprintf is equivalent to 'snprintf' or 'sprintf' in C89. +** (All uses in Lua have only one format item.) +*/ +#if !defined(LUA_USE_C89) +#define l_sprintf(s,sz,f,i) snprintf(s,sz,f,i) +#else +#define l_sprintf(s,sz,f,i) ((void)(sz), sprintf(s,f,i)) +#endif + + +/* +@@ lua_strx2number converts a hexadecimal numeral to a number. +** In C99, 'strtod' does that conversion. Otherwise, you can +** leave 'lua_strx2number' undefined and Lua will provide its own +** implementation. +*/ +#if !defined(LUA_USE_C89) +#define lua_strx2number(s,p) lua_str2number(s,p) +#endif + + +/* +@@ lua_pointer2str converts a pointer to a readable string in a +** non-specified way. +*/ +#define lua_pointer2str(buff,sz,p) l_sprintf(buff,sz,"%p",p) + + +/* +@@ lua_number2strx converts a float to a hexadecimal numeral. +** In C99, 'sprintf' (with format specifiers '%a'/'%A') does that. +** Otherwise, you can leave 'lua_number2strx' undefined and Lua will +** provide its own implementation. +*/ +#if !defined(LUA_USE_C89) +#define lua_number2strx(L,b,sz,f,n) \ + ((void)L, l_sprintf(b,sz,f,(LUAI_UACNUMBER)(n))) +#endif + + +/* +** 'strtof' and 'opf' variants for math functions are not valid in +** C89. Otherwise, the macro 'HUGE_VALF' is a good proxy for testing the +** availability of these variants. ('math.h' is already included in +** all files that use these macros.) +*/ +#if defined(LUA_USE_C89) || (defined(HUGE_VAL) && !defined(HUGE_VALF)) +#undef l_mathop /* variants not available */ +#undef lua_str2number +#define l_mathop(op) (lua_Number)op /* no variant */ +#define lua_str2number(s,p) ((lua_Number)strtod((s), (p))) +#endif + + +/* +@@ LUA_KCONTEXT is the type of the context ('ctx') for continuation +** functions. It must be a numerical type; Lua will use 'intptr_t' if +** available, otherwise it will use 'ptrdiff_t' (the nearest thing to +** 'intptr_t' in C89) +*/ +#define LUA_KCONTEXT ptrdiff_t + +#if !defined(LUA_USE_C89) && defined(__STDC_VERSION__) && \ + __STDC_VERSION__ >= 199901L +#include +#if defined(INTPTR_MAX) /* even in C99 this type is optional */ +#undef LUA_KCONTEXT +#define LUA_KCONTEXT intptr_t +#endif +#endif + + +/* +@@ lua_getlocaledecpoint gets the locale "radix character" (decimal point). +** Change that if you do not want to use C locales. (Code using this +** macro must include the header 'locale.h'.) +*/ +#if !defined(lua_getlocaledecpoint) +#define lua_getlocaledecpoint() (localeconv()->decimal_point[0]) +#endif + + +/* +** macros to improve jump prediction, used mostly for error handling +** and debug facilities. (Some macros in the Lua API use these macros. +** Define LUA_NOBUILTIN if you do not want '__builtin_expect' in your +** code.) +*/ +#if !defined(luai_likely) + +#if defined(__GNUC__) && !defined(LUA_NOBUILTIN) +#define luai_likely(x) (__builtin_expect(((x) != 0), 1)) +#define luai_unlikely(x) (__builtin_expect(((x) != 0), 0)) +#else +#define luai_likely(x) (x) +#define luai_unlikely(x) (x) +#endif + +#endif + + +#if defined(LUA_CORE) || defined(LUA_LIB) +/* shorter names for Lua's own use */ +#define l_likely(x) luai_likely(x) +#define l_unlikely(x) luai_unlikely(x) +#endif + + + +/* }================================================================== */ + + +/* +** {================================================================== +** Language Variations +** ===================================================================== +*/ + +/* +@@ LUA_NOCVTN2S/LUA_NOCVTS2N control how Lua performs some +** coercions. Define LUA_NOCVTN2S to turn off automatic coercion from +** numbers to strings. Define LUA_NOCVTS2N to turn off automatic +** coercion from strings to numbers. +*/ +/* #define LUA_NOCVTN2S */ +/* #define LUA_NOCVTS2N */ + + +/* +@@ LUA_USE_APICHECK turns on several consistency checks on the C API. +** Define it as a help when debugging C code. +*/ +#if defined(LUA_USE_APICHECK) +#include +#define luai_apicheck(l,e) assert(e) +#endif + +/* }================================================================== */ + + +/* +** {================================================================== +** Macros that affect the API and must be stable (that is, must be the +** same when you compile Lua and when you compile code that links to +** Lua). +** ===================================================================== +*/ + +/* +@@ LUAI_MAXSTACK limits the size of the Lua stack. +** CHANGE it if you need a different limit. This limit is arbitrary; +** its only purpose is to stop Lua from consuming unlimited stack +** space (and to reserve some numbers for pseudo-indices). +** (It must fit into max(size_t)/32.) +*/ +#if LUAI_IS32INT +#define LUAI_MAXSTACK 1000000 +#else +#define LUAI_MAXSTACK 15000 +#endif + + +/* +@@ LUA_EXTRASPACE defines the size of a raw memory area associated with +** a Lua state with very fast access. +** CHANGE it if you need a different size. +*/ +#define LUA_EXTRASPACE (sizeof(void *)) + + +/* +@@ LUA_IDSIZE gives the maximum size for the description of the source +@@ of a function in debug information. +** CHANGE it if you want a different size. +*/ +#define LUA_IDSIZE 60 + + +/* +@@ LUAL_BUFFERSIZE is the buffer size used by the lauxlib buffer system. +*/ +#define LUAL_BUFFERSIZE ((int)(16 * sizeof(void*) * sizeof(lua_Number))) + + +/* +@@ LUAI_MAXALIGN defines fields that, when used in a union, ensure +** maximum alignment for the other items in that union. +*/ +#define LUAI_MAXALIGN lua_Number n; double u; void *s; lua_Integer i; long l + +/* }================================================================== */ + + + + + +/* =================================================================== */ + +/* +** Local configuration. You can use this space to add your redefinitions +** without modifying the main part of the file. +*/ + + + + + +#endif + diff --git a/modules/lua/lua54/lualib.h b/modules/lua/lua54/lualib.h new file mode 100644 index 0000000..2625529 --- /dev/null +++ b/modules/lua/lua54/lualib.h @@ -0,0 +1,52 @@ +/* +** $Id: lualib.h $ +** Lua standard libraries +** See Copyright Notice in lua.h +*/ + + +#ifndef lualib_h +#define lualib_h + +#include "lua.h" + + +/* version suffix for environment variable names */ +#define LUA_VERSUFFIX "_" LUA_VERSION_MAJOR "_" LUA_VERSION_MINOR + + +LUAMOD_API int (luaopen_base) (lua_State *L); + +#define LUA_COLIBNAME "coroutine" +LUAMOD_API int (luaopen_coroutine) (lua_State *L); + +#define LUA_TABLIBNAME "table" +LUAMOD_API int (luaopen_table) (lua_State *L); + +#define LUA_IOLIBNAME "io" +LUAMOD_API int (luaopen_io) (lua_State *L); + +#define LUA_OSLIBNAME "os" +LUAMOD_API int (luaopen_os) (lua_State *L); + +#define LUA_STRLIBNAME "string" +LUAMOD_API int (luaopen_string) (lua_State *L); + +#define LUA_UTF8LIBNAME "utf8" +LUAMOD_API int (luaopen_utf8) (lua_State *L); + +#define LUA_MATHLIBNAME "math" +LUAMOD_API int (luaopen_math) (lua_State *L); + +#define LUA_DBLIBNAME "debug" +LUAMOD_API int (luaopen_debug) (lua_State *L); + +#define LUA_LOADLIBNAME "package" +LUAMOD_API int (luaopen_package) (lua_State *L); + + +/* open all previous libraries */ +LUALIB_API void (luaL_openlibs) (lua_State *L); + + +#endif diff --git a/modules/lua/lualib.h b/modules/lua/lualib.h new file mode 100644 index 0000000..a1e9529 --- /dev/null +++ b/modules/lua/lualib.h @@ -0,0 +1,38 @@ +#ifndef LUALIB_H +#define LUALIB_H +#include +#include +#include +#include +#include +#include + +#include "lua54/lua.h" +#include "lua54/lauxlib.h" +#include "lua54/lualib.h" + +#define SLICE "slice" + +typedef struct { + size_t len; + uint8_t* data; +} slice_t; + +void lua_new_slice(lua_State*L, int n) +{ + size_t nbytes = sizeof(slice_t) + n * 1U; + slice_t *a = (slice_t *)lua_newuserdata(L, nbytes); + a->data = &((char *)a)[sizeof(slice_t)]; + luaL_getmetatable(L, SLICE); + lua_setmetatable(L, -2); + + a->len = n; +} +slice_t * lua_check_slice(lua_State *L, int idx) +{ + void *ud = luaL_checkudata(L, idx, SLICE); + luaL_argcheck(L, ud != NULL, idx, "`slice' expected"); + return (slice_t *)ud; +} + +#endif \ No newline at end of file diff --git a/modules/md.c b/modules/md.c new file mode 100644 index 0000000..fde4ae8 --- /dev/null +++ b/modules/md.c @@ -0,0 +1,63 @@ +#include "lua/lualib.h" +#include "3rd/md4c/md4c-html.h" + +static void md_process_output(const MD_CHAR *buf, MD_SIZE len, void *udata) +{ + lua_State *L = (lua_State *)udata; + lua_pushlstring(L, buf, len); + lua_call(L, 1, 0); + lua_pushvalue(L, -1); +} + +static int l_md_to_html(lua_State *L) +{ + const char *input = luaL_checkstring(L, 1); + if (input == NULL) + { + //lua_pushstring(L,"NULL markdown input string"); + return 0; + } + if (!lua_isfunction(L, -1)) + { + //lua_pushstring(L,"Invalid callback function"); + return 0; + } + // duplicate top of the stack + lua_pushvalue(L, -1); + reset_hd_cnt(); + if (md_html(input, + strlen(input), + md_process_output, + L, + MD_DIALECT_GITHUB | + MD_HTML_FLAG_VERBATIM_ENTITIES | + MD_FLAG_PERMISSIVEATXHEADERS | + MD_FLAG_NOINDENTEDCODEBLOCKS | + MD_FLAG_NOHTMLBLOCKS | + MD_FLAG_NOHTMLSPANS | + MD_FLAG_NOHTML | + MD_FLAG_COLLAPSEWHITESPACE | + MD_FLAG_PERMISSIVEURLAUTOLINKS | + MD_FLAG_PERMISSIVEWWWAUTOLINKS | + MD_FLAG_PERMISSIVEEMAILAUTOLINKS | + MD_FLAG_PERMISSIVEAUTOLINKS | + MD_FLAG_UNDERLINE, + MD_HTML_FLAG_XHTML) == -1) + { + //lua_pushstring(L,"Unable to parse markdown: md_parse() fails"); + lua_pop(L,1); + return 0; + } + lua_pop(L,1); + return 1; +} + +static const struct luaL_Reg _lib[] = { + {"to_html", l_md_to_html}, + {NULL, NULL}}; + +int luaopen_md(lua_State *L) +{ + luaL_newlib(L, _lib); + return 1; +} diff --git a/modules/sha1.c b/modules/sha1.c new file mode 100644 index 0000000..5271d84 --- /dev/null +++ b/modules/sha1.c @@ -0,0 +1,265 @@ +/* +SHA-1 in C +By Steve Reid +100% Public Domain + +----------------- +Modified 7/98 +By James H. Brown +Still 100% Public Domain + +Corrected a problem which generated improper hash values on 16 bit machines +Routine SHA1Update changed from + void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int +len) +to + void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned +long len) + +The 'len' parameter was declared an int which works fine on 32 bit machines. +However, on 16 bit machines an int is too small for the shifts being done +against +it. This caused the hash function to generate incorrect values if len was +greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update(). + +Since the file IO in main() reads 16K at a time, any file 8K or larger would +be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million +"a"s). + +I also changed the declaration of variables i & j in SHA1Update to +unsigned long from unsigned int for the same reason. + +These changes should make no difference to any 32 bit implementations since +an +int and a long are the same size in those environments. + +-- +I also corrected a few compiler warnings generated by Borland C. +1. Added #include for exit() prototype +2. Removed unused variable 'j' in SHA1Final +3. Changed exit(0) to return(0) at end of main. + +ALL changes I made can be located by searching for comments containing 'JHB' +----------------- +Modified 8/98 +By Steve Reid +Still 100% public domain + +1- Removed #include and used return() instead of exit() +2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall) +3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net + +----------------- +Modified 4/01 +By Saul Kravitz +Still 100% PD +Modified to run on Compaq Alpha hardware. + +----------------- +Modified 07/2002 +By Ralph Giles +Still 100% public domain +modified for use with stdint types, autoconf +code cleanup, removed attribution comments +switched SHA1Final() argument order for consistency +use SHA1_ prefix for public api +move public api to sha1.h +*/ + +/* +Test Vectors (from FIPS PUB 180-1) +"abc" + A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D +"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" + 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 +A million repetitions of "a" + 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F +*/ + +/* #define SHA1HANDSOFF */ +#include "sha1.h" + +void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]); + +#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) + +/* blk0() and blk() perform the initial expand. */ +/* I got the idea of expanding during the round function from SSLeay */ +/* FIXME: can we do this in an endian-proof way? */ +#ifdef WORDS_BIGENDIAN +#define blk0(i) block->l[i] +#else +#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \ + |(rol(block->l[i],8)&0x00FF00FF)) +#endif +#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \ + ^block->l[(i+2)&15]^block->l[i&15],1)) + +/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ +#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30); +#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30); +#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30); + + +#ifdef VERBOSE /* SAK */ +void SHAPrintContext(SHA1_CTX *context, char *msg){ + printf("%s (%d,%d) %x %x %x %x %x\n", + msg, + context->count[0], context->count[1], + context->state[0], + context->state[1], + context->state[2], + context->state[3], + context->state[4]); +} +#endif /* VERBOSE */ + +/* Hash a single 512-bit block. This is the core of the algorithm. */ +void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]) +{ + uint32_t a, b, c, d, e; + typedef union { + uint8_t c[64]; + uint32_t l[16]; + } CHAR64LONG16; + CHAR64LONG16* block; + +#ifdef SHA1HANDSOFF + static uint8_t workspace[64]; + block = (CHAR64LONG16*)workspace; + memcpy(block, buffer, 64); +#else + block = (CHAR64LONG16*)buffer; +#endif + + /* Copy context->state[] to working vars */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + + /* 4 rounds of 20 operations each. Loop unrolled. */ + R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); + R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); + R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); + R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); + R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); + R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); + R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); + R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); + R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); + R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); + R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); + R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); + R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); + R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); + R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); + R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); + R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); + R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); + R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); + R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); + + /* Add the working vars back into context.state[] */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + + /* Wipe variables */ + a = b = c = d = e = 0; +} + + +/* SHA1Init - Initialize new context */ +void SHA1_Init(SHA1_CTX* context) +{ + /* SHA1 initialization constants */ + context->state[0] = 0x67452301; + context->state[1] = 0xEFCDAB89; + context->state[2] = 0x98BADCFE; + context->state[3] = 0x10325476; + context->state[4] = 0xC3D2E1F0; + context->count[0] = context->count[1] = 0; +} + + +/* Run your data through this. */ +void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len) +{ + size_t i, j; + +#ifdef VERBOSE + SHAPrintContext(context, "before"); +#endif + + j = (context->count[0] >> 3) & 63; + if ((context->count[0] += len << 3) < (len << 3)) context->count[1]++; + context->count[1] += (len >> 29); + if ((j + len) > 63) { + memcpy(&context->buffer[j], data, (i = 64-j)); + SHA1_Transform(context->state, context->buffer); + for ( ; i + 63 < len; i += 64) { + SHA1_Transform(context->state, data + i); + } + j = 0; + } + else i = 0; + memcpy(&context->buffer[j], &data[i], len - i); + +#ifdef VERBOSE + SHAPrintContext(context, "after "); +#endif +} + + +/* Add padding and return the message digest. */ +void SHA1_Final(uint8_t digest[SHA1_DIGEST_SIZE], SHA1_CTX* context) +{ + uint32_t i; + uint8_t finalcount[8]; + + for (i = 0; i < 8; i++) { + finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] + >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ + } + SHA1_Update(context, (uint8_t *)"\200", 1); + while ((context->count[0] & 504) != 448) { + SHA1_Update(context, (uint8_t *)"\0", 1); + } + SHA1_Update(context, finalcount, 8); /* Should cause a SHA1_Transform() */ + for (i = 0; i < SHA1_DIGEST_SIZE; i++) { + digest[i] = (uint8_t) + ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); + } + + /* Wipe variables */ + i = 0; + memset(context->buffer, 0, 64); + memset(context->state, 0, 20); + memset(context->count, 0, 8); + memset(finalcount, 0, 8); /* SWR */ + +#ifdef SHA1HANDSOFF /* make SHA1Transform overwrite its own static vars */ + SHA1_Transform(context->state, context->buffer); +#endif +} +void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output) +{ + int i,j; + char *c = output; + + for (i = 0; i < SHA1_DIGEST_SIZE/4; i++) { + for (j = 0; j < 4; j++) { + sprintf(c,"%02x", digest[i*4+j]); + c += 2; + } + //sprintf(c, " "); + //c += 1; + } + *c = '\0'; +} \ No newline at end of file diff --git a/modules/sha1.h b/modules/sha1.h new file mode 100644 index 0000000..88550ed --- /dev/null +++ b/modules/sha1.h @@ -0,0 +1,23 @@ +/* public api for steve reid's public domain SHA-1 implementation */ +/* this file is in the public domain */ + +#ifndef __SHA1_H +#define __SHA1_H +#include +#include + +#include + +typedef struct { + uint32_t state[5]; + uint32_t count[2]; + uint8_t buffer[64]; +} SHA1_CTX; + +#define SHA1_DIGEST_SIZE 20 + +void SHA1_Init(SHA1_CTX* context); +void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len); +void SHA1_Final(uint8_t digest[SHA1_DIGEST_SIZE], SHA1_CTX* context); +void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output); +#endif /* __SHA1_H */ \ No newline at end of file diff --git a/modules/slice.c b/modules/slice.c new file mode 100644 index 0000000..93a67be --- /dev/null +++ b/modules/slice.c @@ -0,0 +1,141 @@ +#include "lua/lualib.h" + +void lua_new_light_slice(lua_State *L, int n, char *ptr) +{ + size_t nbytes = sizeof(slice_t); + slice_t *a = (slice_t *)lua_newuserdata(L, nbytes); + a->len = n; + a->data = ptr; + luaL_getmetatable(L, SLICE); + lua_setmetatable(L, -2); +} + +static int l_new_slice(lua_State *L) +{ + int n = luaL_checknumber(L, 1); + lua_new_slice(L, n); + return 1; /* new userdatum is already on the stack */ +} + + +static int l_new_lightslice(lua_State *L) +{ + uint8_t* ptr = NULL; + if(lua_isnumber(L,1)) + { + size_t addr = luaL_checknumber(L, 1); + ptr = (uint8_t*) addr; + } + else + { + ptr = lua_touserdata(L, 1); + } + int n = luaL_checknumber(L, 2); + lua_new_light_slice(L, n, ptr); + return 1; /* new userdatum is already on the stack */ +} + +static unsigned char *get_sel(lua_State *L) +{ + slice_t *a = lua_check_slice(L, 1); + int index = luaL_checknumber(L, 2); + luaL_argcheck(L, 1 <= index && index <= a->len, 2, + "index out of range"); + + /* return element address */ + return &a->data[index - 1]; +} + +static int l_set_slice(lua_State *L) +{ + unsigned char value = luaL_checknumber(L, 3); + *get_sel(L) = value; + return 0; +} + +static int l_get_slice_size(lua_State *L) +{ + slice_t *a = lua_check_slice(L, 1); + lua_pushnumber(L, a->len); + return 1; +} + +static int l_slice_write(lua_State *L) +{ + slice_t *a = lua_check_slice(L, 1); + const char *f = luaL_checkstring(L, 2); + FILE *fp; + fp = fopen(f, "wb"); + + if (!fp) + lua_pushboolean(L, 0); + else + { + fwrite(a->data, 1, a->len, fp); + lua_pushboolean(L, 1); + fclose(fp); + } + return 1; +} + +static int l_slice_index(lua_State *L) +{ + if(lua_isnumber(L,2)) + { + lua_pushnumber(L, *get_sel(L)); + } + else if(lua_isstring(L,2)) + { + const char* string = luaL_checkstring(L,2); + if(strcmp(string,"size") == 0) + { + lua_pushcfunction(L, l_get_slice_size); + } + else if(strcmp(string, "write") == 0) + { + lua_pushcfunction(L, l_slice_write); + } + else + { + lua_pushnil(L); + } + return 1; + } + else + { + lua_pushnil(L); + } + return 1; +} + +static int l_slice_to_string(lua_State *L) +{ + slice_t *a = lua_check_slice(L, 1); + char *d = (char *)malloc(a->len + 1); + memcpy(d, a->data, a->len); + d[a->len] = '\0'; + lua_pushstring(L, d); + if (d) + free(d); + return 1; +} + +static const struct luaL_Reg slicemetalib[] = { + {"unew", l_new_lightslice}, + {"new", l_new_slice}, + {NULL, NULL}}; + +static const struct luaL_Reg slicelib[] = { + {"__index", l_slice_index}, + {"__newindex", l_set_slice}, + {"__tostring", l_slice_to_string}, + {NULL, NULL}}; + +int luaopen_slice(lua_State *L) +{ + luaL_newmetatable(L, SLICE); + luaL_setfuncs(L, slicelib, 0); + luaL_newlib(L, slicemetalib); + + return 1; +} \ No newline at end of file diff --git a/modules/sqlitedb.c b/modules/sqlitedb.c new file mode 100644 index 0000000..d89e4d8 --- /dev/null +++ b/modules/sqlitedb.c @@ -0,0 +1,125 @@ +#include +#include "lua/lualib.h" + +typedef sqlite3 *sqldb; + +static int l_getdb(lua_State *L) +{ + const char *file = luaL_checkstring(L, 1); + sqlite3 *db; + int rc = sqlite3_open(file, &db); + if (rc != SQLITE_OK) + { + lua_pushnil(L); + lua_pushstring(L,sqlite3_errmsg(db)); + sqlite3_close(db); + return 2; + } + lua_pushlightuserdata(L, db); + return 1; +} + +static int l_db_close(lua_State *L) +{ + sqldb db = (sqldb)lua_touserdata(L, 1); + if (db) + { + sqlite3_close(db); + } + db = NULL; + return 0; +} +static int l_db_exec(lua_State *L) +{ + sqldb db = (sqldb)lua_touserdata(L, 1); + const char *sql = luaL_checkstring(L, 2); + int r = 0; + if(!db) + { + lua_pushboolean(L, 0); + lua_pushstring(L,"Invalid database handle"); + return 2; + } + + char *err_msg = 0; + sqlite3_mutex_enter(sqlite3_db_mutex(db)); + int rc = sqlite3_exec(db, sql, NULL, 0, &err_msg); + sqlite3_mutex_leave(sqlite3_db_mutex(db)); + if (rc != SQLITE_OK) + { + lua_pushboolean(L, 0); + lua_pushstring(L,err_msg); + sqlite3_free(err_msg); + return 2; + } + + lua_pushboolean(L, 1); + return 1; +} +static int l_db_lastid(lua_State *L) +{ + sqldb db = (sqldb)lua_touserdata(L, 1); + + int idx = -1; + if (db) + idx = sqlite3_last_insert_rowid(db); + lua_pushnumber(L, idx); + return 1; +} +static int l_db_query(lua_State *L) +{ + sqldb db = (sqldb)lua_touserdata(L, 1); + const char *query = luaL_checkstring(L, 2); + if (!db) + { + lua_pushnil(L); + return 1; + } + + sqlite3_stmt *statement; + + if (sqlite3_prepare_v2(db, query, -1, &statement, 0) == SQLITE_OK) + { + int cols = sqlite3_column_count(statement); + int result = 0; + int cnt = 1; + // new table for data + lua_newtable(L); + while ((result = sqlite3_step(statement)) == SQLITE_ROW) + { + lua_pushnumber(L, cnt); + lua_newtable(L); + for (int col = 0; col < cols; col++) + { + const char *value = (const char *)sqlite3_column_text(statement, col); + const char *name = sqlite3_column_name(statement, col); + lua_pushstring(L, name); + lua_pushstring(L, value); + lua_settable(L, -3); + } + lua_settable(L, -3); + cnt++; + } + sqlite3_finalize(statement); + } + else + { + lua_pushnil(L); + lua_pushstring(L, sqlite3_errmsg(db)); + return 2; + } + return 1; +} +static const struct luaL_Reg sqlite[] = { + {"db", l_getdb}, + {"dbclose", l_db_close}, + {"query", l_db_query}, + {"last_insert_id", l_db_lastid}, + {"exec", l_db_exec}, + {NULL, NULL}}; + +int luaopen_sqlitedb(lua_State *L) +{ + luaL_newlib(L, sqlite); + return 1; +} \ No newline at end of file diff --git a/modules/stmr.c b/modules/stmr.c new file mode 100644 index 0000000..b49cc38 --- /dev/null +++ b/modules/stmr.c @@ -0,0 +1,704 @@ +/* This is the Porter stemming algorithm, coded up in ANSI C by the + * author. It may be be regarded as canonical, in that it follows the + * algorithm presented in + * + * Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, + * no. 3, pp 130-137, + * + * only differing from it at the points marked --DEPARTURE-- below. + * + * See also http://www.tartarus.org/~martin/PorterStemmer + * + * The algorithm as described in the paper could be exactly replicated + * by adjusting the points of DEPARTURE, but this is barely necessary, + * because (a) the points of DEPARTURE are definitely improvements, and + * (b) no encoding of the Porter stemmer I have seen is anything like + * as exact as this version, even with the points of DEPARTURE! + * + * You can compile it on Unix with 'gcc -O3 -o stem stem.c' after which + * 'stem' takes a list of inputs and sends the stemmed equivalent to + * stdout. + * + * The algorithm as encoded here is particularly fast. + * + * Release 1: was many years ago + * Release 2: 11 Apr 2013 + * fixes a bug noted by Matt Patenaude , + * + * case 'o': if (ends("\03" "ion") && (b[j] == 's' || b[j] == 't')) break; + * ==> + * case 'o': if (ends("\03" "ion") && j >= k0 && (b[j] == 's' || b[j] == 't')) break; + * + * to avoid accessing b[k0-1] when the word in b is "ion". + * Release 3: 25 Mar 2014 + * fixes a similar bug noted by Klemens Baum , + * that if step1ab leaves a one letter result (ied -> i, aing -> a etc), + * step2 and step4 access the byte before the first letter. So we skip + * steps after step1ab unless k > k0. */ +#include +#include "lua/lualib.h" +#define TRUE 1 +#define FALSE 0 + +/* The main part of the stemming algorithm starts here. b is a buffer + * holding a word to be stemmed. The letters are in b[k0], b[k0+1] ... + * ending at b[k]. In fact k0 = 0 in this demo program. k is readjusted + * downwards as the stemming progresses. Zero termination is not in fact + * used in the algorithm. + * + * Note that only lower case sequences are stemmed. Forcing to lower case + * should be done before stem(...) is called. */ + +/* buffer for word to be stemmed */ +static char *b; + +static int k; +static int k0; + +/* j is a general offset into the string */ +static int j; + +/** + * TRUE when `b[i]` is a consonant. + */ + +static int +isConsonant(int index) { + switch (b[index]) { + case 'a': + case 'e': + case 'i': + case 'o': + case 'u': + return FALSE; + case 'y': + return (index == k0) ? TRUE : !isConsonant(index - 1); + default: + return TRUE; + } +} + +/* Measure the number of consonant sequences between + * `k0` and `j`. If C is a consonant sequence and V + * a vowel sequence, and <..> indicates arbitrary + * presence: + * + * gives 0 + * VC gives 1 + * VCVC gives 2 + * VCVCVC gives 3 + * .... + */ +static int +getMeasure() { + int position; + int index; + + position = 0; + index = k0; + + while (TRUE) { + if (index > j) { + return position; + } + + if (!isConsonant(index)) { + break; + } + + index++; + } + + index++; + + while (TRUE) { + while (TRUE) { + if (index > j) { + return position; + } + + if (isConsonant(index)) { + break; + } + + index++; + } + + index++; + position++; + + while (TRUE) { + if (index > j) { + return position; + } + + if (!isConsonant(index)) { + break; + } + + index++; + } + + index++; + } +} + +/* `TRUE` when `k0, ... j` contains a vowel. */ +static int +vowelInStem() { + int index; + + index = k0 - 1; + + while (++index <= j) { + if (!isConsonant(index)) { + return TRUE; + } + } + + return FALSE; +} + +/* `TRUE` when `j` and `(j-1)` are the same consonant. */ +static int +isDoubleConsonant(int index) { + if (b[index] != b[index - 1]) { + return FALSE; + } + + return isConsonant(index); +} + +/* `TRUE` when `i - 2, i - 1, i` has the form + * `consonant - vowel - consonant` and also if the second + * C is not `"w"`, `"x"`, or `"y"`. this is used when + * trying to restore an `e` at the end of a short word. + * + * Such as: + * + * `cav(e)`, `lov(e)`, `hop(e)`, `crim(e)`, but `snow`, + * `box`, `tray`. + */ +static int +cvc(int index) { + int character; + + if (index < k0 + 2 || !isConsonant(index) || isConsonant(index - 1) || !isConsonant(index - 2)) { + return FALSE; + } + + character = b[index]; + + if (character == 'w' || character == 'x' || character == 'y') { + return FALSE; + } + + return TRUE; +} + +/* `ends(s)` is `TRUE` when `k0, ...k` ends with `value`. */ +static int +ends(const char *value) { + int length = value[0]; + + /* Tiny speed-up. */ + if (value[length] != b[k]) { + return FALSE; + } + + if (length > k - k0 + 1) { + return FALSE; + } + + if (memcmp(b + k - length + 1, value + 1, length) != 0) { + return FALSE; + } + + j = k - length; + + return TRUE; +} + +/* `setTo(value)` sets `(j + 1), ...k` to the characters in + * `value`, readjusting `k`. */ +static void +setTo(const char *value) { + int length = value[0]; + + memmove(b + j + 1, value + 1, length); + + k = j + length; +} + +/* Set string. */ +static void +replace(const char *value) { + if (getMeasure() > 0) { + setTo(value); + } +} + +/* `step1ab()` gets rid of plurals, `-ed`, `-ing`. + * + * Such as: + * + * caresses -> caress + * ponies -> poni + * ties -> ti + * caress -> caress + * cats -> cat + * + * feed -> feed + * agreed -> agree + * disabled -> disable + * + * matting -> mat + * mating -> mate + * meeting -> meet + * milling -> mill + * messing -> mess + * + * meetings -> meet + */ +static void +step1ab() { + int character; + + if (b[k] == 's') { + if (ends("\04" "sses")) { + k -= 2; + } else if (ends("\03" "ies")) { + setTo("\01" "i"); + } else if (b[k - 1] != 's') { + k--; + } + } + + if (ends("\03" "eed")) { + if (getMeasure() > 0) { + k--; + } + } else if ((ends("\02" "ed") || ends("\03" "ing")) && vowelInStem()) { + k = j; + + if (ends("\02" "at")) { + setTo("\03" "ate"); + } else if (ends("\02" "bl")) { + setTo("\03" "ble"); + } else if (ends("\02" "iz")) { + setTo("\03" "ize"); + } else if (isDoubleConsonant(k)) { + k--; + + character = b[k]; + + if (character == 'l' || character == 's' || character == 'z') { + k++; + } + } else if (getMeasure() == 1 && cvc(k)) { + setTo("\01" "e"); + } + } +} + +/* `step1c()` turns terminal `"y"` to `"i"` when there + * is another vowel in the stem. */ +static void +step1c() { + if (ends("\01" "y") && vowelInStem()) { + b[k] = 'i'; + } +} + +/* `step2()` maps double suffices to single ones. + * so -ization ( = -ize plus -ation) maps to -ize etc. + * note that the string before the suffix must give + * getMeasure() > 0. */ +static void +step2() { + switch (b[k - 1]) { + case 'a': + if (ends("\07" "ational")) { + replace("\03" "ate"); + break; + } + + if (ends("\06" "tional")) { + replace("\04" "tion"); + break; + } + + break; + case 'c': + if (ends("\04" "enci")) { + replace("\04" "ence"); + break; + } + + if (ends("\04" "anci")) { + replace("\04" "ance"); + break; + } + + break; + case 'e': + if (ends("\04" "izer")) { + replace("\03" "ize"); + break; + } + + break; + case 'l': + /* --DEPARTURE--: To match the published algorithm, + * replace this line with: + * + * ``` + * if (ends("\04" "abli")) { + * replace("\04" "able"); + * + * break; + * } + * ``` + */ + if (ends("\03" "bli")) { + replace("\03" "ble"); + break; + } + + if (ends("\04" "alli")) { + replace("\02" "al"); + break; + } + + if (ends("\05" "entli")) { + replace("\03" "ent"); + break; + } + + if (ends("\03" "eli")) { + replace("\01" "e"); + break; + } + + if (ends("\05" "ousli")) { + replace("\03" "ous"); + break; + } + + break; + case 'o': + if (ends("\07" "ization")) { + replace("\03" "ize"); + break; + } + + if (ends("\05" "ation")) { + replace("\03" "ate"); + break; + } + + if (ends("\04" "ator")) { + replace("\03" "ate"); + break; + } + + break; + case 's': + if (ends("\05" "alism")) { + replace("\02" "al"); + break; + } + + if (ends("\07" "iveness")) { + replace("\03" "ive"); + break; + } + + if (ends("\07" "fulness")) { + replace("\03" "ful"); + break; + } + + if (ends("\07" "ousness")) { + replace("\03" "ous"); + break; + } + + break; + case 't': + if (ends("\05" "aliti")) { + replace("\02" "al"); + break; + } + + if (ends("\05" "iviti")) { + replace("\03" "ive"); + break; + } + + if (ends("\06" "biliti")) { + replace("\03" "ble"); + break; + } + + break; + /* --DEPARTURE--: To match the published algorithm, delete this line. */ + case 'g': + if (ends("\04" "logi")) { + replace("\03" "log"); + break; + } + } +} + +/* `step3()` deals with -ic-, -full, -ness etc. + * similar strategy to step2. */ +static void +step3() { + switch (b[k]) { + case 'e': + if (ends("\05" "icate")) { + replace("\02" "ic"); + break; + } + + if (ends("\05" "ative")) { + replace("\00" ""); + break; + } + + if (ends("\05" "alize")) { + replace("\02" "al"); + break; + } + + break; + case 'i': + if (ends("\05" "iciti")) { + replace("\02" "ic"); + break; + } + + break; + case 'l': + if (ends("\04" "ical")) { + replace("\02" "ic"); + break; + } + + if (ends("\03" "ful")) { + replace("\00" ""); + break; + } + + break; + case 's': + if (ends("\04" "ness")) { + replace("\00" ""); + break; + } + + break; + } +} + +/* `step4()` takes off -ant, -ence etc., in + * context vcvc. */ +static void +step4() { + switch (b[k - 1]) { + case 'a': + if (ends("\02" "al")) { + break; + } + + return; + case 'c': + if (ends("\04" "ance")) { + break; + } + + if (ends("\04" "ence")) { + break; + } + + return; + case 'e': + if (ends("\02" "er")) { + break; + } + + return; + case 'i': + if (ends("\02" "ic")) { + break; + } + + return; + case 'l': + if (ends("\04" "able")) { + break; + } + + if (ends("\04" "ible")) { + break; + } + + return; + case 'n': + if (ends("\03" "ant")) { + break; + } + + if (ends("\05" "ement")) { + break; + } + + if (ends("\04" "ment")) { + break; + } + + if (ends("\03" "ent")) { + break; + } + + return; + case 'o': + if (ends("\03" "ion") && j >= k0 && (b[j] == 's' || b[j] == 't')) { + break; + } + + /* takes care of -ous */ + if (ends("\02" "ou")) { + break; + } + + return; + case 's': + if (ends("\03" "ism")) { + break; + } + + return; + case 't': + if (ends("\03" "ate")) { + break; + } + + if (ends("\03" "iti")) { + break; + } + + return; + case 'u': + if (ends("\03" "ous")) { + break; + } + + return; + case 'v': + if (ends("\03" "ive")) { + break; + } + + return; + case 'z': + if (ends("\03" "ize")) { + break; + } + + return; + default: + return; + } + + if (getMeasure() > 1) { + k = j; + } +} + +/* `step5()` removes a final `-e` if `getMeasure()` is + * greater than `1`, and changes `-ll` to `-l` if + * `getMeasure()` is greater than `1`. */ +static void +step5() { + int a; + + j = k; + + if (b[k] == 'e') { + a = getMeasure(); + + if (a > 1 || (a == 1 && !cvc(k - 1))) { + k--; + } + } + + if (b[k] == 'l' && isDoubleConsonant(k) && getMeasure() > 1) { + k--; + } +} + +/* In `stem(p, i, j)`, `p` is a `char` pointer, and the + * string to be stemmed is from `p[i]` to + * `p[j]` (inclusive). + * + * Typically, `i` is zero and `j` is the offset to the + * last character of a string, `(p[j + 1] == '\0')`. + * The stemmer adjusts the characters `p[i]` ... `p[j]` + * and returns the new end-point of the string, `k`. + * + * Stemming never increases word length, so `i <= k <= j`. + * + * To turn the stemmer into a module, declare 'stem' as + * extern, and delete the remainder of this file. */ +int +stem(char *p, int index, int position) { + /* Copy the parameters into statics. */ + b = p; + k = position; + k0 = index; + + if (k <= k0 + 1) { + return k; /* --DEPARTURE-- */ + } + + /* With this line, strings of length 1 or 2 don't + * go through the stemming process, although no + * mention is made of this in the published + * algorithm. Remove the line to match the published + * algorithm. */ + step1ab(); + + if (k > k0) { + step1c(); + step2(); + step3(); + step4(); + step5(); + } + + return k; +} + + +static int l_stmr(lua_State* L) +{ + char* word = strdup(luaL_checkstring(L,1)); + int end = stem(word, 0, strlen(word) - 1); + word[end + 1] = 0; + lua_pushstring(L, word); + free(word); + return 1; +} + +static const struct luaL_Reg _lib [] = { + {"stmr", l_stmr}, + {NULL,NULL} +}; + +int luaopen_stmr(lua_State *L) +{ + luaL_newlib(L, _lib); + return 1; +} \ No newline at end of file diff --git a/modules/ulib.c b/modules/ulib.c new file mode 100644 index 0000000..b8ce484 --- /dev/null +++ b/modules/ulib.c @@ -0,0 +1,866 @@ +#ifdef LINUX + #include + #include + #include + #include + #include + #include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lua/lualib.h" +// zip library +#include "3rd/zip/zip.h" + + +#define MAX_PATH_LEN 1024 + +/** + * Trim a string by a character on both ends + * @param str The target string + * @param delim the delim + */ +static void trim(char* str, const char delim) +{ + if(!str || strlen(str) == 0) return; + char * p = str; + int l = strlen(p); + while(l > 0 && p[l - 1] == delim) + p[--l] = 0; + while(* p && (* p) == delim ) ++p, --l; + memmove(str, p, l + 1); +} + +static int l_check_login (lua_State *L) { +#ifdef LINUX + const char* username = luaL_checkstring(L,1); + const char* password = luaL_checkstring(L,2); + + char *encrypted; + struct passwd *pwd; + struct spwd *spwd; + /* Look up password and shadow password records for username */ + pwd = getpwnam(username); + if (pwd == NULL) + { + lua_pushboolean(L,0); + lua_pushstring(L, strerror(errno)); + return 2; + } + spwd = getspnam(username); + /*if (spwd == NULL) + { + lua_pushboolean(L,0); + printf("no permission to read shadow password file\n"); + return 1; + }*/ + + if (spwd != NULL) /* If there is a shadow password record */ + { + pwd->pw_passwd = spwd->sp_pwdp; /* Use the shadow password */ + } + /*else + { + lua_pushboolean(L,0); + printf("shadow record is null \n" ); + return 1; + }*/ + /* Encrypt password and erase cleartext version immediately */ + encrypted = crypt(password, pwd->pw_passwd); + if (encrypted == NULL) + { + lua_pushboolean(L,0); + lua_pushstring(L, strerror(errno)); + return 2; + } + if(strcmp(encrypted, pwd->pw_passwd) == 0) + { + lua_pushboolean(L,1); + return 1; + } else + { + lua_pushboolean(L,0); + return 1; + } +#else + // macos + // just pass the check, for test only + lua_pushboolean(L,0); + lua_pushstring(L, "Login by shadow passwd is not supported on this system"); + return 2; +#endif +} +static void get_userId(const char* name, uid_t* uid,gid_t* gid ) +{ + struct passwd *pwd; + uid_t u; + char *endptr; + if (name == NULL || *name == '\0') + { + *uid = -1; *gid=-1; + return; + } + u = strtol(name, &endptr, 10); + if (*endptr == '\0') + { + *uid = u; + *gid = -1; + return; + } + pwd = getpwnam(name); + if (pwd == NULL) + { + *uid = -1; *gid=-1; + return; + } + *uid = pwd->pw_uid; + *gid = pwd->pw_gid; +} +static int l_fork(lua_State* L) +{ + int pid = fork(); + lua_pushnumber(L, pid); + return 1; +} + +static int l_waitpid(lua_State* L) +{ + int pid = luaL_checknumber(L,1); + int nohang = luaL_checknumber(L,2); + pid_t st; + int status; + if(nohang) + { + st = waitpid(pid, &status, WNOHANG); + } + else + { + st = waitpid(pid, &status, 0); + } + lua_pushnumber(L, st); + return 1; +} +static int l_kill(lua_State* L) +{ + int pid = luaL_checknumber(L,1); + if(pid == -1) pid = getpid(); + int status = kill(pid, SIGHUP); + lua_pushnumber(L, status); + return 1; +} +static int l_setuid(lua_State* L) +{ + uid_t uid = (uid_t) luaL_checknumber(L,1); + if((int)uid != -1) + { + if(setuid(uid) < 0) + { + lua_pushboolean(L,0); + lua_pushstring(L, strerror(errno)); + return 2; + } + else + { + //printf("UID set\n"); + lua_pushboolean(L,1); + return 1; + } + } + else + lua_pushboolean(L,0); + return 1; +} +static int l_setgid(lua_State* L) +{ + uid_t gid = (uid_t) luaL_checknumber(L,1); + if((int)gid != -1) + { + if(setgid(gid) < 0) + { + lua_pushboolean(L,0); + lua_pushstring(L, strerror(errno)); + return 2; + } + else + { + //printf("GID set\n"); + lua_pushboolean(L,1); + return 1; + } + } + else + lua_pushboolean(L,0); + return 1; +} + +static int l_getuid(lua_State* L) +{ + const char* name = luaL_checkstring(L,1); + uid_t uid = -1; + uid_t gid = -1; + get_userId(name,&uid,&gid); + lua_newtable(L); + + lua_pushstring(L,"id"); + lua_pushnumber(L,uid); + lua_settable(L,-3); + + lua_pushstring(L,"gid"); + lua_pushnumber(L,gid); + lua_settable(L,-3); + + int j, ngroups = 30; //only first 10 group + gid_t *groups; + struct group *gr; + // find all the groups + if((int)uid != -1 && (int)gid != -1) + { + /* Retrieve group list */ + groups = malloc(ngroups * sizeof (gid_t)); + if (groups == NULL) { + return 1; + } + if (getgrouplist(name, gid, groups, &ngroups) == -1) { + free(groups); + return 1; + } + /* retrieved groups, along with group names */ + lua_pushstring(L,"groups"); + lua_newtable(L); + + for (j = 0; j < ngroups; j++) { + gr = getgrgid(groups[j]); + if (gr != NULL) + { + //printf("%d: (%s)\n", groups[j],gr->gr_name); + lua_pushnumber(L, groups[j]); + lua_pushstring(L, gr->gr_name); + lua_settable(L,-3); + } + } + lua_settable(L, -3); + free(groups); + } + return 1; +} + +static void timestr(time_t time, char* buf,int len,char* format, int gmt) +{ + struct tm t; + if(gmt) + { + gmtime_r(&time, &t); + } + else + { + localtime_r(&time, &t); + } + strftime(buf, len, format, &t); +} + + +static int l_file_stat(lua_State* L, const char* path) +{ + //const char* path = luaL_checkstring(L,-1); + //printf("PATH %s\n", path); + //lua_pop(L,1); + char date[64]; + struct stat st; + if( stat(path, &st) == 0 ) + { + // recore for file + lua_newtable(L); + + //type + lua_pushstring(L,"type"); + if(S_ISDIR(st.st_mode)) + lua_pushstring(L,"dir"); + else + lua_pushstring(L,"file"); + lua_settable(L,-3); + + //ctime + lua_pushstring(L,"ctime"); + timestr(st.st_ctime,date,sizeof(date),"%a, %d %b %Y %H:%M:%S GMT",1); + lua_pushstring(L,date); + lua_settable(L,-3); + //mtime + + lua_pushstring(L,"mtime"); + timestr(st.st_mtime,date,sizeof(date),"%a, %d %b %Y %H:%M:%S GMT",1); + lua_pushstring(L,date); + lua_settable(L,-3); + + //size + lua_pushstring(L,"size"); + lua_pushnumber(L,st.st_size); + lua_settable(L,-3); + + // uid + lua_pushstring(L,"uid"); + lua_pushnumber(L,st.st_uid); + lua_settable(L,-3); + // gid + lua_pushstring(L,"gid"); + lua_pushnumber(L,st.st_gid); + lua_settable(L,-3); + + lua_pushstring(L,"permissions"); + sprintf(date," (%3o)", st.st_mode&0777); + lua_pushstring(L,date); + lua_settable(L,-3); + + // permission + lua_pushstring(L,"perm"); + lua_newtable(L); + + lua_pushstring(L,"owner"); + lua_newtable(L); + lua_pushstring(L,"read"); + lua_pushboolean(L, (st.st_mode & S_IRUSR)==0?0:1); + lua_settable(L,-3); + lua_pushstring(L,"write"); + lua_pushboolean(L, (st.st_mode & S_IWUSR)==0?0:1); + lua_settable(L,-3); + lua_pushstring(L,"exec"); + lua_pushboolean(L, (st.st_mode & S_IXUSR)==0?0:1); + lua_settable(L,-3); + //end owner + lua_settable(L,-3); + + lua_pushstring(L,"group"); + lua_newtable(L); + lua_pushstring(L,"read"); + lua_pushboolean(L, (st.st_mode & S_IRGRP)==0?0:1); + lua_settable(L,-3); + lua_pushstring(L,"write"); + lua_pushboolean(L, (st.st_mode & S_IWGRP)==0?0:1); + lua_settable(L,-3); + lua_pushstring(L,"exec"); + lua_pushboolean(L, (st.st_mode & S_IXGRP)==0?0:1); + lua_settable(L,-3); + //end owner + lua_settable(L,-3); + + lua_pushstring(L,"other"); + lua_newtable(L); + lua_pushstring(L,"read"); + lua_pushboolean(L, (st.st_mode & S_IROTH)==0?0:1); + lua_settable(L,-3); + lua_pushstring(L,"write"); + lua_pushboolean(L, (st.st_mode & S_IWOTH)==0?0:1); + lua_settable(L,-3); + lua_pushstring(L,"exec"); + lua_pushboolean(L, (st.st_mode & S_IXOTH)==0?0:1); + lua_settable(L,-3); + //end owner + lua_settable(L,-3); + + // end permission + lua_settable(L,-3); + } + else + { + lua_newtable(L); + } + return 1; +} + +static int l_file_info(lua_State* L) +{ + const char* path = luaL_checkstring(L,1); + l_file_stat(L,path); + return 1; +} + +static int l_file_move(lua_State* L) +{ + const char* old = luaL_checkstring(L,1); + const char* new = luaL_checkstring(L,2); + //printf("MOVE %s to %s\n",old,new); + lua_pushboolean(L, rename(old,new)==0); + return 1; +} + +static int l_send_file(lua_State* L) +{ + int fromfd, tofd, ret; + size_t sz = 0; + char* old = NULL; + char* new = NULL; + if(lua_isnumber(L,1)) + { + fromfd = (int)luaL_checknumber(L,1); + } + else + { + old = (char*)luaL_checkstring(L,1); + if((fromfd = open(old, O_RDONLY)) < 0) + { + lua_pushboolean(L,0); + goto end_send_file; + } + struct stat st; + if(stat(old, &st)!=0) + { + lua_pushboolean(L,0); + goto end_send_file; + } + sz = st.st_size; + } + if(lua_isnumber(L,2)) + { + tofd = (int) luaL_checknumber(L,2); + } + else + { + new = (char*)luaL_checkstring(L,2); + if (unlink(new) < 0 && errno != ENOENT) { + lua_pushboolean(L,0); + goto end_send_file; + } + if((tofd = open(new, O_WRONLY | O_CREAT, 0600)) < 0) + { + lua_pushboolean(L,0); + goto end_send_file; + } + } + + if(lua_isnumber(L,3)) + { + sz = (size_t) luaL_checknumber(L,3); + } + + off_t off = 0; + int read = 0; + while ( + sz > 0 && + read != sz && + ( + ((ret = sendfile(tofd, fromfd, &off, sz - read)) >= 0) || + (errno == EAGAIN) + ) + ) + { + if(ret < 0) ret = 0; + read += ret; + } + if(read != sz) + { + lua_pushboolean(L,0); + goto end_send_file; + } + lua_pushboolean(L,1); +end_send_file: + if(fromfd >= 0 && old) + { + (void) close(fromfd); + } + if(tofd >= 0 && new) + { + (void) close(tofd); + } + return 1; +} + +static int l_read_dir(lua_State* L) +{ + const char* path = luaL_checkstring(L,1); + const char* prefix = luaL_checkstring(L,2); + DIR *d; + struct dirent *dir; + d = opendir(path); + char buff[1024]; + lua_newtable(L); + if (d) + { + int id=0; + while ((dir = readdir(d)) != NULL) + { + //ignore curent directory, parent directory + if(strcmp(dir->d_name,".") == 0 || + strcmp(dir->d_name,"..")==0/*|| *(dir->d_name)=='.'*/) continue; + sprintf(buff,"%s/%s",path,dir->d_name); + lua_pushnumber(L,id); + //lua_pushstring(L,buff); + l_file_stat(L,buff); + + lua_pushstring(L,"filename"); + lua_pushstring(L,dir->d_name); + lua_settable(L,-3); + + sprintf(buff,"%s/%s",prefix,dir->d_name); + //printf("FILE %s\n",buff ); + lua_pushstring(L,"path"); + lua_pushstring(L,buff); + lua_settable(L,-3); + + lua_settable(L,-3); + + id++; + } + closedir(d); + } + else + { + lua_pushstring(L,"error"); + lua_pushstring(L,"Resource not found"); + lua_settable(L,-3); + } + return 1; +} + +static int l_chown(lua_State* L) +{ + const char* path = luaL_checkstring(L,1); + int id = luaL_checknumber(L,2); + int gid = luaL_checknumber(L,3); + lua_pushboolean(L, chown(path,id,gid) == 0); + return 1; +} + +static int l_mkdir(lua_State* L) +{ + const char* path = luaL_checkstring(L,1); + lua_pushboolean(L, mkdir(path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) == 0); + return 1; +} +static int l_exist(lua_State* L) +{ + const char* path = luaL_checkstring(L,1); + lua_pushboolean(L,access( path, F_OK ) != -1 ); + return 1; +} + +static int is_dir(const char* f) +{ + struct stat st; + if(stat(f, &st) == -1) + return -1; // unknow + else if(st.st_mode & S_IFDIR) + return 1; + else + return 0; +} + +static int l_is_dir(lua_State *L) +{ + const char *file = (char *)luaL_checkstring(L, 1); + lua_pushboolean(L, is_dir(file) == 1); + return 1; +} + +static int _recursive_delete(const char* path) +{ + if(is_dir(path) == 1) + { + DIR *d; + struct dirent *dir; + d = opendir(path); + char buff[1024]; + if (d) + { + while ((dir = readdir(d)) != NULL) + { + //ignore current & parent dir + if(strcmp(dir->d_name,".") == 0 || strcmp(dir->d_name,"..")==0) continue; + // get the file + sprintf(buff,"%s/%s",path,dir->d_name); + if(_recursive_delete(buff) == -1) return -1; + } + closedir(d); + // remove dir + if(rmdir(path) != 0) return -1; + } else return -1; + } + else + { + // file remove + if(remove(path) != 0) return -1; + } + return 0; +} +static int l_delete(lua_State* L) +{ + const char* f = luaL_checkstring(L,1); + lua_pushboolean(L,_recursive_delete(f) == 0); + return 1; +} +/* +* Only use this for simple command +* Be careful to expose this function +* to user. This can cause a serious +* security problem +*/ +static int l_cmd_open(lua_State* L) +{ + FILE *fp; + const char* cmd = luaL_checkstring(L,1); + + /* Open the command for reading. */ + fp = popen(cmd, "r"); + if (fp == NULL) { + lua_pushnil(L); + return 1; + } + /*return the fd to lua + * user can use this to + read the output data*/ + intptr_t pt = (intptr_t)fp; + lua_pushnumber(L, pt); + return 1; +} +static int l_cmd_read(lua_State* L) +{ + /* Read the output a line at a time - output it. + read user defined data of std + */ + FILE * fd; + intptr_t pt = (intptr_t)luaL_checknumber(L, 1); + fd = (FILE*)pt; + if(fd == NULL) + { + lua_pushnil(L); + return 1; + } + char buff[1024]; + if(fgets(buff, sizeof(buff)-1, fd) != NULL) { + lua_pushstring(L,buff); + } else + { + lua_pushnil(L); + } + /* close */ + //pclose(fp); + + return 1; +} +static int l_cmd_close(lua_State* L) +{ + /* Read the output a line at a time - output it. + read user defined data of std + */ + FILE * fd; + intptr_t pt = (intptr_t)luaL_checknumber(L, 1); + fd = (FILE*)pt; + if(fd == NULL) + { + pclose(fd); + } + return 0; +} +/*zip file handler +* using miniz to compress and uncompress zip file +*/ + +static int l_unzip(lua_State* L) +{ + const char* src = luaL_checkstring(L,1); + const char* dest = luaL_checkstring(L,2); + + if(zip_extract(src, dest, NULL, NULL) == -1) + { + lua_pushboolean(L,0); + return 1; + } + lua_pushboolean(L,1); + return 1; +} + +static int _add_to_zip(struct zip_t * zip, const char* path, const char* root) +{ + int st = is_dir(path); + if(st == -1) return -1; + char p1[MAX_PATH_LEN]; + char p2[MAX_PATH_LEN]; + if(st) + { + // read directory + DIR *d; + struct dirent *dir; + //struct stat st; + d = opendir(path); + if (d) + { + while ((dir = readdir(d)) != NULL) + { + //ignore curent directory, parent directory + if(strcmp(dir->d_name,".") == 0 || strcmp(dir->d_name,"..")==0) continue; + // add file to zip + snprintf(p1,MAX_PATH_LEN,"%s/%s", path, dir->d_name); + snprintf(p2,MAX_PATH_LEN,"%s/%s",root,dir->d_name); + if(_add_to_zip(zip,p1,p2) == -1) + { + return -1; + } + } + closedir(d); + } + } + else + { + // if it is a file + // add it to zip + if(zip_entry_open(zip, root) == -1) return -1; + if(zip_entry_fwrite(zip, path) == -1) return -1; + zip_entry_close(zip); + } + return 0; +} + +static int l_zip(lua_State* L) +{ + const char* src = luaL_checkstring(L,1); + const char* dest = luaL_checkstring(L,2); + + // create a zip handler + struct zip_t *zip = zip_open(dest, ZIP_DEFAULT_COMPRESSION_LEVEL, 0); + + if(zip == NULL) goto pfalse; + + if(_add_to_zip(zip,src,"") == -1) goto pfalse; + + zip_close(zip); + + lua_pushboolean(L,1); + return 1; + + // return false + pfalse: + // TODO remove if filed is created + if(zip) zip_close(zip); + lua_pushboolean(L,0); + return 1; +} +static int l_getenv(lua_State* L) +{ + const char* name = luaL_checkstring(L,1); + if(!name) + { + lua_pushnil(L); + return 1; + } + char * value = getenv(name); + lua_pushstring(L, value); + return 1; +} + +static int l_setenv(lua_State* L) +{ + const char* name = luaL_checkstring(L,1); + const char* value = luaL_checkstring(L,2); + const int force = luaL_checknumber(L,3); + if(!name) + { + lua_pushboolean(L, 0); + return 1; + } + if(!value) + { + lua_pushboolean(L, 0); + return 1; + } + int ret = setenv(name, value, force); + if(ret != 0) + { + lua_pushboolean(L, 0); + return 1; + } + lua_pushboolean(L,1); + return 1; +} + +static int l_unsetenv(lua_State* L) +{ + const char* name = luaL_checkstring(L,1); + if(!name) + { + lua_pushboolean(L, 0); + return 1; + } + int ret = unsetenv(name); + if(ret != 0) + { + lua_pushboolean(L, 0); + return 1; + } + lua_pushboolean(L,1); + return 1; +} + +static int l_gethomedir(lua_State* L) +{ + uid_t uid = (uid_t) luaL_checknumber(L,1); + if(uid < 0) + { + lua_pushnil(L); + return 1; + } + struct passwd *pw = getpwuid(uid); + + if (pw == NULL) { + lua_pushnil(L); + return 1; + } + + lua_pushstring(L, pw->pw_dir); + return 1; +} + +static int l_trim(lua_State *L) +{ + char *str = strdup((char *)luaL_checkstring(L, 1)); + const char *tok = luaL_checkstring(L, 2); + + trim(str, tok[0]); + lua_pushstring(L, (const char *)str); + free(str); + return 1; +} + +static const struct luaL_Reg _lib [] = { + {"auth", l_check_login}, + {"read_dir",l_read_dir}, + {"file_stat",l_file_info}, + {"uid",l_getuid}, + {"setuid", l_setuid}, + {"setgid", l_setgid}, + {"fork", l_fork}, + {"kill", l_kill}, + {"waitpid", l_waitpid}, + {"trim", l_trim}, + {"chown",l_chown}, + {"delete", l_delete}, + {"exists",l_exist}, + {"mkdir",l_mkdir}, + {"cmdopen",l_cmd_open}, + {"cmdread",l_cmd_read}, + {"cmdclose",l_cmd_close}, + {"move",l_file_move}, + {"unzip",l_unzip}, + {"zip",l_zip}, + {"getenv",l_getenv}, + {"setenv",l_setenv}, + {"unsetenv",l_unsetenv}, + {"home_dir",l_gethomedir}, + {"send_file", l_send_file}, + {"is_dir", l_is_dir}, + {NULL,NULL} +}; + +int luaopen_ulib(lua_State *L) +{ + luaL_newlib(L, _lib); + return 1; +} diff --git a/silkmvc/BaseController.lua b/silkmvc/BaseController.lua new file mode 100644 index 0000000..29644ad --- /dev/null +++ b/silkmvc/BaseController.lua @@ -0,0 +1,108 @@ +-- create class +BaseObject:subclass("BaseController", + { + registry = {}, + models = {}, + main = false + }) + +-- set the name here in each subclasses +function BaseController:initialize() + for k, v in pairs(self.models) do + --- infer the class here + local modelname = firstToUpper(v).."Model" + local path = MODEL_ROOT.."."..modelname + -- require it + pcall(require, path) + --require(controller_path) + if not _G[modelname] then + self:modelnotfound(v) + else + -- create new model object + self[v] = _G[modelname]:new{registry = self.registry} + end + end + -- create template + self.template = Template:new{registry = self.registry} +end + +function BaseController:print(...) + return self:actionnotfound("print") +end + +function BaseController:redirect(url) + std.status(301, "Moved Permanently") + std.custom_header("Content-Type","text/html") + std.custom_header("Location", url) + std.header_flush() +end + +function BaseController:switchLayout(name) + if self.main then + self.registry.layout = name + else + self:log("Cannot switch layout since the controller "..self.class.." is not the main controller") + end +end + +function BaseController:setSession(key, value) + SESSION[key] = value +end +function BaseController:getSession(key) + return SESSION[key] +end +function BaseController:removeSession(key) + self:setSession(key, nil) +end +function BaseController:index(...) + self:error("#index: subclasses responsibility") +end + +-- not found action +function BaseController:actionnotfound(...) + local args = {...} + self:error("#action "..args[1].." is not found in controller "..self.class) +end +-- not found model +function BaseController:modelnotfound(...) + local args = {...} + self:error("Model "..firstToUpper(args[1]).."Model is not found in controller "..self.class) +end +-- The not found controller + +BaseController:subclass("NotfoundController",{ registry = {}, models = {} }) + +function NotfoundController:index(...) + local args = {...} + local error = args[2] or "" + if self.template:path() then + self.template:set("error", error) + self.template:set("title", "404 not found") + return true + end + self:error("404: Controller "..args[1].." not found : "..error) + return false +end + +-- The asset controller for the static file +BaseController:subclass("AssetController", {registry={}, models={}}) +function AssetController:index(...) + local args = {...} + return self:get(table.unpack(args)) +end + +function AssetController:get(...) + local path = WWW_ROOT..DIR_SEP..implode({...}, DIR_SEP) + + if self.registry.fileaccess and ulib.exists(path) then + local mime = std.mimeOf(path) + if POLICY.mimes[mime] then + std.sendFile(path) + else + self:error("Access forbidden: "..path) + end + else + self:error("Asset file not found or access forbidden: "..path) + end + return false +end \ No newline at end of file diff --git a/silkmvc/BaseModel.lua b/silkmvc/BaseModel.lua new file mode 100644 index 0000000..f26b304 --- /dev/null +++ b/silkmvc/BaseModel.lua @@ -0,0 +1,51 @@ +-- create class +BaseObject:subclass("BaseModel", {registry = {}}) + +function BaseModel:initialize() + self.db = self.registry.db + if self.db and self.name and self.name ~= "" and self.fields and + not self.db:available(self.name) then + self.db:createTable(self.name, self.fields) + end +end + +function BaseModel:create(m) + if self.db and m then return self.db:insert(self.name, m) end + return false +end + +function BaseModel:update(m) + if self.db and m then return self.db:update(self.name, m) end + return false +end + +function BaseModel:delete(cond) + if self.db and cond then return self.db:delete(self.name, cond) end + return false +end + +function BaseModel:find(cond) + if self.db and cond then return self.db:find(self.name, cond) end + return false +end + +function BaseModel:get(id) + local data, order = self:find({exp = {["="] = {id = id}}}) + if not data or #order == 0 then return false end + return data[1] +end + +function BaseModel:findAll() + if self.db then return self.db:getAll(self.name) end + return false +end + +function BaseModel:query(sql) + if self.db then return self.db:query(sql) end + return false +end + +function BaseModel:select(sel, sql_cnd) + if self.db then return self.db:select(self.name, sel, sql_cnd) end + return nil +end diff --git a/silkmvc/BaseObject.lua b/silkmvc/BaseObject.lua new file mode 100644 index 0000000..efc5935 --- /dev/null +++ b/silkmvc/BaseObject.lua @@ -0,0 +1,34 @@ +BaseObject = Object:extends{registry = {}, class="BaseObject"} +function BaseObject:subclass(name, args) + _G[name] = self:extends(args) + _G[name].class = name +end + +function BaseObject:log(msg, level) + level = level or "INFO" + if self.registry.logger then + self.registry.logger:log(msg,level) + end +end + +function BaseObject:debug(msg) + self:log(msg, "DEBUG") +end + +function BaseObject:print() + print(self.class) +end + +function BaseObject:error(msg, trace) + html() + --local line = debug.getinfo(1).currentline + echo(msg) + self:log(msg,"ERROR") + if trace then + debug.traceback=nil + error(msg) + else + error(msg) + end + return false +end \ No newline at end of file diff --git a/silkmvc/DBHelper.lua b/silkmvc/DBHelper.lua new file mode 100644 index 0000000..5ee9c3f --- /dev/null +++ b/silkmvc/DBHelper.lua @@ -0,0 +1,144 @@ +sqlite = modules.sqlite() + +if sqlite == nil then return 0 end +-- create class +BaseObject:subclass("DBHelper", {db = {}}) + +function DBHelper:createTable(tbl, m) + if self:available(tbl) then return true end + local sql = "CREATE TABLE " .. tbl .. "(id INTEGER PRIMARY KEY" + for k, v in pairs(m) do + if k ~= "id" then sql = sql .. "," .. k .. " " .. v end + end + sql = sql .. ");" + return sqlite.query(self.db, sql) == 1 +end + +function DBHelper:insert(tbl, m) + local keys = {} + local values = {} + for k, v in pairs(m) do + if k ~= "id" then + table.insert(keys, k) + if type(v) == "number" then + table.insert(values, v) + else + local t = "\"" .. v:gsub('"', '""') .. "\"" + table.insert(values, t) + end + end + end + local sql = "INSERT INTO " .. tbl .. " (" .. table.concat(keys, ',') .. + ') VALUES (' + sql = sql .. table.concat(values, ',') .. ');' + return sqlite.query(self.db, sql) == 1 +end + +function DBHelper:get(tbl, id) + return sqlite.select(self.db, tbl, "*", "id=" .. id)[1] +end + +function DBHelper:getAll(tbl) + local data = sqlite.select(self.db, tbl, "*", "1=1") + if data == nil then return nil end + local a = {} + for n in pairs(data) do table.insert(a, n) end + table.sort(a) + return data, a +end + +function DBHelper:find(tbl, cond) + local cnd = "1=1" + local sel = "*" + if cond.exp then cnd = self:gencond(cond.exp) end + if cond.order then + cnd = cnd .. " ORDER BY " + local l = {} + local i = 1 + for k, v in pairs(cond.order) do + l[i] = k .. " " .. v + i = i + 1 + end + cnd = cnd .. table.concat(l, ",") + end + if cond.limit then cnd = cnd .. " LIMIT " .. cond.limit end + if cond.fields then + sel = table.concat(cond.fields, ",") + -- print(sel) + end + local data = sqlite.select(self.db, tbl, sel, cnd) + if data == nil then return nil end + local a = {} + for n in pairs(data) do table.insert(a, n) end + table.sort(a) + return data, a +end + +function DBHelper:select(tbl, sel, cnd) + local data = sqlite.select(self.db, tbl, sel, cnd) + if data == nil then return nil end + local a = {} + for n in pairs(data) do table.insert(a, n) end + table.sort(a) + return data, a +end + +function DBHelper:query(sql) return sqlite.query(self.db, sql) == 1 end + +function DBHelper:update(tbl, m) + local id = m['id'] + if id ~= nil then + local lst = {} + for k, v in pairs(m) do + if (type(v) == "number") then + table.insert(lst, k .. "=" .. v) + else + table.insert(lst, k .. "=\"" .. v:gsub('"', '""') .. "\"") + end + end + local sql = "UPDATE " .. tbl .. " SET " .. table.concat(lst, ",") .. + " WHERE id=" .. id .. ";" + return sqlite.query(self.db, sql) == 1 + end + return false +end + +function DBHelper:available(tbl) return sqlite.hasTable(self.db, tbl) == 1 end +function DBHelper:deleteByID(tbl, id) + local sql = "DELETE FROM " .. tbl .. " WHERE id=" .. id .. ";" + return sqlite.query(self.db, sql) == 1 +end +function DBHelper:gencond(o) + for k, v in pairs(o) do + if k == "and" or k == "or" then + local cnd = {} + local i = 1 + for k1, v1 in pairs(v) do + cnd[i] = self:gencond(v1) + i = i + 1 + end + return " (" .. table.concat(cnd, " " .. k .. " ") .. ") " + else + for k1, v1 in pairs(v) do + local t = type(v1) + if (t == "string") then + return + " (" .. k1 .. " " .. k .. ' "' .. v1:gsub('"', '""') .. + '") ' + end + return " (" .. k1 .. " " .. k .. " " .. v1 .. ") " + end + end + end +end +function DBHelper:delete(tbl, cond) + local sql = "DELETE FROM " .. tbl .. " WHERE " .. self:gencond(cond) .. ";" + return sqlite.query(self.db, sql) == 1 +end + +function DBHelper:lastInsertID() return sqlite.lastInsertID(self.db) end + +function DBHelper:close() if self.db then sqlite.dbclose(self.db) end end +function DBHelper:open() + if self.db ~= nil then self.db = sqlite.getdb(self.db) end +end diff --git a/silkmvc/Logger.lua b/silkmvc/Logger.lua new file mode 100644 index 0000000..4cdcc5b --- /dev/null +++ b/silkmvc/Logger.lua @@ -0,0 +1,30 @@ +Logger = Object:extends{levels = {}} + +function Logger:initialize() +end + +function Logger:log(msg,level) + if self.levels[level] and ulib.exists(LOG_ROOT) then + local path = LOG_ROOT..DIR_SEP..level..'.txt' + local f = io.open(path, 'a') + local text = '['..level.."]: "..msg + if f then + f:write(text..'\n') + f:close() + end + print(text) + end +end + +function Logger:info(msg) + self:log(msg, "INFO") +end + +function Logger:debug(msg) + self:log(msg, "DEBUG") +end + + +function Logger:error(msg) + self:log(msg, "ERROR") +end \ No newline at end of file diff --git a/silkmvc/Router.lua b/silkmvc/Router.lua new file mode 100644 index 0000000..855bbe0 --- /dev/null +++ b/silkmvc/Router.lua @@ -0,0 +1,164 @@ +--define the class +BaseObject:subclass("Router", {registry = {}}) +function Router:setPath(path) + self.path = path +end + +function Router:initialize() + self.routes = {} + self.remaps = {} +end + +--function Router:setArgs(args) +-- self.args = args +--end + +--function Router:arg(name) +-- return self.args[name] +--end + +function Router:infer(url) + -- a controller is like this /a/b/c/d/e + -- a is controller name + -- b is action + -- c,d,e is parameters + -- if user dont provide the url, try to infer it + -- from the REQUEST + url = url or REQUEST.r or "" + url = std.trim(url, "/") + local args = explode(url, "/") + local data = { + name = "index", + action = "index", + args = {} + } + if args and #args > 0 and args[1] ~= "" then + data.name = args[1]:gsub("%.", "") + if args[2] then + data.action = args[2]:gsub("%.", "") + end + for i = 3, #args do + table.insert(data.args, args[i]) + end + end + + -- remap if needed + if self.remaps[data.name] ~= nil then + data.name = self.remaps[data.name] + end + -- find the controller class and init it + local controller_name = firstToUpper(data.name) .. "Controller" + local controller_path = self.path .. "." .. controller_name + -- require the controller module + -- ignore the error + local r, e = pcall(require, controller_path) + --require(controller_path) + if not _G[controller_name] then + -- verify if it is an asset + url = url:gsub("/", DIR_SEP) + local filepath = WWW_ROOT..DIR_SEP..url + if ulib.exists(filepath) then -- and not std.is_dir(filepath) + data.controller = AssetController:new {registry = self.registry} + data.action = "get" + data.name = "asset" + data.args ={url} + else + -- let the notfound controller handle the error + data.controller = NotfoundController:new {registry = self.registry} + data.args = {controller_name, e} + data.action = "index" + data.name = "notfound" + end + else + -- create the coresponding controller + data.controller = _G[controller_name]:new {registry = self.registry} + if not data.controller[data.action] then + --data.args = {data.action} + table.insert(data.args, 1, data.action) + data.action = "actionnotfound" + end + end + + self:log("Controller: " .. data.controller.class .. ", action: "..data.action..", args: ".. JSON.encode(data.args)) + return data +end + +function Router:delegate() + local views = {} + local data = self:infer() + -- set the controller to the main controller + data.controller.main = true + views.__main__ = self:call(data) + if not views.__main__ then + --self:error("No view available for this action") + return + end + -- get all visible routes + local routes = self:dependencies(data.name .. "/" .. data.action) + for k, v in pairs(routes) do + data = self:infer(v) + views[k] = self:call(data) + end + -- now require the main page to put the view + local view_args = {} + local view_argv = {} + for k,v in pairs(views) do + table.insert( view_args, k ) + table.insert( view_argv, v ) + end + + local fn, e = loadscript(VIEW_ROOT .. DIR_SEP .. self.registry.layout .. DIR_SEP .. "layout.ls", view_args) + html() + if fn then + local r, o = pcall(fn, table.unpack(view_argv)) + if not r then + self:error(o) + end + else + e = e or "" + self:error("The index page is not found for layout: " .. self.registry.layout..": "..e) + end +end + +function Router:dependencies(url) + if not self.routes[self.registry.layout] then + return {} + end + local list = {} + --self:log("comparing "..url) + for k, v in pairs(self.routes[self.registry.layout]) do + v.url = std.trim(v.url, "/") + if v.visibility == "ALL" then + list[k] = v.url + elseif v.visibility.routes then + if v.visibility.shown == true or v.visibility.shown == nil then + if v.visibility.routes[url] then + list[k] = v.url + end + else + if not v.visibility.routes[url] then + list[k] = v.url + end + end + end + end + return list +end + +function Router:call(data) + data.controller.template:setView(data.action, data.name) + local obj = data.controller[data.action](data.controller, table.unpack(data.args)) + if obj then + return data.controller.template + else + return false + end +end + +function Router:remap(from, to) + self.remaps[from] = to +end + +function Router:route(layout, dependencies) + self.routes[layout] = dependencies +end diff --git a/silkmvc/Template.lua b/silkmvc/Template.lua new file mode 100644 index 0000000..38eeb7c --- /dev/null +++ b/silkmvc/Template.lua @@ -0,0 +1,58 @@ +-- create class + BaseObject:subclass("Template",{registry = {}}) + +function Template:initialize() + self.vars = {} +end + +function Template:set(k, v, ow) + if not self.vars[k] or (self.vars[k] and ow) then + self.vars[k] = v + end +end + +function Template:get(k) + return self.vars[k] +end + +function Template:remove(k) + self.vars[k] = nil +end + +-- infer view path +function Template:setView(name, controller) + self.name = name + if controller then + self.controller = controller + end +end +function Template:path() + local path = VIEW_ROOT..DIR_SEP..self.registry.layout..DIR_SEP..self.controller..DIR_SEP..self.name..".ls" + if ulib.exists(path) then + return path + else + return false, path + end +end +-- render the page +function Template:render() + local path, err = self:path() + if not path then + return self:error("View not found: "..err) + end + local args = {} + local argv = {} + for k, v in pairs(self.vars) do + table.insert( args, k ) + table.insert( argv,v ) + end + local fn, e = loadscript(self:path(), args) + if fn then + local r,o = pcall(fn, table.unpack(argv)) + if not r then + self:error(o) + end + else + self:error(e) + end +end \ No newline at end of file diff --git a/silkmvc/api.lua b/silkmvc/api.lua new file mode 100644 index 0000000..6dc685d --- /dev/null +++ b/silkmvc/api.lua @@ -0,0 +1,57 @@ +require("OOP") +ulib = require("ulib") +require(BASE_FRW.."silk.BaseObject") +require(BASE_FRW.."silk.DBHelper") +require(BASE_FRW.."silk.Router") +require(BASE_FRW.."silk.BaseController") +require(BASE_FRW.."silk.BaseModel") +require(BASE_FRW.."silk.Logger") +require(BASE_FRW.."silk.Template") + +-- mime type allows +-- this will bypass the default server security +-- the default list is from the server setting +POLICY = {} +POLICY.mimes = { + ["application/javascript"] = true, + ["image/bmp"] = true, + ["image/jpeg"] = true, + ["image/png"] = true, + ["text/css"] = true, + ["text/markdown"] = true, + ["text/csv"] = true, + ["application/pdf"] = true, + ["image/gif"] = true, + ["text/html"] = true, + ["application/json"] = true, + ["application/javascript"] = true, + ["image/x-portable-pixmap"] = true, + ["application/x-rar-compressed"] = true, + ["image/tiff"] = true, + ["application/x-tar"] = true, + ["text/plain"] = true, + ["application/x-font-ttf"] = true, + ["application/xhtml+xml"] = true, + ["application/xml"] = true, + ["application/zip"] = true, + ["image/svg+xml"] = true, + ["application/vnd.ms-fontobject"] = true, + ["application/x-font-woff"] = true, + ["application/x-font-otf"] = true, + ["audio/mpeg"] = true, + +} + + +HEADER_FLAG = false + +function html() + if not HEADER_FLAG then + std.chtml(SESSION) + HEADER_FLAG = true + end +end + +function import(module) + return require(BASE_FRW.."silk.api."..module) +end \ No newline at end of file diff --git a/silkmvc/core/OOP.lua b/silkmvc/core/OOP.lua new file mode 100644 index 0000000..182dacc --- /dev/null +++ b/silkmvc/core/OOP.lua @@ -0,0 +1,40 @@ +Object = {} + +function Object:prototype(o) + o = o or {} -- create table if user does not provide one + setmetatable(o, self) + self.__index = self + return o +end + +function Object:new(o) + local obj = self:prototype(o) + obj:initialize() + return obj +end + +function Object:print() + print('an Object') +end + +function Object:initialize() +end + +function Object:asJSON() + return '{}' +end + +function Object:inherit(o) + return self:prototype(o) +end + + +function Object:extends(o) + return self:inherit(o) +end + +Test = Object:inherit{dummy = 0} + +function Test:toWeb() + wio.t(self.dummy) +end \ No newline at end of file diff --git a/silkmvc/core/api.lua b/silkmvc/core/api.lua new file mode 100644 index 0000000..d5e36d6 --- /dev/null +++ b/silkmvc/core/api.lua @@ -0,0 +1,260 @@ +math.randomseed(os.clock()) +package.cpath = __api__.apiroot..'/?.so' +require("antd") +std = modules.std() +local read_header =function() + local l + repeat + l = std.antd_recv(HTTP_REQUEST.id) + if l and l ~= '\r' then + if l == "HTTP_REQUEST" or l == "request" or l == "COOKIE" or l == "REQUEST_HEADER" or l == "REQUEST_DATA" then + coroutine.yield(l, "LUA_TABLE") + else + local l1 = std.antd_recv(HTTP_REQUEST.id) + if l1 ~= '\r' then + coroutine.yield(l, l1) + end + l = l1 + end + end + until not l or l == '\r' +end + + +local read_headers = function() + local co = coroutine.create(function () read_header() end) + return function () -- iterator + local code, k, v = coroutine.resume(co) + return k,v + end +end + +local parse_headers =function() + local lut = { + HTTP_REQUEST = HTTP_REQUEST + } + local curr_tbl = "HTTP_REQUEST" + for k,v in read_headers() do + if v == "LUA_TABLE" then + if not lut[k] then + lut[k] = {} + end + curr_tbl = k + else + lut[curr_tbl][k] = v + end + end + HTTP_REQUEST.request = lut.request + HTTP_REQUEST.request.COOKIE = lut.COOKIE + HTTP_REQUEST.request.REQUEST_HEADER = lut.REQUEST_HEADER + HTTP_REQUEST.request.REQUEST_DATA = lut.REQUEST_DATA +end + +-- parsing the header +parse_headers() +-- root dir +__ROOT__ = HTTP_REQUEST.request.SERVER_WWW_ROOT +-- set require path +package.path = __ROOT__ .. '/?.lua;'..__api__.apiroot..'/?.lua' +require("std") +require("utils") +require("extra_mime") +ulib = require("ulib") +-- set session +SESSION = {} + +REQUEST = HTTP_REQUEST.request.REQUEST_DATA +REQUEST.method = HTTP_REQUEST.request.METHOD +if HTTP_REQUEST.request.COOKIE then + SESSION = HTTP_REQUEST.request.COOKIE +end +HEADER = HTTP_REQUEST.request.REQUEST_HEADER +HEADER.mobile = false + +if HEADER["User-Agent"] and HEADER["User-Agent"]:match("Mobi") then + HEADER.mobile = true +end + +function LOG_INFO(fmt,...) + ulib.syslog(5,string.format(fmt or "LOG",...)) +end + +function LOG_ERROR(fmt,...) + ulib.syslog(3,string.format(fmt or "ERROR",...)) +end + +function has_module(m) + if utils.file_exists(__ROOT__..'/'..m) then + if m:find("%.ls$") then + return true, true, __ROOT__..'/'..m + else + return true, false, m:gsub(".lua$","") + end + elseif utils.file_exists(__ROOT__..'/'..string.gsub(m,'%.','/')..'.lua') then + return true, false, m + elseif utils.file_exists(__ROOT__..'/'..string.gsub(m,'%.','/')..'.ls') then + return true, true, __ROOT__..'/'..string.gsub(m,'%.','/')..'.ls' + end + return false, false, nil +end + +function echo(m) + if m then std.t(m) else std.t("Undefined value") end +end + +function loadscript(file, args) + local f = io.open(file, "rb") + local content = "" + if f then + local html = "" + local pro = "local fn = function(...)" + local s,e, mt + local mtbegin = true -- find begin of scrit, 0 end of scrit + local i = 1 + if args then + pro = "local fn = function("..table.concat( args, ",")..")" + end + for line in io.lines(file) do + line = std.trim(line, " ") + if(line ~= "") then + if(mtbegin) then + mt = "^%s*<%?lua" + else + mt = "%?>%s*$" + end + s,e = line:find(mt) + if(s) then + if mtbegin then + if html ~= "" then + pro= pro.."echo(\""..utils.escape(html).."\")\n" + html = "" + end + local b,f = line:find("%?>%s*$") + if b then + pro = pro..line:sub(e+1,b-1).."\n" + else + pro = pro..line:sub(e+1).."\n" + mtbegin = not mtbegin + end + else + pro = pro..line:sub(0,s-1).."\n" + mtbegin = not mtbegin + end + else -- no match + if mtbegin then + -- detect if we have inline lua with format + local b,f = line:find("<%?=") + if b then + local tmp = line + pro= pro.."echo(" + while(b) do + -- find the close + local x,y = tmp:find("%?>") + if x then + pro = pro.."\""..utils.escape(html..tmp:sub(0,b-1):gsub("%%","%%%%")).."\".." + pro = pro..tmp:sub(f+1,x-1)..".." + html = "" + tmp = tmp:sub(y+1) + b,f = tmp:find("<%?=") + else + error("Syntax error near line "..i) + end + end + pro = pro.."\""..utils.escape(tmp:gsub("%%","%%%%")).."\")\n" + else + html = html..std.trim(line," "):gsub("%%","%%%%").."\n" + end + else + if line ~= "" then pro = pro..line.."\n" end + end + end + end + i = i+ 1 + end + f:close() + if(html ~= "") then + pro = pro.."echo(\""..utils.escape(html).."\")\n" + end + pro = pro.."\nend \n return fn" + local r,e = load(pro) + if r then return r(), e else return nil,e end + end +end + +-- decode post data if any +local decode_request_data = function() + if (not REQUEST.method) + or (REQUEST.method ~= "POST" + and REQUEST.method ~= "PUT" + and REQUEST.method ~= "PATCH") + or (not REQUEST.HAS_RAW_BODY) then + return 0 + end + local ctype = HEADER['Content-Type'] + local clen = HEADER['Content-Length'] or -1 + if clen then + clen = tonumber(clen) + end + if not ctype or clen == -1 then + LOG_ERROR("Invalid content type %s or content length %d", ctype, clen) + return 400, "Bad Request, missing content description" + end + local raw_data, len = std.antd_recv(HTTP_REQUEST.id, clen) + if len ~= clen then + LOG_ERROR("Unable to read all data: read %d expected %d", len, clen) + return 400, "Bad Request, missing content data" + end + if ctype:find("application/json") then + REQUEST.json = bytes.__tostring(raw_data) + else + REQUEST[ctype] = raw_data + end + REQUEST.HAS_RAW_BODY = nil + return 0 +end + +-- set compression level +local accept_encoding = HEADER["Accept-Encoding"] +if accept_encoding then + if accept_encoding:find("gzip") then + std.antd_set_zlevel(HTTP_REQUEST.id, "gzip") + elseif accept_encoding:find("deflate") then + std.antd_set_zlevel(HTTP_REQUEST.id, "deflate") + end +end + +local code, error = decode_request_data() + +if code ~= 0 then + LOG_ERROR(error) + std.error(code, error) + return +end + +-- LOG_INFO(JSON.encode(REQUEST)) + +-- OOP support +--require("OOP") +-- load sqlite helper +--require("sqlite") +-- enable extra mime + +-- run the file + + +local m, s, p = has_module(HTTP_REQUEST.request.RESOURCE_PATH) +if m then + -- run the correct module + if s then + local r,e = loadscript(p) + if r then r() else unknow(e) end + else + LOG_INFO("RUNNING MODULE %s", p) + require(p) + end +else + unknow("Resource not found for request "..HTTP_REQUEST.request.RESOURCE_PATH) +end + + +--require('router') diff --git a/silkmvc/core/cif.lua b/silkmvc/core/cif.lua new file mode 100644 index 0000000..7e0ebf3 --- /dev/null +++ b/silkmvc/core/cif.lua @@ -0,0 +1,63 @@ +FFI = require("ffi") +FFI.type = {} +FFI.type.VOID = 0 +FFI.type.UINT8 = 1 +FFI.type.SINT8 = 2 +FFI.type.UINT16 = 3 +FFI.type.SINT16 = 4 +FFI.type.UINT32 = 5 +FFI.type.SINT32 = 6 +FFI.type.UINT64 = 7 +FFI.type.SINT64 = 8 +FFI.type.FLOAT = 9 +FFI.type.DOUBLE = 10 +FFI.type.UCHAR = 11 +FFI.type.SCHAR = 12 +FFI.type.USHORT = 13 +FFI.type.SSHORT = 14 +FFI.type.UINT = 15 +FFI.type.SINT = 16 +FFI.type.ULONG = 17 +FFI.type.SLONG = 18 +FFI.type.LONGDOUBLE = 19 +FFI.type.POINTER = 20 +FFI.cache = {} + +FFI.load = function(path) + if FFI.cache[path] then + return FFI.cache[path] + else + print("Loading: "..path) + local lib = FFI.dlopen(path) + if lib then + FFI.cache[path] = {ref = lib, fn= {}} + end + return FFI.cache[path] + end +end + +FFI.unload = function(path) + local lib = FFI.cache[path] + if lib then + FFI.dlclose(lib.ref) + FFI.cache[path] = false + end +end + +FFI.unloadAll = function() + for k,v in pairs(FFI.cache) do + FFI.dlclose(v.ref) + end + FFI.cache = {} +end + +FFI.lookup = function(lib, name) + local fn = lib.fn[name] + if fn then return fn end + fn = FFI.dlsym(lib.ref, name) + if fn then + lib.fn[name] = fn + return fn + end + return nil +end \ No newline at end of file diff --git a/silkmvc/core/extra_mime.lua b/silkmvc/core/extra_mime.lua new file mode 100644 index 0000000..da97300 --- /dev/null +++ b/silkmvc/core/extra_mime.lua @@ -0,0 +1,79 @@ +function std.extra_mime(name) + local ext = std.ext(name) + local mpath = __ROOT__.."/".."mimes.json" + local xmimes = {} + if utils.file_exists(mpath) then + local f = io.open(mpath, "r") + if f then + xmimes = JSON.decodeString(f:read("*all")) + f:close() + end + end + if(name:find("Makefile$")) then return "text/makefile",false + elseif ext == "php" then return "text/php",false + elseif ext == "c" or ext == "h" then return "text/c",false + elseif ext == "cpp" or ext == "hpp" then return "text/cpp",false + elseif ext == "md" then return "text/markdown",false + elseif ext == "lua" then return "text/lua",false + elseif ext == "yml" then return "application/x-yaml", false + elseif xmimes[ext] then return xmimes[ext].mime, xmimes[ext].binary + --elseif ext == "pgm" then return "image/x-portable-graymap", true + else + return "application/octet-stream",true + end +end + +function std.mimeOf(name) + local mime = std.mime(name) + if mime ~= "application/octet-stream" then + return mime + else + return std.extra_mime(name) + end +end + +--[[ function std.isBinary(name) + local mime = std.mime(name) + if mime ~= "application/octet-stream" then + return std.is_bin(name) + else + local xmime,bin = std.extra_mime(name) + return bin + end +end ]] + +function std.sendFile(m) + local mime = std.mimeOf(m) + local finfo = ulib.file_stat(m) + local len = tostring(math.floor(finfo.size)) + local len1 = tostring(math.floor(finfo.size - 1)) + if mime == "audio/mpeg" then + std.status(200) + std.header("Pragma", "public") + std.header("Expires", "0") + std.header("Content-Type", mime) + std.header("Content-Length", len) + std.header("Content-Disposition", "inline; filename=" .. std.basename(m)) + std.header("Content-Range:", "bytes 0-" .. len1 .. "/" .. len) + std.header("Accept-Ranges", "bytes") + std.header("X-Pad", "avoid browser bug") + std.header("Content-Transfer-Encoding", "binary") + std.header("Cache-Control", "no-cache, no-store") + std.header("Connection", "Keep-Alive") + std.header_flush() + std.f(m) + else + if HEADER['If-Modified-Since'] and HEADER['If-Modified-Since'] == finfo.ctime then + std.status(304) + std.header_flush() + else + std.status(200) + std.header("Content-Type", mime) + --std.header("Content-Length", len) + std.header("Cache-Control", "no-cache") + std.header("Last-Modified", finfo.ctime) + std.header_flush() + std.f(m) + end + end +end diff --git a/silkmvc/core/sqlite.lua b/silkmvc/core/sqlite.lua new file mode 100644 index 0000000..7d2d38e --- /dev/null +++ b/silkmvc/core/sqlite.lua @@ -0,0 +1,157 @@ +sqlite = modules.sqlite() + +if sqlite == nil then return 0 end +require("OOP") +-- create class +DBModel = Object:inherit{db=nil, name=''} + +function DBModel:createTable(m) + if self:available() then return true end + local sql = "CREATE TABLE "..self.name.."(id INTEGER PRIMARY KEY" + for k, v in pairs(m) do + if k ~= "id" then + sql = sql..","..k.." "..v + end + end + sql = sql..");" + return sqlite.query(self.db,sql) == 1 +end + +function DBModel:insert(m) + local keys = {} + local values = {} + for k,v in pairs(m) do + if k ~= "id" then + table.insert(keys,k) + if type(v) == "number" then + table.insert(values, v) + elseif type(v) == "boolean" then + table.insert( values, v and 1 or 0 ) + else + local t = "\""..v:gsub('"', '""').."\"" + table.insert(values,t) + end + end + end + local sql = "INSERT INTO "..self.name.." ("..table.concat(keys,',')..') VALUES (' + sql = sql..table.concat(values,',')..');' + return sqlite.query(self.db, sql) == 1 +end + +function DBModel:get(id) + return sqlite.select(self.db, self.name, "*","id="..id)[1] +end + +function DBModel:getAll() + --local sql = "SELECT * FROM "..self.name + --return sqlite.select(self.db, self.name, "1=1") + local data = sqlite.select(self.db, self.name, "*", "1=1") + if data == nil then return nil end + local a = {} + for n in pairs(data) do table.insert(a, n) end + table.sort(a) + return data, a +end + +function DBModel:find(cond) + local cnd = "1=1" + local sel = "*" + if cond.exp then + cnd = self:gencond(cond.exp) + end + if cond.order then + cnd = cnd.." ORDER BY " + local l = {} + local i = 1 + for k,v in pairs(cond.order) do + l[i] = k.." "..v + i = i+1 + end + cnd = cnd..table.concat(l, ",") + end + if cond.limit then + cnd = cnd.." LIMIT "..cond.limit + end + if cond.fields then + sel = table.concat(cond.fields, ",") + --print(sel) + end + --print(cnd) + local data = sqlite.select(self.db, self.name, sel, cnd) + if data == nil then return nil end + local a = {} + for n in pairs(data) do table.insert(a, n) end + table.sort(a) + return data, a +end + +function DBModel:query(sql) + return sqlite.query(self.db, sql) == 1 +end + +function DBModel:update(m) + local id = m['id'] + if id ~= nil then + local lst = {} + for k,v in pairs(m) do + if(type(v)== "number") then + table.insert(lst,k.."="..v) + elseif type(v) == "boolean" then + table.insert( lst, k.."="..(v and 1 or 0) ) + else + table.insert(lst,k.."=\""..v:gsub('"', '""').."\"") + end + end + local sql = "UPDATE "..self.name.." SET "..table.concat(lst,",").." WHERE id="..id..";" + return sqlite.query(self.db, sql) == 1 + end + return false +end + +function DBModel:available() + return sqlite.hasTable(self.db, self.name) == 1 +end +function DBModel:deleteByID(id) + local sql = "DELETE FROM "..self.name.." WHERE id="..id..";" + return sqlite.query(self.db, sql) == 1 +end +function DBModel:gencond(o) + for k,v in pairs(o) do + if k == "and" or k == "or" then + local cnd = {} + local i = 1 + for k1,v1 in pairs(v) do + cnd[i] = self:gencond(v1) + i = i + 1 + end + return " ("..table.concat(cnd, " "..k.." ")..") " + else + for k1,v1 in pairs(v) do + local t = type(v1) + if(t == "string") then + return " ("..k1.." "..k..' "'..v1:gsub('"','""')..'") ' + end + return " ("..k1.." "..k.." "..v1..") " + end + end + end +end +function DBModel:delete(cond) + local sql = "DELETE FROM "..self.name.." WHERE "..self:gencond(cond)..";" + return sqlite.query(self.db, sql) == 1 +end + +function DBModel:lastInsertID() + return sqlite.lastInsertID(self.db) +end + +function DBModel:close() + if self.db then + sqlite.dbclose(self.db) + end +end +function DBModel:open() + if self.db ~= nil then + self.db = sqlite.getdb(self.db) + end +end \ No newline at end of file diff --git a/silkmvc/core/std.lua b/silkmvc/core/std.lua new file mode 100644 index 0000000..746d7a3 --- /dev/null +++ b/silkmvc/core/std.lua @@ -0,0 +1,171 @@ +bytes = modules.bytes() +array = modules.array() + +modules.sqlite = function() + if not sqlite then + sqlite = require("sqlitedb") + sqlite.getdb = function(name) + if name:find("%.db$") then + return sqlite._getdb(name) + elseif name:find("/") then + LOG_ERROR("Invalid database name %s", name) + return nil + else + return sqlite._getdb(__api__.dbpath.."/"..name..".db") + end + end + end + return sqlite +end + +RESPONSE_HEADER = { + status = 200, + header = {}, + cookie = {}, + sent = false +} + +function std.status(code) + RESPONSE_HEADER.status=code +end +function std.custom_header(k,v) + std.header(k,v) +end +function std.header_flush() + std._send_header(HTTP_REQUEST.id,RESPONSE_HEADER.status, RESPONSE_HEADER.header, RESPONSE_HEADER.cookie) + RESPONSE_HEADER.sent = true +end + +function std.header(k,v) + RESPONSE_HEADER.header[k] = v +end + +function std.cjson(ck) + for k,v in pairs(ck) do + std.setCookie(k.."="..v.."; Path=/") + end + std.header("Content-Type","application/json; charset=utf-8") + std.header_flush() +end +function std.chtml(ck) + for k,v in pairs(ck) do + std.setCookie(k.."="..v.."; Path=/") + end + std.header("Content-Type","text/html; charset=utf-8") + std.header_flush() +end +function std.t(s) + if RESPONSE_HEADER.sent == false then + std.header_flush() + end + std._t(HTTP_REQUEST.id,s) +end +function std.b(s) + if RESPONSE_HEADER.sent == false then + std.header_flush() + end + std._b(HTTP_REQUEST.id,s) +end +function std.f(v) + std._f(HTTP_REQUEST.id,v) + --ulib.send_file(v, HTTP_REQUEST.socket) +end + +function std.setCookie(v) + RESPONSE_HEADER.cookie[#RESPONSE_HEADER.cookie] = v +end + +function std.error(status, msg) + std._error(HTTP_REQUEST.id, status, msg) +end +--_upload +--_route +function std.unknow(s) + std.error(404, "Unknown request") +end + +--_redirect +--[[ function std.redirect(s) + std._redirect(HTTP_REQUEST.id,s) +end ]] + +function std.html() + std.header("Content-Type","text/html; charset=utf-8") + std.header_flush() +end +function std.text() + std.header("Content-Type","text/plain; charset=utf-8") + std.header_flush() +end + +function std.json() + std.header("Content-Type","application/json; charset=utf-8") + std.header_flush() +end +function std.jpeg() + std.header("Content-Type","image/jpeg") + std.header_flush() +end +function std.octstream(s) + std.header("Content-Type","application/octet-stream") + std.header("Content-Disposition",'attachment; filename="'..s..'"') + std.header_flush() +end +--[[ function std.textstream() + std._textstream(HTTP_REQUEST.id) +end ]] + + +function std.readOnly(t) -- bugging + local proxy = {} + local mt = { -- create metatable + __index = t, + __newindex = function (t,k,v) + error("attempt to update a read-only table", 2) + end + } + setmetatable(proxy, mt) + return proxy + end + + +-- web socket +std.ws = {} +function std.ws.header() + local h = std.ws_header(HTTP_REQUEST.id) + if(h) then + return h --std.readOnly(h) + else + return nil + end +end + +function std.ws.read(h) + return std.ws_read(HTTP_REQUEST.id,h) +end +function std.ws.swrite(s) + std.ws_t(HTTP_REQUEST.id,s) +end +function std.ws.fwrite(s) + std.ws_f(HTTP_REQUEST.id,s) +end +function std.ws.write_bytes(arr) + std.ws_b(HTTP_REQUEST.id,arr) +end +function std.ws.enable() + return HTTP_REQUEST ~= nil and HTTP_REQUEST.request["__web_socket__"] == "1" +end +function std.ws.close(code) + std.ws_close(HTTP_REQUEST.id,code) +end +function std.basename(str) + local name = string.gsub(std.trim(str,"/"), "(.*/)(.*)", "%2") + return name +end +function std.is_file(f) + return std.is_dir(f) == false +end + +std.ws.TEXT = 1 +std.ws.BIN = 2 +std.ws.CLOSE = 8 diff --git a/silkmvc/core/utils.lua b/silkmvc/core/utils.lua new file mode 100644 index 0000000..8f933c8 --- /dev/null +++ b/silkmvc/core/utils.lua @@ -0,0 +1,161 @@ +utils = {} + +function utils.is_array(table) + local max = 0 + local count = 0 + for k, v in pairs(table) do + if type(k) == "number" then + if k > max then max = k end + count = count + 1 + else + return false + end + end + if max > count * 2 then + return false + end + + return true +end + +function utils.escape(s) + local replacements = { + ["\\"] = "\\\\" , + ['"'] = '\\"', + ["\n"] = "\\n", + ["\t"] = "\\t", + ["\b"] = "\\b", + ["\f"] = "\\f", + ["\r"] = "\\r", + ["%"] = "%%" + } + return (s:gsub( "[\\'\"\n\t\b\f\r%%]", replacements )) +end + +function utils.escape_pattern(s) + return s:gsub("[%(%)%.%+%-%*%?%[%]%^%$%%]", "%%%1") +end + +function utils.unescape_pattern(s) + return s:gsub( "[%%]", "%%%%") +end + +function utils.hex_to_char(x) + return string.char(tonumber(x, 16)) +end + +function utils.decodeURI(url) + return url:gsub("%%(%x%x)", utils.hex_to_char) +end + +function utils.unescape(s) + local str = "" + local escape = false + local esc_map = {b = '\b', f = '\f', n = '\n', r = '\r', t = '\t'} + for c in s:gmatch"." do + if c ~= '\\' then + if escape then + if esc_map[c] then + str = str..esc_map[c] + else + str = str..c + end + else + str = str..c + end + escape = false + else + if escape then + str = str..c + escape = false + else + escape = true + end + end + end + return str +end + +function utils.file_exists(name) + local f=io.open(name,"r") + if f~=nil then io.close(f) return true else return false end +end + +function utils.url_parser(uri) + local pattern = "^(https?)://([%.%w]+):?(%d*)(/?[^#]*)#?.*$" + local obj = {} + obj.protocol = uri:gsub(pattern, "%1") + obj.hostname = uri:gsub(pattern, "%2") + obj.port = uri:gsub(pattern, "%3") + obj.query = uri:gsub(pattern, "%4") + + if obj.port == "" then obj.port = 80 else obj.port = tonumber(obj.port) end + if obj.query == "" then obj.query="/" end + return obj +end + +JSON = require("json") + +function JSON.encode(obj) + local t = type(obj) + if t == 'table' then + -- encode object + if utils.is_array(obj) == false then + local lst = {} + for k,v in pairs(obj) do + table.insert(lst,'"'..k..'":'..JSON.encode(v)) + end + return "{"..table.concat(lst,",").."}" + else + local lst = {} + local a = {} + for n in pairs(obj) do table.insert(a, n) end + table.sort(a) + for i,v in pairs(a) do + table.insert(lst,JSON.encode(obj[v])) + end + return "["..table.concat(lst,",").."]" + end + elseif t == 'string' then + --print('"'..utils.escape(obj)..'"') + return '"'..utils.escape(obj)..'"' + elseif t == 'boolean' or t == 'number' then + return tostring(obj) + elseif obj == nil then + return "null" + else + return '"'..tostring(obj)..'"' + end +end + +function explode(str, div) -- credit: http://richard.warburton.it + if (div=='') then return false end + local pos,arr = 0,{} + -- for each divider found + for st,sp in function() return string.find(str,div,pos,true) end do + table.insert(arr,string.sub(str,pos,st-1)) -- Attach chars left of current divider + pos = sp + 1 -- Jump past current divider + end + table.insert(arr,string.sub(str,pos)) -- Attach chars right of last divider + return arr + end +function implode(arr, div) + return table.concat(arr,div) +end + +function firstToUpper(str) + return (str:gsub("^%l", string.upper)) +end + + +local charset = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM1234567890" + +function utils.generate_salt(length) + local ret = {} + local r + for i = 1, length do + r = math.random(1, #charset) + table.insert(ret, charset:sub(r, r)) + end + return table.concat(ret) +end \ No newline at end of file diff --git a/silkmvc/router.lua.tpl b/silkmvc/router.lua.tpl new file mode 100644 index 0000000..5eb7f2f --- /dev/null +++ b/silkmvc/router.lua.tpl @@ -0,0 +1,54 @@ + +-- the rewrite rule for the framework +-- should be something like this +-- ^\/apps\/+(.*)$ = /apps/router.lua?r=<1>& +-- some global variables +DIR_SEP = "/" +WWW_ROOT = "/opt/www/htdocs/apps" +HTTP_ROOT = "https://apps.localhost:9195/" +-- class path: path.to.class +BASE_FRW = "" +-- class path: path.to.class +CONTROLLER_ROOT = BASE_FRW.."apps.controllers" +MODEL_ROOT = BASE_FRW.."apps.models" +-- file path: path/to/file +VIEW_ROOT = WWW_ROOT..DIR_SEP.."views" +LOG_ROOT = WWW_ROOT..DIR_SEP.."logs" + +-- require needed library +require(BASE_FRW.."silk.api") + +-- registry object store global variables +local REGISTRY = {} +-- set logging level +REGISTRY.logger = Logger:new{ levels = {INFO = true, ERROR = true, DEBUG = true}} +REGISTRY.db = DBHelper:new{db="iosapps"} +REGISTRY.layout = 'default' + +REGISTRY.db:open() +local router = Router:new{registry = REGISTRY} +REGISTRY.router = router +router:setPath(CONTROLLER_ROOT) +--router:route('edit', 'post/edit', "ALL" ) + +-- example of depedencies to the current main route +-- each layout may have different dependencies +local default_routes_dependencies = { + edit = { + url = "post/edit", + visibility = { + shown = true, + routes = { + ["post/index"] = true + } + } + }, + --category = { + -- url = "cat/index", + -- visibility = "ALL" + --} +} +router:route('default', default_routes_dependencies ) +router:delegate() +if REGISTRY.db then REGISTRY.db:close() end +