summaryrefslogtreecommitdiff
path: root/references
diff options
context:
space:
mode:
authorLinnnus <[email protected]>2024-02-01 22:59:38 +0100
committerLinnnus <[email protected]>2024-02-04 09:58:06 +0100
commitd38f82f6462af4e5aad6a2c776f5c00ce5b13c87 (patch)
tree01a222792dfb10473ae4370b4fc90f3a48e1a499 /references
feat: initial commit
Here is a small overview of the state of the project at this first commit. I have basic Git Repo -> HTML working, and a plan for how setting up an actual server would work (mainly, NGINX + a git hook to rebuild). The main thing I'm working on right now is parsing WikiCreole, though I am starting to wonder if this is the right langauge. WikiCreole is pretty irregular and has a lot of edge cases (e.g. around emphasis).
Diffstat (limited to 'references')
-rw-r--r--references/regular-recursive.html632
-rw-r--r--references/smu.c805
2 files changed, 1437 insertions, 0 deletions
diff --git a/references/regular-recursive.html b/references/regular-recursive.html
new file mode 100644
index 0000000..69584ab
--- /dev/null
+++ b/references/regular-recursive.html
@@ -0,0 +1,632 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+ <head>
+ <title>Creole v0.4 Live Preview</title>
+ <meta http-equiv="Content-Type" content="text/xhtml; charset=utf-8" />
+ <style type="text/css">
+ .WikiText { display: block; width: 100%; height: 33%; }
+ table, td { border: solid 1px; }
+ tr { vertical-align: middle; text-align: center; }
+ div#UnitTest { position: absolute; right: 1em;
+ width: 15em; padding: 0; }
+ div#UnitTest h1 { font-size: large; text-align: center;
+ text-transform: uppercase; }
+ div#Main { padding: 0 1em 0 1em; margin: 0px;
+ position: absolute; top: 0px; left: 0px; right: 16em; }
+ table.unit-test { border: thin solid black; border-collapse: collapse; }
+ table.unit-test th { border: thin black; border-style: solid dashed; }
+ table.unit-test td { border: thin dashed black; }
+ table.unit-test strong { color: red; }
+ </style>
+ <script type="text/javascript">
+// <![CDATA[
+// Copyright (c) 2007 Chris Purcell.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+function $(element) {
+ if (document.getElementById)
+ return document.getElementById(element);
+ else if (document.all)
+ return document.all[element];
+ else
+ return null;
+}
+
+String.prototype.replaceEvalGl = function(regex, fn) {
+ var head = "";
+ var tail = "" + this;
+ while (m = tail.match(regex)) {
+ head += tail.substring(0,m.index) + fn(m);
+ tail = tail.substring(m.index + m[0].length);
+ }
+ return head + tail;
+}
+
+//// The markup rules ////////////////////////////////////////////////////////
+MarkupRule = function(regex, rule) {
+ this.regex = regex;
+ this.rule = rule;
+ this.children = [ ];
+}
+MarkupRule.prototype.clone = function() {
+ var objectClone = new this.constructor();
+ for (var property in this)
+ objectClone[property] = this[property];
+ return objectClone;
+}
+MarkupRule.prototype.setChildren = function(children) {
+ this.children = children;
+}
+ElementRule = function(params) {
+ return new MarkupRule(params["regex"], function (r) {
+ var text = "";
+ if ("capture" in params)
+ text = r[params["capture"]];
+ if (text) {
+ if ("replaceRegex" in params)
+ text = text.replace(params["replaceRegex"], params["replaceString"]);
+ var tag = "<" + params["tag"] + ">";
+ var endtag = "</" + params["tag"] + ">";
+ if (!("tag" in params))
+ tag = endtag = "";
+ return tag + this.markUp(text) + endtag;
+ } else if ("tag" in params)
+ return "<" + params["tag"] + " />";
+ else
+ return "";
+ });
+}
+
+function toXHTML(wikiText) {
+ wikiText = wikiText.replace(/&/g, "&amp;");
+ wikiText = wikiText.replace(/</g, "&lt;");
+ wikiText = wikiText.replace(/>/g, "&gt;");
+ wikiText = wikiText.replace(/"/g, "&quot;");
+ return toXHTML.root.markUp(wikiText);
+}
+
+ // A header is text within equals signs (=)
+toXHTML.h1 = new ElementRule({ tag: "h1", capture: 2,
+ regex: /(^|\n)[ \t]*={1}[ \t](.+?)[ \t]*=*\s*(\n|$)/ });
+toXHTML.h2 = new ElementRule({ tag: "h2", capture: 2,
+ regex: /(^|\n)[ \t]*={2}[ \t](.+?)[ \t]*=*\s*(\n|$)/ });
+toXHTML.h3 = new ElementRule({ tag: "h3", capture: 2,
+ regex: /(^|\n)[ \t]*={3}[ \t](.+?)[ \t]*=*\s*(\n|$)/ });
+toXHTML.h4 = new ElementRule({ tag: "h4", capture: 2,
+ regex: /(^|\n)[ \t]*={4}[ \t](.+?)[ \t]*=*\s*(\n|$)/ });
+toXHTML.h5 = new ElementRule({ tag: "h5", capture: 2,
+ regex: /(^|\n)[ \t]*={5}[ \t](.+?)[ \t]*=*\s*(\n|$)/ });
+toXHTML.h6 = new ElementRule({ tag: "h6", capture: 2,
+ regex: /(^|\n)[ \t]*={6}[ \t](.+?)[ \t]*=*\s*(\n|$)/ });
+
+ // hr is a line of 4 dashes (-)
+toXHTML.hr = new ElementRule({ tag: "hr", regex: /(^|\n)\s*----\s*(\n|$)/ });
+
+ // br is two backslashes (\)
+toXHTML.br = new ElementRule({ tag: "br", regex: /\\\\/ });
+
+ // Preformatted blocks are wrapped in {{{...}}}
+toXHTML.preBlock = new ElementRule({ tag: "pre", capture: 2,
+ regex: /(^|\n){{{\n?(.*?(\n.*?)*?)}}}(\n|$)/ });
+
+ // tt inlines are also wrapped in {{{...}}}
+toXHTML.tt = new ElementRule({ tag: "tt",
+ regex: /{{{(.*?(?:\n.*?)*?)}}}/, capture: 1 });
+
+ // Unordered and ordered lists start with * or #
+toXHTML.ulist = new ElementRule({ tag: "ul",
+ regex: /(^|\n)(\*[^*#].*(\n|$)([*#]{2}.*(\n|$))*)+/, capture: 0,
+ replaceRegex: /(^|\n)[*#]/g, replaceString: "$1" });
+toXHTML.olist = new ElementRule({ tag: "ol",
+ regex: /(^|\n)(#[^*#].*(\n|$)([*#]{2}.*(\n|$))*)+/, capture: 0,
+ replaceRegex: /(^|\n)[*#]/g, replaceString: "$1" });
+toXHTML.li = new ElementRule({tag:"li",regex:/.+(\n[*#].+)*/,capture:0});
+
+ // Tables
+toXHTML.table = new ElementRule({ tag: "table",
+ regex: /(^|\n)(\|.*\|[ \t]*(\n|$))+/, capture: 0 });
+toXHTML.tr = new ElementRule({ tag: "tr",
+ regex: /(^|\n)(\|.*)\|[ \t]*(\n|$)/, capture: 2 });
+toXHTML.td = new ElementRule({ tag: "td",
+ regex: /[|]+([^|]*)/, capture: 1 });
+
+ // Kinds of text block:
+ // - paragraph is the fallback for the root rule
+ // and consists of blocks of text separated by blank lines
+ // - singleLine is used within lists
+toXHTML.singleLine = new ElementRule({ regex: /.+/, capture: 0 });
+toXHTML.paragraph = new ElementRule({ tag: "p",
+ regex: /(^|\n)([ \t]*[^\s].*(\n|$))+/, capture: 0 });
+
+ // Strongly emphasised text is surrounded by double-* characters
+toXHTML.strong = new ElementRule({ tag: "strong", capture: 1,
+ regex:/\*\*([^*]*(?:\*[^*]+)*)\*\*/ });
+
+ // Emphasised text is surrounded by double-/ characters
+ // It must skip http:// or ftp:// internally
+ // (This would be a lot easier to write with negative lookbehind!)
+toXHTML.em = new ElementRule({ tag: "em", capture: 1,
+ regex:"\\/\\/(" + // Starts with a double-/
+ "[^\\/hf]*(?:" +
+ "\\/?(?:http:\\/?|ftp:\\/?)*(?:" +
+ "h(?:t(?:tp?)?)?" + "|" +
+ "f(?:tp?)?" + "|" +
+ "(?:" +
+ "h[^t\\/hf]" + "|" +
+ "ht[^t\\/hf]" + "|" +
+ "htt[^p\\/hf]" + "|" +
+ "http[^:\\/hf]" + "|" +
+ "http:[^\\/hf]" + "|" +
+ "http:\\/[^\\/hf]" + "|" +
+ "http:\\/\\/" + "|" +
+ "f[^t\\/hf]" + "|" +
+ "ft[^p\\/hf]" + "|" +
+ "ftp[^:\\/hf]" + "|" +
+ "ftp:[^\\/hf]" + "|" +
+ "ftp:\\/[^\\/hf]" + "|" +
+ "ftp:\\/\\/" +
+ ")" +
+ "[^\\/hf]*" +
+ ")" + "|" +
+ "\\/[^\\/hf][^\\/hf]*" +
+ ")*" +
+ ")" +
+ "\\/\\/" // Ends with a double-/
+});
+
+ // Links
+toXHTML.linkPattern = "[^\\]|\\n]*(?:\\][^\\]|\\n]+)*";
+toXHTML.urlProtocols = "(?:http|https|ftp|afs|news|nntp|mid|cid|mailto|" +
+ "wais|prospero|telnet|gopher)";
+toXHTML.urlPattern = toXHTML.urlProtocols + ":" +
+ "[^\\]|\\n]*(?:\\][^\\]|\\n]+)*";
+toXHTML.loneURLPattern = "(?:" + toXHTML.urlProtocols +
+ ":[\\$-:=\\?-Z_a-z~]+[\\$-+\\/-Z_a-z~-])";
+
+toXHTML.rawURL = new MarkupRule( "(" + toXHTML.loneURLPattern + ")",
+ function(r) {
+ return "<a href=\"" + r[1] + "\">" + r[1] + "</a>";
+ }
+);
+toXHTML.unnamedURL = new MarkupRule(
+ "\\[\\[(" + toXHTML.urlPattern + ")\\]\\]",
+ function(r) {
+ return "<a href=\"" + r[1] + "\">" + r[1] + "</a>";
+ }
+);
+toXHTML.unnamedLink = new MarkupRule(
+ "\\[\\[(" + toXHTML.linkPattern + ")\\]\\]",
+ function(r) {
+ return "<a href=\"" + r[1] + "\">" + r[1] + "</a>";
+ }
+);
+toXHTML.namedURL = new MarkupRule(
+ "\\[\\[(" + toXHTML.urlPattern + ")\\|(.*?)\\]\\]",
+ function(r) {
+ return "<a href=\"" + r[1] + "\">" + r[2] + "</a>";
+ }
+);
+toXHTML.namedLink = new MarkupRule(
+ "\\[\\[(" + toXHTML.linkPattern + ")\\|(.*?)\\]\\]",
+ function(r) {
+ return "<a href=\"" + r[1] + "\">" + r[2] + "</a>";
+ }
+);
+
+ // Images
+toXHTML.img = new MarkupRule(
+ "{{([^|\\n{}][^|\\n}]*(?:}[^|\\n}]+)*)\\|([^|\\n}]*(?:}[^|\\n}]+)*)}}",
+ function(r) {
+ return "<img src=\"" + r[1] + "\" alt=\"" + r[2] + "\"/>";
+ }
+);
+
+ // Children of lists
+toXHTML.ulist.children = toXHTML.olist.children = [ toXHTML.li ];
+toXHTML.li.children = [ toXHTML.olist, toXHTML.ulist, toXHTML.singleLine ];
+
+ // Children of table items
+toXHTML.table.children = [ toXHTML.tr ];
+toXHTML.tr.children = [ toXHTML.td ];
+toXHTML.td.children = [ toXHTML.singleLine ];
+
+ // Children within blocks
+toXHTML.singleLine.children = toXHTML.paragraph.children =
+ toXHTML.strong.children = toXHTML.em.children = toXHTML.tt.children =
+ [ toXHTML.strong, toXHTML.em, toXHTML.br, toXHTML.rawURL,
+ toXHTML.unnamedURL, toXHTML.unnamedLink, toXHTML.namedURL,
+ toXHTML.namedLink, toXHTML.tt, toXHTML.img ];
+
+
+ // The root rule used to start the parser
+toXHTML.root = new MarkupRule();
+toXHTML.root.children = [ toXHTML.h1, toXHTML.h2, toXHTML.h3,
+ toXHTML.h4, toXHTML.h5, toXHTML.h6,
+ toXHTML.hr, toXHTML.olist,
+ toXHTML.ulist, toXHTML.preBlock,
+ toXHTML.table ];
+toXHTML.root.fallback = new MarkupRule();
+toXHTML.root.fallback.children = [ toXHTML.paragraph ];
+
+
+//// Do the rendering ////////////////////////////////////////////////////////
+// Apply each rule, and use whichever matches first in the text
+// If there is a tie, use whichever is first in the list of rules
+MarkupRule.prototype.markUp = function(text) {
+ var head = "";
+ var tail = "" + text;
+ var matches = [ ];
+ for (var i = 0; i < this.children.length; i++) {
+ matches[i] = tail.match(this.children[i].regex);
+ }
+ var best = false;
+ var b_i = false;
+ for (var i = 0; i < this.children.length; i++)
+ if (matches[i] && (!best || best.index > matches[i].index)) {
+ best = matches[i];
+ b_i = i;
+ }
+ while (best) {
+ if ((best.index > 0) && (this.fallback))
+ head += this.fallback.markUp(tail.substring(0,best.index));
+ else
+ head += tail.substring(0,best.index);
+ head += this.children[b_i].rule(best);
+ var chopped = best.index + best[0].length;
+ tail = tail.substring(chopped);
+ for (var i = 0; i < this.children.length; i++)
+ if (matches[i])
+ if (matches[i].index >= chopped)
+ matches[i].index -= chopped;
+ else
+ matches[i] = tail.match(this.children[i].regex);
+ best = false;
+ for (var i = 0; i < this.children.length; i++)
+ if (matches[i] && (!best || best.index > matches[i].index)) {
+ best = matches[i];
+ b_i = i;
+ }
+ }
+ if (tail.length > 0 && this.fallback)
+ tail = this.fallback.markUp(tail);
+ return head + tail;
+}
+
+//// Test the renderer ///////////////////////////////////////////////////////
+toXHTML.UnitTest = function() {
+ var results = "<table class=\"unit-test\"><tr><th>Name</th><th>Status</th>"+
+ "</tr>";
+ for (var i = 0; i < toXHTML.UnitTest.tests.length; i++) {
+ var test = toXHTML.UnitTest.tests[i];
+ var input = test.input;
+ var expected = test.output;
+ var actual = toXHTML(input);
+ results += "<tr><td>" + test.name + "</td><td>";
+ if (expected == actual)
+ results += "Success";
+ else {
+ results += "<strong>Failure</strong>" + "</td><td>";
+ results += actual.replace(/&/g, "&amp;").replace(/</g, "&lt;").
+ replace(/\n/g, "\\n");
+ }
+ results += "</td></tr>";
+ }
+ results += "</table>";
+ return results;
+}
+toXHTML.UnitTest.tests = [
+ {
+ name: "Basic paragraph markup",
+ input: "Basic paragraph test with <, >, & and \"",
+ output: "<p>Basic paragraph test with &lt;, &gt;, &amp; and &quot;</p>"
+ },
+ {
+ name: "Simple unordered list",
+ input: "* list item\n*list item 2",
+ output: "<ul><li> list item</li>\n<li>list item 2</li></ul>"
+ },
+ {
+ name: "Simple ordered list",
+ input: "# list item\n#list item 2",
+ output: "<ol><li> list item</li>\n<li>list item 2</li></ol>"
+ },
+ { // Test an ul item with a sublist
+ name: "Unordered item with unordered sublist",
+ input: "* Item\n** Subitem",
+ output: "<ul><li> Item<ul>\n<li> Subitem</li></ul></li></ul>"
+ },
+ { // Test a sublist without an initial tag (should not make a list)
+ name: "Unordered sublist without initial tag",
+ input: "** Sublist item",
+ output: "<p>** Sublist item</p>"
+ },
+ { // Test an ol item with a sublist
+ name: "Ordered item with ordered sublist",
+ input: "# Item\n## Subitem",
+ output: "<ol><li> Item<ol>\n<li> Subitem</li></ol></li></ol>"
+ },
+ { // Test a sublist without an initial tag (should not make a list)
+ name: "Ordered sublist without initial tag",
+ input: "## Sublist item",
+ output: "<p>## Sublist item</p>"
+ },
+ { // Test an unordered list with an ordered sublist
+ name: "Unordered item with ordered sublist",
+ input: "* Item\n*# Subitem",
+ output: "<ul><li> Item<ol>\n<li> Subitem</li></ol></li></ul>"
+ },
+ { // Test hr
+ name: "Horizontal rule",
+ input: "Some text\n----\nSome more text",
+ output: "<p>Some text</p><hr /><p>Some more text</p>"
+ },
+ { // Test pre block
+ name: "Preformatted block",
+ input: "{{{\nPreformatted block\n}}}",
+ output: "<pre>Preformatted block\n</pre>"
+ },
+ { // Test two pre blocks
+ name: "Two preformatted blocks",
+ input: "{{{\nPreformatted block\n}}}\n{{{Block 2}}}",
+ output: "<pre>Preformatted block\n</pre><pre>Block 2</pre>"
+ },
+ { // Test h1
+ name: "h1",
+ input: "= Header =",
+ output: "<h1>Header</h1>"
+ },
+ { // Test h2
+ name: "h2",
+ input: "== Header =",
+ output: "<h2>Header</h2>"
+ },
+ { // Test h3
+ name: "h3",
+ input: "=== Header =",
+ output: "<h3>Header</h3>"
+ },
+ { // Test h4
+ name: "h4",
+ input: "==== Header =",
+ output: "<h4>Header</h4>"
+ },
+ { // Test h5
+ name: "h5",
+ input: "===== Header",
+ output: "<h5>Header</h5>"
+ },
+ { // Test h6
+ name: "h6",
+ input: "====== Header =",
+ output: "<h6>Header</h6>"
+ },
+ { // Test above h6 (should be ignored)
+ name: ">h6",
+ input: "======= Header =",
+ output: "<p>======= Header =</p>"
+ },
+ { // Test tables
+ name: "Tables",
+ input: "| A | B |\n| //C// | **D** \\\\ E |",
+ output: "<table><tr><td> A </td><td> B </td></tr>" +
+ "<tr><td> <em>C</em> </td>" +
+ "<td> <strong>D</strong> <br /> E </td></tr></table>"
+ },
+ { // Test raw URL
+ name: "Raw URL",
+ input: "http://example.com/examplepage",
+ output: "<p><a href=\"http://example.com/examplepage\">" +
+ "http://example.com/examplepage</a></p>"
+ },
+ { // Test unnamed URL
+ name: "Unnamed URL",
+ input: "[[http://example.com/examplepage]]",
+ output: "<p><a href=\"http://example.com/examplepage\">" +
+ "http://example.com/examplepage</a></p>"
+ },
+ { // Test named URL
+ name: "Named URL",
+ input: "[[http://example.com/examplepage|Example Page]]",
+ output: "<p>" +
+ "<a href=\"http://example.com/examplepage\">Example Page</a></p>"
+ },
+ { // Test unnamed link
+ name: "Unnamed link",
+ input: "[[MyPage]]",
+ output: "<p><a href=\"MyPage\">MyPage</a></p>"
+ },
+ { // Test named link
+ name: "Named link",
+ input: "[[MyPage|My page]]",
+ output: "<p><a href=\"MyPage\">My page</a></p>"
+ },
+ { // Test images
+ name: "Image",
+ input: "{{image.gif|my image}}",
+ output: "<p><img src=\"image.gif\" alt=\"my image\"/></p>"
+ },
+ { // Test inline tt
+ name: "Inline tt",
+ input: "Inline {{{tt}}} example {{{here}}}!",
+ output: "<p>Inline <tt>tt</tt> example <tt>here</tt>!</p>"
+ },
+ { // Test **strong**
+ name: "Strong",
+ input: "**Strong**",
+ output: "<p><strong>Strong</strong></p>"
+ },
+ { // Test //emphasis//
+ name: "Emphasis",
+ input: "//Emphasis//",
+ output: "<p><em>Emphasis</em></p>"
+ },
+
+ //// WikiCreole tests
+ { // Tests multi-line emphasis behaviour
+ name: "Multi-line emphasis",
+ input: "Bold and italics should //be\nable// to cross lines.\n\n" +
+ "But, should //not be...\n\n...able// to cross paragraphs.",
+ output: "<p>Bold and italics should <em>be\nable</em> to cross lines." +
+ "\n</p>" + "<p>\nBut, should //not be...\n</p>" +
+ "<p>\n...able// to cross paragraphs.</p>"
+ },
+ { // Tests URL/emphasis ambiguity handling
+ name: "URL/emphasis ambiguity",
+ input: "This is an //italic// text. This is a url: " +
+ "http://www.wikicreole.org. This is what can go wrong://this " +
+ "should be an italic text//.",
+ output: "<p>This is an <em>italic</em> text. This is a url: " +
+ "<a href=\"http://www.wikicreole.org\">" +
+ "http://www.wikicreole.org</a>. This is what can go wrong:" +
+ "<em>this should be an italic text</em>.</p>"
+ },
+
+ //// Awkward emphasis edge cases
+ {
+ name: "Difficult emphasis #1",
+ input: "// http://www.link.org //",
+ output: "<p><em> <a href=\"http://www.link.org\">" +
+ "http://www.link.org</a> </em></p>"
+ },
+ {
+ name: "Difficult emphasis #2",
+ input: "// http //",
+ output: "<p><em> http </em></p>"
+ },
+ {
+ name: "Difficult emphasis #3",
+ input: "// httphpthtpht //",
+ output: "<p><em> httphpthtpht </em></p>"
+ },
+ {
+ name: "Difficult emphasis #4",
+ input: "// http: //",
+ output: "<p><em> http: </em></p>"
+ },
+ {
+ name: "Difficult emphasis #5",
+ input: "// http://",
+ output: "<p>// <a href=\"http://\">http://</a></p>"
+ },
+ {
+ name: "Difficult emphasis #6",
+ input: "// http:////",
+ output: "<p><em> <a href=\"http://\">http://</a></em></p>"
+ },
+ {
+ name: "Difficult emphasis #7",
+ input: "//httphpthtphtt//",
+ output: "<p><em>httphpthtphtt</em></p>"
+ },
+ {
+ name: "Difficult emphasis #8",
+ input: "//http://link.org//",
+ output: "<p><em><a href=\"http://link.org\">" +
+ "http://link.org</a></em></p>"
+ },
+ {
+ name: "Difficult emphasis #9",
+ input: "// ftp://www.link.org //",
+ output: "<p><em> <a href=\"ftp://www.link.org\">" +
+ "ftp://www.link.org</a> </em></p>"
+ },
+ {
+ name: "Difficult emphasis #10",
+ input: "// ftp //",
+ output: "<p><em> ftp </em></p>"
+ },
+ {
+ name: "Difficult emphasis #11",
+ input: "// fttpfptftpft //",
+ output: "<p><em> fttpfptftpft </em></p>"
+ },
+ {
+ name: "Difficult emphasis #12",
+ input: "// ftp: //",
+ output: "<p><em> ftp: </em></p>"
+ },
+ {
+ name: "Difficult emphasis #13",
+ input: "// ftp://",
+ output: "<p>// <a href=\"ftp://\">ftp://</a></p>"
+ },
+ {
+ name: "Difficult emphasis #14",
+ input: "// ftp:////",
+ output: "<p><em> <a href=\"ftp://\">ftp://</a></em></p>"
+ },
+ {
+ name: "Difficult emphasis #15",
+ input: "//fttpfptftpftt//",
+ output: "<p><em>fttpfptftpftt</em></p>"
+ },
+ {
+ name: "Difficult emphasis #16",
+ input: "//ftp://link.org//",
+ output: "<p><em><a href=\"ftp://link.org\">" +
+ "ftp://link.org</a></em></p>"
+ }
+];
+
+ //// Install the renderer //////////////////////////////////////////////
+ function updateRender() {
+ $("Html").innerHTML = toXHTML($("Text").value);
+ }
+ function installRenderer() {
+ element = $("Text");
+ element.onkeyup = element.onkeypress = element.ondrop =
+ element.onchange = updateRender;
+ updateRender();
+ $("UnitTest").innerHTML = "<h1>Unit Testing</h1>" +
+ toXHTML.UnitTest();
+ }
+ window.onload = installRenderer;
+// ]]>
+ </script>
+ </head>
+ <body>
+ <div id="Main">
+ <form action="">
+ <p><textarea name="Text" id="Text" class="WikiText" rows="20" cols="50">
+= Creole
+
+{{http://www.wikicreole.org/attach/LeftMenu/viki.png|Creole}}\\
+Creole is a common wiki markup language intended to be used across many different wikis. Its aim is not to replace existing markup, but instead to enable wiki users to transfer basic content seamlessly across wikis, and lower the barrier to entry for novice users.
+
+= Regular Language–Recursive Descent Parser
+
+This text has been formatted using a //regular language–recursive descent// (RLRD) parser design. That is, rules are applied in a recursive descent that matches the ultimate XML output; at each level of the descent, a set of regular expressions define the text that each child rule can "consume". Rules are applied greedily (i.e. earliest-match first). Unlike standard markup designs, this makes edge-cases between rules explicit, and allows a parser to be certified XHTML-compliant.
+
+The parser is written in Javascript, allowing greater flexibility in the deployment of the parser. The underlying RLRD design can be implemented in any language.
+
+= Live Preview
+
+This document demonstrates a live Javascript preview, using this RLRD renderer. Editing the above text area will change this text.
+
+The markup follows the basic rules of [[http://www.wikicreole.org/wiki/Creole0.4|Creole v0.4]].</textarea></p>
+ </form>
+ <div id="Html" class="RenderedText">Javascript disabled</div>
+ </div>
+ <div id="UnitTest"></div>
+ </body>
+</html>
diff --git a/references/smu.c b/references/smu.c
new file mode 100644
index 0000000..1174167
--- /dev/null
+++ b/references/smu.c
@@ -0,0 +1,805 @@
+/* smu - simple markup
+ * Copyright (C) <2007, 2008> Enno Boland <g s01 de>
+ * 2019-2022 Karl Bartel <[email protected]>
+ * 2022 bzt
+ *
+ * See LICENSE for further informations
+ */
+#include <ctype.h>
+#include <regex.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define LENGTH(x) sizeof(x)/sizeof(x[0])
+#define ADDC(b,i) if (i % BUFSIZ == 0) { b = realloc(b, (i + BUFSIZ) * sizeof(char)); if (!b) eprint("Malloc failed."); } b[i]
+
+typedef int (*Parser)(const char *, const char *, int);
+typedef struct {
+ char *search;
+ int process;
+ char *before, *after;
+} Tag;
+
+static int docomment(const char *begin, const char *end, int newblock); /* Parser for html-comments */
+static int docodefence(const char *begin, const char *end, int newblock); /* Parser for code fences */
+static int dohtml(const char *begin, const char *end, int newblock); /* Parser for html */
+static int dolineprefix(const char *begin, const char *end, int newblock);/* Parser for line prefix tags */
+static int dolink(const char *begin, const char *end, int newblock); /* Parser for links and images */
+static int dolist(const char *begin, const char *end, int newblock); /* Parser for lists */
+static int dotable(const char *begin, const char *end, int newblock); /* Parser for tables */
+static int doparagraph(const char *begin, const char *end, int newblock); /* Parser for paragraphs */
+static int doreplace(const char *begin, const char *end, int newblock); /* Parser for simple replaces */
+static int doshortlink(const char *begin, const char *end, int newblock); /* Parser for links and images */
+static int dosurround(const char *begin, const char *end, int newblock); /* Parser for surrounding tags */
+static int dounderline(const char *begin, const char *end, int newblock); /* Parser for underline tags */
+static void *ereallocz(void *p, size_t size);
+static void hprint(const char *begin, const char *end); /* escapes HTML and prints it to output */
+static void process(const char *begin, const char *end, int isblock); /* Processes range between begin and end. */
+
+/* list of parsers */
+static Parser parsers[] = { dounderline, docomment, docodefence, dolineprefix,
+ dolist, dotable, doparagraph, dosurround, dolink,
+ doshortlink, dohtml, doreplace };
+static int nohtml = 0;
+static int in_paragraph = 0;
+
+regex_t p_end_regex; /* End of paragraph */
+
+static Tag lineprefix[] = {
+ { " ", 0, "<pre><code>", "\n</code></pre>" },
+ { "\t", 0, "<pre><code>", "\n</code></pre>" },
+ { ">", 2, "<blockquote>", "</blockquote>" },
+ { "###### ", 1, "<h6>", "</h6>" },
+ { "##### ", 1, "<h5>", "</h5>" },
+ { "#### ", 1, "<h4>", "</h4>" },
+ { "### ", 1, "<h3>", "</h3>" },
+ { "## ", 1, "<h2>", "</h2>" },
+ { "# ", 1, "<h1>", "</h1>" },
+ { "- - -\n", 1, "<hr />", ""},
+ { "---\n", 1, "<hr />", ""},
+};
+
+static Tag underline[] = {
+ { "=", 1, "<h1>", "</h1>\n" },
+ { "-", 1, "<h2>", "</h2>\n" },
+};
+
+static Tag surround[] = {
+ { "```", 0, "<code>", "</code>" },
+ { "``", 0, "<code>", "</code>" },
+ { "`", 0, "<code>", "</code>" },
+ { "___", 1, "<strong><em>", "</em></strong>" },
+ { "***", 1, "<strong><em>", "</em></strong>" },
+ { "__", 1, "<strong>", "</strong>" },
+ { "**", 1, "<strong>", "</strong>" },
+ { "_", 1, "<em>", "</em>" },
+ { "*", 1, "<em>", "</em>" },
+};
+
+static const char *replace[][2] = {
+ /* Backslash escapes */
+ { "\\\\", "\\" },
+ { "\\`", "`" },
+ { "\\*", "*" },
+ { "\\_", "_" },
+ { "\\{", "{" },
+ { "\\}", "}" },
+ { "\\[", "[" },
+ { "\\]", "]" },
+ { "\\(", "(" },
+ { "\\)", ")" },
+ { "\\#", "#" },
+ { "\\+", "+" },
+ { "\\-", "-" },
+ { "\\.", "." },
+ { "\\!", "!" },
+ { "\\\"", "&quot;" },
+ { "\\$", "$" },
+ { "\\%", "%" },
+ { "\\&", "&amp;" },
+ { "\\'", "'" },
+ { "\\,", "," },
+ { "\\-", "-" },
+ { "\\.", "." },
+ { "\\/", "/" },
+ { "\\:", ":" },
+ { "\\;", ";" },
+ { "\\<", "&lt;" },
+ { "\\>", "&gt;" },
+ { "\\=", "=" },
+ { "\\?", "?" },
+ { "\\@", "@" },
+ { "\\^", "^" },
+ { "\\|", "|" },
+ { "\\~", "~" },
+ /* HTML syntax symbols that need to be turned into entities */
+ { "<", "&lt;" },
+ { ">", "&gt;" },
+ { "&amp;", "&amp;" }, /* Avoid replacing the & in &amp; */
+ { "&", "&amp;" },
+ /* Preserve newlines with two spaces before linebreak */
+ { " \n", "<br />\n" },
+};
+
+static const char *code_fence = "```";
+
+void
+eprint(const char *format, ...) {
+ va_list ap;
+
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+void end_paragraph(void) {
+ if (in_paragraph) {
+ fputs("</p>\n", stdout);
+ in_paragraph = 0;
+ }
+}
+
+int
+docomment(const char *begin, const char *end, int newblock) {
+ char *p;
+
+ if (nohtml || strncmp("<!--", begin, 4))
+ return 0;
+ p = strstr(begin, "-->");
+ if (!p || p + 3 >= end)
+ return 0;
+ fprintf(stdout, "%.*s\n", (int)(p + 3 - begin), begin);
+ return (p + 3 - begin) * (newblock ? -1 : 1);
+}
+
+int
+docodefence(const char *begin, const char *end, int newblock) {
+ const char *p, *start, *stop, *lang_start, *lang_stop;
+ unsigned int l = strlen(code_fence);
+
+ if (!newblock)
+ return 0;
+
+ if (strncmp(begin, code_fence, l) != 0)
+ return 0;
+
+ /* Find start of content and read language string */
+ start = begin + l;
+ lang_start = start;
+ while (start[0] != '\n')
+ start++;
+ lang_stop = start;
+ start++;
+
+ /* Find end of fence */
+ p = start - 1;
+ do {
+ stop = p;
+ p = strstr(p + 1, code_fence);
+ } while (p && p[-1] == '\\');
+ if (p && p[-1] != '\\')
+ stop = p;
+
+ /* No closing code fence means the rest of file is code (CommonMark) */
+ if (!p)
+ stop = end;
+
+ /* Print output */
+ if (lang_start == lang_stop) {
+ fputs("<pre><code>", stdout);
+ } else {
+ fputs("<pre><code class=\"language-", stdout);
+ hprint(lang_start, lang_stop);
+ fputs("\">", stdout);
+ }
+ hprint(start, stop);
+ fputs("</code></pre>\n", stdout);
+ return -(stop - begin + l);
+}
+
+int
+dohtml(const char *begin, const char *end, int newblock) {
+ const char *p, *tag, *tagend;
+
+ if (nohtml || begin + 2 >= end)
+ return 0;
+ p = begin;
+ if (p[0] != '<' || !isalpha(p[1]))
+ return 0;
+ p++;
+ tag = p;
+ for (; isalnum(*p) && p < end; p++);
+ tagend = p;
+ if (p > end || tag == tagend)
+ return 0;
+ while ((p = strstr(p, "</")) && p < end) {
+ p += 2;
+ if (strncmp(p, tag, tagend - tag) == 0 && p[tagend - tag] == '>') {
+ p++;
+ fwrite(begin, sizeof(char), p - begin + tagend - tag, stdout);
+ return p - begin + tagend - tag;
+ }
+ }
+ p = strchr(tagend, '>');
+ if (p) {
+ fwrite(begin, sizeof(char), p - begin + 2, stdout);
+ return p - begin + 2;
+ }
+ else
+ return 0;
+}
+
+int
+dolineprefix(const char *begin, const char *end, int newblock) {
+ unsigned int i, j, l;
+ char *buffer;
+ const char *p;
+ int consumed_input = 0;
+
+ if (newblock)
+ p = begin;
+ else if (*begin == '\n') {
+ p = begin + 1;
+ consumed_input += 1;
+ } else
+ return 0;
+ for (i = 0; i < LENGTH(lineprefix); i++) {
+ l = strlen(lineprefix[i].search);
+ if (end - p + 1 < l)
+ continue;
+ if (strncmp(lineprefix[i].search, p, l))
+ continue;
+ if (*begin == '\n')
+ fputc('\n', stdout);
+
+ /* All line prefixes add a block element. These are not allowed
+ * inside paragraphs, so we must end the paragraph first. */
+ end_paragraph();
+
+ fputs(lineprefix[i].before, stdout);
+ if (lineprefix[i].search[l-1] == '\n') {
+ fputc('\n', stdout);
+ return l - 1 + consumed_input;
+ }
+ if (!(buffer = malloc(BUFSIZ)))
+ eprint("Malloc failed.");
+ buffer[0] = '\0';
+
+ /* Collect lines into buffer while they start with the prefix */
+ j = 0;
+ while ((strncmp(lineprefix[i].search, p, l) == 0) && p + l < end) {
+ p += l;
+
+ /* Special case for blockquotes: optional space after > */
+ if (lineprefix[i].search[0] == '>' && *p == ' ') {
+ p++;
+ }
+
+ while (p < end) {
+ ADDC(buffer, j) = *p;
+ j++;
+ if (*(p++) == '\n')
+ break;
+ }
+ }
+
+ /* Skip empty lines in block */
+ while (*(buffer + j - 1) == '\n') {
+ j--;
+ }
+
+ ADDC(buffer, j) = '\0';
+ if (lineprefix[i].process)
+ process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2);
+ else
+ hprint(buffer, buffer + strlen(buffer));
+ puts(lineprefix[i].after);
+ free(buffer);
+ return -(p - begin);
+ }
+ return 0;
+}
+
+int
+dolink(const char *begin, const char *end, int newblock) {
+ int img, len, sep, parens_depth = 1;
+ const char *desc, *link, *p, *q, *descend, *linkend;
+ const char *title = NULL, *titleend = NULL;
+
+ if (*begin == '[')
+ img = 0;
+ else if (strncmp(begin, "![", 2) == 0)
+ img = 1;
+ else
+ return 0;
+ p = desc = begin + 1 + img;
+ if (!(p = strstr(desc, "](")) || p > end)
+ return 0;
+ for (q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q + 1, "!["))
+ if (!(p = strstr(p + 1, "](")) || p > end)
+ return 0;
+ descend = p;
+ link = p + 2;
+
+ /* find end of link while handling nested parens */
+ q = link;
+ while (parens_depth) {
+ if (!(q = strpbrk(q, "()")) || q > end)
+ return 0;
+ if (*q == '(')
+ parens_depth++;
+ else
+ parens_depth--;
+ if (parens_depth && q < end)
+ q++;
+ }
+
+ if ((p = strpbrk(link, "\"'")) && p < end && q > p) {
+ sep = p[0]; /* separator: can be " or ' */
+ title = p + 1;
+ /* strip trailing whitespace */
+ for (linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--);
+ for (titleend = q - 1; titleend > link && isspace(*(titleend)); titleend--);
+ if (titleend < title || *titleend != sep) {
+ return 0;
+ }
+ }
+ else {
+ linkend = q;
+ }
+
+ /* Links can be given in angular brackets */
+ if (*link == '<' && *(linkend - 1) == '>') {
+ link++;
+ linkend--;
+ }
+
+ len = q + 1 - begin;
+ if (img) {
+ fputs("<img src=\"", stdout);
+ hprint(link, linkend);
+ fputs("\" alt=\"", stdout);
+ hprint(desc, descend);
+ fputs("\" ", stdout);
+ if (title && titleend) {
+ fputs("title=\"", stdout);
+ hprint(title, titleend);
+ fputs("\" ", stdout);
+ }
+ fputs("/>", stdout);
+ }
+ else {
+ fputs("<a href=\"", stdout);
+ hprint(link, linkend);
+ fputs("\"", stdout);
+ if (title && titleend) {
+ fputs(" title=\"", stdout);
+ hprint(title, titleend);
+ fputs("\"", stdout);
+ }
+ fputs(">", stdout);
+ process(desc, descend, 0);
+ fputs("</a>", stdout);
+ }
+ return len;
+}
+
+int
+dolist(const char *begin, const char *end, int newblock) {
+ unsigned int i, j, indent, run, isblock, start_number;
+ const char *p, *q, *num_start;
+ char *buffer = NULL;
+ char marker = '\0'; /* Bullet symbol or \0 for unordered lists */
+
+ isblock = 0;
+ if (newblock)
+ p = begin;
+ else if (*begin == '\n')
+ p = begin + 1;
+ else
+ return 0;
+ q = p;
+ if (*p == '-' || *p == '*' || *p == '+') {
+ marker = *p;
+ } else {
+ num_start = p;
+ for (; p < end && *p >= '0' && *p <= '9'; p++);
+ if (p >= end || *p != '.')
+ return 0;
+ start_number = atoi(num_start);
+ }
+ p++;
+ if (p >= end || !(*p == ' ' || *p == '\t'))
+ return 0;
+
+ end_paragraph();
+
+ for (p++; p != end && (*p == ' ' || *p == '\t'); p++);
+ indent = p - q;
+ buffer = ereallocz(buffer, BUFSIZ);
+ if (!newblock)
+ fputc('\n', stdout);
+
+ if (marker) {
+ fputs("<ul>\n", stdout);
+ } else if (start_number == 1) {
+ fputs("<ol>\n", stdout);
+ } else {
+ printf("<ol start=\"%d\">\n", start_number);
+ }
+ run = 1;
+ for (; p < end && run; p++) {
+ for (i = 0; p < end && run; p++, i++) {
+ if (*p == '\n') {
+ if (p + 1 == end)
+ break;
+ else {
+ /* Handle empty lines */
+ for (q = p + 1; (*q == ' ' || *q == '\t') && q < end; q++);
+ if (*q == '\n') {
+ ADDC(buffer, i) = '\n';
+ i++;
+ run = 0;
+ isblock++;
+ p = q;
+ }
+ }
+ q = p + 1;
+ j = 0;
+ if (marker && *q == marker)
+ j = 1;
+ else {
+ for (; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++);
+ if (q + j == end)
+ break;
+ if (j > 0 && q[j] == '.')
+ j++;
+ else
+ j = 0;
+ }
+ if (q + indent < end)
+ for (; (q[j] == ' ' || q[j] == '\t') && j < indent; j++);
+ if (j == indent) {
+ ADDC(buffer, i) = '\n';
+ i++;
+ p += indent;
+ run = 1;
+ if (*q == ' ' || *q == '\t')
+ p++;
+ else
+ break;
+ }
+ else if (j < indent)
+ run = 0;
+ }
+ ADDC(buffer, i) = *p;
+ }
+ ADDC(buffer, i) = '\0';
+ fputs("<li>", stdout);
+ process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run));
+ fputs("</li>\n", stdout);
+ }
+ fputs(marker ? "</ul>\n" : "</ol>\n", stdout);
+ free(buffer);
+ p--;
+ while (*(--p) == '\n');
+ return -(p - begin + 1);
+}
+
+int
+dotable(const char *begin, const char *end, int newblock) {
+ /* table state */
+ static signed char intable, inrow, incell;
+ static unsigned long int calign;
+ static const char *align_table[] = {
+ "",
+ " style=\"text-align: left\"",
+ " style=\"text-align: right\"",
+ " style=\"text-align: center\"",
+ };
+
+ const char *p;
+ int i, l = (int)sizeof(calign) * 4;
+
+ if(*begin != '|')
+ return 0;
+ if (intable == 2) { /* in alignment row, skip it. */
+ ++intable;
+ for (p = begin; p < end && *p != '\n'; ++p);
+ return p - begin + 1;
+ }
+ if(inrow && (begin + 1 >= end || begin[1] == '\n')) { /* close cell and row and if ends, table too */
+ fprintf(stdout, "</t%c></tr>", inrow == -1 ? 'h' : 'd');
+ if (inrow == -1)
+ intable = 2;
+ inrow = 0;
+ if(end - begin <= 2 || begin[2] == '\n') {
+ intable = 0;
+ fputs("\n</table>\n", stdout);
+ }
+ return 1;
+ }
+
+ if(!intable) { /* open table */
+ intable = 1; inrow = -1; incell = 0; calign = 0;
+ for (p = begin; p < end && *p != '\n'; ++p);
+ if(*p == '\n') { /* load alignment from 2nd line */
+ for(i = -1, ++p; p < end && *p != '\n'; p++) {
+ if(*p == '|') {
+ i++;
+ do { p++; } while(p < end && (*p == ' ' || *p == '\t'));
+ if(i < l && *p == ':')
+ calign |= 1ul << (i * 2);
+ if (*p == '\n')
+ break;
+ } else if(i < l && *p == ':') {
+ calign |= 1ul << (i * 2 + 1);
+ }
+ }
+ }
+ fputs("<table>\n<tr>", stdout);
+ }
+ if(!inrow) { /* open row */
+ inrow = 1; incell = 0;
+ fputs("<tr>", stdout);
+ }
+ if(incell) /* close cell */
+ fprintf(stdout, "</t%c>", inrow == -1 ? 'h' : 'd');
+ l = incell < l ? (calign >> (incell * 2)) & 3 : 0; /* open cell */
+ fprintf(stdout, "<t%c%s>", inrow == -1 ? 'h' : 'd', align_table[l]);
+ incell++;
+ for(p = begin + 1; p < end && *p == ' '; p++);
+ return p - begin;
+}
+
+int
+doparagraph(const char *begin, const char *end, int newblock) {
+ const char *p;
+ regmatch_t match;
+
+ if (!newblock)
+ return 0;
+ if (regexec(&p_end_regex, begin + 1, 1, &match, 0)) {
+ p = end;
+ } else {
+ p = begin + 1 + match.rm_so;
+ }
+
+ fputs("<p>", stdout);
+ in_paragraph = 1;
+ process(begin, p, 0);
+ end_paragraph();
+
+ return -(p - begin);
+}
+
+int
+doreplace(const char *begin, const char *end, int newblock) {
+ unsigned int i, l;
+
+ for (i = 0; i < LENGTH(replace); i++) {
+ l = strlen(replace[i][0]);
+ if (end - begin < l)
+ continue;
+ if (strncmp(replace[i][0], begin, l) == 0) {
+ fputs(replace[i][1], stdout);
+ return l;
+ }
+ }
+ return 0;
+}
+
+int
+doshortlink(const char *begin, const char *end, int newblock) {
+ const char *p, *c;
+ int ismail = 0;
+
+ if (*begin != '<')
+ return 0;
+ for (p = begin + 1; p != end; p++) {
+ switch (*p) {
+ case ' ':
+ case '\t':
+ case '\n':
+ return 0;
+ case '#':
+ case ':':
+ ismail = -1;
+ break;
+ case '@':
+ if (ismail == 0)
+ ismail = 1;
+ break;
+ case '>':
+ if (ismail == 0)
+ return 0;
+ fputs("<a href=\"", stdout);
+ if (ismail == 1) {
+ /* mailto: */
+ fputs("&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:", stdout);
+ for (c = begin + 1; *c != '>'; c++)
+ fprintf(stdout, "&#%u;", *c);
+ fputs("\">", stdout);
+ for (c = begin + 1; *c != '>'; c++)
+ fprintf(stdout, "&#%u;", *c);
+ }
+ else {
+ hprint(begin + 1, p);
+ fputs("\">", stdout);
+ hprint(begin + 1, p);
+ }
+ fputs("</a>", stdout);
+ return p - begin + 1;
+ }
+ }
+ return 0;
+}
+
+int
+dosurround(const char *begin, const char *end, int newblock) {
+ unsigned int i, l;
+ const char *p, *start, *stop;
+
+ for (i = 0; i < LENGTH(surround); i++) {
+ l = strlen(surround[i].search);
+ if (end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0)
+ continue;
+ start = begin + l;
+ p = start;
+ do {
+ stop = p;
+ p = strstr(p + 1, surround[i].search);
+ } while (p && p[-1] == '\\');
+ if (p && p[-1] != '\\')
+ stop = p;
+ if (!stop || stop < start || stop >= end)
+ continue;
+ fputs(surround[i].before, stdout);
+
+ /* Single space at start and end are ignored */
+ if (start[0] == ' ' && stop[-1] == ' ' && start < stop - 1) {
+ start++;
+ stop--;
+ l++;
+ }
+
+ if (surround[i].process)
+ process(start, stop, 0);
+ else
+ hprint(start, stop);
+ fputs(surround[i].after, stdout);
+ return stop - start + 2 * l;
+ }
+ return 0;
+}
+
+int
+dounderline(const char *begin, const char *end, int newblock) {
+ unsigned int i, j, l;
+ const char *p;
+
+ if (!newblock)
+ return 0;
+ p = begin;
+ for (l = 0; p + l != end && p[l] != '\n'; l++);
+ p += l + 1;
+ if (l == 0)
+ return 0;
+ for (i = 0; i < LENGTH(underline); i++) {
+ for (j = 0; p + j < end && p[j] != '\n' && p[j] == underline[i].search[0]; j++);
+ if (j >= 3) {
+ fputs(underline[i].before, stdout);
+ if (underline[i].process)
+ process(begin, begin + l, 0);
+ else
+ hprint(begin, begin + l);
+ fputs(underline[i].after, stdout);
+ return -(j + p - begin);
+ }
+ }
+ return 0;
+}
+
+void *
+ereallocz(void *p, size_t size) {
+ void *res;
+ res = realloc(p, size);
+ if (!res)
+ eprint("realloc: %zu bytes\n", size);
+ return res;
+}
+
+void
+hprint(const char *begin, const char *end) {
+ const char *p;
+
+ for (p = begin; p != end; p++) {
+ if (*p == '&')
+ fputs("&amp;", stdout);
+ else if (*p == '"')
+ fputs("&quot;", stdout);
+ else if (*p == '>')
+ fputs("&gt;", stdout);
+ else if (*p == '<')
+ fputs("&lt;", stdout);
+ else
+ fputc(*p, stdout);
+ }
+}
+
+void
+process(const char *begin, const char *end, int newblock) {
+ const char *p;
+ int affected;
+ unsigned int i;
+
+ for (p = begin; p < end;) {
+ if (newblock)
+ while (*p == '\n')
+ if (++p == end)
+ return;
+
+ for (i = 0; i < LENGTH(parsers); i++)
+ if ((affected = parsers[i](p, end, newblock)))
+ break;
+ if (affected)
+ p += abs(affected);
+ else
+ fputc(*p++, stdout);
+
+ /* Don't print single newline at end */
+ if (p + 1 == end && *p == '\n')
+ return;
+
+ if (p[0] == '\n' && p + 1 != end && p[1] == '\n')
+ newblock = 1;
+ else
+ newblock = affected < 0;
+ }
+}
+
+int
+main(int argc, char *argv[]) {
+ char *buffer = NULL;
+ int s, i;
+ unsigned long len, bsize;
+ FILE *source = stdin;
+
+ regcomp(&p_end_regex, "(\n\n|(^|\n)```)", REG_EXTENDED);
+
+ for (i = 1; i < argc; i++) {
+ if (!strcmp("-v", argv[i]))
+ eprint("simple markup %s (C) Enno Boland\n",VERSION);
+ else if (!strcmp("-n", argv[i]))
+ nohtml = 1;
+ else if (argv[i][0] != '-')
+ break;
+ else if (!strcmp("--", argv[i])) {
+ i++;
+ break;
+ }
+ else
+ eprint("Usage %s [-n] [file]\n -n escape html strictly\n", argv[0]);
+ }
+ if (i < argc && !(source = fopen(argv[i], "r")))
+ eprint("Cannot open file `%s`\n",argv[i]);
+ bsize = 2 * BUFSIZ;
+ buffer = ereallocz(buffer, bsize);
+ len = 0;
+ while ((s = fread(buffer + len, 1, BUFSIZ, source))) {
+ len += s;
+ if (BUFSIZ + len + 1 > bsize) {
+ bsize += BUFSIZ;
+ if (!(buffer = realloc(buffer, bsize)))
+ eprint("realloc failed.");
+ }
+ }
+ buffer[len] = '\0';
+ process(buffer, buffer + len, 1);
+ fclose(source);
+ free(buffer);
+ return EXIT_SUCCESS;
+}