diff options
Diffstat (limited to 'references/regular-recursive.html')
-rw-r--r-- | references/regular-recursive.html | 632 |
1 files changed, 632 insertions, 0 deletions
diff --git a/references/regular-recursive.html b/references/regular-recursive.html new file mode 100644 index 0000000..69584ab --- /dev/null +++ b/references/regular-recursive.html @@ -0,0 +1,632 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <title>Creole v0.4 Live Preview</title> + <meta http-equiv="Content-Type" content="text/xhtml; charset=utf-8" /> + <style type="text/css"> + .WikiText { display: block; width: 100%; height: 33%; } + table, td { border: solid 1px; } + tr { vertical-align: middle; text-align: center; } + div#UnitTest { position: absolute; right: 1em; + width: 15em; padding: 0; } + div#UnitTest h1 { font-size: large; text-align: center; + text-transform: uppercase; } + div#Main { padding: 0 1em 0 1em; margin: 0px; + position: absolute; top: 0px; left: 0px; right: 16em; } + table.unit-test { border: thin solid black; border-collapse: collapse; } + table.unit-test th { border: thin black; border-style: solid dashed; } + table.unit-test td { border: thin dashed black; } + table.unit-test strong { color: red; } + </style> + <script type="text/javascript"> +// <![CDATA[ +// Copyright (c) 2007 Chris Purcell. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + +function $(element) { + if (document.getElementById) + return document.getElementById(element); + else if (document.all) + return document.all[element]; + else + return null; +} + +String.prototype.replaceEvalGl = function(regex, fn) { + var head = ""; + var tail = "" + this; + while (m = tail.match(regex)) { + head += tail.substring(0,m.index) + fn(m); + tail = tail.substring(m.index + m[0].length); + } + return head + tail; +} + +//// The markup rules //////////////////////////////////////////////////////// +MarkupRule = function(regex, rule) { + this.regex = regex; + this.rule = rule; + this.children = [ ]; +} +MarkupRule.prototype.clone = function() { + var objectClone = new this.constructor(); + for (var property in this) + objectClone[property] = this[property]; + return objectClone; +} +MarkupRule.prototype.setChildren = function(children) { + this.children = children; +} +ElementRule = function(params) { + return new MarkupRule(params["regex"], function (r) { + var text = ""; + if ("capture" in params) + text = r[params["capture"]]; + if (text) { + if ("replaceRegex" in params) + text = text.replace(params["replaceRegex"], params["replaceString"]); + var tag = "<" + params["tag"] + ">"; + var endtag = "</" + params["tag"] + ">"; + if (!("tag" in params)) + tag = endtag = ""; + return tag + this.markUp(text) + endtag; + } else if ("tag" in params) + return "<" + params["tag"] + " />"; + else + return ""; + }); +} + +function toXHTML(wikiText) { + wikiText = wikiText.replace(/&/g, "&"); + wikiText = wikiText.replace(/</g, "<"); + wikiText = wikiText.replace(/>/g, ">"); + wikiText = wikiText.replace(/"/g, """); + return toXHTML.root.markUp(wikiText); +} + + // A header is text within equals signs (=) +toXHTML.h1 = new ElementRule({ tag: "h1", capture: 2, + regex: /(^|\n)[ \t]*={1}[ \t](.+?)[ \t]*=*\s*(\n|$)/ }); +toXHTML.h2 = new ElementRule({ tag: "h2", capture: 2, + regex: /(^|\n)[ \t]*={2}[ \t](.+?)[ \t]*=*\s*(\n|$)/ }); +toXHTML.h3 = new ElementRule({ tag: "h3", capture: 2, + regex: /(^|\n)[ \t]*={3}[ \t](.+?)[ \t]*=*\s*(\n|$)/ }); +toXHTML.h4 = new ElementRule({ tag: "h4", capture: 2, + regex: /(^|\n)[ \t]*={4}[ \t](.+?)[ \t]*=*\s*(\n|$)/ }); +toXHTML.h5 = new ElementRule({ tag: "h5", capture: 2, + regex: /(^|\n)[ \t]*={5}[ \t](.+?)[ \t]*=*\s*(\n|$)/ }); +toXHTML.h6 = new ElementRule({ tag: "h6", capture: 2, + regex: /(^|\n)[ \t]*={6}[ \t](.+?)[ \t]*=*\s*(\n|$)/ }); + + // hr is a line of 4 dashes (-) +toXHTML.hr = new ElementRule({ tag: "hr", regex: /(^|\n)\s*----\s*(\n|$)/ }); + + // br is two backslashes (\) +toXHTML.br = new ElementRule({ tag: "br", regex: /\\\\/ }); + + // Preformatted blocks are wrapped in {{{...}}} +toXHTML.preBlock = new ElementRule({ tag: "pre", capture: 2, + regex: /(^|\n){{{\n?(.*?(\n.*?)*?)}}}(\n|$)/ }); + + // tt inlines are also wrapped in {{{...}}} +toXHTML.tt = new ElementRule({ tag: "tt", + regex: /{{{(.*?(?:\n.*?)*?)}}}/, capture: 1 }); + + // Unordered and ordered lists start with * or # +toXHTML.ulist = new ElementRule({ tag: "ul", + regex: /(^|\n)(\*[^*#].*(\n|$)([*#]{2}.*(\n|$))*)+/, capture: 0, + replaceRegex: /(^|\n)[*#]/g, replaceString: "$1" }); +toXHTML.olist = new ElementRule({ tag: "ol", + regex: /(^|\n)(#[^*#].*(\n|$)([*#]{2}.*(\n|$))*)+/, capture: 0, + replaceRegex: /(^|\n)[*#]/g, replaceString: "$1" }); +toXHTML.li = new ElementRule({tag:"li",regex:/.+(\n[*#].+)*/,capture:0}); + + // Tables +toXHTML.table = new ElementRule({ tag: "table", + regex: /(^|\n)(\|.*\|[ \t]*(\n|$))+/, capture: 0 }); +toXHTML.tr = new ElementRule({ tag: "tr", + regex: /(^|\n)(\|.*)\|[ \t]*(\n|$)/, capture: 2 }); +toXHTML.td = new ElementRule({ tag: "td", + regex: /[|]+([^|]*)/, capture: 1 }); + + // Kinds of text block: + // - paragraph is the fallback for the root rule + // and consists of blocks of text separated by blank lines + // - singleLine is used within lists +toXHTML.singleLine = new ElementRule({ regex: /.+/, capture: 0 }); +toXHTML.paragraph = new ElementRule({ tag: "p", + regex: /(^|\n)([ \t]*[^\s].*(\n|$))+/, capture: 0 }); + + // Strongly emphasised text is surrounded by double-* characters +toXHTML.strong = new ElementRule({ tag: "strong", capture: 1, + regex:/\*\*([^*]*(?:\*[^*]+)*)\*\*/ }); + + // Emphasised text is surrounded by double-/ characters + // It must skip http:// or ftp:// internally + // (This would be a lot easier to write with negative lookbehind!) +toXHTML.em = new ElementRule({ tag: "em", capture: 1, + regex:"\\/\\/(" + // Starts with a double-/ + "[^\\/hf]*(?:" + + "\\/?(?:http:\\/?|ftp:\\/?)*(?:" + + "h(?:t(?:tp?)?)?" + "|" + + "f(?:tp?)?" + "|" + + "(?:" + + "h[^t\\/hf]" + "|" + + "ht[^t\\/hf]" + "|" + + "htt[^p\\/hf]" + "|" + + "http[^:\\/hf]" + "|" + + "http:[^\\/hf]" + "|" + + "http:\\/[^\\/hf]" + "|" + + "http:\\/\\/" + "|" + + "f[^t\\/hf]" + "|" + + "ft[^p\\/hf]" + "|" + + "ftp[^:\\/hf]" + "|" + + "ftp:[^\\/hf]" + "|" + + "ftp:\\/[^\\/hf]" + "|" + + "ftp:\\/\\/" + + ")" + + "[^\\/hf]*" + + ")" + "|" + + "\\/[^\\/hf][^\\/hf]*" + + ")*" + + ")" + + "\\/\\/" // Ends with a double-/ +}); + + // Links +toXHTML.linkPattern = "[^\\]|\\n]*(?:\\][^\\]|\\n]+)*"; +toXHTML.urlProtocols = "(?:http|https|ftp|afs|news|nntp|mid|cid|mailto|" + + "wais|prospero|telnet|gopher)"; +toXHTML.urlPattern = toXHTML.urlProtocols + ":" + + "[^\\]|\\n]*(?:\\][^\\]|\\n]+)*"; +toXHTML.loneURLPattern = "(?:" + toXHTML.urlProtocols + + ":[\\$-:=\\?-Z_a-z~]+[\\$-+\\/-Z_a-z~-])"; + +toXHTML.rawURL = new MarkupRule( "(" + toXHTML.loneURLPattern + ")", + function(r) { + return "<a href=\"" + r[1] + "\">" + r[1] + "</a>"; + } +); +toXHTML.unnamedURL = new MarkupRule( + "\\[\\[(" + toXHTML.urlPattern + ")\\]\\]", + function(r) { + return "<a href=\"" + r[1] + "\">" + r[1] + "</a>"; + } +); +toXHTML.unnamedLink = new MarkupRule( + "\\[\\[(" + toXHTML.linkPattern + ")\\]\\]", + function(r) { + return "<a href=\"" + r[1] + "\">" + r[1] + "</a>"; + } +); +toXHTML.namedURL = new MarkupRule( + "\\[\\[(" + toXHTML.urlPattern + ")\\|(.*?)\\]\\]", + function(r) { + return "<a href=\"" + r[1] + "\">" + r[2] + "</a>"; + } +); +toXHTML.namedLink = new MarkupRule( + "\\[\\[(" + toXHTML.linkPattern + ")\\|(.*?)\\]\\]", + function(r) { + return "<a href=\"" + r[1] + "\">" + r[2] + "</a>"; + } +); + + // Images +toXHTML.img = new MarkupRule( + "{{([^|\\n{}][^|\\n}]*(?:}[^|\\n}]+)*)\\|([^|\\n}]*(?:}[^|\\n}]+)*)}}", + function(r) { + return "<img src=\"" + r[1] + "\" alt=\"" + r[2] + "\"/>"; + } +); + + // Children of lists +toXHTML.ulist.children = toXHTML.olist.children = [ toXHTML.li ]; +toXHTML.li.children = [ toXHTML.olist, toXHTML.ulist, toXHTML.singleLine ]; + + // Children of table items +toXHTML.table.children = [ toXHTML.tr ]; +toXHTML.tr.children = [ toXHTML.td ]; +toXHTML.td.children = [ toXHTML.singleLine ]; + + // Children within blocks +toXHTML.singleLine.children = toXHTML.paragraph.children = + toXHTML.strong.children = toXHTML.em.children = toXHTML.tt.children = + [ toXHTML.strong, toXHTML.em, toXHTML.br, toXHTML.rawURL, + toXHTML.unnamedURL, toXHTML.unnamedLink, toXHTML.namedURL, + toXHTML.namedLink, toXHTML.tt, toXHTML.img ]; + + + // The root rule used to start the parser +toXHTML.root = new MarkupRule(); +toXHTML.root.children = [ toXHTML.h1, toXHTML.h2, toXHTML.h3, + toXHTML.h4, toXHTML.h5, toXHTML.h6, + toXHTML.hr, toXHTML.olist, + toXHTML.ulist, toXHTML.preBlock, + toXHTML.table ]; +toXHTML.root.fallback = new MarkupRule(); +toXHTML.root.fallback.children = [ toXHTML.paragraph ]; + + +//// Do the rendering //////////////////////////////////////////////////////// +// Apply each rule, and use whichever matches first in the text +// If there is a tie, use whichever is first in the list of rules +MarkupRule.prototype.markUp = function(text) { + var head = ""; + var tail = "" + text; + var matches = [ ]; + for (var i = 0; i < this.children.length; i++) { + matches[i] = tail.match(this.children[i].regex); + } + var best = false; + var b_i = false; + for (var i = 0; i < this.children.length; i++) + if (matches[i] && (!best || best.index > matches[i].index)) { + best = matches[i]; + b_i = i; + } + while (best) { + if ((best.index > 0) && (this.fallback)) + head += this.fallback.markUp(tail.substring(0,best.index)); + else + head += tail.substring(0,best.index); + head += this.children[b_i].rule(best); + var chopped = best.index + best[0].length; + tail = tail.substring(chopped); + for (var i = 0; i < this.children.length; i++) + if (matches[i]) + if (matches[i].index >= chopped) + matches[i].index -= chopped; + else + matches[i] = tail.match(this.children[i].regex); + best = false; + for (var i = 0; i < this.children.length; i++) + if (matches[i] && (!best || best.index > matches[i].index)) { + best = matches[i]; + b_i = i; + } + } + if (tail.length > 0 && this.fallback) + tail = this.fallback.markUp(tail); + return head + tail; +} + +//// Test the renderer /////////////////////////////////////////////////////// +toXHTML.UnitTest = function() { + var results = "<table class=\"unit-test\"><tr><th>Name</th><th>Status</th>"+ + "</tr>"; + for (var i = 0; i < toXHTML.UnitTest.tests.length; i++) { + var test = toXHTML.UnitTest.tests[i]; + var input = test.input; + var expected = test.output; + var actual = toXHTML(input); + results += "<tr><td>" + test.name + "</td><td>"; + if (expected == actual) + results += "Success"; + else { + results += "<strong>Failure</strong>" + "</td><td>"; + results += actual.replace(/&/g, "&").replace(/</g, "<"). + replace(/\n/g, "\\n"); + } + results += "</td></tr>"; + } + results += "</table>"; + return results; +} +toXHTML.UnitTest.tests = [ + { + name: "Basic paragraph markup", + input: "Basic paragraph test with <, >, & and \"", + output: "<p>Basic paragraph test with <, >, & and "</p>" + }, + { + name: "Simple unordered list", + input: "* list item\n*list item 2", + output: "<ul><li> list item</li>\n<li>list item 2</li></ul>" + }, + { + name: "Simple ordered list", + input: "# list item\n#list item 2", + output: "<ol><li> list item</li>\n<li>list item 2</li></ol>" + }, + { // Test an ul item with a sublist + name: "Unordered item with unordered sublist", + input: "* Item\n** Subitem", + output: "<ul><li> Item<ul>\n<li> Subitem</li></ul></li></ul>" + }, + { // Test a sublist without an initial tag (should not make a list) + name: "Unordered sublist without initial tag", + input: "** Sublist item", + output: "<p>** Sublist item</p>" + }, + { // Test an ol item with a sublist + name: "Ordered item with ordered sublist", + input: "# Item\n## Subitem", + output: "<ol><li> Item<ol>\n<li> Subitem</li></ol></li></ol>" + }, + { // Test a sublist without an initial tag (should not make a list) + name: "Ordered sublist without initial tag", + input: "## Sublist item", + output: "<p>## Sublist item</p>" + }, + { // Test an unordered list with an ordered sublist + name: "Unordered item with ordered sublist", + input: "* Item\n*# Subitem", + output: "<ul><li> Item<ol>\n<li> Subitem</li></ol></li></ul>" + }, + { // Test hr + name: "Horizontal rule", + input: "Some text\n----\nSome more text", + output: "<p>Some text</p><hr /><p>Some more text</p>" + }, + { // Test pre block + name: "Preformatted block", + input: "{{{\nPreformatted block\n}}}", + output: "<pre>Preformatted block\n</pre>" + }, + { // Test two pre blocks + name: "Two preformatted blocks", + input: "{{{\nPreformatted block\n}}}\n{{{Block 2}}}", + output: "<pre>Preformatted block\n</pre><pre>Block 2</pre>" + }, + { // Test h1 + name: "h1", + input: "= Header =", + output: "<h1>Header</h1>" + }, + { // Test h2 + name: "h2", + input: "== Header =", + output: "<h2>Header</h2>" + }, + { // Test h3 + name: "h3", + input: "=== Header =", + output: "<h3>Header</h3>" + }, + { // Test h4 + name: "h4", + input: "==== Header =", + output: "<h4>Header</h4>" + }, + { // Test h5 + name: "h5", + input: "===== Header", + output: "<h5>Header</h5>" + }, + { // Test h6 + name: "h6", + input: "====== Header =", + output: "<h6>Header</h6>" + }, + { // Test above h6 (should be ignored) + name: ">h6", + input: "======= Header =", + output: "<p>======= Header =</p>" + }, + { // Test tables + name: "Tables", + input: "| A | B |\n| //C// | **D** \\\\ E |", + output: "<table><tr><td> A </td><td> B </td></tr>" + + "<tr><td> <em>C</em> </td>" + + "<td> <strong>D</strong> <br /> E </td></tr></table>" + }, + { // Test raw URL + name: "Raw URL", + input: "http://example.com/examplepage", + output: "<p><a href=\"http://example.com/examplepage\">" + + "http://example.com/examplepage</a></p>" + }, + { // Test unnamed URL + name: "Unnamed URL", + input: "[[http://example.com/examplepage]]", + output: "<p><a href=\"http://example.com/examplepage\">" + + "http://example.com/examplepage</a></p>" + }, + { // Test named URL + name: "Named URL", + input: "[[http://example.com/examplepage|Example Page]]", + output: "<p>" + + "<a href=\"http://example.com/examplepage\">Example Page</a></p>" + }, + { // Test unnamed link + name: "Unnamed link", + input: "[[MyPage]]", + output: "<p><a href=\"MyPage\">MyPage</a></p>" + }, + { // Test named link + name: "Named link", + input: "[[MyPage|My page]]", + output: "<p><a href=\"MyPage\">My page</a></p>" + }, + { // Test images + name: "Image", + input: "{{image.gif|my image}}", + output: "<p><img src=\"image.gif\" alt=\"my image\"/></p>" + }, + { // Test inline tt + name: "Inline tt", + input: "Inline {{{tt}}} example {{{here}}}!", + output: "<p>Inline <tt>tt</tt> example <tt>here</tt>!</p>" + }, + { // Test **strong** + name: "Strong", + input: "**Strong**", + output: "<p><strong>Strong</strong></p>" + }, + { // Test //emphasis// + name: "Emphasis", + input: "//Emphasis//", + output: "<p><em>Emphasis</em></p>" + }, + + //// WikiCreole tests + { // Tests multi-line emphasis behaviour + name: "Multi-line emphasis", + input: "Bold and italics should //be\nable// to cross lines.\n\n" + + "But, should //not be...\n\n...able// to cross paragraphs.", + output: "<p>Bold and italics should <em>be\nable</em> to cross lines." + + "\n</p>" + "<p>\nBut, should //not be...\n</p>" + + "<p>\n...able// to cross paragraphs.</p>" + }, + { // Tests URL/emphasis ambiguity handling + name: "URL/emphasis ambiguity", + input: "This is an //italic// text. This is a url: " + + "http://www.wikicreole.org. This is what can go wrong://this " + + "should be an italic text//.", + output: "<p>This is an <em>italic</em> text. This is a url: " + + "<a href=\"http://www.wikicreole.org\">" + + "http://www.wikicreole.org</a>. This is what can go wrong:" + + "<em>this should be an italic text</em>.</p>" + }, + + //// Awkward emphasis edge cases + { + name: "Difficult emphasis #1", + input: "// http://www.link.org //", + output: "<p><em> <a href=\"http://www.link.org\">" + + "http://www.link.org</a> </em></p>" + }, + { + name: "Difficult emphasis #2", + input: "// http //", + output: "<p><em> http </em></p>" + }, + { + name: "Difficult emphasis #3", + input: "// httphpthtpht //", + output: "<p><em> httphpthtpht </em></p>" + }, + { + name: "Difficult emphasis #4", + input: "// http: //", + output: "<p><em> http: </em></p>" + }, + { + name: "Difficult emphasis #5", + input: "// http://", + output: "<p>// <a href=\"http://\">http://</a></p>" + }, + { + name: "Difficult emphasis #6", + input: "// http:////", + output: "<p><em> <a href=\"http://\">http://</a></em></p>" + }, + { + name: "Difficult emphasis #7", + input: "//httphpthtphtt//", + output: "<p><em>httphpthtphtt</em></p>" + }, + { + name: "Difficult emphasis #8", + input: "//http://link.org//", + output: "<p><em><a href=\"http://link.org\">" + + "http://link.org</a></em></p>" + }, + { + name: "Difficult emphasis #9", + input: "// ftp://www.link.org //", + output: "<p><em> <a href=\"ftp://www.link.org\">" + + "ftp://www.link.org</a> </em></p>" + }, + { + name: "Difficult emphasis #10", + input: "// ftp //", + output: "<p><em> ftp </em></p>" + }, + { + name: "Difficult emphasis #11", + input: "// fttpfptftpft //", + output: "<p><em> fttpfptftpft </em></p>" + }, + { + name: "Difficult emphasis #12", + input: "// ftp: //", + output: "<p><em> ftp: </em></p>" + }, + { + name: "Difficult emphasis #13", + input: "// ftp://", + output: "<p>// <a href=\"ftp://\">ftp://</a></p>" + }, + { + name: "Difficult emphasis #14", + input: "// ftp:////", + output: "<p><em> <a href=\"ftp://\">ftp://</a></em></p>" + }, + { + name: "Difficult emphasis #15", + input: "//fttpfptftpftt//", + output: "<p><em>fttpfptftpftt</em></p>" + }, + { + name: "Difficult emphasis #16", + input: "//ftp://link.org//", + output: "<p><em><a href=\"ftp://link.org\">" + + "ftp://link.org</a></em></p>" + } +]; + + //// Install the renderer ////////////////////////////////////////////// + function updateRender() { + $("Html").innerHTML = toXHTML($("Text").value); + } + function installRenderer() { + element = $("Text"); + element.onkeyup = element.onkeypress = element.ondrop = + element.onchange = updateRender; + updateRender(); + $("UnitTest").innerHTML = "<h1>Unit Testing</h1>" + + toXHTML.UnitTest(); + } + window.onload = installRenderer; +// ]]> + </script> + </head> + <body> + <div id="Main"> + <form action=""> + <p><textarea name="Text" id="Text" class="WikiText" rows="20" cols="50"> += Creole + +{{http://www.wikicreole.org/attach/LeftMenu/viki.png|Creole}}\\ +Creole is a common wiki markup language intended to be used across many different wikis. Its aim is not to replace existing markup, but instead to enable wiki users to transfer basic content seamlessly across wikis, and lower the barrier to entry for novice users. + += Regular Language–Recursive Descent Parser + +This text has been formatted using a //regular language–recursive descent// (RLRD) parser design. That is, rules are applied in a recursive descent that matches the ultimate XML output; at each level of the descent, a set of regular expressions define the text that each child rule can "consume". Rules are applied greedily (i.e. earliest-match first). Unlike standard markup designs, this makes edge-cases between rules explicit, and allows a parser to be certified XHTML-compliant. + +The parser is written in Javascript, allowing greater flexibility in the deployment of the parser. The underlying RLRD design can be implemented in any language. + += Live Preview + +This document demonstrates a live Javascript preview, using this RLRD renderer. Editing the above text area will change this text. + +The markup follows the basic rules of [[http://www.wikicreole.org/wiki/Creole0.4|Creole v0.4]].</textarea></p> + </form> + <div id="Html" class="RenderedText">Javascript disabled</div> + </div> + <div id="UnitTest"></div> + </body> +</html> |