diff options
-rw-r--r-- | references/creole1.0test.txt | 128 | ||||
-rw-r--r-- | src/creole.c | 119 | ||||
-rw-r--r-- | src/creole_test_main.c | 30 |
3 files changed, 267 insertions, 10 deletions
diff --git a/references/creole1.0test.txt b/references/creole1.0test.txt new file mode 100644 index 0000000..732256b --- /dev/null +++ b/references/creole1.0test.txt @@ -0,0 +1,128 @@ += Top-level heading (1) +== This a test for creole 0.1 (2) +=== This is a Subheading (3) +==== Subsub (4) +===== Subsubsub (5) + +The ending equal signs should not be displayed: + += Top-level heading (1) = +== This a test for creole 0.1 (2) == +=== This is a Subheading (3) === +==== Subsub (4) ==== +===== Subsubsub (5) ===== + + +You can make things **bold** or //italic// or **//both//** or //**both**//. + +Character formatting extends across line breaks: **bold, +this is still bold. This line deliberately does not end in star-star. + +Not bold. Character formatting does not cross paragraph boundaries. + +You can use [[internal links]] or [[http://www.wikicreole.org|external links]], +give the link a [[internal links|different]] name. + +Here's another sentence: This wisdom is taken from [[Ward Cunningham's]] +[[http://www.c2.com/doc/wikisym/WikiSym2006.pdf|Presentation at the Wikisym 06]]. + +Here's a external link without a description: [[http://www.wikicreole.org]] + +Be careful that italic links are rendered properly: //[[http://my.book.example/|My Book Title]]// + +Free links without braces should be rendered as well, like http://www.wikicreole.org/ and http://www.wikicreole.org/users/~example. + +Creole1.0 specifies that http://bar and ftp://bar should not render italic, +something like foo://bar should render as italic. + +You can use this to draw a line to separate the page: +---- + +You can use lists, start it at the first column for now, please... + +unnumbered lists are like +* item a +* item b +* **bold item c** + +blank space is also permitted before lists like: + * item a + * item b +* item c + ** item c.a + +or you can number them +# [[item 1]] +# item 2 +# // italic item 3 // + ## item 3.1 + ## item 3.2 + +up to five levels +* 1 +** 2 +*** 3 +**** 4 +***** 5 + +* You can have +multiline list items +* this is a second multiline +list item + +You can use nowiki syntax if you would like do stuff like this: + +{{{ +Guitar Chord C: + +||---|---|---| +||-0-|---|---| +||---|---|---| +||---|-0-|---| +||---|---|-0-| +||---|---|---| +}}} + +You can also use it inline nowiki {{{ in a sentence }}} like this. + += Escapes = +Normal Link: http://wikicreole.org/ - now same link, but escaped: ~http://wikicreole.org/ + +Normal asterisks: ~**not bold~** + +a tilde alone: ~ + +a tilde escapes itself: ~~xxx + +=== Creole 0.2 === + +This should be a flower with the ALT text "this is a flower" if your wiki supports ALT text on images: + +{{Red-Flower.jpg|here is a red flower}} + +=== Creole 0.4 === + +Tables are done like this: + +|=header col1|=header col2| +|col1|col2| +|you |can | +|also |align\\ it. | + +You can format an address by simply forcing linebreaks: + +My contact dates:\\ +Pone: xyz\\ +Fax: +45\\ +Mobile: abc + +=== Creole 0.5 === + +|= Header title |= Another header title | +| {{{ //not italic text// }}} | {{{ **not bold text** }}} | +| //italic text// | ** bold text ** | + +=== Creole 1.0 === + +If interwiki links are setup in your wiki, this links to the WikiCreole page about Creole 1.0 test cases: [[WikiCreole:Creole1.0TestCases]]. + diff --git a/src/creole.c b/src/creole.c index 2a16205..c8847bb 100644 --- a/src/creole.c +++ b/src/creole.c @@ -2,6 +2,7 @@ #include <assert.h> #include <ctype.h> +#include <regex.h> #include <stdarg.h> #include <stdbool.h> #include <stdio.h> @@ -22,6 +23,7 @@ long do_emphasis(const char *begin, const char *end, bool new_block, FILE *out); long do_bold(const char *begin, const char *end, bool new_block, FILE *out); long do_nowiki_inline(const char *begin, const char *end, bool new_block, FILE *out); long do_nowiki_block(const char *begin, const char *end, bool new_block, FILE *out); +long do_list(const char *begin, const char *end, bool new_block, FILE *out); // Prints string with special HTML characters escaped. // @@ -54,6 +56,27 @@ bool starts_with(const char *haystack_begin, const char *haystack_end, const cha } } +const char *find_char(const char *haystack_begin, const char *haystack_end, char needle) { + for (const char *p = haystack_begin; p < haystack_end; ++p) { + if (*p == needle) { + return p; + } + } + + return haystack_end; +} + +bool contains_only_spaces(const char *begin, const char *end) { + assert(begin <= end); + + for (const char *p = begin; p < end; ++p) { + if (!isspace(*p)) { + return false; + } + } + + return true; +} // A parser takes a (sub)string and returns the number of characters consumed, if any. // @@ -65,6 +88,7 @@ static parser_t parsers[] = { // Block-level elements do_headers, do_nowiki_block, + do_list, do_paragraph, // <p> should be last as it eats anything // Inline-level elements @@ -391,9 +415,104 @@ long do_nowiki_block(const char *begin, const char *end, bool new_block, FILE *o return -(stop - start + 8); } +// TODO: We still do not handle mixing ol/ul in nested lists. +// See: http://www.wikicreole.org/wiki/Lists#section-Lists-Mixing +long do_list(const char *begin, const char *end, bool new_block, FILE *out) { + // FIXME: Some sample documents allow a list to start without begin + // separated form the above text by \n\n. In order to allow that, we + // would need to know if the current * is at the start of a line. + if (!new_block) { + return 0; + } + + const char *begin_stripped = begin; + while (*begin_stripped == ' ' || *begin_stripped == '\t') { + begin_stripped++; + } + + char marker; + if (starts_with(begin_stripped, end, "* ")) { + fputs("<ul>", out); + marker = '*'; + } else if (starts_with(begin_stripped, end, "# ")) { + fputs("<ol>", out); + marker = '#'; + } else { + return 0; + } + + bool more_items = true; + unsigned current_level = 1; + const char *item_begin = begin_stripped, *item_end; + while (more_items) { + // At this point in the code, item_begin should point to the + // first star that marks the start of a new list item. We will start by reading the depth. + unsigned level = 0; + while (*item_begin == marker && item_begin + 1 < end) { + item_begin++; + level++; + } + + if (level > current_level) { + while (level > current_level) { + fputs((marker == '*') ? "<ul>" : "<ol>", out); + current_level += 1; + } + } else if (level < current_level){ + while (level < current_level) { + fputs((marker == '*') ? "</ul>" : "</ol>", out); + current_level -= 1; + } + } + + // This part essentailly emulates the regular expression /\n\n|\n[ \t]*\*|$/. + item_end = item_begin; + while (true) { + if (starts_with(item_end, end, "\n\n")) { + more_items = false; + break; + } else if (item_end == end) { + more_items = false; + break; + } else if (item_end < end && *item_end == '\n') { + const char *q = item_end + 1; + while (q < end && (*q == ' ' || *q == '\t')) + q += 1; + + if (q < end && *q == marker) { + // Include the final newline in the output; will be eaten by special case in process(). + item_end = q; + break; + } + } + + item_end++; + } + + // Note how we don't close the <li> tag! We can avoid some + // tricky logic by using the fact that <li> is a self-closing tag. + // + // See: https://html.spec.whatwg.org/#syntax-tag-omission + // See: https://html.spec.whatwg.org/#the-li-element + fputs("<li>", out); + process(item_begin, item_end, false, out); + + item_begin = item_end; + } + + while (current_level > 0) { + fputs((marker == '*') ? "</ul>" : "</ol>", out); + current_level -= 1; + } + + return -(item_end - begin); +} + void process(const char *begin, const char *end, bool new_block, FILE *out) { assert(begin <= end); + // DEBUG("Processing: %.*s\n", (int)(end - begin), begin); + const char *p = begin; while (p < end) { // Eat all newlines if we're starting a block. diff --git a/src/creole_test_main.c b/src/creole_test_main.c index 5be4499..7bb7816 100644 --- a/src/creole_test_main.c +++ b/src/creole_test_main.c @@ -252,25 +252,34 @@ struct { }, { // Spec: In preformatted blocks, since markers must not be preceded by leading spaces, lines with three closing braces // which belong to the preformatted block must follow at least one space. In the rendered output, one leading space is removed. - .name = "", + .name = "Whitespace before }}} stripped", .input = "{{{\nif (x != NULL) {\n for (i = 0; i < size; i++) {\n if (x[i] > 0) {\n x[i]--;\n }}}\n}}}\n", .output = "<pre><code>if (x != NULL) {\n for (i = 0; i < size; i++) {\n if (x[i] > 0) {\n x[i]--;\n }}}</code></pre>", }, -#if 0 { .name = "Simple unordered list", .input = "* list item\n*list item 2", - .output = "<ul><li> list item</li>\n<li>list item 2</li></ul>" + .output = "<ul><li> list item<li>list item 2</ul>" }, { .name = "Simple ordered list", .input = "# list item\n#list item 2", - .output = "<ol><li> list item</li>\n<li>list item 2</li></ol>" + .output = "<ol><li> list item<li>list item 2</ol>" }, { .name = "Unordered item with unordered sublist", .input = "* Item\n** Subitem", - .output = "<ul><li> Item<ul>\n<li> Subitem</li></ul></li></ul>" + .output = "<ul><li> Item<ul><li> Subitem</ul></ul>" + }, + { + .name = "Unwindling deeply nested list", + .input = "* A\n** B\n*** C\n**** D\n***** E", + .output = "<ul><li> A<ul><li> B<ul><li> C<ul><li> D<ul><li> E</ul></ul></ul></ul></ul>" + }, + { + .name = "Leading spaces ignored in lists", + .input = " * Item 1\n * Item 2\n ** Item 2.1\n ** Item 2.2\n", + .output = "<ul><li> Item 1\n <li> Item 2\n <ul><li> Item 2.1\n <li> Item 2.2</ul></ul>" }, { .name = "Unordered sublist without initial tag", @@ -278,15 +287,16 @@ struct { .output = "<p>** Sublist item</p>" }, { - .name = "Ordered item with ordered sublist", - .input = "# Item\n## Subitem", - .output = "<ol><li> Item<ol>\n<li> Subitem</li></ol></li></ol>" - }, - { .name = "Ordered sublist without initial tag", .input = "## Sublist item", .output = "<p>## Sublist item</p>" }, +#if 0 + { + .name = "Ordered item with ordered sublist", + .input = "# Item\n## Subitem", + .output = "<ol><li> Item<ol>\n<li> Subitem</li></ol></li></ol>" + }, { .name = "Unordered item with ordered sublist", .input = "* Item\n*# Subitem", |