summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/creole.c119
-rw-r--r--src/creole_test_main.c30
2 files changed, 139 insertions, 10 deletions
diff --git a/src/creole.c b/src/creole.c
index 2a16205..c8847bb 100644
--- a/src/creole.c
+++ b/src/creole.c
@@ -2,6 +2,7 @@
#include <assert.h>
#include <ctype.h>
+#include <regex.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
@@ -22,6 +23,7 @@ long do_emphasis(const char *begin, const char *end, bool new_block, FILE *out);
long do_bold(const char *begin, const char *end, bool new_block, FILE *out);
long do_nowiki_inline(const char *begin, const char *end, bool new_block, FILE *out);
long do_nowiki_block(const char *begin, const char *end, bool new_block, FILE *out);
+long do_list(const char *begin, const char *end, bool new_block, FILE *out);
// Prints string with special HTML characters escaped.
//
@@ -54,6 +56,27 @@ bool starts_with(const char *haystack_begin, const char *haystack_end, const cha
}
}
+const char *find_char(const char *haystack_begin, const char *haystack_end, char needle) {
+ for (const char *p = haystack_begin; p < haystack_end; ++p) {
+ if (*p == needle) {
+ return p;
+ }
+ }
+
+ return haystack_end;
+}
+
+bool contains_only_spaces(const char *begin, const char *end) {
+ assert(begin <= end);
+
+ for (const char *p = begin; p < end; ++p) {
+ if (!isspace(*p)) {
+ return false;
+ }
+ }
+
+ return true;
+}
// A parser takes a (sub)string and returns the number of characters consumed, if any.
//
@@ -65,6 +88,7 @@ static parser_t parsers[] = {
// Block-level elements
do_headers,
do_nowiki_block,
+ do_list,
do_paragraph, // <p> should be last as it eats anything
// Inline-level elements
@@ -391,9 +415,104 @@ long do_nowiki_block(const char *begin, const char *end, bool new_block, FILE *o
return -(stop - start + 8);
}
+// TODO: We still do not handle mixing ol/ul in nested lists.
+// See: http://www.wikicreole.org/wiki/Lists#section-Lists-Mixing
+long do_list(const char *begin, const char *end, bool new_block, FILE *out) {
+ // FIXME: Some sample documents allow a list to start without begin
+ // separated form the above text by \n\n. In order to allow that, we
+ // would need to know if the current * is at the start of a line.
+ if (!new_block) {
+ return 0;
+ }
+
+ const char *begin_stripped = begin;
+ while (*begin_stripped == ' ' || *begin_stripped == '\t') {
+ begin_stripped++;
+ }
+
+ char marker;
+ if (starts_with(begin_stripped, end, "* ")) {
+ fputs("<ul>", out);
+ marker = '*';
+ } else if (starts_with(begin_stripped, end, "# ")) {
+ fputs("<ol>", out);
+ marker = '#';
+ } else {
+ return 0;
+ }
+
+ bool more_items = true;
+ unsigned current_level = 1;
+ const char *item_begin = begin_stripped, *item_end;
+ while (more_items) {
+ // At this point in the code, item_begin should point to the
+ // first star that marks the start of a new list item. We will start by reading the depth.
+ unsigned level = 0;
+ while (*item_begin == marker && item_begin + 1 < end) {
+ item_begin++;
+ level++;
+ }
+
+ if (level > current_level) {
+ while (level > current_level) {
+ fputs((marker == '*') ? "<ul>" : "<ol>", out);
+ current_level += 1;
+ }
+ } else if (level < current_level){
+ while (level < current_level) {
+ fputs((marker == '*') ? "</ul>" : "</ol>", out);
+ current_level -= 1;
+ }
+ }
+
+ // This part essentailly emulates the regular expression /\n\n|\n[ \t]*\*|$/.
+ item_end = item_begin;
+ while (true) {
+ if (starts_with(item_end, end, "\n\n")) {
+ more_items = false;
+ break;
+ } else if (item_end == end) {
+ more_items = false;
+ break;
+ } else if (item_end < end && *item_end == '\n') {
+ const char *q = item_end + 1;
+ while (q < end && (*q == ' ' || *q == '\t'))
+ q += 1;
+
+ if (q < end && *q == marker) {
+ // Include the final newline in the output; will be eaten by special case in process().
+ item_end = q;
+ break;
+ }
+ }
+
+ item_end++;
+ }
+
+ // Note how we don't close the <li> tag! We can avoid some
+ // tricky logic by using the fact that <li> is a self-closing tag.
+ //
+ // See: https://html.spec.whatwg.org/#syntax-tag-omission
+ // See: https://html.spec.whatwg.org/#the-li-element
+ fputs("<li>", out);
+ process(item_begin, item_end, false, out);
+
+ item_begin = item_end;
+ }
+
+ while (current_level > 0) {
+ fputs((marker == '*') ? "</ul>" : "</ol>", out);
+ current_level -= 1;
+ }
+
+ return -(item_end - begin);
+}
+
void process(const char *begin, const char *end, bool new_block, FILE *out) {
assert(begin <= end);
+ // DEBUG("Processing: %.*s\n", (int)(end - begin), begin);
+
const char *p = begin;
while (p < end) {
// Eat all newlines if we're starting a block.
diff --git a/src/creole_test_main.c b/src/creole_test_main.c
index 5be4499..7bb7816 100644
--- a/src/creole_test_main.c
+++ b/src/creole_test_main.c
@@ -252,25 +252,34 @@ struct {
},
{ // Spec: In preformatted blocks, since markers must not be preceded by leading spaces, lines with three closing braces
// which belong to the preformatted block must follow at least one space. In the rendered output, one leading space is removed.
- .name = "",
+ .name = "Whitespace before }}} stripped",
.input = "{{{\nif (x != NULL) {\n for (i = 0; i < size; i++) {\n if (x[i] > 0) {\n x[i]--;\n }}}\n}}}\n",
.output = "<pre><code>if (x != NULL) {\n for (i = 0; i &lt; size; i++) {\n if (x[i] &gt; 0) {\n x[i]--;\n }}}</code></pre>",
},
-#if 0
{
.name = "Simple unordered list",
.input = "* list item\n*list item 2",
- .output = "<ul><li> list item</li>\n<li>list item 2</li></ul>"
+ .output = "<ul><li> list item<li>list item 2</ul>"
},
{
.name = "Simple ordered list",
.input = "# list item\n#list item 2",
- .output = "<ol><li> list item</li>\n<li>list item 2</li></ol>"
+ .output = "<ol><li> list item<li>list item 2</ol>"
},
{
.name = "Unordered item with unordered sublist",
.input = "* Item\n** Subitem",
- .output = "<ul><li> Item<ul>\n<li> Subitem</li></ul></li></ul>"
+ .output = "<ul><li> Item<ul><li> Subitem</ul></ul>"
+ },
+ {
+ .name = "Unwindling deeply nested list",
+ .input = "* A\n** B\n*** C\n**** D\n***** E",
+ .output = "<ul><li> A<ul><li> B<ul><li> C<ul><li> D<ul><li> E</ul></ul></ul></ul></ul>"
+ },
+ {
+ .name = "Leading spaces ignored in lists",
+ .input = " * Item 1\n * Item 2\n ** Item 2.1\n ** Item 2.2\n",
+ .output = "<ul><li> Item 1\n <li> Item 2\n <ul><li> Item 2.1\n <li> Item 2.2</ul></ul>"
},
{
.name = "Unordered sublist without initial tag",
@@ -278,15 +287,16 @@ struct {
.output = "<p>** Sublist item</p>"
},
{
- .name = "Ordered item with ordered sublist",
- .input = "# Item\n## Subitem",
- .output = "<ol><li> Item<ol>\n<li> Subitem</li></ol></li></ol>"
- },
- {
.name = "Ordered sublist without initial tag",
.input = "## Sublist item",
.output = "<p>## Sublist item</p>"
},
+#if 0
+ {
+ .name = "Ordered item with ordered sublist",
+ .input = "# Item\n## Subitem",
+ .output = "<ol><li> Item<ol>\n<li> Subitem</li></ol></li></ol>"
+ },
{
.name = "Unordered item with ordered sublist",
.input = "* Item\n*# Subitem",