2 files changed, 139 insertions, 10 deletions
diff --git a/src/creole.c b/src/creole.c
index 2a16205..c8847bb 100644
--- a/src/creole.c
+++ b/src/creole.c
@@ -2,6 +2,7 @@
 
 #include <assert.h>
 #include <ctype.h>
+#include <regex.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
@@ -22,6 +23,7 @@ long do_emphasis(const char *begin, const char *end, bool new_block, FILE *out);
 long do_bold(const char *begin, const char *end, bool new_block, FILE *out);
 long do_nowiki_inline(const char *begin, const char *end, bool new_block, FILE *out);
 long do_nowiki_block(const char *begin, const char *end, bool new_block, FILE *out);
+long do_list(const char *begin, const char *end, bool new_block, FILE *out);
 
 // Prints string with special HTML characters escaped.
 //
@@ -54,6 +56,27 @@ bool starts_with(const char *haystack_begin, const char *haystack_end, const cha
 	}
 }
 
+const char *find_char(const char *haystack_begin, const char *haystack_end, char needle) {
+	for (const char *p = haystack_begin; p < haystack_end; ++p) {
+		if (*p == needle) {
+			return p;
+		}
+	}
+
+	return haystack_end;
+}
+
+bool contains_only_spaces(const char *begin, const char *end) {
+	assert(begin <= end);
+
+	for (const char *p = begin; p < end; ++p) {
+		if (!isspace(*p)) {
+			return false;
+		}
+	}
+
+	return true;
+}
 
 // A parser takes a (sub)string and returns the number of characters consumed, if any.
 //
@@ -65,6 +88,7 @@ static parser_t parsers[] = {
 	// Block-level elements
 	do_headers,
 	do_nowiki_block,
+	do_list,
 	do_paragraph, // <p> should be last as it eats anything
 
 	// Inline-level elements
@@ -391,9 +415,104 @@ long do_nowiki_block(const char *begin, const char *end, bool new_block, FILE *o
 	return -(stop - start + 8);
 }
 
+// TODO: We still do not handle mixing ol/ul in nested lists.
+//       See: http://www.wikicreole.org/wiki/Lists#section-Lists-Mixing
+long do_list(const char *begin, const char *end, bool new_block, FILE *out) {
+	// FIXME: Some sample documents allow a list to start without begin
+	// separated form the above text by \n\n. In order to allow that, we
+	// would need to know if the current * is at the start of a line.
+	if (!new_block) {
+		return 0;
+	}
+
+	const char *begin_stripped = begin;
+	while (*begin_stripped == ' ' || *begin_stripped == '\t') {
+		begin_stripped++;
+	}
+
+	char marker;
+	if (starts_with(begin_stripped, end, "* ")) {
+		fputs("<ul>", out);
+		marker = '*';
+	} else if (starts_with(begin_stripped, end, "# ")) {
+		fputs("<ol>", out);
+		marker = '#';
+	} else {
+		return 0;
+	}
+
+	bool more_items = true;
+	unsigned current_level = 1;
+	const char *item_begin = begin_stripped, *item_end;
+	while (more_items) {
+		// At this point in the code, item_begin should point to the
+		// first star that marks the start of a new list item. We will start by reading the depth.
+		unsigned level = 0;
+		while (*item_begin == marker && item_begin + 1 < end) {
+			item_begin++;
+			level++;
+		}
+
+		if (level > current_level) {
+			while (level > current_level) {
+				fputs((marker == '*') ? "<ul>" : "<ol>", out);
+				current_level += 1;
+			}
+		} else if (level < current_level){
+			while (level < current_level) {
+				fputs((marker == '*') ? "</ul>" : "</ol>", out);
+				current_level -= 1;
+			}
+		}
+
+		// This part essentailly emulates the regular expression /\n\n|\n[ \t]*\*|$/.
+		item_end = item_begin;
+		while (true) {
+			if (starts_with(item_end, end, "\n\n")) {
+				more_items = false;
+				break;
+			} else if (item_end == end) {
+				more_items = false;
+				break;
+			} else if (item_end < end && *item_end == '\n') {
+				const char *q = item_end + 1;
+				while (q < end && (*q == ' ' || *q == '\t'))
+					q += 1;
+
+				if (q < end && *q == marker) {
+					// Include the final newline in the output; will be eaten by special case in process().
+					item_end = q;
+					break;
+				}
+			}
+
+			item_end++;
+		}
+
+		// Note how we don't close the <li> tag! We can avoid some
+		// tricky logic by using the fact that <li> is a self-closing tag.
+		//
+		// See: https://html.spec.whatwg.org/#syntax-tag-omission
+		// See: https://html.spec.whatwg.org/#the-li-element
+		fputs("<li>", out);
+		process(item_begin, item_end, false, out);
+
+		item_begin = item_end;
+	}
+
+	while (current_level > 0) {
+		fputs((marker == '*') ? "</ul>" : "</ol>", out);
+		current_level -= 1;
+	}
+
+	return -(item_end - begin);
+}
+
 void process(const char *begin, const char *end, bool new_block, FILE *out) {
 	assert(begin <= end);
 
+	// DEBUG("Processing: %.*s\n", (int)(end - begin), begin);
+
 	const char *p = begin;
 	while (p < end) {
 		// Eat all newlines if we're starting a block.
diff --git a/src/creole_test_main.c b/src/creole_test_main.c
index 5be4499..7bb7816 100644
--- a/src/creole_test_main.c
+++ b/src/creole_test_main.c
@@ -252,25 +252,34 @@ struct {
 	},
 	{ // Spec: In preformatted blocks, since markers must not be preceded by leading spaces, lines with three closing braces
 	  // which belong to the preformatted block must follow at least one space. In the rendered output, one leading space is removed.
-		.name    =  "",
+		.name    =  "Whitespace before }}} stripped",
 		.input   =  "{{{\nif (x != NULL) {\n  for (i = 0; i < size; i++) {\n    if (x[i] > 0) {\n      x[i]--;\n  }}}\n}}}\n",
 		.output  =  "<pre><code>if (x != NULL) {\n  for (i = 0; i &lt; size; i++) {\n    if (x[i] &gt; 0) {\n      x[i]--;\n  }}}</code></pre>",
 	},
-#if 0
 	{
 		.name    =  "Simple unordered list",
 		.input   =  "* list item\n*list item 2",
-		.output  =  "<ul><li> list item</li>\n<li>list item 2</li></ul>"
+		.output  =  "<ul><li> list item<li>list item 2</ul>"
 	},
 	{
 		.name    =  "Simple ordered list",
 		.input   =  "# list item\n#list item 2",
-		.output  =  "<ol><li> list item</li>\n<li>list item 2</li></ol>"
+		.output  =  "<ol><li> list item<li>list item 2</ol>"
 	},
 	{
 		.name    =  "Unordered item with unordered sublist",
 		.input   =  "* Item\n** Subitem",
-		.output  =  "<ul><li> Item<ul>\n<li> Subitem</li></ul></li></ul>"
+		.output  =  "<ul><li> Item<ul><li> Subitem</ul></ul>"
+	},
+	{
+		.name    =  "Unwindling deeply nested list",
+		.input   =  "* A\n** B\n*** C\n**** D\n***** E",
+		.output  =  "<ul><li> A<ul><li> B<ul><li> C<ul><li> D<ul><li> E</ul></ul></ul></ul></ul>"
+	},
+	{
+		.name    =  "Leading spaces ignored in lists",
+		.input   =  "  * Item 1\n  * Item 2\n    **  Item 2.1\n     ** Item 2.2\n",
+		.output  =  "<ul><li> Item 1\n  <li> Item 2\n    <ul><li>  Item 2.1\n     <li> Item 2.2</ul></ul>"
 	},
 	{
 		.name    =  "Unordered sublist without initial tag",
@@ -278,15 +287,16 @@ struct {
 		.output  =  "<p>** Sublist item</p>"
 	},
 	{
-		.name    =  "Ordered item with ordered sublist",
-		.input   =  "# Item\n## Subitem",
-		.output  =  "<ol><li> Item<ol>\n<li> Subitem</li></ol></li></ol>"
-	},
-	{
 		.name    =  "Ordered sublist without initial tag",
 		.input   =  "## Sublist item",
 		.output  =  "<p>## Sublist item</p>"
 	},
+#if 0
+	{
+		.name    =  "Ordered item with ordered sublist",
+		.input   =  "# Item\n## Subitem",
+		.output  =  "<ol><li> Item<ol>\n<li> Subitem</li></ol></li></ol>"
+	},
 	{
 		.name    =  "Unordered item with ordered sublist",
 		.input   =  "* Item\n*# Subitem",