1 files changed, 119 insertions, 0 deletions
diff --git a/src/creole.c b/src/creole.c
index 2a16205..c8847bb 100644
--- a/src/creole.c
+++ b/src/creole.c
@@ -2,6 +2,7 @@
 
 #include <assert.h>
 #include <ctype.h>
+#include <regex.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
@@ -22,6 +23,7 @@ long do_emphasis(const char *begin, const char *end, bool new_block, FILE *out);
 long do_bold(const char *begin, const char *end, bool new_block, FILE *out);
 long do_nowiki_inline(const char *begin, const char *end, bool new_block, FILE *out);
 long do_nowiki_block(const char *begin, const char *end, bool new_block, FILE *out);
+long do_list(const char *begin, const char *end, bool new_block, FILE *out);
 
 // Prints string with special HTML characters escaped.
 //
@@ -54,6 +56,27 @@ bool starts_with(const char *haystack_begin, const char *haystack_end, const cha
 	}
 }
 
+const char *find_char(const char *haystack_begin, const char *haystack_end, char needle) {
+	for (const char *p = haystack_begin; p < haystack_end; ++p) {
+		if (*p == needle) {
+			return p;
+		}
+	}
+
+	return haystack_end;
+}
+
+bool contains_only_spaces(const char *begin, const char *end) {
+	assert(begin <= end);
+
+	for (const char *p = begin; p < end; ++p) {
+		if (!isspace(*p)) {
+			return false;
+		}
+	}
+
+	return true;
+}
 
 // A parser takes a (sub)string and returns the number of characters consumed, if any.
 //
@@ -65,6 +88,7 @@ static parser_t parsers[] = {
 	// Block-level elements
 	do_headers,
 	do_nowiki_block,
+	do_list,
 	do_paragraph, // <p> should be last as it eats anything
 
 	// Inline-level elements
@@ -391,9 +415,104 @@ long do_nowiki_block(const char *begin, const char *end, bool new_block, FILE *o
 	return -(stop - start + 8);
 }
 
+// TODO: We still do not handle mixing ol/ul in nested lists.
+//       See: http://www.wikicreole.org/wiki/Lists#section-Lists-Mixing
+long do_list(const char *begin, const char *end, bool new_block, FILE *out) {
+	// FIXME: Some sample documents allow a list to start without begin
+	// separated form the above text by \n\n. In order to allow that, we
+	// would need to know if the current * is at the start of a line.
+	if (!new_block) {
+		return 0;
+	}
+
+	const char *begin_stripped = begin;
+	while (*begin_stripped == ' ' || *begin_stripped == '\t') {
+		begin_stripped++;
+	}
+
+	char marker;
+	if (starts_with(begin_stripped, end, "* ")) {
+		fputs("<ul>", out);
+		marker = '*';
+	} else if (starts_with(begin_stripped, end, "# ")) {
+		fputs("<ol>", out);
+		marker = '#';
+	} else {
+		return 0;
+	}
+
+	bool more_items = true;
+	unsigned current_level = 1;
+	const char *item_begin = begin_stripped, *item_end;
+	while (more_items) {
+		// At this point in the code, item_begin should point to the
+		// first star that marks the start of a new list item. We will start by reading the depth.
+		unsigned level = 0;
+		while (*item_begin == marker && item_begin + 1 < end) {
+			item_begin++;
+			level++;
+		}
+
+		if (level > current_level) {
+			while (level > current_level) {
+				fputs((marker == '*') ? "<ul>" : "<ol>", out);
+				current_level += 1;
+			}
+		} else if (level < current_level){
+			while (level < current_level) {
+				fputs((marker == '*') ? "</ul>" : "</ol>", out);
+				current_level -= 1;
+			}
+		}
+
+		// This part essentailly emulates the regular expression /\n\n|\n[ \t]*\*|$/.
+		item_end = item_begin;
+		while (true) {
+			if (starts_with(item_end, end, "\n\n")) {
+				more_items = false;
+				break;
+			} else if (item_end == end) {
+				more_items = false;
+				break;
+			} else if (item_end < end && *item_end == '\n') {
+				const char *q = item_end + 1;
+				while (q < end && (*q == ' ' || *q == '\t'))
+					q += 1;
+
+				if (q < end && *q == marker) {
+					// Include the final newline in the output; will be eaten by special case in process().
+					item_end = q;
+					break;
+				}
+			}
+
+			item_end++;
+		}
+
+		// Note how we don't close the <li> tag! We can avoid some
+		// tricky logic by using the fact that <li> is a self-closing tag.
+		//
+		// See: https://html.spec.whatwg.org/#syntax-tag-omission
+		// See: https://html.spec.whatwg.org/#the-li-element
+		fputs("<li>", out);
+		process(item_begin, item_end, false, out);
+
+		item_begin = item_end;
+	}
+
+	while (current_level > 0) {
+		fputs((marker == '*') ? "</ul>" : "</ol>", out);
+		current_level -= 1;
+	}
+
+	return -(item_end - begin);
+}
+
 void process(const char *begin, const char *end, bool new_block, FILE *out) {
 	assert(begin <= end);
 
+	// DEBUG("Processing: %.*s\n", (int)(end - begin), begin);
+
 	const char *p = begin;
 	while (p < end) {
 		// Eat all newlines if we're starting a block.