From 1ad63a50dcf01171a3e7b9a04b37d36933a769ef Mon Sep 17 00:00:00 2001 From: Linnnus Date: Fri, 16 Feb 2024 12:23:27 +0100 Subject: feat(creole): Support raw URLs --- src/creole-test.c | 42 ++++++++++++++++++++++++------------------ src/creole.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/creole-test.c b/src/creole-test.c index c5b6590..8844a88 100644 --- a/src/creole-test.c +++ b/src/creole-test.c @@ -90,6 +90,30 @@ struct { .input = "[[]]", .output = "

" }, + { + .name = "Raw HTTP URL", + .input = "Here is a http://example.com/examplepage link.", + .output = "

Here is a " + "http://example.com/examplepage link.

" + }, + { // This is interesting because it doesn't contain a "://". + .name = "Raw mailto URL", + .input = "mailto:quandale@dingle.com", + .output = "

" + "mailto:quandale@dingle.com

" + }, + { + .name = "Unnamed URL", + .input = "[[http //example.com/examplepage]]", + .output = "

" + "http //example.com/examplepage

" + }, + { + .name = "Named URL", + .input = "[[http //example.com/examplepage|Example Page]]", + .output = "

" + "Example Page

" + }, #if 0 { .name = "Simple unordered list", @@ -148,24 +172,6 @@ struct { " C " " D
E " }, - { - .name = "Raw URL", - .input = "http //example.com/examplepage", - .output = "

" - "http //example.com/examplepage

" - }, - { - .name = "Unnamed URL", - .input = "[[http //example.com/examplepage]]", - .output = "

" - "http //example.com/examplepage

" - }, - { - .name = "Named URL", - .input = "[[http //example.com/examplepage|Example Page]]", - .output = "

" - "Example Page

" - }, { .name = "Image", .input = "{{image.gif|my image}}", diff --git a/src/creole.c b/src/creole.c index 79a767d..107b50b 100644 --- a/src/creole.c +++ b/src/creole.c @@ -17,6 +17,7 @@ int do_headers(const char *begin, const char *end, bool new_block, FILE *out); int do_paragraph(const char *begin, const char *end, bool new_block, FILE *out); int do_replacements(const char *begin, const char *end, bool new_block, FILE *out); int do_link(const char *begin, const char *end, bool new_block, FILE *out); +int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out); // Prints string escaped. void hprint(FILE *out, const char *begin, const char *end) { @@ -41,7 +42,7 @@ void hprint(FILE *out, const char *begin, const char *end) { // The sign of the return value determines whether a new block should begin, after the consumed text. typedef int (* parser_t)(const char *begin, const char *end, bool new_block, FILE *out); -static parser_t parsers[] = { do_headers, do_paragraph, do_link, do_replacements }; +static parser_t parsers[] = { do_headers, do_paragraph, do_link, do_raw_url, do_replacements }; int do_headers(const char *begin, const char *end, bool new_block, FILE *out) { if (!new_block) { // Headers are block-level elements. @@ -180,6 +181,47 @@ int do_link(const char *begin, const char *end, bool new_block, FILE *out) return stop - start + 4 /* [[]] */; } +int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out) +{ + // Eat a scheme followed by a ":". Here are the relevant rules from RFC 3986. + // - URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + // - scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + // See: + const char *p = begin; + if (!isalpha(*p)) { + return 0; + } + while (p < end && (isalnum(*p) || *p == '+' || *p == '-' || *p == '.')) { + p += 1; + } + if (p >= end || p[0] != ':') { + return 0; + } + p += 1; + + // Eat the remainder of the URI. + // This is not technically correct, but it's a good enough heuristic. + const char *q = p; + while (q < end && !isspace(*q)) { + q += 1; + } + + // If there is nothing following the colon, don't accept it as a raw + // url. Otherwise we'd incorrectly find a link with the "said" protocol + // here: "And he said: blah blah". + if (q == p) { + return 0; + } + + fputs("", out); + hprint(out, begin, q); + fputs("", out); + + return q - begin; +} + void process(const char *begin, const char *end, bool new_block, FILE *out) { const char *p = begin; while (p < end) { -- cgit v1.2.3