diff options
author | Linnnus <[email protected]> | 2024-02-16 12:23:27 +0100 |
---|---|---|
committer | Linnnus <[email protected]> | 2024-02-16 17:59:01 +0100 |
commit | 1ad63a50dcf01171a3e7b9a04b37d36933a769ef (patch) | |
tree | 32b4ce03a9cff06f1d6e3e1d79ede4dd77882f3f | |
parent | cfb648daef74f695e42e3314c05296a72168860c (diff) |
feat(creole): Support raw URLs
-rw-r--r-- | src/creole-test.c | 42 | ||||
-rw-r--r-- | src/creole.c | 44 |
2 files changed, 67 insertions, 19 deletions
diff --git a/src/creole-test.c b/src/creole-test.c index c5b6590..8844a88 100644 --- a/src/creole-test.c +++ b/src/creole-test.c @@ -90,6 +90,30 @@ struct { .input = "[[]]", .output = "<p><a href=\"\"></a></p>" }, + { + .name = "Raw HTTP URL", + .input = "Here is a http://example.com/examplepage link.", + .output = "<p>Here is a <a href=\"http://example.com/examplepage\">" + "http://example.com/examplepage</a> link.</p>" + }, + { // This is interesting because it doesn't contain a "://". + .name = "Raw mailto URL", + .input = "mailto:[email protected]", + .output = "<p><a href=\"mailto:[email protected]\">" + "mailto:[email protected]</a></p>" + }, + { + .name = "Unnamed URL", + .input = "[[http //example.com/examplepage]]", + .output = "<p><a href=\"http //example.com/examplepage\">" + "http //example.com/examplepage</a></p>" + }, + { + .name = "Named URL", + .input = "[[http //example.com/examplepage|Example Page]]", + .output = "<p>" + "<a href=\"http //example.com/examplepage\">Example Page</a></p>" + }, #if 0 { .name = "Simple unordered list", @@ -149,24 +173,6 @@ struct { "<td> <strong>D</strong> <br /> E </td></tr></table>" }, { - .name = "Raw URL", - .input = "http //example.com/examplepage", - .output = "<p><a href=\"http //example.com/examplepage\">" - "http //example.com/examplepage</a></p>" - }, - { - .name = "Unnamed URL", - .input = "[[http //example.com/examplepage]]", - .output = "<p><a href=\"http //example.com/examplepage\">" - "http //example.com/examplepage</a></p>" - }, - { - .name = "Named URL", - .input = "[[http //example.com/examplepage|Example Page]]", - .output = "<p>" - "<a href=\"http //example.com/examplepage\">Example Page</a></p>" - }, - { .name = "Image", .input = "{{image.gif|my image}}", .output = "<p><img src=\"image.gif\" alt=\"my image\"/></p>" diff --git a/src/creole.c b/src/creole.c index 79a767d..107b50b 100644 --- a/src/creole.c +++ b/src/creole.c @@ -17,6 +17,7 @@ int do_headers(const char *begin, const char *end, bool new_block, FILE *out); int do_paragraph(const char *begin, const char *end, bool new_block, FILE *out); int do_replacements(const char *begin, const char *end, bool new_block, FILE *out); int do_link(const char *begin, const char *end, bool new_block, FILE *out); +int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out); // Prints string escaped. void hprint(FILE *out, const char *begin, const char *end) { @@ -41,7 +42,7 @@ void hprint(FILE *out, const char *begin, const char *end) { // The sign of the return value determines whether a new block should begin, after the consumed text. typedef int (* parser_t)(const char *begin, const char *end, bool new_block, FILE *out); -static parser_t parsers[] = { do_headers, do_paragraph, do_link, do_replacements }; +static parser_t parsers[] = { do_headers, do_paragraph, do_link, do_raw_url, do_replacements }; int do_headers(const char *begin, const char *end, bool new_block, FILE *out) { if (!new_block) { // Headers are block-level elements. @@ -180,6 +181,47 @@ int do_link(const char *begin, const char *end, bool new_block, FILE *out) return stop - start + 4 /* [[]] */; } +int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out) +{ + // Eat a scheme followed by a ":". Here are the relevant rules from RFC 3986. + // - URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + // - scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + // See: <https://www.rfc-editor.org/rfc/rfc3986#section-3.1> + const char *p = begin; + if (!isalpha(*p)) { + return 0; + } + while (p < end && (isalnum(*p) || *p == '+' || *p == '-' || *p == '.')) { + p += 1; + } + if (p >= end || p[0] != ':') { + return 0; + } + p += 1; + + // Eat the remainder of the URI. + // This is not technically correct, but it's a good enough heuristic. + const char *q = p; + while (q < end && !isspace(*q)) { + q += 1; + } + + // If there is nothing following the colon, don't accept it as a raw + // url. Otherwise we'd incorrectly find a link with the "said" protocol + // here: "And he said: blah blah". + if (q == p) { + return 0; + } + + fputs("<a href=\"", out); + hprint(out, begin, q); + fputs("\">", out); + hprint(out, begin, q); + fputs("</a>", out); + + return q - begin; +} + void process(const char *begin, const char *end, bool new_block, FILE *out) { const char *p = begin; while (p < end) { |