summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinnnus <[email protected]>2024-02-16 12:23:27 +0100
committerLinnnus <[email protected]>2024-02-16 17:59:01 +0100
commit1ad63a50dcf01171a3e7b9a04b37d36933a769ef (patch)
tree32b4ce03a9cff06f1d6e3e1d79ede4dd77882f3f
parentcfb648daef74f695e42e3314c05296a72168860c (diff)
feat(creole): Support raw URLs
-rw-r--r--src/creole-test.c42
-rw-r--r--src/creole.c44
2 files changed, 67 insertions, 19 deletions
diff --git a/src/creole-test.c b/src/creole-test.c
index c5b6590..8844a88 100644
--- a/src/creole-test.c
+++ b/src/creole-test.c
@@ -90,6 +90,30 @@ struct {
.input = "[[]]",
.output = "<p><a href=\"\"></a></p>"
},
+ {
+ .name = "Raw HTTP URL",
+ .input = "Here is a http://example.com/examplepage link.",
+ .output = "<p>Here is a <a href=\"http://example.com/examplepage\">"
+ "http://example.com/examplepage</a> link.</p>"
+ },
+ { // This is interesting because it doesn't contain a "://".
+ .name = "Raw mailto URL",
+ .input = "mailto:[email protected]",
+ .output = "<p><a href=\"mailto:[email protected]\">"
+ "mailto:[email protected]</a></p>"
+ },
+ {
+ .name = "Unnamed URL",
+ .input = "[[http //example.com/examplepage]]",
+ .output = "<p><a href=\"http //example.com/examplepage\">"
+ "http //example.com/examplepage</a></p>"
+ },
+ {
+ .name = "Named URL",
+ .input = "[[http //example.com/examplepage|Example Page]]",
+ .output = "<p>"
+ "<a href=\"http //example.com/examplepage\">Example Page</a></p>"
+ },
#if 0
{
.name = "Simple unordered list",
@@ -149,24 +173,6 @@ struct {
"<td> <strong>D</strong> <br /> E </td></tr></table>"
},
{
- .name = "Raw URL",
- .input = "http //example.com/examplepage",
- .output = "<p><a href=\"http //example.com/examplepage\">"
- "http //example.com/examplepage</a></p>"
- },
- {
- .name = "Unnamed URL",
- .input = "[[http //example.com/examplepage]]",
- .output = "<p><a href=\"http //example.com/examplepage\">"
- "http //example.com/examplepage</a></p>"
- },
- {
- .name = "Named URL",
- .input = "[[http //example.com/examplepage|Example Page]]",
- .output = "<p>"
- "<a href=\"http //example.com/examplepage\">Example Page</a></p>"
- },
- {
.name = "Image",
.input = "{{image.gif|my image}}",
.output = "<p><img src=\"image.gif\" alt=\"my image\"/></p>"
diff --git a/src/creole.c b/src/creole.c
index 79a767d..107b50b 100644
--- a/src/creole.c
+++ b/src/creole.c
@@ -17,6 +17,7 @@ int do_headers(const char *begin, const char *end, bool new_block, FILE *out);
int do_paragraph(const char *begin, const char *end, bool new_block, FILE *out);
int do_replacements(const char *begin, const char *end, bool new_block, FILE *out);
int do_link(const char *begin, const char *end, bool new_block, FILE *out);
+int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out);
// Prints string escaped.
void hprint(FILE *out, const char *begin, const char *end) {
@@ -41,7 +42,7 @@ void hprint(FILE *out, const char *begin, const char *end) {
// The sign of the return value determines whether a new block should begin, after the consumed text.
typedef int (* parser_t)(const char *begin, const char *end, bool new_block, FILE *out);
-static parser_t parsers[] = { do_headers, do_paragraph, do_link, do_replacements };
+static parser_t parsers[] = { do_headers, do_paragraph, do_link, do_raw_url, do_replacements };
int do_headers(const char *begin, const char *end, bool new_block, FILE *out) {
if (!new_block) { // Headers are block-level elements.
@@ -180,6 +181,47 @@ int do_link(const char *begin, const char *end, bool new_block, FILE *out)
return stop - start + 4 /* [[]] */;
}
+int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out)
+{
+ // Eat a scheme followed by a ":". Here are the relevant rules from RFC 3986.
+ // - URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ // - scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ // See: <https://www.rfc-editor.org/rfc/rfc3986#section-3.1>
+ const char *p = begin;
+ if (!isalpha(*p)) {
+ return 0;
+ }
+ while (p < end && (isalnum(*p) || *p == '+' || *p == '-' || *p == '.')) {
+ p += 1;
+ }
+ if (p >= end || p[0] != ':') {
+ return 0;
+ }
+ p += 1;
+
+ // Eat the remainder of the URI.
+ // This is not technically correct, but it's a good enough heuristic.
+ const char *q = p;
+ while (q < end && !isspace(*q)) {
+ q += 1;
+ }
+
+ // If there is nothing following the colon, don't accept it as a raw
+ // url. Otherwise we'd incorrectly find a link with the "said" protocol
+ // here: "And he said: blah blah".
+ if (q == p) {
+ return 0;
+ }
+
+ fputs("<a href=\"", out);
+ hprint(out, begin, q);
+ fputs("\">", out);
+ hprint(out, begin, q);
+ fputs("</a>", out);
+
+ return q - begin;
+}
+
void process(const char *begin, const char *end, bool new_block, FILE *out) {
const char *p = begin;
while (p < end) {