From d7a869ffeb2f313df06d4505911cefdf8108ff7e Mon Sep 17 00:00:00 2001 From: Linnnus Date: Sat, 17 Feb 2024 04:00:31 +0100 Subject: feat(creole): handle escaped raw URLs --- src/creole.c | 26 ++++++++++++++++++++------ src/creole_test_main.c | 5 +++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/creole.c b/src/creole.c index 94e8020..c83ffd1 100644 --- a/src/creole.c +++ b/src/creole.c @@ -187,11 +187,21 @@ long do_link(const char *begin, const char *end, bool new_block, FILE *out) long do_raw_url(const char *begin, const char *end, bool new_block, FILE *out) { + const char *p = begin; + + // This piece of spaghetti is necessary to handle escaped urls. + // These should not actually be turned into anchor tags. + // See: + bool escaped = false; + if (*begin == '~') { + escaped = true; + p += 1; + } + // Eat a scheme followed by a ":". Here are the relevant rules from RFC 3986. // - URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] // - scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) // See: - const char *p = begin; if (!isalpha(*p)) { return 0; } @@ -240,11 +250,15 @@ end_url: q -= 1; } - fputs("", out); - hprint(out, begin, q); - fputs("", out); + if (escaped) { + hprint(out, begin + 1 /* ~ */, q); + } else { + fputs("", out); + hprint(out, begin, q); + fputs("", out); + } return q - begin; } diff --git a/src/creole_test_main.c b/src/creole_test_main.c index a284811..6b9e95b 100644 --- a/src/creole_test_main.c +++ b/src/creole_test_main.c @@ -123,6 +123,11 @@ struct { .output = "

My favorite website is " "https://wiki.c2.com/.

" }, + { + .name = "Escaped raw URL", + .input = "Please don't register ~https://cohost.org/!", + .output = "

Please don't register https://cohost.org/!

" + }, { .name = "Unnamed URL", .input = "[[http //example.com/examplepage]]", -- cgit v1.2.3