From 90f4eb831fd6f1e8a293851f1b8f2ed5238f5e58 Mon Sep 17 00:00:00 2001 From: Linnnus Date: Fri, 16 Feb 2024 22:56:49 +0100 Subject: fix(creole): Add special case for "." at end of raw URL --- src/creole-test.c | 6 ++++++ src/creole.c | 33 ++++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/creole-test.c b/src/creole-test.c index 76d62af..ee29e54 100644 --- a/src/creole-test.c +++ b/src/creole-test.c @@ -107,6 +107,12 @@ struct { .output = "

" "mailto:quandale@dingle.com

" }, + { // This test captures a non-standard (?) special case in the parser. + .name = "Raw URL followed by full stop", + .input = "My favorite website is https://wiki.c2.com/.", + .output = "

My favorite website is " + "https://wiki.c2.com/.

" + }, { .name = "Unnamed URL", .input = "[[http //example.com/examplepage]]", diff --git a/src/creole.c b/src/creole.c index 107b50b..462dc50 100644 --- a/src/creole.c +++ b/src/creole.c @@ -199,12 +199,28 @@ int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out) } p += 1; - // Eat the remainder of the URI. - // This is not technically correct, but it's a good enough heuristic. - const char *q = p; - while (q < end && !isspace(*q)) { - q += 1; + // Eat the remainder of the URI, purely going by what "legal" URI + // characters it contains. + // See: + const char *q = p; + while (q < end) { + switch (*q) { + case '0' ... '9': + case 'a' ... 'z': + case 'A' ... 'Z': + case '-': case '.': case '_': case '~': + case ':': case '/': case '?': case '#': + case '[': case ']': case '@': case '!': + case '$': case '&': case '\'': case '(': + case ')': case '*': case '+': case ',': + case ';': case '%': case '=': + q += 1; + break; + default: + goto end_url; + } } +end_url: // If there is nothing following the colon, don't accept it as a raw // url. Otherwise we'd incorrectly find a link with the "said" protocol @@ -213,6 +229,13 @@ int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out) return 0; } + // Special case: If we end on a ".", assume it's a full stop at the end + // of a sentence. Here's an example: + // My favorite webside is https://cohost.org/. + if (q[-1] == '.') { + q -= 1; + } + fputs("", out); -- cgit v1.2.3