diff options
author | Linnnus <[email protected]> | 2024-02-16 22:56:49 +0100 |
---|---|---|
committer | Linnnus <[email protected]> | 2024-02-16 22:58:00 +0100 |
commit | 90f4eb831fd6f1e8a293851f1b8f2ed5238f5e58 (patch) | |
tree | baff26980eb274b7d988460131e83ed15dc845f3 | |
parent | 17919e38efb4c454edf48957bbc42f91c109e89e (diff) |
fix(creole): Add special case for "." at end of raw URL
-rw-r--r-- | src/creole-test.c | 6 | ||||
-rw-r--r-- | src/creole.c | 33 |
2 files changed, 34 insertions, 5 deletions
diff --git a/src/creole-test.c b/src/creole-test.c index 76d62af..ee29e54 100644 --- a/src/creole-test.c +++ b/src/creole-test.c @@ -107,6 +107,12 @@ struct { .output = "<p><a href=\"mailto:[email protected]\">" "mailto:[email protected]</a></p>" }, + { // This test captures a non-standard (?) special case in the parser. + .name = "Raw URL followed by full stop", + .input = "My favorite website is https://wiki.c2.com/.", + .output = "<p>My favorite website is <a href=\"https://wiki.c2.com/\">" + "https://wiki.c2.com/</a>.</p>" + }, { .name = "Unnamed URL", .input = "[[http //example.com/examplepage]]", diff --git a/src/creole.c b/src/creole.c index 107b50b..462dc50 100644 --- a/src/creole.c +++ b/src/creole.c @@ -199,12 +199,28 @@ int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out) } p += 1; - // Eat the remainder of the URI. - // This is not technically correct, but it's a good enough heuristic. - const char *q = p; - while (q < end && !isspace(*q)) { - q += 1; + // Eat the remainder of the URI, purely going by what "legal" URI + // characters it contains. + // See: <https://stackoverflow.com/a/7109208> + const char *q = p; + while (q < end) { + switch (*q) { + case '0' ... '9': + case 'a' ... 'z': + case 'A' ... 'Z': + case '-': case '.': case '_': case '~': + case ':': case '/': case '?': case '#': + case '[': case ']': case '@': case '!': + case '$': case '&': case '\'': case '(': + case ')': case '*': case '+': case ',': + case ';': case '%': case '=': + q += 1; + break; + default: + goto end_url; + } } +end_url: // If there is nothing following the colon, don't accept it as a raw // url. Otherwise we'd incorrectly find a link with the "said" protocol @@ -213,6 +229,13 @@ int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out) return 0; } + // Special case: If we end on a ".", assume it's a full stop at the end + // of a sentence. Here's an example: + // My favorite webside is https://cohost.org/. + if (q[-1] == '.') { + q -= 1; + } + fputs("<a href=\"", out); hprint(out, begin, q); fputs("\">", out); |