summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinnnus <[email protected]>2024-02-16 22:56:49 +0100
committerLinnnus <[email protected]>2024-02-16 22:58:00 +0100
commit90f4eb831fd6f1e8a293851f1b8f2ed5238f5e58 (patch)
treebaff26980eb274b7d988460131e83ed15dc845f3
parent17919e38efb4c454edf48957bbc42f91c109e89e (diff)
fix(creole): Add special case for "." at end of raw URL
-rw-r--r--src/creole-test.c6
-rw-r--r--src/creole.c33
2 files changed, 34 insertions, 5 deletions
diff --git a/src/creole-test.c b/src/creole-test.c
index 76d62af..ee29e54 100644
--- a/src/creole-test.c
+++ b/src/creole-test.c
@@ -107,6 +107,12 @@ struct {
.output = "<p><a href=\"mailto:[email protected]\">"
"mailto:[email protected]</a></p>"
},
+ { // This test captures a non-standard (?) special case in the parser.
+ .name = "Raw URL followed by full stop",
+ .input = "My favorite website is https://wiki.c2.com/.",
+ .output = "<p>My favorite website is <a href=\"https://wiki.c2.com/\">"
+ "https://wiki.c2.com/</a>.</p>"
+ },
{
.name = "Unnamed URL",
.input = "[[http //example.com/examplepage]]",
diff --git a/src/creole.c b/src/creole.c
index 107b50b..462dc50 100644
--- a/src/creole.c
+++ b/src/creole.c
@@ -199,12 +199,28 @@ int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out)
}
p += 1;
- // Eat the remainder of the URI.
- // This is not technically correct, but it's a good enough heuristic.
- const char *q = p;
- while (q < end && !isspace(*q)) {
- q += 1;
+ // Eat the remainder of the URI, purely going by what "legal" URI
+ // characters it contains.
+ // See: <https://stackoverflow.com/a/7109208>
+ const char *q = p;
+ while (q < end) {
+ switch (*q) {
+ case '0' ... '9':
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '-': case '.': case '_': case '~':
+ case ':': case '/': case '?': case '#':
+ case '[': case ']': case '@': case '!':
+ case '$': case '&': case '\'': case '(':
+ case ')': case '*': case '+': case ',':
+ case ';': case '%': case '=':
+ q += 1;
+ break;
+ default:
+ goto end_url;
+ }
}
+end_url:
// If there is nothing following the colon, don't accept it as a raw
// url. Otherwise we'd incorrectly find a link with the "said" protocol
@@ -213,6 +229,13 @@ int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out)
return 0;
}
+ // Special case: If we end on a ".", assume it's a full stop at the end
+ // of a sentence. Here's an example:
+ // My favorite webside is https://cohost.org/.
+ if (q[-1] == '.') {
+ q -= 1;
+ }
+
fputs("<a href=\"", out);
hprint(out, begin, q);
fputs("\">", out);