summaryrefslogtreecommitdiff
path: root/src/creole.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/creole.c')
-rw-r--r--src/creole.c44
1 files changed, 43 insertions, 1 deletions
diff --git a/src/creole.c b/src/creole.c
index 79a767d..107b50b 100644
--- a/src/creole.c
+++ b/src/creole.c
@@ -17,6 +17,7 @@ int do_headers(const char *begin, const char *end, bool new_block, FILE *out);
int do_paragraph(const char *begin, const char *end, bool new_block, FILE *out);
int do_replacements(const char *begin, const char *end, bool new_block, FILE *out);
int do_link(const char *begin, const char *end, bool new_block, FILE *out);
+int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out);
// Prints string escaped.
void hprint(FILE *out, const char *begin, const char *end) {
@@ -41,7 +42,7 @@ void hprint(FILE *out, const char *begin, const char *end) {
// The sign of the return value determines whether a new block should begin, after the consumed text.
typedef int (* parser_t)(const char *begin, const char *end, bool new_block, FILE *out);
-static parser_t parsers[] = { do_headers, do_paragraph, do_link, do_replacements };
+static parser_t parsers[] = { do_headers, do_paragraph, do_link, do_raw_url, do_replacements };
int do_headers(const char *begin, const char *end, bool new_block, FILE *out) {
if (!new_block) { // Headers are block-level elements.
@@ -180,6 +181,47 @@ int do_link(const char *begin, const char *end, bool new_block, FILE *out)
return stop - start + 4 /* [[]] */;
}
+int do_raw_url(const char *begin, const char *end, bool new_block, FILE *out)
+{
+ // Eat a scheme followed by a ":". Here are the relevant rules from RFC 3986.
+ // - URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ // - scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ // See: <https://www.rfc-editor.org/rfc/rfc3986#section-3.1>
+ const char *p = begin;
+ if (!isalpha(*p)) {
+ return 0;
+ }
+ while (p < end && (isalnum(*p) || *p == '+' || *p == '-' || *p == '.')) {
+ p += 1;
+ }
+ if (p >= end || p[0] != ':') {
+ return 0;
+ }
+ p += 1;
+
+ // Eat the remainder of the URI.
+ // This is not technically correct, but it's a good enough heuristic.
+ const char *q = p;
+ while (q < end && !isspace(*q)) {
+ q += 1;
+ }
+
+ // If there is nothing following the colon, don't accept it as a raw
+ // url. Otherwise we'd incorrectly find a link with the "said" protocol
+ // here: "And he said: blah blah".
+ if (q == p) {
+ return 0;
+ }
+
+ fputs("<a href=\"", out);
+ hprint(out, begin, q);
+ fputs("\">", out);
+ hprint(out, begin, q);
+ fputs("</a>", out);
+
+ return q - begin;
+}
+
void process(const char *begin, const char *end, bool new_block, FILE *out) {
const char *p = begin;
while (p < end) {