From d38f82f6462af4e5aad6a2c776f5c00ce5b13c87 Mon Sep 17 00:00:00 2001
From: Linnnus
" },
+ { "\t", 0, "", "\n
" },
+ { ">", 2, "", "\n
", "
" },
+ { "###### ", 1, "", "
" },
+ { "##### ", 1, "", "
" },
+ { "#### ", 1, "", "
" },
+ { "### ", 1, "", "
" },
+ { "## ", 1, "", "
" },
+ { "# ", 1, "", "
" },
+ { "- - -\n", 1, "
", ""},
+ { "---\n", 1, "
", ""},
+};
+
+static Tag underline[] = {
+ { "=", 1, "", "
\n" },
+ { "-", 1, "", "
\n" },
+};
+
+static Tag surround[] = {
+ { "```", 0, "", "
" },
+ { "``", 0, "", "
" },
+ { "`", 0, "", "
" },
+ { "___", 1, "", "" },
+ { "***", 1, "", "" },
+ { "__", 1, "", "" },
+ { "**", 1, "", "" },
+ { "_", 1, "", "" },
+ { "*", 1, "", "" },
+};
+
+static const char *replace[][2] = {
+ /* Backslash escapes */
+ { "\\\\", "\\" },
+ { "\\`", "`" },
+ { "\\*", "*" },
+ { "\\_", "_" },
+ { "\\{", "{" },
+ { "\\}", "}" },
+ { "\\[", "[" },
+ { "\\]", "]" },
+ { "\\(", "(" },
+ { "\\)", ")" },
+ { "\\#", "#" },
+ { "\\+", "+" },
+ { "\\-", "-" },
+ { "\\.", "." },
+ { "\\!", "!" },
+ { "\\\"", """ },
+ { "\\$", "$" },
+ { "\\%", "%" },
+ { "\\&", "&" },
+ { "\\'", "'" },
+ { "\\,", "," },
+ { "\\-", "-" },
+ { "\\.", "." },
+ { "\\/", "/" },
+ { "\\:", ":" },
+ { "\\;", ";" },
+ { "\\<", "<" },
+ { "\\>", ">" },
+ { "\\=", "=" },
+ { "\\?", "?" },
+ { "\\@", "@" },
+ { "\\^", "^" },
+ { "\\|", "|" },
+ { "\\~", "~" },
+ /* HTML syntax symbols that need to be turned into entities */
+ { "<", "<" },
+ { ">", ">" },
+ { "&", "&" }, /* Avoid replacing the & in & */
+ { "&", "&" },
+ /* Preserve newlines with two spaces before linebreak */
+ { " \n", "
\n" },
+};
+
+static const char *code_fence = "```";
+
+void
+eprint(const char *format, ...) {
+ va_list ap;
+
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+void end_paragraph(void) {
+ if (in_paragraph) {
+ fputs("
", stdout);
+ } else {
+ fputs("", stdout);
+ }
+ hprint(start, stop);
+ fputs("
\n", stdout);
+ return -(stop - begin + l);
+}
+
+int
+dohtml(const char *begin, const char *end, int newblock) {
+ const char *p, *tag, *tagend;
+
+ if (nohtml || begin + 2 >= end)
+ return 0;
+ p = begin;
+ if (p[0] != '<' || !isalpha(p[1]))
+ return 0;
+ p++;
+ tag = p;
+ for (; isalnum(*p) && p < end; p++);
+ tagend = p;
+ if (p > end || tag == tagend)
+ return 0;
+ while ((p = strstr(p, "")) && p < end) {
+ p += 2;
+ if (strncmp(p, tag, tagend - tag) == 0 && p[tagend - tag] == '>') {
+ p++;
+ fwrite(begin, sizeof(char), p - begin + tagend - tag, stdout);
+ return p - begin + tagend - tag;
+ }
+ }
+ p = strchr(tagend, '>');
+ if (p) {
+ fwrite(begin, sizeof(char), p - begin + 2, stdout);
+ return p - begin + 2;
+ }
+ else
+ return 0;
+}
+
+int
+dolineprefix(const char *begin, const char *end, int newblock) {
+ unsigned int i, j, l;
+ char *buffer;
+ const char *p;
+ int consumed_input = 0;
+
+ if (newblock)
+ p = begin;
+ else if (*begin == '\n') {
+ p = begin + 1;
+ consumed_input += 1;
+ } else
+ return 0;
+ for (i = 0; i < LENGTH(lineprefix); i++) {
+ l = strlen(lineprefix[i].search);
+ if (end - p + 1 < l)
+ continue;
+ if (strncmp(lineprefix[i].search, p, l))
+ continue;
+ if (*begin == '\n')
+ fputc('\n', stdout);
+
+ /* All line prefixes add a block element. These are not allowed
+ * inside paragraphs, so we must end the paragraph first. */
+ end_paragraph();
+
+ fputs(lineprefix[i].before, stdout);
+ if (lineprefix[i].search[l-1] == '\n') {
+ fputc('\n', stdout);
+ return l - 1 + consumed_input;
+ }
+ if (!(buffer = malloc(BUFSIZ)))
+ eprint("Malloc failed.");
+ buffer[0] = '\0';
+
+ /* Collect lines into buffer while they start with the prefix */
+ j = 0;
+ while ((strncmp(lineprefix[i].search, p, l) == 0) && p + l < end) {
+ p += l;
+
+ /* Special case for blockquotes: optional space after > */
+ if (lineprefix[i].search[0] == '>' && *p == ' ') {
+ p++;
+ }
+
+ while (p < end) {
+ ADDC(buffer, j) = *p;
+ j++;
+ if (*(p++) == '\n')
+ break;
+ }
+ }
+
+ /* Skip empty lines in block */
+ while (*(buffer + j - 1) == '\n') {
+ j--;
+ }
+
+ ADDC(buffer, j) = '\0';
+ if (lineprefix[i].process)
+ process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2);
+ else
+ hprint(buffer, buffer + strlen(buffer));
+ puts(lineprefix[i].after);
+ free(buffer);
+ return -(p - begin);
+ }
+ return 0;
+}
+
+int
+dolink(const char *begin, const char *end, int newblock) {
+ int img, len, sep, parens_depth = 1;
+ const char *desc, *link, *p, *q, *descend, *linkend;
+ const char *title = NULL, *titleend = NULL;
+
+ if (*begin == '[')
+ img = 0;
+ else if (strncmp(begin, ") || p > end)
+ return 0;
+ for (q = strstr(desc, ") || p > end)
+ return 0;
+ descend = p;
+ link = p + 2;
+
+ /* find end of link while handling nested parens */
+ q = link;
+ while (parens_depth) {
+ if (!(q = strpbrk(q, "()")) || q > end)
+ return 0;
+ if (*q == '(')
+ parens_depth++;
+ else
+ parens_depth--;
+ if (parens_depth && q < end)
+ q++;
+ }
+
+ if ((p = strpbrk(link, "\"'")) && p < end && q > p) {
+ sep = p[0]; /* separator: can be " or ' */
+ title = p + 1;
+ /* strip trailing whitespace */
+ for (linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--);
+ for (titleend = q - 1; titleend > link && isspace(*(titleend)); titleend--);
+ if (titleend < title || *titleend != sep) {
+ return 0;
+ }
+ }
+ else {
+ linkend = q;
+ }
+
+ /* Links can be given in angular brackets */
+ if (*link == '<' && *(linkend - 1) == '>') {
+ link++;
+ linkend--;
+ }
+
+ len = q + 1 - begin;
+ if (img) {
+ fputs("
", stdout);
+ }
+ else {
+ fputs("", stdout);
+ process(desc, descend, 0);
+ fputs("", stdout);
+ }
+ return len;
+}
+
+int
+dolist(const char *begin, const char *end, int newblock) {
+ unsigned int i, j, indent, run, isblock, start_number;
+ const char *p, *q, *num_start;
+ char *buffer = NULL;
+ char marker = '\0'; /* Bullet symbol or \0 for unordered lists */
+
+ isblock = 0;
+ if (newblock)
+ p = begin;
+ else if (*begin == '\n')
+ p = begin + 1;
+ else
+ return 0;
+ q = p;
+ if (*p == '-' || *p == '*' || *p == '+') {
+ marker = *p;
+ } else {
+ num_start = p;
+ for (; p < end && *p >= '0' && *p <= '9'; p++);
+ if (p >= end || *p != '.')
+ return 0;
+ start_number = atoi(num_start);
+ }
+ p++;
+ if (p >= end || !(*p == ' ' || *p == '\t'))
+ return 0;
+
+ end_paragraph();
+
+ for (p++; p != end && (*p == ' ' || *p == '\t'); p++);
+ indent = p - q;
+ buffer = ereallocz(buffer, BUFSIZ);
+ if (!newblock)
+ fputc('\n', stdout);
+
+ if (marker) {
+ fputs("\n", stdout);
+ } else if (start_number == 1) {
+ fputs("\n", stdout);
+ } else {
+ printf("\n", start_number);
+ }
+ run = 1;
+ for (; p < end && run; p++) {
+ for (i = 0; p < end && run; p++, i++) {
+ if (*p == '\n') {
+ if (p + 1 == end)
+ break;
+ else {
+ /* Handle empty lines */
+ for (q = p + 1; (*q == ' ' || *q == '\t') && q < end; q++);
+ if (*q == '\n') {
+ ADDC(buffer, i) = '\n';
+ i++;
+ run = 0;
+ isblock++;
+ p = q;
+ }
+ }
+ q = p + 1;
+ j = 0;
+ if (marker && *q == marker)
+ j = 1;
+ else {
+ for (; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++);
+ if (q + j == end)
+ break;
+ if (j > 0 && q[j] == '.')
+ j++;
+ else
+ j = 0;
+ }
+ if (q + indent < end)
+ for (; (q[j] == ' ' || q[j] == '\t') && j < indent; j++);
+ if (j == indent) {
+ ADDC(buffer, i) = '\n';
+ i++;
+ p += indent;
+ run = 1;
+ if (*q == ' ' || *q == '\t')
+ p++;
+ else
+ break;
+ }
+ else if (j < indent)
+ run = 0;
+ }
+ ADDC(buffer, i) = *p;
+ }
+ ADDC(buffer, i) = '\0';
+ fputs("- ", stdout);
+ process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run));
+ fputs("
\n", stdout);
+ }
+ fputs(marker ? "
\n" : "\n", stdout);
+ free(buffer);
+ p--;
+ while (*(--p) == '\n');
+ return -(p - begin + 1);
+}
+
+int
+dotable(const char *begin, const char *end, int newblock) {
+ /* table state */
+ static signed char intable, inrow, incell;
+ static unsigned long int calign;
+ static const char *align_table[] = {
+ "",
+ " style=\"text-align: left\"",
+ " style=\"text-align: right\"",
+ " style=\"text-align: center\"",
+ };
+
+ const char *p;
+ int i, l = (int)sizeof(calign) * 4;
+
+ if(*begin != '|')
+ return 0;
+ if (intable == 2) { /* in alignment row, skip it. */
+ ++intable;
+ for (p = begin; p < end && *p != '\n'; ++p);
+ return p - begin + 1;
+ }
+ if(inrow && (begin + 1 >= end || begin[1] == '\n')) { /* close cell and row and if ends, table too */
+ fprintf(stdout, "", inrow == -1 ? 'h' : 'd');
+ if (inrow == -1)
+ intable = 2;
+ inrow = 0;
+ if(end - begin <= 2 || begin[2] == '\n') {
+ intable = 0;
+ fputs("\n\n", stdout);
+ }
+ return 1;
+ }
+
+ if(!intable) { /* open table */
+ intable = 1; inrow = -1; incell = 0; calign = 0;
+ for (p = begin; p < end && *p != '\n'; ++p);
+ if(*p == '\n') { /* load alignment from 2nd line */
+ for(i = -1, ++p; p < end && *p != '\n'; p++) {
+ if(*p == '|') {
+ i++;
+ do { p++; } while(p < end && (*p == ' ' || *p == '\t'));
+ if(i < l && *p == ':')
+ calign |= 1ul << (i * 2);
+ if (*p == '\n')
+ break;
+ } else if(i < l && *p == ':') {
+ calign |= 1ul << (i * 2 + 1);
+ }
+ }
+ }
+ fputs("\n", stdout);
+ }
+ if(!inrow) { /* open row */
+ inrow = 1; incell = 0;
+ fputs(" ", stdout);
+ }
+ if(incell) /* close cell */
+ fprintf(stdout, "", inrow == -1 ? 'h' : 'd');
+ l = incell < l ? (calign >> (incell * 2)) & 3 : 0; /* open cell */
+ fprintf(stdout, "", inrow == -1 ? 'h' : 'd', align_table[l]);
+ incell++;
+ for(p = begin + 1; p < end && *p == ' '; p++);
+ return p - begin;
+}
+
+int
+doparagraph(const char *begin, const char *end, int newblock) {
+ const char *p;
+ regmatch_t match;
+
+ if (!newblock)
+ return 0;
+ if (regexec(&p_end_regex, begin + 1, 1, &match, 0)) {
+ p = end;
+ } else {
+ p = begin + 1 + match.rm_so;
+ }
+
+ fputs("", stdout);
+ in_paragraph = 1;
+ process(begin, p, 0);
+ end_paragraph();
+
+ return -(p - begin);
+}
+
+int
+doreplace(const char *begin, const char *end, int newblock) {
+ unsigned int i, l;
+
+ for (i = 0; i < LENGTH(replace); i++) {
+ l = strlen(replace[i][0]);
+ if (end - begin < l)
+ continue;
+ if (strncmp(replace[i][0], begin, l) == 0) {
+ fputs(replace[i][1], stdout);
+ return l;
+ }
+ }
+ return 0;
+}
+
+int
+doshortlink(const char *begin, const char *end, int newblock) {
+ const char *p, *c;
+ int ismail = 0;
+
+ if (*begin != '<')
+ return 0;
+ for (p = begin + 1; p != end; p++) {
+ switch (*p) {
+ case ' ':
+ case '\t':
+ case '\n':
+ return 0;
+ case '#':
+ case ':':
+ ismail = -1;
+ break;
+ case '@':
+ if (ismail == 0)
+ ismail = 1;
+ break;
+ case '>':
+ if (ismail == 0)
+ return 0;
+ fputs("'; c++)
+ fprintf(stdout, "%u;", *c);
+ fputs("\">", stdout);
+ for (c = begin + 1; *c != '>'; c++)
+ fprintf(stdout, "%u;", *c);
+ }
+ else {
+ hprint(begin + 1, p);
+ fputs("\">", stdout);
+ hprint(begin + 1, p);
+ }
+ fputs("", stdout);
+ return p - begin + 1;
+ }
+ }
+ return 0;
+}
+
+int
+dosurround(const char *begin, const char *end, int newblock) {
+ unsigned int i, l;
+ const char *p, *start, *stop;
+
+ for (i = 0; i < LENGTH(surround); i++) {
+ l = strlen(surround[i].search);
+ if (end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0)
+ continue;
+ start = begin + l;
+ p = start;
+ do {
+ stop = p;
+ p = strstr(p + 1, surround[i].search);
+ } while (p && p[-1] == '\\');
+ if (p && p[-1] != '\\')
+ stop = p;
+ if (!stop || stop < start || stop >= end)
+ continue;
+ fputs(surround[i].before, stdout);
+
+ /* Single space at start and end are ignored */
+ if (start[0] == ' ' && stop[-1] == ' ' && start < stop - 1) {
+ start++;
+ stop--;
+ l++;
+ }
+
+ if (surround[i].process)
+ process(start, stop, 0);
+ else
+ hprint(start, stop);
+ fputs(surround[i].after, stdout);
+ return stop - start + 2 * l;
+ }
+ return 0;
+}
+
+int
+dounderline(const char *begin, const char *end, int newblock) {
+ unsigned int i, j, l;
+ const char *p;
+
+ if (!newblock)
+ return 0;
+ p = begin;
+ for (l = 0; p + l != end && p[l] != '\n'; l++);
+ p += l + 1;
+ if (l == 0)
+ return 0;
+ for (i = 0; i < LENGTH(underline); i++) {
+ for (j = 0; p + j < end && p[j] != '\n' && p[j] == underline[i].search[0]; j++);
+ if (j >= 3) {
+ fputs(underline[i].before, stdout);
+ if (underline[i].process)
+ process(begin, begin + l, 0);
+ else
+ hprint(begin, begin + l);
+ fputs(underline[i].after, stdout);
+ return -(j + p - begin);
+ }
+ }
+ return 0;
+}
+
+void *
+ereallocz(void *p, size_t size) {
+ void *res;
+ res = realloc(p, size);
+ if (!res)
+ eprint("realloc: %zu bytes\n", size);
+ return res;
+}
+
+void
+hprint(const char *begin, const char *end) {
+ const char *p;
+
+ for (p = begin; p != end; p++) {
+ if (*p == '&')
+ fputs("&", stdout);
+ else if (*p == '"')
+ fputs(""", stdout);
+ else if (*p == '>')
+ fputs(">", stdout);
+ else if (*p == '<')
+ fputs("<", stdout);
+ else
+ fputc(*p, stdout);
+ }
+}
+
+void
+process(const char *begin, const char *end, int newblock) {
+ const char *p;
+ int affected;
+ unsigned int i;
+
+ for (p = begin; p < end;) {
+ if (newblock)
+ while (*p == '\n')
+ if (++p == end)
+ return;
+
+ for (i = 0; i < LENGTH(parsers); i++)
+ if ((affected = parsers[i](p, end, newblock)))
+ break;
+ if (affected)
+ p += abs(affected);
+ else
+ fputc(*p++, stdout);
+
+ /* Don't print single newline at end */
+ if (p + 1 == end && *p == '\n')
+ return;
+
+ if (p[0] == '\n' && p + 1 != end && p[1] == '\n')
+ newblock = 1;
+ else
+ newblock = affected < 0;
+ }
+}
+
+int
+main(int argc, char *argv[]) {
+ char *buffer = NULL;
+ int s, i;
+ unsigned long len, bsize;
+ FILE *source = stdin;
+
+ regcomp(&p_end_regex, "(\n\n|(^|\n)```)", REG_EXTENDED);
+
+ for (i = 1; i < argc; i++) {
+ if (!strcmp("-v", argv[i]))
+ eprint("simple markup %s (C) Enno Boland\n",VERSION);
+ else if (!strcmp("-n", argv[i]))
+ nohtml = 1;
+ else if (argv[i][0] != '-')
+ break;
+ else if (!strcmp("--", argv[i])) {
+ i++;
+ break;
+ }
+ else
+ eprint("Usage %s [-n] [file]\n -n escape html strictly\n", argv[0]);
+ }
+ if (i < argc && !(source = fopen(argv[i], "r")))
+ eprint("Cannot open file `%s`\n",argv[i]);
+ bsize = 2 * BUFSIZ;
+ buffer = ereallocz(buffer, bsize);
+ len = 0;
+ while ((s = fread(buffer + len, 1, BUFSIZ, source))) {
+ len += s;
+ if (BUFSIZ + len + 1 > bsize) {
+ bsize += BUFSIZ;
+ if (!(buffer = realloc(buffer, bsize)))
+ eprint("realloc failed.");
+ }
+ }
+ buffer[len] = '\0';
+ process(buffer, buffer + len, 1);
+ fclose(source);
+ free(buffer);
+ return EXIT_SUCCESS;
+}
--
cgit v1.2.3