/* smu - simple markup
* Copyright (C) <2007, 2008> Enno Boland
" },
{ "\t", 0, "", "\n
" },
{ ">", 2, "", "\n
", "
" },
{ "###### ", 1, "", "
" },
{ "##### ", 1, "", "
" },
{ "#### ", 1, "", "
" },
{ "### ", 1, "", "
" },
{ "## ", 1, "", "
" },
{ "# ", 1, "", "
" },
{ "- - -\n", 1, "
", ""},
{ "---\n", 1, "
", ""},
};
static Tag underline[] = {
{ "=", 1, "", "
\n" },
{ "-", 1, "", "
\n" },
};
static Tag surround[] = {
{ "```", 0, "", "
" },
{ "``", 0, "", "
" },
{ "`", 0, "", "
" },
{ "___", 1, "", "" },
{ "***", 1, "", "" },
{ "__", 1, "", "" },
{ "**", 1, "", "" },
{ "_", 1, "", "" },
{ "*", 1, "", "" },
};
static const char *replace[][2] = {
/* Backslash escapes */
{ "\\\\", "\\" },
{ "\\`", "`" },
{ "\\*", "*" },
{ "\\_", "_" },
{ "\\{", "{" },
{ "\\}", "}" },
{ "\\[", "[" },
{ "\\]", "]" },
{ "\\(", "(" },
{ "\\)", ")" },
{ "\\#", "#" },
{ "\\+", "+" },
{ "\\-", "-" },
{ "\\.", "." },
{ "\\!", "!" },
{ "\\\"", """ },
{ "\\$", "$" },
{ "\\%", "%" },
{ "\\&", "&" },
{ "\\'", "'" },
{ "\\,", "," },
{ "\\-", "-" },
{ "\\.", "." },
{ "\\/", "/" },
{ "\\:", ":" },
{ "\\;", ";" },
{ "\\<", "<" },
{ "\\>", ">" },
{ "\\=", "=" },
{ "\\?", "?" },
{ "\\@", "@" },
{ "\\^", "^" },
{ "\\|", "|" },
{ "\\~", "~" },
/* HTML syntax symbols that need to be turned into entities */
{ "<", "<" },
{ ">", ">" },
{ "&", "&" }, /* Avoid replacing the & in & */
{ "&", "&" },
/* Preserve newlines with two spaces before linebreak */
{ " \n", "
\n" },
};
static const char *code_fence = "```";
void
eprint(const char *format, ...) {
va_list ap;
va_start(ap, format);
vfprintf(stderr, format, ap);
va_end(ap);
exit(EXIT_FAILURE);
}
void end_paragraph(void) {
if (in_paragraph) {
fputs("
", stdout);
} else {
fputs("", stdout);
}
hprint(start, stop);
fputs("
\n", stdout);
return -(stop - begin + l);
}
int
dohtml(const char *begin, const char *end, int newblock) {
const char *p, *tag, *tagend;
if (nohtml || begin + 2 >= end)
return 0;
p = begin;
if (p[0] != '<' || !isalpha(p[1]))
return 0;
p++;
tag = p;
for (; isalnum(*p) && p < end; p++);
tagend = p;
if (p > end || tag == tagend)
return 0;
while ((p = strstr(p, "")) && p < end) {
p += 2;
if (strncmp(p, tag, tagend - tag) == 0 && p[tagend - tag] == '>') {
p++;
fwrite(begin, sizeof(char), p - begin + tagend - tag, stdout);
return p - begin + tagend - tag;
}
}
p = strchr(tagend, '>');
if (p) {
fwrite(begin, sizeof(char), p - begin + 2, stdout);
return p - begin + 2;
}
else
return 0;
}
int
dolineprefix(const char *begin, const char *end, int newblock) {
unsigned int i, j, l;
char *buffer;
const char *p;
int consumed_input = 0;
if (newblock)
p = begin;
else if (*begin == '\n') {
p = begin + 1;
consumed_input += 1;
} else
return 0;
for (i = 0; i < LENGTH(lineprefix); i++) {
l = strlen(lineprefix[i].search);
if (end - p + 1 < l)
continue;
if (strncmp(lineprefix[i].search, p, l))
continue;
if (*begin == '\n')
fputc('\n', stdout);
/* All line prefixes add a block element. These are not allowed
* inside paragraphs, so we must end the paragraph first. */
end_paragraph();
fputs(lineprefix[i].before, stdout);
if (lineprefix[i].search[l-1] == '\n') {
fputc('\n', stdout);
return l - 1 + consumed_input;
}
if (!(buffer = malloc(BUFSIZ)))
eprint("Malloc failed.");
buffer[0] = '\0';
/* Collect lines into buffer while they start with the prefix */
j = 0;
while ((strncmp(lineprefix[i].search, p, l) == 0) && p + l < end) {
p += l;
/* Special case for blockquotes: optional space after > */
if (lineprefix[i].search[0] == '>' && *p == ' ') {
p++;
}
while (p < end) {
ADDC(buffer, j) = *p;
j++;
if (*(p++) == '\n')
break;
}
}
/* Skip empty lines in block */
while (*(buffer + j - 1) == '\n') {
j--;
}
ADDC(buffer, j) = '\0';
if (lineprefix[i].process)
process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2);
else
hprint(buffer, buffer + strlen(buffer));
puts(lineprefix[i].after);
free(buffer);
return -(p - begin);
}
return 0;
}
int
dolink(const char *begin, const char *end, int newblock) {
int img, len, sep, parens_depth = 1;
const char *desc, *link, *p, *q, *descend, *linkend;
const char *title = NULL, *titleend = NULL;
if (*begin == '[')
img = 0;
else if (strncmp(begin, ") || p > end)
return 0;
for (q = strstr(desc, ") || p > end)
return 0;
descend = p;
link = p + 2;
/* find end of link while handling nested parens */
q = link;
while (parens_depth) {
if (!(q = strpbrk(q, "()")) || q > end)
return 0;
if (*q == '(')
parens_depth++;
else
parens_depth--;
if (parens_depth && q < end)
q++;
}
if ((p = strpbrk(link, "\"'")) && p < end && q > p) {
sep = p[0]; /* separator: can be " or ' */
title = p + 1;
/* strip trailing whitespace */
for (linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--);
for (titleend = q - 1; titleend > link && isspace(*(titleend)); titleend--);
if (titleend < title || *titleend != sep) {
return 0;
}
}
else {
linkend = q;
}
/* Links can be given in angular brackets */
if (*link == '<' && *(linkend - 1) == '>') {
link++;
linkend--;
}
len = q + 1 - begin;
if (img) {
fputs("
", stdout);
}
else {
fputs("", stdout);
process(desc, descend, 0);
fputs("", stdout);
}
return len;
}
int
dolist(const char *begin, const char *end, int newblock) {
unsigned int i, j, indent, run, isblock, start_number;
const char *p, *q, *num_start;
char *buffer = NULL;
char marker = '\0'; /* Bullet symbol or \0 for unordered lists */
isblock = 0;
if (newblock)
p = begin;
else if (*begin == '\n')
p = begin + 1;
else
return 0;
q = p;
if (*p == '-' || *p == '*' || *p == '+') {
marker = *p;
} else {
num_start = p;
for (; p < end && *p >= '0' && *p <= '9'; p++);
if (p >= end || *p != '.')
return 0;
start_number = atoi(num_start);
}
p++;
if (p >= end || !(*p == ' ' || *p == '\t'))
return 0;
end_paragraph();
for (p++; p != end && (*p == ' ' || *p == '\t'); p++);
indent = p - q;
buffer = ereallocz(buffer, BUFSIZ);
if (!newblock)
fputc('\n', stdout);
if (marker) {
fputs("\n", stdout);
} else if (start_number == 1) {
fputs("\n", stdout);
} else {
printf("\n", start_number);
}
run = 1;
for (; p < end && run; p++) {
for (i = 0; p < end && run; p++, i++) {
if (*p == '\n') {
if (p + 1 == end)
break;
else {
/* Handle empty lines */
for (q = p + 1; (*q == ' ' || *q == '\t') && q < end; q++);
if (*q == '\n') {
ADDC(buffer, i) = '\n';
i++;
run = 0;
isblock++;
p = q;
}
}
q = p + 1;
j = 0;
if (marker && *q == marker)
j = 1;
else {
for (; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++);
if (q + j == end)
break;
if (j > 0 && q[j] == '.')
j++;
else
j = 0;
}
if (q + indent < end)
for (; (q[j] == ' ' || q[j] == '\t') && j < indent; j++);
if (j == indent) {
ADDC(buffer, i) = '\n';
i++;
p += indent;
run = 1;
if (*q == ' ' || *q == '\t')
p++;
else
break;
}
else if (j < indent)
run = 0;
}
ADDC(buffer, i) = *p;
}
ADDC(buffer, i) = '\0';
fputs("- ", stdout);
process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run));
fputs("
\n", stdout);
}
fputs(marker ? "
\n" : "\n", stdout);
free(buffer);
p--;
while (*(--p) == '\n');
return -(p - begin + 1);
}
int
dotable(const char *begin, const char *end, int newblock) {
/* table state */
static signed char intable, inrow, incell;
static unsigned long int calign;
static const char *align_table[] = {
"",
" style=\"text-align: left\"",
" style=\"text-align: right\"",
" style=\"text-align: center\"",
};
const char *p;
int i, l = (int)sizeof(calign) * 4;
if(*begin != '|')
return 0;
if (intable == 2) { /* in alignment row, skip it. */
++intable;
for (p = begin; p < end && *p != '\n'; ++p);
return p - begin + 1;
}
if(inrow && (begin + 1 >= end || begin[1] == '\n')) { /* close cell and row and if ends, table too */
fprintf(stdout, "", inrow == -1 ? 'h' : 'd');
if (inrow == -1)
intable = 2;
inrow = 0;
if(end - begin <= 2 || begin[2] == '\n') {
intable = 0;
fputs("\n\n", stdout);
}
return 1;
}
if(!intable) { /* open table */
intable = 1; inrow = -1; incell = 0; calign = 0;
for (p = begin; p < end && *p != '\n'; ++p);
if(*p == '\n') { /* load alignment from 2nd line */
for(i = -1, ++p; p < end && *p != '\n'; p++) {
if(*p == '|') {
i++;
do { p++; } while(p < end && (*p == ' ' || *p == '\t'));
if(i < l && *p == ':')
calign |= 1ul << (i * 2);
if (*p == '\n')
break;
} else if(i < l && *p == ':') {
calign |= 1ul << (i * 2 + 1);
}
}
}
fputs("\n", stdout);
}
if(!inrow) { /* open row */
inrow = 1; incell = 0;
fputs(" ", stdout);
}
if(incell) /* close cell */
fprintf(stdout, "", inrow == -1 ? 'h' : 'd');
l = incell < l ? (calign >> (incell * 2)) & 3 : 0; /* open cell */
fprintf(stdout, "", inrow == -1 ? 'h' : 'd', align_table[l]);
incell++;
for(p = begin + 1; p < end && *p == ' '; p++);
return p - begin;
}
int
doparagraph(const char *begin, const char *end, int newblock) {
const char *p;
regmatch_t match;
if (!newblock)
return 0;
if (regexec(&p_end_regex, begin + 1, 1, &match, 0)) {
p = end;
} else {
p = begin + 1 + match.rm_so;
}
fputs("", stdout);
in_paragraph = 1;
process(begin, p, 0);
end_paragraph();
return -(p - begin);
}
int
doreplace(const char *begin, const char *end, int newblock) {
unsigned int i, l;
for (i = 0; i < LENGTH(replace); i++) {
l = strlen(replace[i][0]);
if (end - begin < l)
continue;
if (strncmp(replace[i][0], begin, l) == 0) {
fputs(replace[i][1], stdout);
return l;
}
}
return 0;
}
int
doshortlink(const char *begin, const char *end, int newblock) {
const char *p, *c;
int ismail = 0;
if (*begin != '<')
return 0;
for (p = begin + 1; p != end; p++) {
switch (*p) {
case ' ':
case '\t':
case '\n':
return 0;
case '#':
case ':':
ismail = -1;
break;
case '@':
if (ismail == 0)
ismail = 1;
break;
case '>':
if (ismail == 0)
return 0;
fputs("'; c++)
fprintf(stdout, "%u;", *c);
fputs("\">", stdout);
for (c = begin + 1; *c != '>'; c++)
fprintf(stdout, "%u;", *c);
}
else {
hprint(begin + 1, p);
fputs("\">", stdout);
hprint(begin + 1, p);
}
fputs("", stdout);
return p - begin + 1;
}
}
return 0;
}
int
dosurround(const char *begin, const char *end, int newblock) {
unsigned int i, l;
const char *p, *start, *stop;
for (i = 0; i < LENGTH(surround); i++) {
l = strlen(surround[i].search);
if (end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0)
continue;
start = begin + l;
p = start;
do {
stop = p;
p = strstr(p + 1, surround[i].search);
} while (p && p[-1] == '\\');
if (p && p[-1] != '\\')
stop = p;
if (!stop || stop < start || stop >= end)
continue;
fputs(surround[i].before, stdout);
/* Single space at start and end are ignored */
if (start[0] == ' ' && stop[-1] == ' ' && start < stop - 1) {
start++;
stop--;
l++;
}
if (surround[i].process)
process(start, stop, 0);
else
hprint(start, stop);
fputs(surround[i].after, stdout);
return stop - start + 2 * l;
}
return 0;
}
int
dounderline(const char *begin, const char *end, int newblock) {
unsigned int i, j, l;
const char *p;
if (!newblock)
return 0;
p = begin;
for (l = 0; p + l != end && p[l] != '\n'; l++);
p += l + 1;
if (l == 0)
return 0;
for (i = 0; i < LENGTH(underline); i++) {
for (j = 0; p + j < end && p[j] != '\n' && p[j] == underline[i].search[0]; j++);
if (j >= 3) {
fputs(underline[i].before, stdout);
if (underline[i].process)
process(begin, begin + l, 0);
else
hprint(begin, begin + l);
fputs(underline[i].after, stdout);
return -(j + p - begin);
}
}
return 0;
}
void *
ereallocz(void *p, size_t size) {
void *res;
res = realloc(p, size);
if (!res)
eprint("realloc: %zu bytes\n", size);
return res;
}
void
hprint(const char *begin, const char *end) {
const char *p;
for (p = begin; p != end; p++) {
if (*p == '&')
fputs("&", stdout);
else if (*p == '"')
fputs(""", stdout);
else if (*p == '>')
fputs(">", stdout);
else if (*p == '<')
fputs("<", stdout);
else
fputc(*p, stdout);
}
}
void
process(const char *begin, const char *end, int newblock) {
const char *p;
int affected;
unsigned int i;
for (p = begin; p < end;) {
if (newblock)
while (*p == '\n')
if (++p == end)
return;
for (i = 0; i < LENGTH(parsers); i++)
if ((affected = parsers[i](p, end, newblock)))
break;
if (affected)
p += abs(affected);
else
fputc(*p++, stdout);
/* Don't print single newline at end */
if (p + 1 == end && *p == '\n')
return;
if (p[0] == '\n' && p + 1 != end && p[1] == '\n')
newblock = 1;
else
newblock = affected < 0;
}
}
int
main(int argc, char *argv[]) {
char *buffer = NULL;
int s, i;
unsigned long len, bsize;
FILE *source = stdin;
regcomp(&p_end_regex, "(\n\n|(^|\n)```)", REG_EXTENDED);
for (i = 1; i < argc; i++) {
if (!strcmp("-v", argv[i]))
eprint("simple markup %s (C) Enno Boland\n",VERSION);
else if (!strcmp("-n", argv[i]))
nohtml = 1;
else if (argv[i][0] != '-')
break;
else if (!strcmp("--", argv[i])) {
i++;
break;
}
else
eprint("Usage %s [-n] [file]\n -n escape html strictly\n", argv[0]);
}
if (i < argc && !(source = fopen(argv[i], "r")))
eprint("Cannot open file `%s`\n",argv[i]);
bsize = 2 * BUFSIZ;
buffer = ereallocz(buffer, bsize);
len = 0;
while ((s = fread(buffer + len, 1, BUFSIZ, source))) {
len += s;
if (BUFSIZ + len + 1 > bsize) {
bsize += BUFSIZ;
if (!(buffer = realloc(buffer, bsize)))
eprint("realloc failed.");
}
}
buffer[len] = '\0';
process(buffer, buffer + len, 1);
fclose(source);
free(buffer);
return EXIT_SUCCESS;
}