sitegen/framework/lib/markdown.tsx

201 lines
6.1 KiB
TypeScript
Raw Normal View History

/* Implementation of [CommonMark] specification for markdown with support
2025-07-08 01:09:55 -07:00
* for custom syntax extensions via the parser options. Instead of
* returning an AST that has a second conversion pass to JSX, the
* returned value of 'parse' is 'engine.Node' which can be stringified
* via Clover's SSR engine. This way, generation optimizations, async
2025-07-08 01:09:55 -07:00
* components, and other features are gained for free here.
*
* [CommonMark]: https://spec.commonmark.org/0.31.2/
2025-07-08 01:09:55 -07:00
*/
function parse(src: string, options: Partial<ParseOpts> = {}) {}
2025-07-08 01:09:55 -07:00
/* Render markdown content. Same function as 'parse', but JSX components
* only take one argument and must start with a capital letter. */
export function Markdown({
src,
...options
}: { src: string } & Partial<ParseOpts>) {
2025-07-08 20:48:30 -07:00
return parse(src, options);
2025-07-08 01:09:55 -07:00
}
// TODO: This implementation is flawed because it is impossible to sanely handle
// emphasis and strong emphasis, and all their edge cases. Instead of making these
// using extensions interface, they should be special cased.
2025-07-08 01:09:55 -07:00
function parseInline(src: string, options: Partial<InlineOpts> = {}) {
const { rules = inlineRules, links = new Map() } = options;
const opts: InlineOpts = { rules, links };
const parts: render.Node[] = [];
2025-07-08 01:09:55 -07:00
const ruleList = Object.values(rules);
2025-07-08 20:48:30 -07:00
parse: while (true) {
2025-07-08 01:09:55 -07:00
for (const rule of ruleList) {
if (!rule.match) continue;
const match = src.match(rule.match);
if (!match) continue;
const index = UNWRAP(match.index);
const after = src.slice(index + match[0].length);
const parse = rule.parse({ after, match: match[0], opts });
if (!parse) continue;
2025-07-08 20:48:30 -07:00
// parse before
2025-07-08 01:09:55 -07:00
parts.push(src.slice(0, index), parse.result);
src = parse.rest ?? after;
continue parse;
}
break;
2025-07-08 20:48:30 -07:00
}
2025-07-08 01:09:55 -07:00
parts.push(src);
return parts;
}
// -- interfaces --
interface ParseOpts {
2025-07-08 20:48:30 -07:00
gfm: boolean;
2025-07-08 01:09:55 -07:00
blockRules: Record<string, BlockRule>;
inlineRules: Record<string, InlineRule>;
}
interface InlineOpts {
rules: Record<string, InlineRule>;
links: Map<string, LinkRef>;
}
interface InlineRule {
match: RegExp;
parse(opts: {
after: string;
match: string;
opts: InlineOpts;
}): InlineParse | null;
}
interface InlineParse {
result: engine.Node;
rest?: string;
}
interface LinkRef {
href: string;
title: string | null;
}
interface BlockRule {
match: RegExp;
parse(opts: {}): unknown;
}
export const inlineRules: Record<string, InlineRule> = {
code: {
match: /`+/,
// 6.1 - code spans
parse({ after, match }) {
const end = after.indexOf(match);
if (end === -1) return null;
let inner = after.slice(0, end);
const rest = after.slice(end + match.length);
// If the resulting string both begins and ends with a space
// character, but does not consist entirely of space characters,
// a single space character is removed from the front and back.
if (inner.match(/^ [^ ]+ $/)) inner = inner.slice(1, -1);
return { result: <code>{inner}</code>, rest };
},
},
link: {
match: /(?<!!)\[/,
2025-07-08 20:48:30 -07:00
// 6.3 - links
2025-07-08 01:09:55 -07:00
parse({ after, opts }) {
// Match '[' to let the inner-most link win.
const splitText = splitFirst(after, /[[\]]/);
if (!splitText) return null;
if (splitText.delim !== "]") return null;
const { first: textSrc, rest: afterText } = splitText;
let href: string,
title: string | null = null,
rest: string;
2025-07-08 01:09:55 -07:00
if (afterText[0] === "(") {
// Inline link
const splitTarget = splitFirst(afterText.slice(1), /\)/);
if (!splitTarget) return null;
({ rest } = splitTarget);
const target = parseLinkTarget(splitTarget.first);
if (!target) return null;
({ href, title } = target);
} else if (afterText[0] === "[") {
const splitTarget = splitFirst(afterText.slice(1), /]/);
if (!splitTarget) return null;
const name = splitTarget.first.trim().length === 0
// Collapsed reference link
? textSrc.trim()
// Full Reference Link
: splitTarget.first.trim();
2025-07-08 01:09:55 -07:00
const target = opts.links.get(name);
if (!target) return null;
({ href, title } = target);
({ rest } = splitTarget);
} else {
// Shortcut reference link
const target = opts.links.get(textSrc);
if (!target) return null;
({ href, title } = target);
rest = afterText;
}
return {
result: <a {...{ href, title }}>{parseInline(textSrc, opts)}</a>,
rest,
};
},
},
2025-07-08 20:48:30 -07:00
image: {
match: /!\[/,
// 6.4 - images
parse({ after, opts }) {
// Match '[' to let the inner-most link win.
const splitText = splitFirst(after, /[[\]]/);
if (!splitText) return null;
if (splitText.delim !== "]") return null;
const { first: textSrc, rest: afterText } = splitText;
},
},
emphasis: {
// detect left-flanking delimiter runs, but this expression does not
// consider preceding escapes. instead, those are programatically
// checked inside the parse function.
match: /(?:\*+|(?<!\p{P})_+)(?!\s|\p{P}|$)/u,
// 6.2 - emphasis and strong emphasis
parse({ before, match, after, opts }) {
// find out how long the delim sequence is
// look for 'ends'
},
},
2025-07-08 01:09:55 -07:00
autolink: {},
html: {},
br: {
match: / +\n|\\\n/,
parse() {
return { result: <br /> };
},
},
};
function parseLinkTarget(src: string) {
let href: string,
title: string | null = null;
2025-07-08 01:09:55 -07:00
href = src;
2025-07-08 20:48:30 -07:00
return { href, title };
2025-07-08 01:09:55 -07:00
}
/* Find a delimiter while considering backslash escapes. */
function splitFirst(text: string, match: RegExp) {
let first = "",
delim: string,
escaped: boolean;
2025-07-08 01:09:55 -07:00
do {
const find = text.match(match);
if (!find) return null;
delim = find[0];
const index = UNWRAP(find.index);
let i = index - 1;
escaped = false;
while (i >= 0 && text[i] === "\\") (escaped = !escaped), (i -= 1);
2025-07-08 01:09:55 -07:00
first += text.slice(0, index - +escaped);
text = text.slice(index + find[0].length);
} while (escaped);
return { first, delim, rest: text };
}
console.log(render.sync(parseInline("meow `bwaa` `` ` `` `` `z``")));
2025-07-08 01:09:55 -07:00
import * as render from "#engine/render";