sitegen/framework/lib/markdown.tsx
chloe caruso c5ac450f21 feat: dynamic page regeneration (#24)
the asset system is reworked to support "dynamic" entries, where each
entry is a separate file on disk containing the latest generation's
headers+raw+gzip+zstd. when calling view.regenerate, it will look for
pages that had "export const regenerate" during generation, and render
those pages using the view system, but then store the results as assets
instead of sending as a response.

pages configured as regenerable are also bundled as views, using the
non-aliasing key "page:${page.id}". this cannot alias because file
paths may not contain a colon.
2025-08-11 22:43:27 -07:00

200 lines
6.1 KiB
TypeScript

/* Implementation of [CommonMark] specification for markdown with support
* for custom syntax extensions via the parser options. Instead of
* returning an AST that has a second conversion pass to JSX, the
* returned value of 'parse' is 'engine.Node' which can be stringified
* via Clover's SSR engine. This way, generation optimizations, async
* components, and other features are gained for free here.
*
* [CommonMark]: https://spec.commonmark.org/0.31.2/
*/
function parse(src: string, options: Partial<ParseOpts> = {}) {}
/* Render markdown content. Same function as 'parse', but JSX components
* only take one argument and must start with a capital letter. */
export function Markdown({
src,
...options
}: { src: string } & Partial<ParseOpts>) {
return parse(src, options);
}
// TODO: This implementation is flawed because it is impossible to sanely handle
// emphasis and strong emphasis, and all their edge cases. Instead of making these
// using extensions interface, they should be special cased.
function parseInline(src: string, options: Partial<InlineOpts> = {}) {
const { rules = inlineRules, links = new Map() } = options;
const opts: InlineOpts = { rules, links };
const parts: render.Node[] = [];
const ruleList = Object.values(rules);
parse: while (true) {
for (const rule of ruleList) {
if (!rule.match) continue;
const match = src.match(rule.match);
if (!match) continue;
const index = UNWRAP(match.index);
const after = src.slice(index + match[0].length);
const parse = rule.parse({ after, match: match[0], opts });
if (!parse) continue;
// parse before
parts.push(src.slice(0, index), parse.result);
src = parse.rest ?? after;
continue parse;
}
break;
}
parts.push(src);
return parts;
}
// -- interfaces --
interface ParseOpts {
gfm: boolean;
blockRules: Record<string, BlockRule>;
inlineRules: Record<string, InlineRule>;
}
interface InlineOpts {
rules: Record<string, InlineRule>;
links: Map<string, LinkRef>;
}
interface InlineRule {
match: RegExp;
parse(opts: {
after: string;
match: string;
opts: InlineOpts;
}): InlineParse | null;
}
interface InlineParse {
result: engine.Node;
rest?: string;
}
interface LinkRef {
href: string;
title: string | null;
}
interface BlockRule {
match: RegExp;
parse(opts: {}): unknown;
}
export const inlineRules: Record<string, InlineRule> = {
code: {
match: /`+/,
// 6.1 - code spans
parse({ after, match }) {
const end = after.indexOf(match);
if (end === -1) return null;
let inner = after.slice(0, end);
const rest = after.slice(end + match.length);
// If the resulting string both begins and ends with a space
// character, but does not consist entirely of space characters,
// a single space character is removed from the front and back.
if (inner.match(/^ [^ ]+ $/)) inner = inner.slice(1, -1);
return { result: <code>{inner}</code>, rest };
},
},
link: {
match: /(?<!!)\[/,
// 6.3 - links
parse({ after, opts }) {
// Match '[' to let the inner-most link win.
const splitText = splitFirst(after, /[[\]]/);
if (!splitText) return null;
if (splitText.delim !== "]") return null;
const { first: textSrc, rest: afterText } = splitText;
let href: string,
title: string | null = null,
rest: string;
if (afterText[0] === "(") {
// Inline link
const splitTarget = splitFirst(afterText.slice(1), /\)/);
if (!splitTarget) return null;
({ rest } = splitTarget);
const target = parseLinkTarget(splitTarget.first);
if (!target) return null;
({ href, title } = target);
} else if (afterText[0] === "[") {
const splitTarget = splitFirst(afterText.slice(1), /]/);
if (!splitTarget) return null;
const name = splitTarget.first.trim().length === 0
// Collapsed reference link
? textSrc.trim()
// Full Reference Link
: splitTarget.first.trim();
const target = opts.links.get(name);
if (!target) return null;
({ href, title } = target);
({ rest } = splitTarget);
} else {
// Shortcut reference link
const target = opts.links.get(textSrc);
if (!target) return null;
({ href, title } = target);
rest = afterText;
}
return {
result: <a {...{ href, title }}>{parseInline(textSrc, opts)}</a>,
rest,
};
},
},
image: {
match: /!\[/,
// 6.4 - images
parse({ after, opts }) {
// Match '[' to let the inner-most link win.
const splitText = splitFirst(after, /[[\]]/);
if (!splitText) return null;
if (splitText.delim !== "]") return null;
const { first: textSrc, rest: afterText } = splitText;
},
},
emphasis: {
// detect left-flanking delimiter runs, but this expression does not
// consider preceding escapes. instead, those are programatically
// checked inside the parse function.
match: /(?:\*+|(?<!\p{P})_+)(?!\s|\p{P}|$)/u,
// 6.2 - emphasis and strong emphasis
parse({ before, match, after, opts }) {
// find out how long the delim sequence is
// look for 'ends'
},
},
autolink: {},
html: {},
br: {
match: / +\n|\\\n/,
parse() {
return { result: <br /> };
},
},
};
function parseLinkTarget(src: string) {
let href: string,
title: string | null = null;
href = src;
return { href, title };
}
/* Find a delimiter while considering backslash escapes. */
function splitFirst(text: string, match: RegExp) {
let first = "",
delim: string,
escaped: boolean;
do {
const find = text.match(match);
if (!find) return null;
delim = find[0];
const index = UNWRAP(find.index);
let i = index - 1;
escaped = false;
while (i >= 0 && text[i] === "\\") (escaped = !escaped), (i -= 1);
first += text.slice(0, index - +escaped);
text = text.slice(index + find[0].length);
} while (escaped);
return { first, delim, rest: text };
}
console.log(render.sync(parseInline("meow `bwaa` `` ` `` `` `z``")));
import * as render from "#engine/render";