sitegen/framework/lib/markdown.tsx

/* Implementation of [CommonMark] specification for markdown with support
 * for custom syntax extensions via the parser options. Instead of
 * returning an AST that has a second conversion pass to JSX, the
 * returned value of 'parse' is 'engine.Node' which can be stringified
 * via Clover's SSR engine. This way, generation optimizations, async
 * components, and other features are gained for free here.
 *
 * [CommonMark]: https://spec.commonmark.org/0.31.2/
 */
function parse(src: string, options: Partial<ParseOpts> = {}) {}

/* Render markdown content. Same function as 'parse', but JSX components
 * only take one argument and must start with a capital letter. */
export function Markdown({
  src,
  ...options
}: { src: string } & Partial<ParseOpts>) {
  return parse(src, options);
}

// TODO: This implementation is flawed because it is impossible to sanely handle
// emphasis and strong emphasis, and all their edge cases. Instead of making these
// using extensions interface, they should be special cased.
function parseInline(src: string, options: Partial<InlineOpts> = {}) {
  const { rules = inlineRules, links = new Map() } = options;
  const opts: InlineOpts = { rules, links };
  const parts: render.Node[] = [];
  const ruleList = Object.values(rules);
  parse: while (true) {
    for (const rule of ruleList) {
      if (!rule.match) continue;
      const match = src.match(rule.match);
      if (!match) continue;
      const index = UNWRAP(match.index);
      const after = src.slice(index + match[0].length);
      const parse = rule.parse({ after, match: match[0], opts });
      if (!parse) continue;
      // parse before
      parts.push(src.slice(0, index), parse.result);
      src = parse.rest ?? after;
      continue parse;
    }
    break;
  }
  parts.push(src);
  return parts;
}

// -- interfaces --
interface ParseOpts {
  gfm: boolean;
  blockRules: Record<string, BlockRule>;
  inlineRules: Record<string, InlineRule>;
}
interface InlineOpts {
  rules: Record<string, InlineRule>;
  links: Map<string, LinkRef>;
}
interface InlineRule {
  match: RegExp;
  parse(opts: {
    after: string;
    match: string;
    opts: InlineOpts;
  }): InlineParse | null;
}
interface InlineParse {
  result: engine.Node;
  rest?: string;
}
interface LinkRef {
  href: string;
  title: string | null;
}
interface BlockRule {
  match: RegExp;
  parse(opts: {}): unknown;
}
export const inlineRules: Record<string, InlineRule> = {
  code: {
    match: /`+/,
    // 6.1 - code spans
    parse({ after, match }) {
      const end = after.indexOf(match);
      if (end === -1) return null;
      let inner = after.slice(0, end);
      const rest = after.slice(end + match.length);
      // If the resulting string both begins and ends with a space
      // character, but does not consist entirely of space characters,
      // a single space character is removed from the front and back.
      if (inner.match(/^ [^ ]+ $/)) inner = inner.slice(1, -1);
      return { result: <code>{inner}</code>, rest };
    },
  },
  link: {
    match: /(?<!!)\[/,
    // 6.3 - links
    parse({ after, opts }) {
      // Match '[' to let the inner-most link win.
      const splitText = splitFirst(after, /[[\]]/);
      if (!splitText) return null;
      if (splitText.delim !== "]") return null;
      const { first: textSrc, rest: afterText } = splitText;
      let href: string,
        title: string | null = null,
        rest: string;
      if (afterText[0] === "(") {
        // Inline link
        const splitTarget = splitFirst(afterText.slice(1), /\)/);
        if (!splitTarget) return null;
        ({ rest } = splitTarget);
        const target = parseLinkTarget(splitTarget.first);
        if (!target) return null;
        ({ href, title } = target);
      } else if (afterText[0] === "[") {
        const splitTarget = splitFirst(afterText.slice(1), /]/);
        if (!splitTarget) return null;
        const name = splitTarget.first.trim().length === 0
          // Collapsed reference link
          ? textSrc.trim()
          // Full Reference Link
          : splitTarget.first.trim();
        const target = opts.links.get(name);
        if (!target) return null;
        ({ href, title } = target);
        ({ rest } = splitTarget);
      } else {
        // Shortcut reference link
        const target = opts.links.get(textSrc);
        if (!target) return null;
        ({ href, title } = target);
        rest = afterText;
      }
      return {
        result: <a {...{ href, title }}>{parseInline(textSrc, opts)}</a>,
        rest,
      };
    },
  },
  image: {
    match: /!\[/,
    // 6.4 - images
    parse({ after, opts }) {
      // Match '[' to let the inner-most link win.
      const splitText = splitFirst(after, /[[\]]/);
      if (!splitText) return null;
      if (splitText.delim !== "]") return null;
      const { first: textSrc, rest: afterText } = splitText;
    },
  },
  emphasis: {
    // detect left-flanking delimiter runs, but this expression does not
    // consider preceding escapes. instead, those are programatically
    // checked inside the parse function.
    match: /(?:\*+|(?<!\p{P})_+)(?!\s|\p{P}|$)/u,
    // 6.2 - emphasis and strong emphasis
    parse({ before, match, after, opts }) {
      // find out how long the delim sequence is
      // look for 'ends'
    },
  },
  autolink: {},
  html: {},
  br: {
    match: /  +\n|\\\n/,
    parse() {
      return { result: <br /> };
    },
  },
};

function parseLinkTarget(src: string) {
  let href: string,
    title: string | null = null;
  href = src;
  return { href, title };
}

/* Find a delimiter while considering backslash escapes. */
function splitFirst(text: string, match: RegExp) {
  let first = "",
    delim: string,
    escaped: boolean;
  do {
    const find = text.match(match);
    if (!find) return null;
    delim = find[0];
    const index = UNWRAP(find.index);
    let i = index - 1;
    escaped = false;
    while (i >= 0 && text[i] === "\\") (escaped = !escaped), (i -= 1);
    first += text.slice(0, index - +escaped);
    text = text.slice(index + find[0].length);
  } while (escaped);
  return { first, delim, rest: text };
}

console.log(render.sync(parseInline("meow `bwaa` `` ` `` `` `z``")));

import * as render from "#engine/render";
feat: dynamic page regeneration (#24) the asset system is reworked to support "dynamic" entries, where each entry is a separate file on disk containing the latest generation's headers+raw+gzip+zstd. when calling view.regenerate, it will look for pages that had "export const regenerate" during generation, and render those pages using the view system, but then store the results as assets instead of sending as a response. pages configured as regenerable are also bundled as views, using the non-aliasing key "page:${page.id}". this cannot alias because file paths may not contain a colon. 2025-08-11 22:43:27 -07:00			`/* Implementation of [CommonMark] specification for markdown with support`
start the markdown parser 2025-07-08 01:09:55 -07:00			`* for custom syntax extensions via the parser options. Instead of`
			`* returning an AST that has a second conversion pass to JSX, the`
			`* returned value of 'parse' is 'engine.Node' which can be stringified`
feat: dynamic page regeneration (#24) the asset system is reworked to support "dynamic" entries, where each entry is a separate file on disk containing the latest generation's headers+raw+gzip+zstd. when calling view.regenerate, it will look for pages that had "export const regenerate" during generation, and render those pages using the view system, but then store the results as assets instead of sending as a response. pages configured as regenerable are also bundled as views, using the non-aliasing key "page:${page.id}". this cannot alias because file paths may not contain a colon. 2025-08-11 22:43:27 -07:00			`* via Clover's SSR engine. This way, generation optimizations, async`
start the markdown parser 2025-07-08 01:09:55 -07:00			`* components, and other features are gained for free here.`
feat: dynamic page regeneration (#24) the asset system is reworked to support "dynamic" entries, where each entry is a separate file on disk containing the latest generation's headers+raw+gzip+zstd. when calling view.regenerate, it will look for pages that had "export const regenerate" during generation, and render those pages using the view system, but then store the results as assets instead of sending as a response. pages configured as regenerable are also bundled as views, using the non-aliasing key "page:${page.id}". this cannot alias because file paths may not contain a colon. 2025-08-11 22:43:27 -07:00			`*`
			`* [CommonMark]: https://spec.commonmark.org/0.31.2/`
start the markdown parser 2025-07-08 01:09:55 -07:00			`*/`
chore: rework Clover Engine API, remove "SSR" term "server side rendering" is a misleading term since it implies there is a server. that isn't neccecarily the case here, since it supports running in the browser. I think "clover engine" is cute, short for "clover html rendering engine". Instead of "server side rendering", it's just rendering. This commit makes things a lot more concise, such as `ssr.ssrAsync` being renamed to `render.async` to play nicely with namespaced imports. `getCurrentRender` and `setCurrentRender` are just `current` and `setCurrent`, and the addon interface has been redesigned to force symbols with a wrapping helper. 2025-08-02 19:22:07 -07:00			`function parse(src: string, options: Partial<ParseOpts> = {}) {}`
start the markdown parser 2025-07-08 01:09:55 -07:00
			`/* Render markdown content. Same function as 'parse', but JSX components`
			`* only take one argument and must start with a capital letter. */`
chore: rework Clover Engine API, remove "SSR" term "server side rendering" is a misleading term since it implies there is a server. that isn't neccecarily the case here, since it supports running in the browser. I think "clover engine" is cute, short for "clover html rendering engine". Instead of "server side rendering", it's just rendering. This commit makes things a lot more concise, such as `ssr.ssrAsync` being renamed to `render.async` to play nicely with namespaced imports. `getCurrentRender` and `setCurrentRender` are just `current` and `setCurrent`, and the addon interface has been redesigned to force symbols with a wrapping helper. 2025-08-02 19:22:07 -07:00			`export function Markdown({`
			`src,`
			`...options`
			`}: { src: string } & Partial<ParseOpts>) {`
finish scan3 2025-07-08 20:48:30 -07:00			`return parse(src, options);`
start the markdown parser 2025-07-08 01:09:55 -07:00			`}`

feat: dynamic page regeneration (#24) the asset system is reworked to support "dynamic" entries, where each entry is a separate file on disk containing the latest generation's headers+raw+gzip+zstd. when calling view.regenerate, it will look for pages that had "export const regenerate" during generation, and render those pages using the view system, but then store the results as assets instead of sending as a response. pages configured as regenerable are also bundled as views, using the non-aliasing key "page:${page.id}". this cannot alias because file paths may not contain a colon. 2025-08-11 22:43:27 -07:00			`// TODO: This implementation is flawed because it is impossible to sanely handle`
			`// emphasis and strong emphasis, and all their edge cases. Instead of making these`
			`// using extensions interface, they should be special cased.`
start the markdown parser 2025-07-08 01:09:55 -07:00			`function parseInline(src: string, options: Partial<InlineOpts> = {}) {`
			`const { rules = inlineRules, links = new Map() } = options;`
			`const opts: InlineOpts = { rules, links };`
chore: rework Clover Engine API, remove "SSR" term "server side rendering" is a misleading term since it implies there is a server. that isn't neccecarily the case here, since it supports running in the browser. I think "clover engine" is cute, short for "clover html rendering engine". Instead of "server side rendering", it's just rendering. This commit makes things a lot more concise, such as `ssr.ssrAsync` being renamed to `render.async` to play nicely with namespaced imports. `getCurrentRender` and `setCurrentRender` are just `current` and `setCurrent`, and the addon interface has been redesigned to force symbols with a wrapping helper. 2025-08-02 19:22:07 -07:00			`const parts: render.Node[] = [];`
start the markdown parser 2025-07-08 01:09:55 -07:00			`const ruleList = Object.values(rules);`
finish scan3 2025-07-08 20:48:30 -07:00			`parse: while (true) {`
start the markdown parser 2025-07-08 01:09:55 -07:00			`for (const rule of ruleList) {`
			`if (!rule.match) continue;`
			`const match = src.match(rule.match);`
			`if (!match) continue;`
			`const index = UNWRAP(match.index);`
			`const after = src.slice(index + match[0].length);`
			`const parse = rule.parse({ after, match: match[0], opts });`
			`if (!parse) continue;`
finish scan3 2025-07-08 20:48:30 -07:00			`// parse before`
start the markdown parser 2025-07-08 01:09:55 -07:00			`parts.push(src.slice(0, index), parse.result);`
			`src = parse.rest ?? after;`
			`continue parse;`
			`}`
			`break;`
finish scan3 2025-07-08 20:48:30 -07:00			`}`
start the markdown parser 2025-07-08 01:09:55 -07:00			`parts.push(src);`
			`return parts;`
			`}`

			`// -- interfaces --`
			`interface ParseOpts {`
finish scan3 2025-07-08 20:48:30 -07:00			`gfm: boolean;`
start the markdown parser 2025-07-08 01:09:55 -07:00			`blockRules: Record<string, BlockRule>;`
			`inlineRules: Record<string, InlineRule>;`
			`}`
			`interface InlineOpts {`
			`rules: Record<string, InlineRule>;`
			`links: Map<string, LinkRef>;`
			`}`
			`interface InlineRule {`
			`match: RegExp;`
			`parse(opts: {`
			`after: string;`
			`match: string;`
			`opts: InlineOpts;`
			`}): InlineParse \| null;`
			`}`
			`interface InlineParse {`
			`result: engine.Node;`
			`rest?: string;`
			`}`
			`interface LinkRef {`
			`href: string;`
			`title: string \| null;`
			`}`
			`interface BlockRule {`
			`match: RegExp;`
			`parse(opts: {}): unknown;`
			`}`
			`export const inlineRules: Record<string, InlineRule> = {`
			`code: {`
			match: /`+/,
			`// 6.1 - code spans`
			`parse({ after, match }) {`
			`const end = after.indexOf(match);`
			`if (end === -1) return null;`
			`let inner = after.slice(0, end);`
			`const rest = after.slice(end + match.length);`
			`// If the resulting string both begins and ends with a space`
			`// character, but does not consist entirely of space characters,`
			`// a single space character is removed from the front and back.`
			`if (inner.match(/^ [^ ]+ $/)) inner = inner.slice(1, -1);`
			`return { result: <code>{inner}</code>, rest };`
			`},`
			`},`
			`link: {`
			`match: /(?<!!)\[/,`
finish scan3 2025-07-08 20:48:30 -07:00			`// 6.3 - links`
start the markdown parser 2025-07-08 01:09:55 -07:00			`parse({ after, opts }) {`
			`// Match '[' to let the inner-most link win.`
			`const splitText = splitFirst(after, /[[\]]/);`
			`if (!splitText) return null;`
			`if (splitText.delim !== "]") return null;`
			`const { first: textSrc, rest: afterText } = splitText;`
chore: rework Clover Engine API, remove "SSR" term "server side rendering" is a misleading term since it implies there is a server. that isn't neccecarily the case here, since it supports running in the browser. I think "clover engine" is cute, short for "clover html rendering engine". Instead of "server side rendering", it's just rendering. This commit makes things a lot more concise, such as `ssr.ssrAsync` being renamed to `render.async` to play nicely with namespaced imports. `getCurrentRender` and `setCurrentRender` are just `current` and `setCurrent`, and the addon interface has been redesigned to force symbols with a wrapping helper. 2025-08-02 19:22:07 -07:00			`let href: string,`
			`title: string \| null = null,`
			`rest: string;`
start the markdown parser 2025-07-08 01:09:55 -07:00			`if (afterText[0] === "(") {`
			`// Inline link`
			`const splitTarget = splitFirst(afterText.slice(1), /\)/);`
			`if (!splitTarget) return null;`
			`({ rest } = splitTarget);`
			`const target = parseLinkTarget(splitTarget.first);`
			`if (!target) return null;`
			`({ href, title } = target);`
			`} else if (afterText[0] === "[") {`
			`const splitTarget = splitFirst(afterText.slice(1), /]/);`
			`if (!splitTarget) return null;`
feat: dynamic page regeneration (#24) the asset system is reworked to support "dynamic" entries, where each entry is a separate file on disk containing the latest generation's headers+raw+gzip+zstd. when calling view.regenerate, it will look for pages that had "export const regenerate" during generation, and render those pages using the view system, but then store the results as assets instead of sending as a response. pages configured as regenerable are also bundled as views, using the non-aliasing key "page:${page.id}". this cannot alias because file paths may not contain a colon. 2025-08-11 22:43:27 -07:00			`const name = splitTarget.first.trim().length === 0`
			`// Collapsed reference link`
			`? textSrc.trim()`
			`// Full Reference Link`
			`: splitTarget.first.trim();`
start the markdown parser 2025-07-08 01:09:55 -07:00			`const target = opts.links.get(name);`
			`if (!target) return null;`
			`({ href, title } = target);`
			`({ rest } = splitTarget);`
			`} else {`
			`// Shortcut reference link`
			`const target = opts.links.get(textSrc);`
			`if (!target) return null;`
			`({ href, title } = target);`
			`rest = afterText;`
			`}`
			`return {`
			`result: <a {...{ href, title }}>{parseInline(textSrc, opts)}</a>,`
			`rest,`
			`};`
			`},`
			`},`
finish scan3 2025-07-08 20:48:30 -07:00			`image: {`
			`match: /!\[/,`
			`// 6.4 - images`
			`parse({ after, opts }) {`
			`// Match '[' to let the inner-most link win.`
			`const splitText = splitFirst(after, /[[\]]/);`
			`if (!splitText) return null;`
			`if (splitText.delim !== "]") return null;`
			`const { first: textSrc, rest: afterText } = splitText;`
			`},`
			`},`
			`emphasis: {`
			`// detect left-flanking delimiter runs, but this expression does not`
			`// consider preceding escapes. instead, those are programatically`
			`// checked inside the parse function.`
			`match: /(?:\*+\|(?<!\p{P})_+)(?!\s\|\p{P}\|$)/u,`
			`// 6.2 - emphasis and strong emphasis`
			`parse({ before, match, after, opts }) {`
			`// find out how long the delim sequence is`
			`// look for 'ends'`
			`},`
			`},`
start the markdown parser 2025-07-08 01:09:55 -07:00			`autolink: {},`
			`html: {},`
			`br: {`
			`match: / +\n\|\\\n/,`
			`parse() {`
			`return { result: <br /> };`
			`},`
			`},`
			`};`

			`function parseLinkTarget(src: string) {`
chore: rework Clover Engine API, remove "SSR" term "server side rendering" is a misleading term since it implies there is a server. that isn't neccecarily the case here, since it supports running in the browser. I think "clover engine" is cute, short for "clover html rendering engine". Instead of "server side rendering", it's just rendering. This commit makes things a lot more concise, such as `ssr.ssrAsync` being renamed to `render.async` to play nicely with namespaced imports. `getCurrentRender` and `setCurrentRender` are just `current` and `setCurrent`, and the addon interface has been redesigned to force symbols with a wrapping helper. 2025-08-02 19:22:07 -07:00			`let href: string,`
			`title: string \| null = null;`
start the markdown parser 2025-07-08 01:09:55 -07:00			`href = src;`
finish scan3 2025-07-08 20:48:30 -07:00			`return { href, title };`
start the markdown parser 2025-07-08 01:09:55 -07:00			`}`

			`/* Find a delimiter while considering backslash escapes. */`
			`function splitFirst(text: string, match: RegExp) {`
chore: rework Clover Engine API, remove "SSR" term "server side rendering" is a misleading term since it implies there is a server. that isn't neccecarily the case here, since it supports running in the browser. I think "clover engine" is cute, short for "clover html rendering engine". Instead of "server side rendering", it's just rendering. This commit makes things a lot more concise, such as `ssr.ssrAsync` being renamed to `render.async` to play nicely with namespaced imports. `getCurrentRender` and `setCurrentRender` are just `current` and `setCurrent`, and the addon interface has been redesigned to force symbols with a wrapping helper. 2025-08-02 19:22:07 -07:00			`let first = "",`
			`delim: string,`
			`escaped: boolean;`
start the markdown parser 2025-07-08 01:09:55 -07:00			`do {`
			`const find = text.match(match);`
			`if (!find) return null;`
			`delim = find[0];`
			`const index = UNWRAP(find.index);`
			`let i = index - 1;`
			`escaped = false;`
chore: rework Clover Engine API, remove "SSR" term "server side rendering" is a misleading term since it implies there is a server. that isn't neccecarily the case here, since it supports running in the browser. I think "clover engine" is cute, short for "clover html rendering engine". Instead of "server side rendering", it's just rendering. This commit makes things a lot more concise, such as `ssr.ssrAsync` being renamed to `render.async` to play nicely with namespaced imports. `getCurrentRender` and `setCurrentRender` are just `current` and `setCurrent`, and the addon interface has been redesigned to force symbols with a wrapping helper. 2025-08-02 19:22:07 -07:00			`while (i >= 0 && text[i] === "\\") (escaped = !escaped), (i -= 1);`
start the markdown parser 2025-07-08 01:09:55 -07:00			`first += text.slice(0, index - +escaped);`
			`text = text.slice(index + find[0].length);`
			`} while (escaped);`
			`return { first, delim, rest: text };`
			`}`

chore: rework Clover Engine API, remove "SSR" term "server side rendering" is a misleading term since it implies there is a server. that isn't neccecarily the case here, since it supports running in the browser. I think "clover engine" is cute, short for "clover html rendering engine". Instead of "server side rendering", it's just rendering. This commit makes things a lot more concise, such as `ssr.ssrAsync` being renamed to `render.async` to play nicely with namespaced imports. `getCurrentRender` and `setCurrentRender` are just `current` and `setCurrent`, and the addon interface has been redesigned to force symbols with a wrapping helper. 2025-08-02 19:22:07 -07:00			console.log(render.sync(parseInline("meow `bwaa` `` ` `` `` `z``")));
start the markdown parser 2025-07-08 01:09:55 -07:00
chore: rework Clover Engine API, remove "SSR" term "server side rendering" is a misleading term since it implies there is a server. that isn't neccecarily the case here, since it supports running in the browser. I think "clover engine" is cute, short for "clover html rendering engine". Instead of "server side rendering", it's just rendering. This commit makes things a lot more concise, such as `ssr.ssrAsync` being renamed to `render.async` to play nicely with namespaced imports. `getCurrentRender` and `setCurrentRender` are just `current` and `setCurrent`, and the addon interface has been redesigned to force symbols with a wrapping helper. 2025-08-02 19:22:07 -07:00			`import * as render from "#engine/render";`