sitegen/framework/lib/markdown.tsx

/* Implementation of [CommonMark] specification for markdown with support
 * for custom syntax extensions via the parser options. Instead of
 * returning an AST that has a second conversion pass to JSX, the
 * returned value of 'parse' is 'engine.Node' which can be stringified
 * via Clover's SSR engine. This way, generation optimizations, async
 * components, and other features are gained for free here.
 *
 * [CommonMark]: https://spec.commonmark.org/0.31.2/
 */
function parse(src: string, options: Partial<ParseOpts> = {}) {}

/* Render markdown content. Same function as 'parse', but JSX components
 * only take one argument and must start with a capital letter. */
export function Markdown({
  src,
  ...options
}: { src: string } & Partial<ParseOpts>) {
  return parse(src, options);
}

// TODO: This implementation is flawed because it is impossible to sanely handle
// emphasis and strong emphasis, and all their edge cases. Instead of making these
// using extensions interface, they should be special cased.
function parseInline(src: string, options: Partial<InlineOpts> = {}) {
  const { rules = inlineRules, links = new Map() } = options;
  const opts: InlineOpts = { rules, links };
  const parts: render.Node[] = [];
  const ruleList = Object.values(rules);
  parse: while (true) {
    for (const rule of ruleList) {
      if (!rule.match) continue;
      const match = src.match(rule.match);
      if (!match) continue;
      const index = UNWRAP(match.index);
      const after = src.slice(index + match[0].length);
      const parse = rule.parse({ after, match: match[0], opts });
      if (!parse) continue;
      // parse before
      parts.push(src.slice(0, index), parse.result);
      src = parse.rest ?? after;
      continue parse;
    }
    break;
  }
  parts.push(src);
  return parts;
}

// -- interfaces --
interface ParseOpts {
  gfm: boolean;
  blockRules: Record<string, BlockRule>;
  inlineRules: Record<string, InlineRule>;
}
interface InlineOpts {
  rules: Record<string, InlineRule>;
  links: Map<string, LinkRef>;
}
interface InlineRule {
  match: RegExp;
  parse(opts: {
    after: string;
    match: string;
    opts: InlineOpts;
  }): InlineParse | null;
}
interface InlineParse {
  result: engine.Node;
  rest?: string;
}
interface LinkRef {
  href: string;
  title: string | null;
}
interface BlockRule {
  match: RegExp;
  parse(opts: {}): unknown;
}
export const inlineRules: Record<string, InlineRule> = {
  code: {
    match: /`+/,
    // 6.1 - code spans
    parse({ after, match }) {
      const end = after.indexOf(match);
      if (end === -1) return null;
      let inner = after.slice(0, end);
      const rest = after.slice(end + match.length);
      // If the resulting string both begins and ends with a space
      // character, but does not consist entirely of space characters,
      // a single space character is removed from the front and back.
      if (inner.match(/^ [^ ]+ $/)) inner = inner.slice(1, -1);
      return { result: <code>{inner}</code>, rest };
    },
  },
  link: {
    match: /(?<!!)\[/,
    // 6.3 - links
    parse({ after, opts }) {
      // Match '[' to let the inner-most link win.
      const splitText = splitFirst(after, /[[\]]/);
      if (!splitText) return null;
      if (splitText.delim !== "]") return null;
      const { first: textSrc, rest: afterText } = splitText;
      let href: string,
        title: string | null = null,
        rest: string;
      if (afterText[0] === "(") {
        // Inline link
        const splitTarget = splitFirst(afterText.slice(1), /\)/);
        if (!splitTarget) return null;
        ({ rest } = splitTarget);
        const target = parseLinkTarget(splitTarget.first);
        if (!target) return null;
        ({ href, title } = target);
      } else if (afterText[0] === "[") {
        const splitTarget = splitFirst(afterText.slice(1), /]/);
        if (!splitTarget) return null;
        const name = splitTarget.first.trim().length === 0
          // Collapsed reference link
          ? textSrc.trim()
          // Full Reference Link
          : splitTarget.first.trim();
        const target = opts.links.get(name);
        if (!target) return null;
        ({ href, title } = target);
        ({ rest } = splitTarget);
      } else {
        // Shortcut reference link
        const target = opts.links.get(textSrc);
        if (!target) return null;
        ({ href, title } = target);
        rest = afterText;
      }
      return {
        result: <a {...{ href, title }}>{parseInline(textSrc, opts)}</a>,
        rest,
      };
    },
  },
  image: {
    match: /!\[/,
    // 6.4 - images
    parse({ after, opts }) {
      // Match '[' to let the inner-most link win.
      const splitText = splitFirst(after, /[[\]]/);
      if (!splitText) return null;
      if (splitText.delim !== "]") return null;
      const { first: textSrc, rest: afterText } = splitText;
    },
  },
  emphasis: {
    // detect left-flanking delimiter runs, but this expression does not
    // consider preceding escapes. instead, those are programatically
    // checked inside the parse function.
    match: /(?:\*+|(?<!\p{P})_+)(?!\s|\p{P}|$)/u,
    // 6.2 - emphasis and strong emphasis
    parse({ before, match, after, opts }) {
      // find out how long the delim sequence is
      // look for 'ends'
    },
  },
  autolink: {},
  html: {},
  br: {
    match: /  +\n|\\\n/,
    parse() {
      return { result: <br /> };
    },
  },
};

function parseLinkTarget(src: string) {
  let href: string,
    title: string | null = null;
  href = src;
  return { href, title };
}

/* Find a delimiter while considering backslash escapes. */
function splitFirst(text: string, match: RegExp) {
  let first = "",
    delim: string,
    escaped: boolean;
  do {
    const find = text.match(match);
    if (!find) return null;
    delim = find[0];
    const index = UNWRAP(find.index);
    let i = index - 1;
    escaped = false;
    while (i >= 0 && text[i] === "\\") (escaped = !escaped), (i -= 1);
    first += text.slice(0, index - +escaped);
    text = text.slice(index + find[0].length);
  } while (escaped);
  return { first, delim, rest: text };
}

console.log(render.sync(parseInline("meow `bwaa` `` ` `` `` `z``")));

import * as render from "#engine/render";