sitegen/framework/incremental.ts

667 lines
18 KiB
TypeScript
Raw Normal View History

2025-07-31 21:35:36 -07:00
// Incremental compilation framework
let running = false;
2025-08-01 20:16:21 -07:00
let jobs = 0;
let newKeys = 0;
let seenWorks = new Set<string>(); // for detecting conflict vs overwrite
let seenWrites = new Set<string>(); // for detecting conflict vs overwrite
2025-07-31 21:35:36 -07:00
let works = new Map<string, Work>();
2025-08-01 20:16:21 -07:00
let files = new Map<string, TrackedFile>(); // keyed by `toRel` path
let writes = new Map<string, FileWrite>();
let assets = new Map<string, Asset>(); // keyed by hash
2025-07-31 21:35:36 -07:00
2025-08-01 20:16:21 -07:00
export interface Ref<T> {
/** This method is compatible with `await` syntax */
then(
onFulfilled: (value: T) => void,
onRejected: (error: unknown) => void,
): void;
key: string;
}
2025-08-01 20:16:21 -07:00
type Job<I = any, O = any> = (io: Io, input: I) => Promise<O>;
2025-07-31 21:35:36 -07:00
/**
2025-08-01 20:16:21 -07:00
* Declare and a unit of work. Return value is memoized and
* only rebuilt when inputs (declared via `Io`) change. Outputs
* are written at the end of a compilation (see `compile`).
*
* If the returned `Ref` is not awaited or read
* via io.readWork, the job is never started.
2025-07-31 21:35:36 -07:00
*/
2025-08-01 20:16:21 -07:00
export function work<O>(job: Job<void, O>): Ref<O>;
export function work<I, O>(job: Job<I, O>, input: I): Ref<O>;
export function work<I, O>(job: Job<I, O>, input: I = null as I): Ref<O> {
2025-07-31 21:35:36 -07:00
const keySource = [
JSON.stringify(util.getCallSites(2)[1]),
util.inspect(input),
2025-08-01 20:16:21 -07:00
].join(":");
const key = crypto.createHash("sha1").update(keySource).digest("base64url");
ASSERT(running);
2025-07-31 21:35:36 -07:00
ASSERT(
!seenWorks.has(key),
2025-08-01 20:16:21 -07:00
`Key '${key}' must be unique during the build. ` +
`To fix this, provide a manual 'key' argument.`,
2025-07-31 21:35:36 -07:00
);
seenWorks.add(key);
const prev = works.get(key) as Work<O> | null;
if (prev) {
2025-08-01 20:16:21 -07:00
return { key, then: (done) => done(prev.value) };
}
2025-06-11 00:17:58 -07:00
2025-08-01 20:16:21 -07:00
async function perform() {
const io = new Io(key);
jobs += 1;
newKeys += 1;
try {
const value = await job(io, input);
validateSerializable(value, "");
const { reads, writes } = io;
works.set(key, {
value,
affects: [],
reads,
writes,
});
for (const add of reads.files) {
const { affects } = UNWRAP(files.get(add));
ASSERT(!affects.includes(key));
affects.push(key);
}
for (const add of reads.works) {
const { affects } = UNWRAP(works.get(add));
ASSERT(!affects.includes(key));
affects.push(key);
}
return value;
} finally {
jobs -= 1;
}
}
2025-06-11 00:17:58 -07:00
2025-08-01 20:16:21 -07:00
let cached: Promise<O>;
return {
key,
then: (fufill, reject) => void (cached ??= perform()).then(fufill, reject),
};
2025-07-31 21:35:36 -07:00
}
2025-08-01 20:16:21 -07:00
2025-07-31 21:35:36 -07:00
export async function compile<T>(compiler: () => Promise<T>) {
ASSERT(!running, `Cannot run twice`);
try {
2025-08-01 20:16:21 -07:00
running = true;
ASSERT(jobs === 0);
const start = performance.now();
const timerSpinner = new Spinner({
text: () =>
`sitegen! [${
((performance.now() - start) / 1000).toFixed(
1,
)
}s]`,
fps: 10,
});
using _endTimerSpinner = { [Symbol.dispose]: () => timerSpinner.stop() };
2025-07-31 21:35:36 -07:00
const value = await compiler();
2025-08-01 20:16:21 -07:00
ASSERT(jobs === 0);
timerSpinner.text = "incremental flush";
await flush(start);
timerSpinner.stop();
2025-07-31 21:35:36 -07:00
seenWorks.clear();
2025-08-01 20:16:21 -07:00
newKeys = 0;
2025-07-31 21:35:36 -07:00
return { value };
} finally {
running = false;
2025-06-13 00:13:22 -07:00
}
2025-07-31 21:35:36 -07:00
}
2025-06-13 00:13:22 -07:00
2025-08-01 20:16:21 -07:00
export async function flush(start: number) {
// Trim
const detachedFiles = new Set<string>;
const referencedAssets = new Set<string>;
for (const [k, { writes: { assets } }] of works) {
if (seenWorks.has(k)) {
for (const asset of assets.values()) referencedAssets.add(asset.hash);
continue;
}
deleteWork(k);
}
for (const [k, file] of files) {
if (file.affects.length > 0) continue;
files.delete(k);
detachedFiles.add(k);
}
for (const k of assets.keys()) {
if (!referencedAssets.has(k))
assets.delete(k);
}
const p = [];
// File writes
let dist = 0;
for (const [key, { buffer, size }] of writes) {
if (buffer) p.push(fs.writeMkdir(path.join(`.clover/o/${key}`), buffer));
dist += size;
}
// Asset map
{
const { json, blob } = getAssetManifest();
const jsonString = Buffer.from(JSON.stringify(json));
p.push(fs.writeMkdir(".clover/o/static.json", jsonString));
p.push(fs.writeMkdir(".clover/o/static.blob", blob));
dist += blob.byteLength + jsonString.byteLength;
}
await Promise.all(p);
// Incremental state
const serialized = msgpackr.pack(serialize());
await fs.writeMkdir(".clover/incr.state", serialized);
const time = (performance.now() - start).toFixed(0);
console.success(`sitegen! in ${time} ms`);
console.writeLine(` - ${works.size} keys (${works.size - newKeys} cached)`);
console.writeLine(` - ${assets.size} static assets`);
console.writeLine(
` - dist: ${formatSize(dist)}, incremental: ${
formatSize(serialized.byteLength)
}`,
);
}
export async function restore() {
let buffer;
try {
buffer = await fs.readFile(".clover/incr.state");
} catch (err: any) {
if (err.code !== "ENOENT") throw err;
}
if (!buffer) return;
await deserialize(buffer);
}
2025-07-31 21:35:36 -07:00
export function forceInvalidate(file: string) {
const resolved = toAbs(file);
const key = toRel(resolved);
forceInvalidateEntry(UNWRAP(files.get(key), `Untracked file '${file}'`));
}
2025-06-13 00:13:22 -07:00
2025-07-31 21:35:36 -07:00
export function forceInvalidateEntry(entry: { affects: string[] }) {
const queue = [...entry.affects];
let key;
while ((key = queue.shift())) {
2025-08-01 20:16:21 -07:00
const affects = deleteWork(key);
2025-07-31 21:35:36 -07:00
queue.push(...affects);
}
2025-07-31 21:35:36 -07:00
}
2025-08-01 20:16:21 -07:00
function deleteWork(key: string) {
console.info({ key });
const { reads, affects, writes: w } = UNWRAP(works.get(key));
for (const remove of reads.files) {
const { affects } = UNWRAP(files.get(remove));
ASSERT(affects.includes(key));
affects.splice(affects.indexOf(key), 1);
}
for (const remove of reads.works) {
const { affects } = UNWRAP(works.get(remove), remove);
ASSERT(affects.includes(key));
affects.splice(affects.indexOf(key), 1);
}
for (const remove of affects) {
const { reads: { works: list } } = UNWRAP(works.get(remove), remove);
ASSERT(list.has(key));
list.delete(key);
}
for (const file of w.files) {
if (UNWRAP(writes.get(file)).work === key)
writes.delete(file);
}
// Assets are temporarily kept, trimmed via manual GC after compilation.
works.delete(key);
return affects;
}
2025-07-31 21:35:36 -07:00
export function reset() {
ASSERT(!running);
works.clear();
files.clear();
assets.clear();
}
2025-06-13 00:13:22 -07:00
2025-07-31 21:35:36 -07:00
export function serialize() {
2025-08-01 20:16:21 -07:00
const fileEntries = Array.from(files, ([k, v]) =>
[
k,
v.type,
v.type === 'f' ? v.lastModified : v.type === 'd' ? v.contentHash : null,
...v.affects,
] as const);
const workEntries = Array.from(works, ([k, v]) =>
[
k,
v.value,
Array.from(v.reads.files),
Array.from(v.reads.works),
Array.from(v.writes.files),
Array.from(v.writes.assets, ([k, { headers }]) => [k, headers] as const),
v.affects,
] as const);
const expectedFilesOnDisk = Array.from(
writes,
([k, { size, work }]) => [k, size, work] as const,
);
const assetEntries = Array.from(
assets,
([k, asset]) => [k, asset.raw, asset.gzip, asset.zstd] as const,
);
return [
1,
fileEntries,
workEntries,
expectedFilesOnDisk,
assetEntries,
] as const;
2025-07-31 21:35:36 -07:00
}
2025-08-01 20:16:21 -07:00
type SerializedState = ReturnType<typeof serialize>;
/* No-op on failure */
async function deserialize(buffer: Buffer) {
const decoded = msgpackr.decode(buffer) as SerializedState;
if (!Array.isArray(decoded)) return false;
if (decoded[0] !== 1) return false;
const [, fileEntries, workEntries, expectedFilesOnDisk, assetEntries] =
decoded;
for (const [k, type, content, ...affects] of fileEntries) {
if (type === "f") {
ASSERT(typeof content === "number");
files.set(k, { type, affects, lastModified: content });
} else if (type === 'd') {
ASSERT(typeof content === "string");
files.set(k, { type, affects, contentHash: content, contents: [] });
} else {
files.set(k, { type, affects });
}
}
for (const entry of workEntries) {
const [
k,
value,
readFiles,
readWorks,
writeFiles,
writeAssets,
affects,
] = entry;
works.set(k, {
value,
reads: {
files: new Set(readFiles),
works: new Set(readWorks),
},
writes: {
files: new Set(writeFiles),
assets: new Map(Array.from(writeAssets, ([k, headers]) => [k, {
hash: JSON.parse(UNWRAP(headers.etag)),
headers,
}])),
},
affects,
});
}
const statFiles = await Promise.all(expectedFilesOnDisk
.map(([k, size, work]) =>
fs.stat(path.join(".clover/o", k))
.catch((err) => {
if (err.code === "ENOENT") return null;
throw err;
})
.then((stat) => ({ k, size, work, stat }))
));
for (const { k, stat, work, size } of statFiles) {
if (stat?.size === size) {
writes.set(k, {
size: size,
buffer: null,
work,
});
} else {
forceInvalidateEntry({ affects: [work] });
}
}
for (const [hash, raw, gzip, zstd] of assetEntries) {
assets.set(hash, { raw, gzip, zstd });
}
2025-08-01 20:16:21 -07:00
await Promise.all(Array.from(files, async ([k, file]) => {
try {
if (file.type === "d") {
const contents = file.contents = await fs.readdir(k);
contents.sort();
const contentHash = crypto
.createHash("sha1")
.update(contents.join("\0"))
.digest("base64url");
if (file.contentHash !== contentHash) {
file.contentHash = contentHash;
throw new Error();
}
} else if (file.type === 'f') {
const lastModified = await fs.stat(k)
.then(x => Math.floor(x.mtimeMs), () => 0);
if (file.lastModified !== lastModified) {
file.lastModified = lastModified;
throw new Error();
}
} else {
file.type satisfies 'null';
const stat = await fs.stat(k).catch(() => null);
if (stat) throw new Error();
}
} catch (e) {
forceInvalidateEntry(file);
if (file.type === 'null') files.delete(k);
}
}));
}
2025-06-13 00:13:22 -07:00
2025-08-01 20:16:21 -07:00
export function getAssetManifest() {
const writer = new BufferWriter();
const asset = Object.fromEntries(
Array.from(works, (work) => work[1].writes.assets)
.filter((map) => map.size > 0)
.flatMap((map) =>
Array.from(map, ([key, { hash, headers }]) => {
const { raw, gzip, zstd } = UNWRAP(
assets.get(hash),
`Asset ${key} (${hash})`,
);
return [key, {
raw: writer.write(raw, "raw:" + hash),
gzip: writer.write(gzip, "raw:" + hash),
zstd: writer.write(zstd, "raw:" + hash),
headers,
}] as const;
})
),
) satisfies BuiltAssetMap;
return { json: asset, blob: writer.get() };
}
2025-07-31 21:35:36 -07:00
/* Input/Output with automatic tracking.
* - Inputs read with Io are tracked to know when to rebuild
* - Outputs written with Io are deleted when abandoned.
*/
export class Io {
2025-08-01 20:16:21 -07:00
constructor(public key: string) {}
reads: Reads = { files: new Set(), works: new Set() };
writes: Writes = { files: new Set(), assets: new Map() };
2025-07-31 21:35:36 -07:00
#trackFs(file: string) {
const resolved = toAbs(file);
const key = toRel(resolved);
2025-08-01 20:16:21 -07:00
this.reads.files.add(key);
2025-07-31 21:35:36 -07:00
return { resolved, key };
}
2025-08-01 20:16:21 -07:00
async readWork<T>(ref: Ref<T>): Promise<T> {
this.reads.works.add(ref.key);
return await ref;
2025-07-31 21:35:36 -07:00
}
/** Track a file in the compilation without reading it. */
async trackFile(file: string) {
const { key, resolved } = this.#trackFs(file);
if (!files.get(key)) {
let lastModified: number = 0;
2025-06-13 00:13:22 -07:00
try {
2025-08-01 20:16:21 -07:00
lastModified = Math.floor((await fs.stat(file)).mtimeMs);
files.set(key, { type: "f", lastModified, affects: [] });
} catch {
files.set(key, { type: "null", affects: [] });
}
2025-06-09 00:12:41 -07:00
}
2025-07-31 21:35:36 -07:00
return resolved;
}
2025-07-31 21:35:36 -07:00
async readFile(file: string) {
return fs.readFile(await this.trackFile(file), "utf-8");
}
2025-08-01 20:16:21 -07:00
async readJson<T>(file: string) {
return JSON.parse(await this.readFile(file)) as T;
}
2025-07-31 21:35:36 -07:00
async readDir(dir: string) {
const { key, resolved } = this.#trackFs(dir);
2025-08-01 20:16:21 -07:00
const existing = files.get(key);
2025-07-31 21:35:36 -07:00
try {
2025-08-01 20:16:21 -07:00
if (existing?.type === 'd') return existing.contents;
const contents = await fs.readdir(resolved);
contents.sort();
2025-07-31 21:35:36 -07:00
const contentHash = crypto
.createHash("sha1")
2025-08-01 20:16:21 -07:00
.update(contents.join("\0"))
2025-07-31 21:35:36 -07:00
.digest("base64url");
files.set(key, {
2025-08-01 20:16:21 -07:00
type: "d",
2025-07-31 21:35:36 -07:00
affects: [],
contentHash,
2025-08-01 20:16:21 -07:00
contents,
});
2025-08-01 20:16:21 -07:00
return contents;
} catch (err) {
if (!existing) files.set(key, { type: "null", affects: [] });
throw err;
}
}
2025-07-31 21:35:36 -07:00
async readDirRecursive(dir: string): Promise<string[]> {
const dirs = await this.readDir(dir);
return (
await Promise.all(
dirs.map(async (child) => {
const abs = path.join(dir, child);
const stat = await fs.stat(abs);
if (stat.isDirectory()) {
return (await this.readDirRecursive(abs)).map((grand) =>
path.join(child, grand)
);
} else {
return child;
}
}),
)
).flat();
}
2025-07-31 21:35:36 -07:00
/* Track all dependencies of a module. */
async import<T>(file: string): Promise<T> {
const { resolved } = this.#trackFs(file);
try {
return require(resolved) as T;
} finally {
const queue = [resolved];
const seen = new Set<string>();
let current;
while ((current = queue.shift())) {
2025-08-01 20:16:21 -07:00
const stat = hot.getFileStat(current);
2025-07-31 21:35:36 -07:00
if (!stat) continue;
const { key } = this.#trackFs(current);
if (!files.get(key)) {
files.set(key, {
2025-08-01 20:16:21 -07:00
type: "f",
2025-07-31 21:35:36 -07:00
affects: [],
lastModified: stat?.lastModified ?? 0,
2025-06-13 00:13:22 -07:00
});
}
2025-07-31 21:35:36 -07:00
for (const imp of stat.imports) {
if (!seen.has(imp)) {
seen.add(imp);
queue.push(imp);
}
}
2025-06-13 00:13:22 -07:00
}
}
}
2025-08-01 20:16:21 -07:00
async writeAsset(
pathname: string,
blob: string | Buffer,
headersOption?: HeadersInit,
) {
2025-07-31 21:35:36 -07:00
ASSERT(pathname.startsWith("/"));
2025-08-01 20:16:21 -07:00
ASSERT(!seenWrites.has("a:" + pathname));
const buffer = typeof blob === "string" ? Buffer.from(blob) : blob;
2025-07-31 21:35:36 -07:00
const headers = new Headers(headersOption ?? {});
2025-08-01 20:16:21 -07:00
const hash = crypto.createHash("sha1").update(buffer).digest("hex");
2025-07-31 21:35:36 -07:00
if (!headers.has("Content-Type")) {
headers.set("Content-Type", mime.contentTypeFor(pathname));
2025-06-09 00:12:41 -07:00
}
2025-07-31 21:35:36 -07:00
headers.set("ETag", JSON.stringify(hash));
this.writes.assets.set(pathname, {
hash,
// @ts-expect-error TODO
2025-08-01 20:16:21 -07:00
headers: Object.fromEntries(headers),
2025-07-31 21:35:36 -07:00
});
2025-08-01 20:16:21 -07:00
if (!assets.has(hash)) {
jobs += 1;
assets.set(hash, undefined!);
const [gzipBuffer, zstdBuffer] = await Promise.all([
gzip(buffer),
zstdCompress(buffer),
]);
assets.set(hash, {
raw: buffer,
gzip: gzipBuffer,
zstd: zstdBuffer,
});
jobs -= 1;
}
}
2025-07-31 21:35:36 -07:00
writeFile(subPath: string, blob: string | Buffer) {
2025-08-01 20:16:21 -07:00
ASSERT(!subPath.startsWith("/"));
ASSERT(
!seenWrites.has("f:" + subPath),
`File overwritten: ${JSON.stringify(subPath)}`,
);
seenWrites.add("f:" + subPath);
const buffer = Buffer.isBuffer(blob) ? blob : Buffer.from(blob);
writes.set(subPath, {
buffer,
size: buffer.byteLength,
work: this.key,
});
}
2025-07-31 21:35:36 -07:00
}
2025-06-08 15:12:04 -07:00
2025-08-01 20:16:21 -07:00
class BufferWriter {
size = 0;
seen = new Map<string, BufferView>();
buffers: Buffer[] = [];
write(buffer: Buffer, hash: string): BufferView {
let view = this.seen.get(hash);
if (view) return view;
view = [this.size, this.size += buffer.byteLength];
this.seen.set(hash, view);
this.buffers.push(buffer);
return view;
}
get() {
return Buffer.concat(this.buffers);
}
}
2025-07-31 21:35:36 -07:00
export function validateSerializable(value: unknown, key: string) {
2025-08-01 20:16:21 -07:00
if (typeof value === "string") {
if (value.includes(hot.projectRoot)) {
throw new Error(
`Return value must not contain the CWD for portability, found at ${key}`,
);
}
} else if (value && typeof value === "object") {
if (Array.isArray(value)) {
value.forEach((item, i) => validateSerializable(item, `${key}[${i}]`));
} else if (Object.getPrototypeOf(value) === Object.prototype) {
Object.entries(value).forEach(([k, v]) =>
validateSerializable(v, `${key}.${k}`)
);
} else {
throw new Error(
`Return value must be a plain JS object, found ${
Object.getPrototypeOf(value).constructor.name
} at ${key}`,
);
}
} else if (["bigint", "function", "symbol"].includes(typeof value)) {
throw new Error(
`Return value must be a plain JS object, found ${typeof value} at ${key}`,
);
}
}
2025-07-31 21:35:36 -07:00
export function toAbs(absPath: string) {
return path.resolve(hot.projectRoot, absPath);
}
2025-07-31 21:35:36 -07:00
export function toRel(absPath: string) {
return path.relative(hot.projectRoot, absPath).replaceAll("\\", "/");
}
2025-08-01 20:16:21 -07:00
export type BufferView = [start: number, end: number];
interface Reads {
2025-07-31 21:35:36 -07:00
files: Set<string>;
works: Set<string>;
}
2025-08-01 20:16:21 -07:00
interface FileWrite {
buffer: Buffer | null;
size: number;
work: string;
}
2025-07-31 21:35:36 -07:00
interface Writes {
2025-08-01 20:16:21 -07:00
files: Set<string>;
2025-07-31 21:35:36 -07:00
assets: Map<string, {
2025-08-01 20:16:21 -07:00
hash: string;
headers: Record<string, string>;
2025-07-31 21:35:36 -07:00
}>;
}
interface Asset {
raw: Buffer;
gzip: Buffer;
zstd: Buffer;
}
interface Work<T = unknown> {
value: T;
2025-08-01 20:16:21 -07:00
reads: Reads;
2025-07-31 21:35:36 -07:00
writes: Writes;
affects: string[];
}
2025-08-01 20:16:21 -07:00
type TrackedFile =
& {
affects: string[];
}
& (
| { type: "f"; lastModified: number }
| { type: "d"; contentHash: string; contents: string[] | null }
| { type: "null"; }
);
export interface BuiltAssetMap {
[route: string]: BuiltAsset;
}
export interface BuiltAsset {
2025-07-31 21:35:36 -07:00
raw: BufferView;
gzip: BufferView;
zstd: BufferView;
headers: Record<string, string>;
}
2025-08-01 20:16:21 -07:00
const gzip = util.promisify(zlib.gzip);
const zstdCompress = util.promisify(zlib.zstdCompress);
2025-06-08 17:31:03 -07:00
import * as fs from "#sitegen/fs";
2025-07-31 21:35:36 -07:00
import * as path from "node:path";
import * as hot from "./hot.ts";
2025-07-31 21:35:36 -07:00
import * as util from "node:util";
import * as crypto from "node:crypto";
2025-06-08 17:31:03 -07:00
import * as mime from "#sitegen/mime";
2025-08-01 20:16:21 -07:00
import * as zlib from "node:zlib";
import * as console from "@paperclover/console";
import { Spinner } from "@paperclover/console/Spinner";
import { formatSize } from "@/file-viewer/format.ts";
import * as msgpackr from "msgpackr";