sitegen/framework/incremental.ts
chloe caruso c5ac450f21 feat: dynamic page regeneration (#24)
the asset system is reworked to support "dynamic" entries, where each
entry is a separate file on disk containing the latest generation's
headers+raw+gzip+zstd. when calling view.regenerate, it will look for
pages that had "export const regenerate" during generation, and render
those pages using the view system, but then store the results as assets
instead of sending as a response.

pages configured as regenerable are also bundled as views, using the
non-aliasing key "page:${page.id}". this cannot alias because file
paths may not contain a colon.
2025-08-11 22:43:27 -07:00

727 lines
20 KiB
TypeScript

// Incremental build system using singleton state.
// See `work()`, `compile()`, and `invalidate()` for details.
//
// All state is serializable to allow recovering state across sessions.
// This library special-cases the asset map, but is otherwise agnostic.
let running = false;
let jobs = 0;
let newKeys = 0;
let seenWorks = new Set<string>(); // for detecting conflict vs overwrite
let seenWrites = new Set<string>(); // for detecting conflict vs overwrite
let works = new Map<string, Work>();
let files = new Map<string, TrackedFile>(); // keyed by `toRel` path
let writes = new Map<string, FileWrite>();
let assets = new Map<string, Asset>(); // keyed by hash
export interface Ref<T> {
key: string;
/** This method is compatible with `await` syntax */
then(resolve: (value: T) => void, reject: (error: unknown) => void): void;
get value(): T | null;
}
type Job<I = any, O = any> = (io: Io, input: I) => Promise<O>;
/**
* Declare and a unit of work. Return value is memoized and only rebuilt when
* inputs change. Inputs are tracked via the `io` interface, as well as a hash
* of the `input` param and caller source code. Outputs are written at the end
* of a compilation (see `compile`).
*
* Work items are lazy, only started when `Ref` is awaited or `io.readWork`ed.
*/
export function work<O>(job: Job<void, O>): Ref<O>;
export function work<I, O>(job: Job<I, O>, input: I): Ref<O>;
export function work<I, O>(job: Job<I, O>, input: I = null as I): Ref<O> {
const source = JSON.stringify(UNWRAP(util.getCallSites(2)[1]));
const keySource = [source, util.inspect(input)].join(":");
const key = crypto.createHash("sha1").update(keySource).digest("base64url");
ASSERT(running);
ASSERT(
!seenWorks.has(key),
`Key '${key}' must be unique during the build. ` +
`To fix this, provide a manual 'key' argument.`,
);
seenWorks.add(key);
const prev = works.get(key) as Work<O> | null;
if (prev) {
return { key, then: (done) => done(prev.value), value: prev.value };
}
async function perform() {
const io = new Io(key);
jobs += 1;
newKeys += 1;
try {
const value = await job(io, input);
validateSerializable(value, "");
const { reads, writes } = io;
works.set(key, { value, affects: [], reads, writes });
for (const add of reads.files) {
const { affects } = UNWRAP(files.get(add));
ASSERT(!affects.includes(key));
affects.push(key);
}
for (const add of reads.works) {
const { affects } = UNWRAP(works.get(add));
ASSERT(!affects.includes(key));
affects.push(key);
}
return value;
} finally {
jobs -= 1;
}
}
let cached: Promise<O>;
return {
key,
then(fufill, reject) {
(cached ??= perform()).then(fufill, reject);
},
get value() {
return (works.get(this.key)?.value as O) ?? null;
},
};
}
export async function compile<T>(compiler: () => Promise<T>) {
ASSERT(!running, `Cannot run twice at the same time.`);
try {
running = true;
ASSERT(jobs === 0);
const start = performance.now();
const timerSpinner = new Spinner({
text: () =>
`sitegen! [${((performance.now() - start) / 1000).toFixed(1)}s]`,
fps: 10,
});
using _endTimerSpinner = { [Symbol.dispose]: () => timerSpinner.stop() };
const value = await compiler();
ASSERT(jobs === 0);
timerSpinner.text = "incremental flush";
await flush(start);
timerSpinner.stop();
return {
value,
watchFiles: new Set(files.keys()),
newOutputs: Array.from(seenWrites)
.filter((x) => x.startsWith("f:"))
.map((x) => x.slice(2)),
newAssets: !Array.from(seenWrites).some((x) => x.startsWith("a:")),
};
} finally {
running = false;
newKeys = 0;
seenWrites.clear();
seenWorks.clear();
}
}
export async function flush(start: number) {
// Trim
const detachedFiles = new Set<string>();
const referencedAssets = new Set<string>();
for (const [k, v] of works) {
const assets = v.writes.assets;
if (seenWorks.has(k)) {
for (const asset of assets.values()) referencedAssets.add(asset.hash);
continue;
}
deleteWork(k);
}
for (const [k, file] of files) {
if (file.affects.length > 0) continue;
files.delete(k);
detachedFiles.add(k);
}
for (const k of assets.keys()) if (!referencedAssets.has(k)) assets.delete(k);
const p: Promise<void>[] = [];
// File writes
let dist = 0;
for (const [key, { buffer, size }] of writes) {
if (buffer) p.push(fs.writeMkdir(path.join(`.clover/o/${key}`), buffer));
dist += size;
}
// Asset map
{
const { json, blob, dynamic, dts } = getAssetManifest();
const jsonString = Buffer.from(JSON.stringify(json));
p.push(fs.writeMkdir(".clover/o/asset.json", jsonString));
p.push(fs.writeMkdir(".clover/o/asset.blob", blob));
p.push(fs.writeMkdir(".clover/ts/asset.d.ts", dts));
for (const [k, v] of dynamic) {
p.push(fs.writeMkdir(`.clover/o/dynamic/${k}`, v));
}
dist += blob.byteLength + jsonString.byteLength;
}
await Promise.all(p);
// Incremental state
const serialized = msgpackr.pack(serialize());
await fs.writeMkdir(".clover/incr.state", serialized);
const time = (performance.now() - start).toFixed(0);
console.success(`sitegen! in ${time} ms`);
console.writeLine(` - ${works.size} keys (${works.size - newKeys} cached)`);
console.writeLine(` - ${assets.size} static assets`);
console.writeLine(
` - dist: ${formatSize(dist)},` +
` incremental: ${formatSize(serialized.byteLength)}`,
);
}
export async function restore() {
let buffer;
try {
buffer = await fs.readFile(".clover/incr.state");
} catch (err: any) {
if (err.code !== "ENOENT") throw err;
}
if (!buffer) return;
await deserialize(buffer);
}
function forceInvalidate(entry: { affects: string[] }) {
const queue = [...entry.affects];
let key;
while ((key = queue.shift())) {
const affects = deleteWork(key);
queue.push(...affects);
}
}
function deleteWork(key: string) {
const work = works.get(key);
if (!work) return [];
const { reads, affects, writes: w } = work;
for (const remove of reads.files) {
const { affects } = UNWRAP(files.get(remove));
ASSERT(affects.includes(key));
affects.splice(affects.indexOf(key), 1);
}
for (const remove of reads.works) {
const { affects } = UNWRAP(works.get(remove), remove);
ASSERT(affects.includes(key));
affects.splice(affects.indexOf(key), 1);
}
for (const remove of affects) {
const {
reads: { works: list },
} = UNWRAP(works.get(remove), remove);
ASSERT(list.has(key));
list.delete(key);
}
for (const file of w.files) {
if (UNWRAP(writes.get(file)).work === key) writes.delete(file);
}
// Assets are temporarily kept, trimmed via manual GC after compilation.
works.delete(key);
return affects;
}
export function reset() {
ASSERT(!running);
works.clear();
files.clear();
assets.clear();
}
export function serialize() {
const fileEntries = Array.from(
files,
([k, v]) =>
[
k,
v.type,
v.type === "f" ? v.lastModified : v.type === "d" ? v.contentHash : null,
...v.affects,
] as const,
);
const workEntries = Array.from(
works,
([k, v]) =>
[
k,
v.value,
Array.from(v.reads.files),
Array.from(v.reads.works),
Array.from(v.writes.files),
Array.from(
v.writes.assets,
([k, { headers }]) => [k, headers] as const,
),
v.affects,
] as const,
);
const expectedFilesOnDisk = Array.from(
writes,
([k, { size, work }]) => [k, size, work] as const,
);
const assetEntries = Array.from(
assets,
([k, asset]) => [k, asset.raw, asset.gzip, asset.zstd] as const,
);
return [
1,
fileEntries,
workEntries,
expectedFilesOnDisk,
assetEntries,
] as const;
}
type SerializedState = ReturnType<typeof serialize>;
/* No-op on failure */
async function deserialize(buffer: Buffer) {
const decoded = msgpackr.decode(buffer) as SerializedState;
if (!Array.isArray(decoded)) return false;
if (decoded[0] !== 1) return false;
const [, fileEntries, workEntries, expectedFilesOnDisk, assetEntries] =
decoded;
for (const [k, type, content, ...affects] of fileEntries) {
if (type === "f") {
ASSERT(typeof content === "number");
files.set(k, { type, affects, lastModified: content });
} else if (type === "d") {
ASSERT(typeof content === "string");
files.set(k, { type, affects, contentHash: content, contents: [] });
} else {
files.set(k, { type, affects });
}
}
for (const entry of workEntries) {
const [k, value, readFiles, readWorks, writeFiles, writeAssets, affects] =
entry;
works.set(k, {
value,
reads: {
files: new Set(readFiles),
works: new Set(readWorks),
},
writes: {
files: new Set(writeFiles),
assets: new Map(
Array.from(writeAssets, ([k, headers]) => [
k,
{
hash: JSON.parse(UNWRAP(headers.etag)),
headers,
},
]),
),
},
affects,
});
}
const statFiles = await Promise.all(
expectedFilesOnDisk.map(([k, size, work]) =>
fs
.stat(path.join(".clover/o", k))
.catch((err) => {
if (err.code === "ENOENT") return null;
throw err;
})
.then((stat) => ({ k, size, work, stat }))
),
);
for (const { k, stat, work, size } of statFiles) {
if (stat?.size === size) {
writes.set(k, {
size: size,
buffer: null,
work,
});
} else {
forceInvalidate({ affects: [work] });
}
}
for (const [hash, raw, gzip, zstd] of assetEntries) {
assets.set(hash, { raw, gzip, zstd });
}
await Promise.all(
Array.from(files, ([key, file]) => invalidateEntry(key, file, false)),
);
}
export async function invalidate(
filePath: string,
unloadModule: boolean = true,
): Promise<boolean> {
const key = toRel(toAbs(filePath));
const file = UNWRAP(files.get(key), `Untracked file '${key}'`);
return invalidateEntry(key, file, unloadModule);
}
export async function invalidateEntry(
key: string,
file: TrackedFile,
unloadModule: boolean,
): Promise<boolean> {
try {
if (file.type === "d") {
const contents = (file.contents = await fs.readdir(key));
contents.sort();
const contentHash = crypto
.createHash("sha1")
.update(contents.join("\0"))
.digest("base64url");
if (file.contentHash !== contentHash) {
file.contentHash = contentHash;
throw new Error();
}
} else if (file.type === "f") {
const lastModified = await fs.stat(key).then(
(x) => Math.floor(x.mtimeMs),
() => 0,
);
if (file.lastModified !== lastModified) {
file.lastModified = lastModified;
throw new Error();
}
} else {
file.type satisfies "null";
const stat = await fs.stat(key).catch(() => null);
if (stat) throw new Error();
}
return false;
} catch (e) {
forceInvalidate(file);
if (unloadModule) {
// TODO: handle when this triggers unloading of `generate.ts`
hot.unload(toAbs(key));
}
if (file.type === "null") files.delete(key);
return true;
}
}
export function getAssetManifest() {
const dynamic = new Map<string, Buffer>();
const writer = new BufferWriter();
const assetMap = Object.fromEntries(
Array.from(works, (work) => work[1].writes.assets)
.filter((map) => map.size > 0)
.flatMap((map) =>
Array.from(map, ([key, { hash, headers, regenerative }]) => {
const { raw, gzip, zstd } = UNWRAP(
assets.get(hash),
`Asset ${key} (${hash})`,
);
if (regenerative) {
const id = crypto
.createHash("sha1")
.update(key)
.digest("hex")
.slice(0, 16); /* TODO */
dynamic.set(
id,
manifest.packDynamicBuffer(raw, gzip, zstd, headers),
);
return [key, { type: 1, id }] as const;
}
return [
key,
{
type: 0,
raw: writer.write(raw, "raw:" + hash),
gzip: writer.write(gzip, "gzip:" + hash),
zstd: writer.write(zstd, "zstd:" + hash),
headers,
},
] as const;
})
),
) satisfies manifest.Manifest;
return {
json: assetMap,
blob: writer.get(),
dynamic,
dts: "export type AssetKey = " +
Object.keys(assetMap)
.map((key) => JSON.stringify(key))
.join(" | ") +
"\n",
};
}
/* Input/Output with automatic tracking.
* - Inputs read with Io are tracked to know when to rebuild
* - Outputs written with Io are deleted when abandoned.
*/
export class Io {
constructor(public key: string) {}
reads: Reads = { files: new Set(), works: new Set() };
writes: Writes = { files: new Set(), assets: new Map() };
#trackFs(file: string) {
const resolved = toAbs(file);
const key = toRel(resolved);
this.reads.files.add(key);
return { resolved, key };
}
async readWork<T>(ref: Ref<T>): Promise<T> {
this.reads.works.add(ref.key);
return await ref;
}
/** Track a file in the compilation without reading it. */
async trackFile(file: string) {
const { key, resolved } = this.#trackFs(file);
if (!files.get(key)) {
let lastModified: number = 0;
try {
lastModified = Math.floor((await fs.stat(file)).mtimeMs);
files.set(key, { type: "f", lastModified, affects: [] });
} catch {
files.set(key, { type: "null", affects: [] });
}
}
return resolved;
}
async readFile(file: string) {
return fs.readFile(await this.trackFile(file), "utf-8");
}
async readJson<T>(file: string) {
return JSON.parse(await this.readFile(file)) as T;
}
async readDir(dir: string) {
const { key, resolved } = this.#trackFs(dir);
const existing = files.get(key);
try {
if (existing?.type === "d") return existing.contents;
const contents = await fs.readdir(resolved);
contents.sort();
const contentHash = crypto
.createHash("sha1")
.update(contents.join("\0"))
.digest("base64url");
files.set(key, {
type: "d",
affects: [],
contentHash,
contents,
});
return contents;
} catch (err) {
if (!existing) files.set(key, { type: "null", affects: [] });
throw err;
}
}
async readDirRecursive(dir: string): Promise<string[]> {
const dirs = await this.readDir(dir);
return (
await Promise.all(
dirs.map(async (child) => {
const abs = path.join(dir, child);
const stat = await fs.stat(abs);
if (stat.isDirectory()) {
return (await this.readDirRecursive(abs)).map((grand) =>
path.join(child, grand)
);
} else {
return child;
}
}),
)
).flat();
}
/* Track all dependencies of a module. */
async import<T>(file: string): Promise<T> {
const { resolved } = this.#trackFs(file);
try {
return require(resolved) as T;
} finally {
const queue = [resolved];
const seen = new Set<string>();
let current;
while ((current = queue.shift())) {
const stat = hot.getFileStat(current);
if (!stat) continue;
const { key } = this.#trackFs(current);
if (!files.get(key)) {
files.set(key, {
type: "f",
affects: [],
lastModified: stat?.lastModified ?? 0,
});
}
for (const imp of stat.imports) {
if (!seen.has(imp)) {
seen.add(imp);
queue.push(imp);
}
}
}
}
}
async writeAsset(asset: {
pathname: string;
buffer: string | Buffer;
regenerative?: boolean;
headers?: HeadersInit;
}) {
ASSERT(asset.pathname.startsWith("/"));
ASSERT(!seenWrites.has("a:" + asset.pathname));
const buffer = typeof asset.buffer === "string"
? Buffer.from(asset.buffer)
: asset.buffer;
const headers = new Headers(asset.headers ?? {});
const hash = crypto.createHash("sha1").update(buffer).digest("hex");
if (!headers.has("Content-Type")) {
headers.set("Content-Type", mime.contentTypeFor(asset.pathname));
}
headers.set("etag", JSON.stringify(hash));
this.writes.assets.set(asset.pathname, {
hash,
headers: Object.fromEntries(headers),
regenerative: !!asset.regenerative,
});
if (!assets.has(hash)) {
jobs += 1;
assets.set(hash, undefined!);
const [gzipBuffer, zstdBuffer] = await Promise.all([
gzip(buffer),
zstdCompress(buffer),
]);
assets.set(hash, {
raw: buffer,
gzip: gzipBuffer,
zstd: zstdBuffer,
});
jobs -= 1;
}
}
writeFile(subPath: string, blob: string | Buffer) {
ASSERT(!subPath.startsWith("/"));
ASSERT(
!seenWrites.has("f:" + subPath),
`File overwritten: ${JSON.stringify(subPath)}`,
);
seenWrites.add("f:" + subPath);
const buffer = Buffer.isBuffer(blob) ? blob : Buffer.from(blob);
writes.set(subPath, {
buffer,
size: buffer.byteLength,
work: this.key,
});
}
}
class BufferWriter {
size = 0;
seen = new Map<string, BufferView>();
buffers: Buffer[] = [];
write(buffer: Buffer, hash: string): BufferView {
let view = this.seen.get(hash);
if (view) return view;
view = [this.size, this.size += buffer.byteLength];
this.seen.set(hash, view);
this.buffers.push(buffer);
return view;
}
get() {
return Buffer.concat(this.buffers);
}
}
export function validateSerializable(value: unknown, key: string) {
if (typeof value === "string") {
if (value.includes(hot.projectRoot)) {
throw new Error(
`Return value must not contain the CWD for portability, found at ${key}`,
);
}
} else if (value && typeof value === "object") {
if (Array.isArray(value)) {
value.forEach((item, i) => validateSerializable(item, `${key}[${i}]`));
} else if (
Object.getPrototypeOf(value) === Object.prototype ||
Buffer.isBuffer(value)
) {
Object.entries(value).forEach(([k, v]) =>
validateSerializable(v, `${key}.${k}`)
);
} else {
throw new Error(
`Return value must be a plain JS object, found ${
Object.getPrototypeOf(value).constructor.name
} at ${key}`,
);
}
} else if (["bigint", "function", "symbol"].includes(typeof value)) {
throw new Error(
`Return value must be a plain JS object, found ${typeof value} at ${key}`,
);
}
}
export function toAbs(absPath: string) {
return path.resolve(hot.projectRoot, absPath);
}
export function toRel(absPath: string) {
return path.relative(hot.projectRoot, absPath).replaceAll("\\", "/");
}
export type BufferView = [start: number, end: number];
interface Reads {
files: Set<string>;
works: Set<string>;
}
interface FileWrite {
buffer: Buffer | null;
size: number;
work: string;
}
interface Writes {
files: Set<string>;
assets: Map<string, AssetWrite>;
}
interface Asset {
raw: Buffer;
gzip: Buffer;
zstd: Buffer;
}
interface AssetWrite {
hash: string;
headers: Record<string, string>;
regenerative: boolean;
}
interface Work<T = unknown> {
value: T;
reads: Reads;
writes: Writes;
affects: string[];
}
type TrackedFile =
& { affects: string[] }
& (
| { type: "f"; lastModified: number }
| { type: "d"; contentHash: string; contents: string[] }
| { type: "null" }
);
const gzip = util.promisify(zlib.gzip);
const zstdCompress = util.promisify(zlib.zstdCompress);
import * as hot from "./hot.ts";
import * as fs from "#sitegen/fs";
import * as mime from "#sitegen/mime";
import * as manifest from "#sitegen/assets";
import * as path from "node:path";
import * as util from "node:util";
import * as crypto from "node:crypto";
import * as zlib from "node:zlib";
import * as console from "@paperclover/console";
import { Spinner } from "@paperclover/console/Spinner";
import { formatSize } from "@/file-viewer/format.ts";
import * as msgpackr from "msgpackr";