1015 lines
28 KiB
TypeScript
1015 lines
28 KiB
TypeScript
// This file was started by AI and maintained by hand since.
|
|
import "@paperclover/console/inject";
|
|
import { Progress } from "@paperclover/console/Progress";
|
|
import { Spinner } from "@paperclover/console/Spinner";
|
|
import assert from "node:assert";
|
|
import { execFile } from "node:child_process";
|
|
import { existsSync, Stats } from "node:fs";
|
|
import * as fsp from "node:fs/promises";
|
|
import * as path from "node:path";
|
|
import { promisify } from "node:util";
|
|
import { BlobAsset, cache, FilePermissions, MediaFile } from "../db.ts";
|
|
import { formatDate, formatSize } from "./share.ts";
|
|
import { highlightCode, type Language } from "./highlight.ts";
|
|
|
|
const execFileAsync = promisify(execFile);
|
|
|
|
// Configuration
|
|
const FILE_ROOT = process.env.SCAN_FILE_ROOT;
|
|
if (!FILE_ROOT) {
|
|
throw new Error(
|
|
"FILE_ROOT environment variable not set (e.g. '/path/to/files')",
|
|
);
|
|
}
|
|
const LOCAL_DIR = path.resolve(FILE_ROOT);
|
|
const DRY_RUN = process.argv.includes("--dry-run");
|
|
const SHOULD_COMPRESS = true;
|
|
const VERBOSE = process.argv.includes("--verbose");
|
|
const SHOULD_SCRUB = true;
|
|
const COMPRESS_STORE = process.env.COMPRESS_STORE ||
|
|
path.join(process.cwd(), ".clover/compressed");
|
|
|
|
// Helper function for logging that respects verbose flag
|
|
function log(message: string, always = false): void {
|
|
if (always || VERBOSE) {
|
|
console.log(message);
|
|
}
|
|
}
|
|
|
|
// File extensions that need duration metadata
|
|
const MEDIA_EXTENSIONS = new Set([
|
|
".mp4",
|
|
".mkv",
|
|
".webm",
|
|
".avi",
|
|
".mov",
|
|
".mp3",
|
|
".flac",
|
|
".wav",
|
|
".ogg",
|
|
".m4a",
|
|
]);
|
|
|
|
// File extensions that need dimension metadata
|
|
const IMAGE_EXTENSIONS = new Set([
|
|
".jpg",
|
|
".jpeg",
|
|
".png",
|
|
".gif",
|
|
".webp",
|
|
".avif",
|
|
".heic",
|
|
".svg",
|
|
]);
|
|
|
|
const VIDEO_EXTENSIONS = new Set([".mp4", ".mkv", ".webm", ".avi", ".mov"]);
|
|
|
|
// File extensions that need metadata scrubbing
|
|
const SCRUB_EXTENSIONS = new Set([
|
|
".jpg",
|
|
".jpeg",
|
|
".png",
|
|
".mov",
|
|
".mp4",
|
|
".m4a",
|
|
]);
|
|
|
|
const CODE_EXTENSIONS: Record<string, Language> = {
|
|
".json": "json",
|
|
".toml": "toml",
|
|
".ts": "ts",
|
|
".js": "ts",
|
|
".tsx": "tsx",
|
|
".jsx": "tsx",
|
|
".css": "css",
|
|
".py": "python",
|
|
".lua": "lua",
|
|
".sh": "shell",
|
|
".bat": "dosbatch",
|
|
".ps1": "powershell",
|
|
".cmd": "dosbatch",
|
|
".yaml": "yaml",
|
|
".yml": "yaml",
|
|
".zig": "zig",
|
|
".astro": "astro",
|
|
".mdx": "mdx",
|
|
".xml": "xml",
|
|
".jsonc": "json",
|
|
".php": "php",
|
|
".patch": "diff",
|
|
".diff": "diff",
|
|
};
|
|
|
|
const READ_CONTENTS_EXTENSIONS = new Set([".txt", ".chat"]);
|
|
|
|
// For files that have changed indexing logic, update the date here rescanning
|
|
// will reconstruct the entire file object. This way you can incrementally
|
|
// update new file types without having to reindex everything.
|
|
const lastUpdateTypes: Record<string, Date> = {};
|
|
lastUpdateTypes[".lnk"] = new Date("2025-05-13 13:58:00");
|
|
for (const ext in CODE_EXTENSIONS) {
|
|
lastUpdateTypes[ext] = new Date("2025-05-13 13:58:00");
|
|
}
|
|
for (const ext of READ_CONTENTS_EXTENSIONS) {
|
|
lastUpdateTypes[ext] = new Date("2025-05-13 13:58:00");
|
|
}
|
|
lastUpdateTypes[".diff"] = new Date("2025-05-18 13:58:00");
|
|
lastUpdateTypes[".patch"] = new Date("2025-05-18 13:58:00");
|
|
|
|
// Helper functions for metadata extraction
|
|
async function calculateHash(filePath: string): Promise<string> {
|
|
try {
|
|
const hash = await execFileAsync("sha1sum", [filePath]);
|
|
return hash.stdout.split(" ")[0];
|
|
} catch (error) {
|
|
console.error(`Error calculating hash for ${filePath}:`, error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async function calculateDuration(filePath: string): Promise<number> {
|
|
try {
|
|
const ext = path.extname(filePath).toLowerCase();
|
|
if (!MEDIA_EXTENSIONS.has(ext)) return 0;
|
|
|
|
const { stdout } = await execFileAsync("ffprobe", [
|
|
"-v",
|
|
"error",
|
|
"-show_entries",
|
|
"format=duration",
|
|
"-of",
|
|
"default=noprint_wrappers=1:nokey=1",
|
|
filePath,
|
|
]);
|
|
return Math.ceil(parseFloat(stdout.trim()));
|
|
} catch (error) {
|
|
console.error(`Error calculating duration for ${filePath}:`, error);
|
|
return 0; // Return 0 for duration on error
|
|
}
|
|
}
|
|
|
|
async function calculateDimensions(filePath: string): Promise<string> {
|
|
const ext = path.extname(filePath).toLowerCase();
|
|
if (!IMAGE_EXTENSIONS.has(ext) && !VIDEO_EXTENSIONS.has(ext)) return "";
|
|
|
|
try {
|
|
if (ext === ".svg") {
|
|
// For SVG files, parse the file and extract width/height
|
|
const content = await fsp.readFile(filePath, "utf8");
|
|
const widthMatch = content.match(/width="(\d+)"/);
|
|
const heightMatch = content.match(/height="(\d+)"/);
|
|
|
|
if (widthMatch && heightMatch) {
|
|
return `${widthMatch[1]}x${heightMatch[1]}`;
|
|
}
|
|
} else if (IMAGE_EXTENSIONS.has(ext) || VIDEO_EXTENSIONS.has(ext)) {
|
|
// Use ffprobe for images and videos
|
|
const { stdout } = await execFileAsync("ffprobe", [
|
|
"-v",
|
|
"error",
|
|
"-select_streams",
|
|
"v:0",
|
|
"-show_entries",
|
|
"stream=width,height",
|
|
"-of",
|
|
"csv=s=x:p=0",
|
|
filePath,
|
|
]);
|
|
return stdout.trim();
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error calculating dimensions for ${filePath}:`, error);
|
|
}
|
|
|
|
return "";
|
|
}
|
|
|
|
// Helper function to check and remove location metadata
|
|
async function scrubLocationMetadata(
|
|
filePath: string,
|
|
stats: Stats,
|
|
): Promise<boolean> {
|
|
try {
|
|
const ext = path.extname(filePath).toLowerCase();
|
|
if (!SCRUB_EXTENSIONS.has(ext)) return false;
|
|
|
|
let hasLocation = false;
|
|
let args: string[] = [];
|
|
|
|
// Check for location metadata based on file type
|
|
const tempOutput = path.join(
|
|
path.dirname(filePath),
|
|
`.tmp.${path.basename(filePath)}`,
|
|
);
|
|
switch (ext) {
|
|
case ".jpg":
|
|
case ".jpeg":
|
|
case ".png":
|
|
// Check for GPS tags in EXIF
|
|
const { stdout: gpsCheck } = await execFileAsync("exiftool", [
|
|
"-gps:all",
|
|
filePath,
|
|
]);
|
|
hasLocation = gpsCheck.trim().length > 0;
|
|
args = ["-gps:all=", filePath, "-o", tempOutput];
|
|
break;
|
|
case ".mov":
|
|
case ".mp4":
|
|
// Check for GPS metadata in video files
|
|
const { stdout: videoCheck } = await execFileAsync("exiftool", [
|
|
"-ee",
|
|
"-G3",
|
|
"-s",
|
|
filePath,
|
|
]);
|
|
hasLocation = videoCheck.includes("GPS") ||
|
|
videoCheck.includes("Location");
|
|
args = ["-gps:all=", "-xmp:all=", filePath, "-o", tempOutput];
|
|
break;
|
|
case ".m4a":
|
|
// Check for location and other metadata in m4a files
|
|
const { stdout: m4aCheck } = await execFileAsync("exiftool", [
|
|
"-ee",
|
|
"-G3",
|
|
"-s",
|
|
filePath,
|
|
]);
|
|
hasLocation = m4aCheck.includes("GPS") ||
|
|
m4aCheck.includes("Location") ||
|
|
m4aCheck.includes("Filename") ||
|
|
m4aCheck.includes("Title");
|
|
|
|
if (hasLocation) {
|
|
args = [
|
|
"-gps:all=",
|
|
"-location:all=",
|
|
"-filename:all=",
|
|
"-title=",
|
|
"-m4a:all=",
|
|
filePath,
|
|
"-o",
|
|
tempOutput,
|
|
];
|
|
}
|
|
break;
|
|
}
|
|
|
|
const accessTime = stats.atime;
|
|
const modTime = stats.mtime;
|
|
|
|
let backup: string | null = null;
|
|
try {
|
|
if (hasLocation) {
|
|
if (DRY_RUN) return true;
|
|
|
|
// Prepare a backup
|
|
const tmp = path.join(
|
|
path.dirname(filePath),
|
|
`.tmp.backup.${path.basename(filePath)}`,
|
|
);
|
|
await fsp.copyFile(filePath, tmp);
|
|
await fsp.utimes(tmp, accessTime, modTime);
|
|
backup = tmp;
|
|
|
|
// Remove metadata
|
|
await execFileAsync("exiftool", args);
|
|
if (!existsSync(tempOutput)) {
|
|
throw new Error(`Failed to create output file: ${tempOutput}`);
|
|
}
|
|
|
|
// Restore original timestamps
|
|
await fsp.rename(tempOutput, filePath);
|
|
await fsp.utimes(filePath, accessTime, modTime);
|
|
|
|
// Backup is no longer needed
|
|
await fsp.unlink(backup);
|
|
|
|
log(
|
|
`Scrubbed location metadata in ${path.relative(LOCAL_DIR, filePath)}`,
|
|
true,
|
|
);
|
|
return true;
|
|
}
|
|
} catch (error) {
|
|
if (backup) {
|
|
await fsp.rename(backup, filePath);
|
|
}
|
|
if (existsSync(tempOutput)) {
|
|
await fsp.unlink(tempOutput);
|
|
}
|
|
throw error;
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error scrubbing metadata for ${filePath}:`, error);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// Queue implementation for parallel processing
|
|
type AsyncQueueProcessor<T> = (s: Spinner, item: T) => Promise<void>;
|
|
class AsyncQueue<T> {
|
|
private queue: T[] = [];
|
|
private running = 0;
|
|
private maxConcurrent: number;
|
|
private processed = 0;
|
|
private progress?: Progress<{ active: Spinner[] }>;
|
|
private name: string;
|
|
private estimate?: number;
|
|
|
|
constructor(name: string, maxConcurrent: number) {
|
|
this.maxConcurrent = maxConcurrent;
|
|
this.name = name;
|
|
}
|
|
|
|
setEstimate(estimate: number) {
|
|
this.estimate = estimate;
|
|
if (this.progress) {
|
|
this.progress.total = Math.max(
|
|
this.processed + this.queue.length,
|
|
estimate,
|
|
);
|
|
}
|
|
}
|
|
|
|
getProgress() {
|
|
if (!this.progress) {
|
|
this.progress = new Progress({
|
|
spinner: null,
|
|
text: ({ active }) => {
|
|
const now = performance.now();
|
|
let text = `[${this.processed}/${
|
|
this.processed + this.queue.length
|
|
}] ${this.name}`;
|
|
let n = 0;
|
|
for (const item of active) {
|
|
let itemText = "- " + item.format(now);
|
|
text += `\n` +
|
|
itemText.slice(0, Math.max(0, process.stdout.columns - 1));
|
|
if (n > 10) {
|
|
text += `\n ... + ${active.length - n} more`;
|
|
break;
|
|
}
|
|
n++;
|
|
}
|
|
return text;
|
|
},
|
|
props: {
|
|
active: [] as Spinner[],
|
|
},
|
|
});
|
|
this.progress.total = this.estimate ?? 0;
|
|
this.progress.value = 0;
|
|
this.progress.fps = 30;
|
|
}
|
|
return this.progress;
|
|
}
|
|
|
|
async add(item: T, processor: AsyncQueueProcessor<T>): Promise<void> {
|
|
this.queue.push(item);
|
|
this.getProgress().total = Math.max(
|
|
this.processed + this.queue.length,
|
|
this.estimate ?? 0,
|
|
);
|
|
return this.processNext(processor);
|
|
}
|
|
|
|
async addBatch(items: T[], processor: AsyncQueueProcessor<T>): Promise<void> {
|
|
this.queue.push(...items);
|
|
this.getProgress().total = Math.max(
|
|
this.processed + this.queue.length,
|
|
this.estimate ?? 0,
|
|
);
|
|
return this.processNext(processor);
|
|
}
|
|
|
|
private async processNext(processor: AsyncQueueProcessor<T>): Promise<void> {
|
|
if (this.running >= this.maxConcurrent || this.queue.length === 0) {
|
|
return;
|
|
}
|
|
|
|
const item = this.queue.shift();
|
|
if (!item) return;
|
|
|
|
this.running++;
|
|
|
|
try {
|
|
const progress = this.getProgress();
|
|
|
|
let itemText = "";
|
|
if (typeof item === "string") {
|
|
itemText = item;
|
|
} else if (typeof item === "object" && item !== null && "path" in item) {
|
|
itemText = "" + item.path;
|
|
} else {
|
|
itemText = JSON.stringify(item);
|
|
}
|
|
if (itemText.startsWith(LOCAL_DIR)) {
|
|
itemText = path.relative(LOCAL_DIR, itemText);
|
|
}
|
|
|
|
const spinner = new Spinner(itemText);
|
|
spinner.stop();
|
|
progress.props.active.unshift(spinner);
|
|
await processor(spinner, item);
|
|
progress.props = {
|
|
active: progress.props.active.filter((s) => s !== spinner),
|
|
};
|
|
this.processed++;
|
|
progress.value = this.processed;
|
|
} catch (error) {
|
|
console.error(`Error processing ${this.name} queue item:`, error);
|
|
this.processed++;
|
|
this.getProgress().value = this.processed;
|
|
} finally {
|
|
this.running--;
|
|
await this.processNext(processor);
|
|
}
|
|
}
|
|
|
|
async waitForCompletion(): Promise<void> {
|
|
if (this.queue.length === 0 && this.running === 0) {
|
|
if (this.processed > 0) {
|
|
this.#success();
|
|
}
|
|
return;
|
|
}
|
|
|
|
return new Promise((resolve) => {
|
|
const checkInterval = setInterval(() => {
|
|
if (this.queue.length === 0 && this.running === 0) {
|
|
clearInterval(checkInterval);
|
|
this.#success();
|
|
resolve();
|
|
}
|
|
}, 100);
|
|
});
|
|
}
|
|
|
|
#success() {
|
|
this.getProgress().success(`${this.processed} ${this.name}`);
|
|
}
|
|
}
|
|
|
|
function skipBasename(basename: string): boolean {
|
|
// dot files must be incrementally tracked
|
|
if (basename === ".dirsort") return true;
|
|
if (basename === ".friends") return true;
|
|
|
|
return (
|
|
basename.startsWith(".") ||
|
|
basename.startsWith("._") ||
|
|
basename.startsWith(".tmp") ||
|
|
basename === ".DS_Store" ||
|
|
basename.toLowerCase() === "thumbs.db" ||
|
|
basename.toLowerCase() === "desktop.ini"
|
|
);
|
|
}
|
|
|
|
// File system scanner
|
|
class FileSystemScanner {
|
|
private visitedPaths = new Set<string>();
|
|
private previousPaths = new Set<string>();
|
|
private dirQueue = new AsyncQueue<string>("Scan Directories", 10);
|
|
private fileQueue = new AsyncQueue<{ path: string; stat: any }>(
|
|
"File metadata",
|
|
20,
|
|
);
|
|
private compressQueue: AsyncQueue<{ file: MediaFile; path: string }> | null =
|
|
SHOULD_COMPRESS ? new AsyncQueue("Compress Assets", 10) : null;
|
|
|
|
private getDbPath(localPath: string): string {
|
|
// Convert local file system path to database path
|
|
const relativePath = path.relative(LOCAL_DIR, localPath);
|
|
return "/" + relativePath.split(path.sep).join(path.posix.sep);
|
|
}
|
|
|
|
private getLocalPath(dbPath: string): string {
|
|
// Convert database path to local file system path
|
|
return path.join(LOCAL_DIR, dbPath.slice(1));
|
|
}
|
|
|
|
async scanFile(s: Spinner, filePath: string, stat: any): Promise<void> {
|
|
const dbPath = this.getDbPath(filePath);
|
|
|
|
// Skip hidden files
|
|
const basename = path.basename(filePath);
|
|
if (skipBasename(basename)) {
|
|
return;
|
|
}
|
|
|
|
this.visitedPaths.add(dbPath);
|
|
|
|
// Get existing file info from db
|
|
const existingFile = MediaFile.getByPath(dbPath);
|
|
|
|
// Determine which date to use (for date protection)
|
|
let dateToUse = stat.mtime;
|
|
const year2025Start = new Date("2025-01-01T00:00:00Z");
|
|
|
|
if (
|
|
existingFile &&
|
|
existingFile.date < year2025Start &&
|
|
stat.mtime >= year2025Start
|
|
) {
|
|
console.error(
|
|
`Error: ${dbPath} is ${
|
|
formatDate(
|
|
existingFile.date,
|
|
)
|
|
}, got modified to ${formatDate(stat.mtime)}`,
|
|
);
|
|
dateToUse = existingFile.date;
|
|
}
|
|
|
|
// Check if we need to reprocess the file
|
|
if (existingFile && existingFile.size === stat.size && existingFile.hash) {
|
|
maybe_skip: {
|
|
const lastUpdateDate = lastUpdateTypes[path.extname(filePath)];
|
|
if (lastUpdateDate && existingFile.lastUpdateDate < lastUpdateDate) {
|
|
console.log(
|
|
`Reprocessing ${dbPath} because indexing logic changed after ${
|
|
formatDate(
|
|
lastUpdateDate,
|
|
)
|
|
}`,
|
|
);
|
|
break maybe_skip;
|
|
}
|
|
|
|
if (SHOULD_COMPRESS && existingFile.processed !== 2) {
|
|
this.compressQueue!.add(
|
|
{ file: existingFile, path: dbPath },
|
|
this.compressFile.bind(this),
|
|
);
|
|
}
|
|
|
|
// File hasn't changed, no need to reprocess
|
|
MediaFile.createFile({
|
|
path: dbPath,
|
|
date: dateToUse,
|
|
hash: existingFile.hash,
|
|
size: stat.size,
|
|
duration: existingFile.duration,
|
|
dimensions: existingFile.dimensions,
|
|
content: existingFile.contents,
|
|
});
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Process the file
|
|
log(`Processing file: ${dbPath}`);
|
|
|
|
// Scrub location metadata if needed
|
|
if (SHOULD_SCRUB) {
|
|
if (await scrubLocationMetadata(filePath, stat)) {
|
|
// Re-stat the file in case it was modified
|
|
const newStat = await fsp.stat(filePath);
|
|
stat.size = newStat.size;
|
|
}
|
|
}
|
|
|
|
// Extract content
|
|
const hash = await calculateHash(filePath);
|
|
let content = "";
|
|
if (filePath.endsWith(".lnk")) {
|
|
content = (await fsp.readFile(filePath, "utf8")).trim();
|
|
}
|
|
const language = CODE_EXTENSIONS[path.extname(filePath)];
|
|
if (language) {
|
|
read_code: {
|
|
// An issue is that .ts is an overloaded extension, shared between
|
|
// 'transport stream' and 'typescript'.
|
|
//
|
|
// Filter used here is:
|
|
// - more than 1mb
|
|
// - invalid UTF-8
|
|
if (stat.size > 1_000_000) break read_code;
|
|
let code;
|
|
const buf = await fsp.readFile(filePath);
|
|
try {
|
|
code = new TextDecoder("utf-8", { fatal: true }).decode(buf);
|
|
} catch (error) {
|
|
break read_code;
|
|
}
|
|
content = await highlightCode(code, language);
|
|
}
|
|
}
|
|
if (!content && READ_CONTENTS_EXTENSIONS.has(path.extname(filePath))) {
|
|
content = await fsp.readFile(filePath, "utf8");
|
|
}
|
|
// End extract content
|
|
|
|
if (hash === existingFile?.hash) {
|
|
MediaFile.createFile({
|
|
path: dbPath,
|
|
date: dateToUse,
|
|
hash,
|
|
size: stat.size,
|
|
duration: existingFile.duration,
|
|
dimensions: existingFile.dimensions,
|
|
content,
|
|
});
|
|
return;
|
|
} else if (existingFile) {
|
|
if (existingFile.processed === 2) {
|
|
if (BlobAsset.decrementOrDelete(existingFile.hash)) {
|
|
log(
|
|
`Deleted compressed asset ${existingFile.hash}.{gzip, zstd}`,
|
|
true,
|
|
);
|
|
await fsp.unlink(
|
|
path.join(
|
|
COMPRESS_STORE,
|
|
existingFile.hash.substring(0, 2),
|
|
existingFile.hash + ".gz",
|
|
),
|
|
);
|
|
await fsp.unlink(
|
|
path.join(
|
|
COMPRESS_STORE,
|
|
existingFile.hash.substring(0, 2),
|
|
existingFile.hash + ".zstd",
|
|
),
|
|
);
|
|
}
|
|
}
|
|
}
|
|
const [duration, dimensions] = await Promise.all([
|
|
calculateDuration(filePath),
|
|
calculateDimensions(filePath),
|
|
]);
|
|
|
|
// Update database with all metadata
|
|
MediaFile.createFile({
|
|
path: dbPath,
|
|
date: dateToUse,
|
|
hash,
|
|
size: stat.size,
|
|
duration,
|
|
dimensions,
|
|
content,
|
|
});
|
|
|
|
if (SHOULD_COMPRESS) {
|
|
this.compressQueue!.add(
|
|
{
|
|
file: MediaFile.getByPath(dbPath)!,
|
|
path: dbPath,
|
|
},
|
|
this.compressFile.bind(this),
|
|
);
|
|
}
|
|
}
|
|
|
|
async compressFile(s: Spinner, { file }: { file: MediaFile }): Promise<void> {
|
|
log(`Compressing file: ${file.path}`);
|
|
if (DRY_RUN) return;
|
|
|
|
const filePath = path.join(FILE_ROOT!, file.path);
|
|
|
|
const hash = file.hash;
|
|
const firstTwoChars = hash.substring(0, 2);
|
|
const compressDir = `${COMPRESS_STORE}/${firstTwoChars}`;
|
|
const compressPath = `${compressDir}/${hash}`;
|
|
|
|
// Create directory structure if it doesn't exist
|
|
await fsp.mkdir(compressDir, { recursive: true });
|
|
|
|
// Compress the file with gzip
|
|
const blob = BlobAsset.putOrIncrement(hash);
|
|
if (blob.refs > 1) {
|
|
log(
|
|
`Skipping compression of ${filePath} because it already exists in ${compressPath}`,
|
|
);
|
|
return;
|
|
}
|
|
// Check if already exists
|
|
if (existsSync(compressPath + ".gz")) {
|
|
file.setCompressed(true);
|
|
return;
|
|
}
|
|
try {
|
|
const gzipProcess = Bun.spawn(["gzip", "-c", filePath, "-9"], {
|
|
stdout: Bun.file(compressPath + ".gz"),
|
|
});
|
|
const zstdProcess = Bun.spawn(["zstd", "-c", filePath, "-9"], {
|
|
stdout: Bun.file(compressPath + ".zstd"),
|
|
});
|
|
const [gzipExited, zstdExited] = await Promise.all([
|
|
gzipProcess.exited,
|
|
zstdProcess.exited,
|
|
]);
|
|
assert(gzipExited === 0);
|
|
assert(zstdExited === 0);
|
|
assert(existsSync(compressPath + ".gz"));
|
|
assert(existsSync(compressPath + ".zstd"));
|
|
file.setCompressed(true);
|
|
} catch (error) {
|
|
console.error(`Error compressing file ${filePath}:`, error);
|
|
BlobAsset.decrementOrDelete(hash);
|
|
file.setCompressed(false);
|
|
}
|
|
}
|
|
|
|
async scanDirectory(s: Spinner, dirPath: string): Promise<void> {
|
|
const dbPath = this.getDbPath(dirPath);
|
|
|
|
this.visitedPaths.add(dbPath);
|
|
|
|
// Create or update directory entry
|
|
log(`Scanning directory: ${dbPath}`);
|
|
if (!DRY_RUN) {
|
|
MediaFile.createOrUpdateDirectory(dbPath);
|
|
}
|
|
|
|
try {
|
|
const entries = await fsp.readdir(dirPath, { withFileTypes: true });
|
|
|
|
// Process files and subdirectories
|
|
for (const entry of entries) {
|
|
const entryPath = path.join(dirPath, entry.name);
|
|
|
|
// Skip hidden files and system files
|
|
if (skipBasename(entry.name)) {
|
|
continue;
|
|
}
|
|
|
|
if (entry.isDirectory()) {
|
|
// Queue subdirectory for scanning
|
|
this.dirQueue.add(entryPath, this.scanDirectory.bind(this));
|
|
} else if (entry.isFile()) {
|
|
// Queue file for processing
|
|
const stat = await fsp.stat(entryPath);
|
|
|
|
this.fileQueue.add(
|
|
{ path: entryPath, stat },
|
|
async (s, item) => await this.scanFile(s, item.path, item.stat),
|
|
);
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error scanning directory ${dirPath}:`, error);
|
|
}
|
|
}
|
|
|
|
async processDirectoryMetadata(dirPath: string): Promise<void> {
|
|
const dbPath = this.getDbPath(dirPath);
|
|
const dir = MediaFile.getByPath(dbPath);
|
|
|
|
if (!dir || dir.kind !== MediaFile.Kind.directory) {
|
|
return;
|
|
}
|
|
|
|
if (DRY_RUN) return;
|
|
|
|
const children = dir.getChildren();
|
|
|
|
// Calculate directory metadata
|
|
let totalSize = 0;
|
|
let newestDate = new Date(0);
|
|
let allHashes = "";
|
|
|
|
// Check for readme.txt
|
|
let readmeContent = "";
|
|
|
|
try {
|
|
readmeContent = await fsp.readFile(
|
|
path.join(dirPath, "readme.txt"),
|
|
"utf8",
|
|
);
|
|
} catch (error: any) {
|
|
console.info(`no readme ${dirPath}`);
|
|
if (error.code !== "ENOENT") {
|
|
console.error(`Error reading readme.txt in ${dirPath}:`, error);
|
|
}
|
|
}
|
|
|
|
let dirsort: string[] | null = null;
|
|
try {
|
|
dirsort = (await fsp.readFile(path.join(dirPath, ".dirsort"), "utf8"))
|
|
.split("\n")
|
|
.map((x) => x.trim())
|
|
.filter(Boolean);
|
|
} catch (error: any) {
|
|
if (error.code !== "ENOENT") {
|
|
console.error(`Error reading .dirsort in ${dirPath}:`, error);
|
|
}
|
|
}
|
|
|
|
if (await fsp.exists(path.join(dirPath, ".friends"))) {
|
|
FilePermissions.setPermissions(dbPath, 1);
|
|
} else {
|
|
FilePermissions.setPermissions(dbPath, 0);
|
|
}
|
|
|
|
// Process children
|
|
for (const child of children) {
|
|
totalSize += child.size;
|
|
allHashes += child.hash;
|
|
|
|
// Update newest date, ignoring readme.txt
|
|
if (!child.path.endsWith("/readme.txt") && child.date > newestDate) {
|
|
newestDate = child.date;
|
|
}
|
|
}
|
|
|
|
// Create a hash for the directory
|
|
const dirHash = new Bun.CryptoHasher("sha1")
|
|
.update(dbPath + allHashes)
|
|
.digest("hex");
|
|
|
|
// Update directory metadata
|
|
MediaFile.markDirectoryProcessed({
|
|
id: dir.id,
|
|
timestamp: newestDate,
|
|
contents: readmeContent,
|
|
size: totalSize,
|
|
hash: dirHash,
|
|
dirsort,
|
|
});
|
|
}
|
|
|
|
async findDeletedFiles(): Promise<void> {
|
|
if (DRY_RUN) return;
|
|
|
|
// Find all paths that exist in the DB but not in the filesystem
|
|
const deletedPaths = Array.from(this.previousPaths).filter(
|
|
(path) => !this.visitedPaths.has(path),
|
|
);
|
|
|
|
for (const dbPath of deletedPaths) {
|
|
const file = MediaFile.getByPath(dbPath);
|
|
if (!file) continue;
|
|
|
|
log(`Item Deleted: ${dbPath}`, true);
|
|
if (file.processed === 2) {
|
|
if (BlobAsset.decrementOrDelete(file.hash)) {
|
|
log(`Deleted compressed asset ${file.hash}.{gzip, zstd}`, true);
|
|
await fsp.unlink(
|
|
path.join(
|
|
COMPRESS_STORE,
|
|
file.hash.substring(0, 2),
|
|
file.hash + ".gz",
|
|
),
|
|
);
|
|
await fsp.unlink(
|
|
path.join(
|
|
COMPRESS_STORE,
|
|
file.hash.substring(0, 2),
|
|
file.hash + ".zstd",
|
|
),
|
|
);
|
|
}
|
|
}
|
|
MediaFile.deleteByPath(dbPath);
|
|
}
|
|
}
|
|
|
|
async loadPreviousPaths(): Promise<void> {
|
|
// Get all files and directories from the database
|
|
// This uses a custom query to get all paths at once
|
|
const getAllPathsQuery = cache
|
|
.prepare(`SELECT path, kind FROM media_files`)
|
|
.all() as {
|
|
path: string;
|
|
kind: MediaFile.Kind;
|
|
}[];
|
|
|
|
let dirs = 0;
|
|
let files = 0;
|
|
for (const row of getAllPathsQuery) {
|
|
this.previousPaths.add(row.path);
|
|
if (row.kind === MediaFile.Kind.directory) {
|
|
dirs++;
|
|
} else {
|
|
files++;
|
|
}
|
|
}
|
|
|
|
this.dirQueue.setEstimate(dirs);
|
|
this.fileQueue.setEstimate(files);
|
|
|
|
// log(`Loaded ${this.previousPaths.size} paths from database`, true);
|
|
}
|
|
|
|
async scan(): Promise<void> {
|
|
log(`Starting file system scan in ${LOCAL_DIR}`, true);
|
|
|
|
// Check if the root directory exists and is accessible
|
|
try {
|
|
const rootStat = await fsp.stat(LOCAL_DIR);
|
|
if (!rootStat.isDirectory()) {
|
|
throw new Error(`${LOCAL_DIR} is not a directory`);
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error: Cannot access root directory ${LOCAL_DIR}`, error);
|
|
console.error(
|
|
`Aborting scan to prevent database corruption. Please check if the volume is mounted.`,
|
|
);
|
|
process.exit(1);
|
|
}
|
|
|
|
await this.loadPreviousPaths();
|
|
|
|
await this.dirQueue.add(LOCAL_DIR, this.scanDirectory.bind(this));
|
|
|
|
await this.dirQueue.waitForCompletion();
|
|
await this.fileQueue.waitForCompletion();
|
|
|
|
await this.findDeletedFiles();
|
|
|
|
const allDirs = Array.from(this.visitedPaths)
|
|
.filter((path) => {
|
|
const file = MediaFile.getByPath(path);
|
|
return file && file.kind === MediaFile.Kind.directory;
|
|
})
|
|
.sort((a, b) => b.length - a.length);
|
|
|
|
const dirMetadataQueue = new AsyncQueue<string>("Directory Metadata", 10);
|
|
for (const dirPath of allDirs) {
|
|
await this.processDirectoryMetadata(this.getLocalPath(dirPath));
|
|
}
|
|
|
|
await dirMetadataQueue.waitForCompletion();
|
|
|
|
if (SHOULD_COMPRESS) {
|
|
await this.compressQueue!.waitForCompletion();
|
|
}
|
|
|
|
log("Scan completed successfully!", true);
|
|
}
|
|
}
|
|
|
|
// Main execution
|
|
function showHelp() {
|
|
console.log(`
|
|
MediaFile Scanner - Index filesystem content for paperclover.net
|
|
|
|
Environment variables:
|
|
FILE_ROOT Required. Path to the directory to scan
|
|
COMPRESS_STORE Optional. Path to store compressed files (default: .clover/compressed)
|
|
|
|
Options:
|
|
--help Show this help message
|
|
--dry-run Don't make any changes to the database
|
|
--verbose Show detailed output
|
|
|
|
Usage:
|
|
bun ./media/scan.ts [options]
|
|
|
|
`);
|
|
process.exit(0);
|
|
}
|
|
|
|
{
|
|
// Show help if requested
|
|
if (process.argv.includes("--help")) {
|
|
showHelp();
|
|
process.exit(0);
|
|
}
|
|
|
|
// Check if the root directory exists before starting
|
|
if (!existsSync(LOCAL_DIR)) {
|
|
console.error(
|
|
`Error: Root directory ${LOCAL_DIR} does not exist or is not accessible.`,
|
|
);
|
|
console.error(`Please check if the volume is mounted correctly.`);
|
|
process.exit(1);
|
|
}
|
|
|
|
const startTime = Date.now();
|
|
|
|
try {
|
|
const scanner = new FileSystemScanner();
|
|
await scanner.scan();
|
|
|
|
const endTime = Date.now();
|
|
log(`Scan completed in ${(endTime - startTime) / 1000} seconds`, true);
|
|
|
|
const rootDir = MediaFile.getByPath("/")!;
|
|
const totalEntries = cache
|
|
.prepare(`SELECT COUNT(*) as count FROM media_files`)
|
|
.get() as { count: number };
|
|
const totalDuration = cache
|
|
.prepare(`SELECT SUM(duration) as duration FROM media_files`)
|
|
.get() as { duration: number };
|
|
console.log();
|
|
console.log("Global Stats");
|
|
console.log(` Entry count: ${totalEntries.count}`);
|
|
console.log(` Uncompressed size: ${formatSize(rootDir.size)}`);
|
|
console.log(
|
|
` Total audio/video duration: ${
|
|
(
|
|
totalDuration.duration /
|
|
60 /
|
|
60
|
|
).toFixed(1)
|
|
} hours`,
|
|
);
|
|
} catch (error) {
|
|
console.error("Error during scan:", error);
|
|
process.exit(1);
|
|
}
|
|
}
|