sitegen/src/file-viewer/bin/scan.ts

// This file was started by AI and maintained by hand since.
import "@paperclover/console/inject";
import { Progress } from "@paperclover/console/Progress";
import { Spinner } from "@paperclover/console/Spinner";
import assert from "node:assert";
import { execFile } from "node:child_process";
import { existsSync, Stats } from "node:fs";
import * as fsp from "node:fs/promises";
import * as path from "node:path";
import { promisify } from "node:util";
import { BlobAsset, cache, FilePermissions, MediaFile } from "../db.ts";
import { formatDate, formatSize } from "./share.ts";
import { highlightCode, type Language } from "./highlight.ts";

const execFileAsync = promisify(execFile);

// Configuration
const FILE_ROOT = process.env.SCAN_FILE_ROOT;
if (!FILE_ROOT) {
  throw new Error(
    "FILE_ROOT environment variable not set (e.g. '/path/to/files')",
  );
}
const LOCAL_DIR = path.resolve(FILE_ROOT);
const DRY_RUN = process.argv.includes("--dry-run");
const SHOULD_COMPRESS = true;
const VERBOSE = process.argv.includes("--verbose");
const SHOULD_SCRUB = true;
const COMPRESS_STORE = process.env.COMPRESS_STORE ||
  path.join(process.cwd(), ".clover/compressed");

// Helper function for logging that respects verbose flag
function log(message: string, always = false): void {
  if (always || VERBOSE) {
    console.log(message);
  }
}

// File extensions that need duration metadata
const MEDIA_EXTENSIONS = new Set([
  ".mp4",
  ".mkv",
  ".webm",
  ".avi",
  ".mov",
  ".mp3",
  ".flac",
  ".wav",
  ".ogg",
  ".m4a",
]);

// File extensions that need dimension metadata
const IMAGE_EXTENSIONS = new Set([
  ".jpg",
  ".jpeg",
  ".png",
  ".gif",
  ".webp",
  ".avif",
  ".heic",
  ".svg",
]);

const VIDEO_EXTENSIONS = new Set([".mp4", ".mkv", ".webm", ".avi", ".mov"]);

// File extensions that need metadata scrubbing
const SCRUB_EXTENSIONS = new Set([
  ".jpg",
  ".jpeg",
  ".png",
  ".mov",
  ".mp4",
  ".m4a",
]);

const CODE_EXTENSIONS: Record<string, Language> = {
  ".json": "json",
  ".toml": "toml",
  ".ts": "ts",
  ".js": "ts",
  ".tsx": "tsx",
  ".jsx": "tsx",
  ".css": "css",
  ".py": "python",
  ".lua": "lua",
  ".sh": "shell",
  ".bat": "dosbatch",
  ".ps1": "powershell",
  ".cmd": "dosbatch",
  ".yaml": "yaml",
  ".yml": "yaml",
  ".zig": "zig",
  ".astro": "astro",
  ".mdx": "mdx",
  ".xml": "xml",
  ".jsonc": "json",
  ".php": "php",
  ".patch": "diff",
  ".diff": "diff",
};

const READ_CONTENTS_EXTENSIONS = new Set([".txt", ".chat"]);

// For files that have changed indexing logic, update the date here rescanning
// will reconstruct the entire file object. This way you can incrementally
// update new file types without having to reindex everything.
const lastUpdateTypes: Record<string, Date> = {};
lastUpdateTypes[".lnk"] = new Date("2025-05-13 13:58:00");
for (const ext in CODE_EXTENSIONS) {
  lastUpdateTypes[ext] = new Date("2025-05-13 13:58:00");
}
for (const ext of READ_CONTENTS_EXTENSIONS) {
  lastUpdateTypes[ext] = new Date("2025-05-13 13:58:00");
}
lastUpdateTypes[".diff"] = new Date("2025-05-18 13:58:00");
lastUpdateTypes[".patch"] = new Date("2025-05-18 13:58:00");

// Helper functions for metadata extraction
async function calculateHash(filePath: string): Promise<string> {
  try {
    const hash = await execFileAsync("sha1sum", [filePath]);
    return hash.stdout.split(" ")[0];
  } catch (error) {
    console.error(`Error calculating hash for ${filePath}:`, error);
    throw error;
  }
}

async function calculateDuration(filePath: string): Promise<number> {
  try {
    const ext = path.extname(filePath).toLowerCase();
    if (!MEDIA_EXTENSIONS.has(ext)) return 0;

    const { stdout } = await execFileAsync("ffprobe", [
      "-v",
      "error",
      "-show_entries",
      "format=duration",
      "-of",
      "default=noprint_wrappers=1:nokey=1",
      filePath,
    ]);
    return Math.ceil(parseFloat(stdout.trim()));
  } catch (error) {
    console.error(`Error calculating duration for ${filePath}:`, error);
    return 0; // Return 0 for duration on error
  }
}

async function calculateDimensions(filePath: string): Promise<string> {
  const ext = path.extname(filePath).toLowerCase();
  if (!IMAGE_EXTENSIONS.has(ext) && !VIDEO_EXTENSIONS.has(ext)) return "";

  try {
    if (ext === ".svg") {
      // For SVG files, parse the file and extract width/height
      const content = await fsp.readFile(filePath, "utf8");
      const widthMatch = content.match(/width="(\d+)"/);
      const heightMatch = content.match(/height="(\d+)"/);

      if (widthMatch && heightMatch) {
        return `${widthMatch[1]}x${heightMatch[1]}`;
      }
    } else if (IMAGE_EXTENSIONS.has(ext) || VIDEO_EXTENSIONS.has(ext)) {
      // Use ffprobe for images and videos
      const { stdout } = await execFileAsync("ffprobe", [
        "-v",
        "error",
        "-select_streams",
        "v:0",
        "-show_entries",
        "stream=width,height",
        "-of",
        "csv=s=x:p=0",
        filePath,
      ]);
      return stdout.trim();
    }
  } catch (error) {
    console.error(`Error calculating dimensions for ${filePath}:`, error);
  }

  return "";
}

// Helper function to check and remove location metadata
async function scrubLocationMetadata(
  filePath: string,
  stats: Stats,
): Promise<boolean> {
  try {
    const ext = path.extname(filePath).toLowerCase();
    if (!SCRUB_EXTENSIONS.has(ext)) return false;

    let hasLocation = false;
    let args: string[] = [];

    // Check for location metadata based on file type
    const tempOutput = path.join(
      path.dirname(filePath),
      `.tmp.${path.basename(filePath)}`,
    );
    switch (ext) {
      case ".jpg":
      case ".jpeg":
      case ".png":
        // Check for GPS tags in EXIF
        const { stdout: gpsCheck } = await execFileAsync("exiftool", [
          "-gps:all",
          filePath,
        ]);
        hasLocation = gpsCheck.trim().length > 0;
        args = ["-gps:all=", filePath, "-o", tempOutput];
        break;
      case ".mov":
      case ".mp4":
        // Check for GPS metadata in video files
        const { stdout: videoCheck } = await execFileAsync("exiftool", [
          "-ee",
          "-G3",
          "-s",
          filePath,
        ]);
        hasLocation = videoCheck.includes("GPS") ||
          videoCheck.includes("Location");
        args = ["-gps:all=", "-xmp:all=", filePath, "-o", tempOutput];
        break;
      case ".m4a":
        // Check for location and other metadata in m4a files
        const { stdout: m4aCheck } = await execFileAsync("exiftool", [
          "-ee",
          "-G3",
          "-s",
          filePath,
        ]);
        hasLocation = m4aCheck.includes("GPS") ||
          m4aCheck.includes("Location") ||
          m4aCheck.includes("Filename") ||
          m4aCheck.includes("Title");

        if (hasLocation) {
          args = [
            "-gps:all=",
            "-location:all=",
            "-filename:all=",
            "-title=",
            "-m4a:all=",
            filePath,
            "-o",
            tempOutput,
          ];
        }
        break;
    }

    const accessTime = stats.atime;
    const modTime = stats.mtime;

    let backup: string | null = null;
    try {
      if (hasLocation) {
        if (DRY_RUN) return true;

        // Prepare a backup
        const tmp = path.join(
          path.dirname(filePath),
          `.tmp.backup.${path.basename(filePath)}`,
        );
        await fsp.copyFile(filePath, tmp);
        await fsp.utimes(tmp, accessTime, modTime);
        backup = tmp;

        // Remove metadata
        await execFileAsync("exiftool", args);
        if (!existsSync(tempOutput)) {
          throw new Error(`Failed to create output file: ${tempOutput}`);
        }

        // Restore original timestamps
        await fsp.rename(tempOutput, filePath);
        await fsp.utimes(filePath, accessTime, modTime);

        // Backup is no longer needed
        await fsp.unlink(backup);

        log(
          `Scrubbed location metadata in ${path.relative(LOCAL_DIR, filePath)}`,
          true,
        );
        return true;
      }
    } catch (error) {
      if (backup) {
        await fsp.rename(backup, filePath);
      }
      if (existsSync(tempOutput)) {
        await fsp.unlink(tempOutput);
      }
      throw error;
    }
  } catch (error) {
    console.error(`Error scrubbing metadata for ${filePath}:`, error);
  }

  return false;
}

// Queue implementation for parallel processing
type AsyncQueueProcessor<T> = (s: Spinner, item: T) => Promise<void>;
class AsyncQueue<T> {
  private queue: T[] = [];
  private running = 0;
  private maxConcurrent: number;
  private processed = 0;
  private progress?: Progress<{ active: Spinner[] }>;
  private name: string;
  private estimate?: number;

  constructor(name: string, maxConcurrent: number) {
    this.maxConcurrent = maxConcurrent;
    this.name = name;
  }

  setEstimate(estimate: number) {
    this.estimate = estimate;
    if (this.progress) {
      this.progress.total = Math.max(
        this.processed + this.queue.length,
        estimate,
      );
    }
  }

  getProgress() {
    if (!this.progress) {
      this.progress = new Progress({
        spinner: null,
        text: ({ active }) => {
          const now = performance.now();
          let text = `[${this.processed}/${
            this.processed + this.queue.length
          }] ${this.name}`;
          let n = 0;
          for (const item of active) {
            let itemText = "- " + item.format(now);
            text += `\n` +
              itemText.slice(0, Math.max(0, process.stdout.columns - 1));
            if (n > 10) {
              text += `\n  ... + ${active.length - n} more`;
              break;
            }
            n++;
          }
          return text;
        },
        props: {
          active: [] as Spinner[],
        },
      });
      this.progress.total = this.estimate ?? 0;
      this.progress.value = 0;
      this.progress.fps = 30;
    }
    return this.progress;
  }

  async add(item: T, processor: AsyncQueueProcessor<T>): Promise<void> {
    this.queue.push(item);
    this.getProgress().total = Math.max(
      this.processed + this.queue.length,
      this.estimate ?? 0,
    );
    return this.processNext(processor);
  }

  async addBatch(items: T[], processor: AsyncQueueProcessor<T>): Promise<void> {
    this.queue.push(...items);
    this.getProgress().total = Math.max(
      this.processed + this.queue.length,
      this.estimate ?? 0,
    );
    return this.processNext(processor);
  }

  private async processNext(processor: AsyncQueueProcessor<T>): Promise<void> {
    if (this.running >= this.maxConcurrent || this.queue.length === 0) {
      return;
    }

    const item = this.queue.shift();
    if (!item) return;

    this.running++;

    try {
      const progress = this.getProgress();

      let itemText = "";
      if (typeof item === "string") {
        itemText = item;
      } else if (typeof item === "object" && item !== null && "path" in item) {
        itemText = "" + item.path;
      } else {
        itemText = JSON.stringify(item);
      }
      if (itemText.startsWith(LOCAL_DIR)) {
        itemText = path.relative(LOCAL_DIR, itemText);
      }

      const spinner = new Spinner(itemText);
      spinner.stop();
      progress.props.active.unshift(spinner);
      await processor(spinner, item);
      progress.props = {
        active: progress.props.active.filter((s) => s !== spinner),
      };
      this.processed++;
      progress.value = this.processed;
    } catch (error) {
      console.error(`Error processing ${this.name} queue item:`, error);
      this.processed++;
      this.getProgress().value = this.processed;
    } finally {
      this.running--;
      await this.processNext(processor);
    }
  }

  async waitForCompletion(): Promise<void> {
    if (this.queue.length === 0 && this.running === 0) {
      if (this.processed > 0) {
        this.#success();
      }
      return;
    }

    return new Promise((resolve) => {
      const checkInterval = setInterval(() => {
        if (this.queue.length === 0 && this.running === 0) {
          clearInterval(checkInterval);
          this.#success();
          resolve();
        }
      }, 100);
    });
  }

  #success() {
    this.getProgress().success(`${this.processed} ${this.name}`);
  }
}

function skipBasename(basename: string): boolean {
  // dot files must be incrementally tracked
  if (basename === ".dirsort") return true;
  if (basename === ".friends") return true;

  return (
    basename.startsWith(".") ||
    basename.startsWith("._") ||
    basename.startsWith(".tmp") ||
    basename === ".DS_Store" ||
    basename.toLowerCase() === "thumbs.db" ||
    basename.toLowerCase() === "desktop.ini"
  );
}

// File system scanner
class FileSystemScanner {
  private visitedPaths = new Set<string>();
  private previousPaths = new Set<string>();
  private dirQueue = new AsyncQueue<string>("Scan Directories", 10);
  private fileQueue = new AsyncQueue<{ path: string; stat: any }>(
    "File metadata",
    20,
  );
  private compressQueue: AsyncQueue<{ file: MediaFile; path: string }> | null =
    SHOULD_COMPRESS ? new AsyncQueue("Compress Assets", 10) : null;

  private getDbPath(localPath: string): string {
    // Convert local file system path to database path
    const relativePath = path.relative(LOCAL_DIR, localPath);
    return "/" + relativePath.split(path.sep).join(path.posix.sep);
  }

  private getLocalPath(dbPath: string): string {
    // Convert database path to local file system path
    return path.join(LOCAL_DIR, dbPath.slice(1));
  }

  async scanFile(s: Spinner, filePath: string, stat: any): Promise<void> {
    const dbPath = this.getDbPath(filePath);

    // Skip hidden files
    const basename = path.basename(filePath);
    if (skipBasename(basename)) {
      return;
    }

    this.visitedPaths.add(dbPath);

    // Get existing file info from db
    const existingFile = MediaFile.getByPath(dbPath);

    // Determine which date to use (for date protection)
    let dateToUse = stat.mtime;
    const year2025Start = new Date("2025-01-01T00:00:00Z");

    if (
      existingFile &&
      existingFile.date < year2025Start &&
      stat.mtime >= year2025Start
    ) {
      console.error(
        `Error: ${dbPath} is ${
          formatDate(
            existingFile.date,
          )
        }, got modified to ${formatDate(stat.mtime)}`,
      );
      dateToUse = existingFile.date;
    }

    // Check if we need to reprocess the file
    if (existingFile && existingFile.size === stat.size && existingFile.hash) {
      maybe_skip: {
        const lastUpdateDate = lastUpdateTypes[path.extname(filePath)];
        if (lastUpdateDate && existingFile.lastUpdateDate < lastUpdateDate) {
          console.log(
            `Reprocessing ${dbPath} because indexing logic changed after ${
              formatDate(
                lastUpdateDate,
              )
            }`,
          );
          break maybe_skip;
        }

        if (SHOULD_COMPRESS && existingFile.processed !== 2) {
          this.compressQueue!.add(
            { file: existingFile, path: dbPath },
            this.compressFile.bind(this),
          );
        }

        // File hasn't changed, no need to reprocess
        MediaFile.createFile({
          path: dbPath,
          date: dateToUse,
          hash: existingFile.hash,
          size: stat.size,
          duration: existingFile.duration,
          dimensions: existingFile.dimensions,
          content: existingFile.contents,
        });
        return;
      }
    }

    // Process the file
    log(`Processing file: ${dbPath}`);

    // Scrub location metadata if needed
    if (SHOULD_SCRUB) {
      if (await scrubLocationMetadata(filePath, stat)) {
        // Re-stat the file in case it was modified
        const newStat = await fsp.stat(filePath);
        stat.size = newStat.size;
      }
    }

    // Extract content
    const hash = await calculateHash(filePath);
    let content = "";
    if (filePath.endsWith(".lnk")) {
      content = (await fsp.readFile(filePath, "utf8")).trim();
    }
    const language = CODE_EXTENSIONS[path.extname(filePath)];
    if (language) {
      read_code: {
        // An issue is that .ts is an overloaded extension, shared between
        // 'transport stream' and 'typescript'.
        //
        // Filter used here is:
        // - more than 1mb
        // - invalid UTF-8
        if (stat.size > 1_000_000) break read_code;
        let code;
        const buf = await fsp.readFile(filePath);
        try {
          code = new TextDecoder("utf-8", { fatal: true }).decode(buf);
        } catch (error) {
          break read_code;
        }
        content = await highlightCode(code, language);
      }
    }
    if (!content && READ_CONTENTS_EXTENSIONS.has(path.extname(filePath))) {
      content = await fsp.readFile(filePath, "utf8");
    }
    // End extract content

    if (hash === existingFile?.hash) {
      MediaFile.createFile({
        path: dbPath,
        date: dateToUse,
        hash,
        size: stat.size,
        duration: existingFile.duration,
        dimensions: existingFile.dimensions,
        content,
      });
      return;
    } else if (existingFile) {
      if (existingFile.processed === 2) {
        if (BlobAsset.decrementOrDelete(existingFile.hash)) {
          log(
            `Deleted compressed asset ${existingFile.hash}.{gzip, zstd}`,
            true,
          );
          await fsp.unlink(
            path.join(
              COMPRESS_STORE,
              existingFile.hash.substring(0, 2),
              existingFile.hash + ".gz",
            ),
          );
          await fsp.unlink(
            path.join(
              COMPRESS_STORE,
              existingFile.hash.substring(0, 2),
              existingFile.hash + ".zstd",
            ),
          );
        }
      }
    }
    const [duration, dimensions] = await Promise.all([
      calculateDuration(filePath),
      calculateDimensions(filePath),
    ]);

    // Update database with all metadata
    MediaFile.createFile({
      path: dbPath,
      date: dateToUse,
      hash,
      size: stat.size,
      duration,
      dimensions,
      content,
    });

    if (SHOULD_COMPRESS) {
      this.compressQueue!.add(
        {
          file: MediaFile.getByPath(dbPath)!,
          path: dbPath,
        },
        this.compressFile.bind(this),
      );
    }
  }

  async compressFile(s: Spinner, { file }: { file: MediaFile }): Promise<void> {
    log(`Compressing file: ${file.path}`);
    if (DRY_RUN) return;

    const filePath = path.join(FILE_ROOT!, file.path);

    const hash = file.hash;
    const firstTwoChars = hash.substring(0, 2);
    const compressDir = `${COMPRESS_STORE}/${firstTwoChars}`;
    const compressPath = `${compressDir}/${hash}`;

    // Create directory structure if it doesn't exist
    await fsp.mkdir(compressDir, { recursive: true });

    // Compress the file with gzip
    const blob = BlobAsset.putOrIncrement(hash);
    if (blob.refs > 1) {
      log(
        `Skipping compression of ${filePath} because it already exists in ${compressPath}`,
      );
      return;
    }
    // Check if already exists
    if (existsSync(compressPath + ".gz")) {
      file.setCompressed(true);
      return;
    }
    try {
      const gzipProcess = Bun.spawn(["gzip", "-c", filePath, "-9"], {
        stdout: Bun.file(compressPath + ".gz"),
      });
      const zstdProcess = Bun.spawn(["zstd", "-c", filePath, "-9"], {
        stdout: Bun.file(compressPath + ".zstd"),
      });
      const [gzipExited, zstdExited] = await Promise.all([
        gzipProcess.exited,
        zstdProcess.exited,
      ]);
      assert(gzipExited === 0);
      assert(zstdExited === 0);
      assert(existsSync(compressPath + ".gz"));
      assert(existsSync(compressPath + ".zstd"));
      file.setCompressed(true);
    } catch (error) {
      console.error(`Error compressing file ${filePath}:`, error);
      BlobAsset.decrementOrDelete(hash);
      file.setCompressed(false);
    }
  }

  async scanDirectory(s: Spinner, dirPath: string): Promise<void> {
    const dbPath = this.getDbPath(dirPath);

    this.visitedPaths.add(dbPath);

    // Create or update directory entry
    log(`Scanning directory: ${dbPath}`);
    if (!DRY_RUN) {
      MediaFile.createOrUpdateDirectory(dbPath);
    }

    try {
      const entries = await fsp.readdir(dirPath, { withFileTypes: true });

      // Process files and subdirectories
      for (const entry of entries) {
        const entryPath = path.join(dirPath, entry.name);

        // Skip hidden files and system files
        if (skipBasename(entry.name)) {
          continue;
        }

        if (entry.isDirectory()) {
          // Queue subdirectory for scanning
          this.dirQueue.add(entryPath, this.scanDirectory.bind(this));
        } else if (entry.isFile()) {
          // Queue file for processing
          const stat = await fsp.stat(entryPath);

          this.fileQueue.add(
            { path: entryPath, stat },
            async (s, item) => await this.scanFile(s, item.path, item.stat),
          );
        }
      }
    } catch (error) {
      console.error(`Error scanning directory ${dirPath}:`, error);
    }
  }

  async processDirectoryMetadata(dirPath: string): Promise<void> {
    const dbPath = this.getDbPath(dirPath);
    const dir = MediaFile.getByPath(dbPath);

    if (!dir || dir.kind !== MediaFile.Kind.directory) {
      return;
    }

    if (DRY_RUN) return;

    const children = dir.getChildren();

    // Calculate directory metadata
    let totalSize = 0;
    let newestDate = new Date(0);
    let allHashes = "";

    // Check for readme.txt
    let readmeContent = "";

    try {
      readmeContent = await fsp.readFile(
        path.join(dirPath, "readme.txt"),
        "utf8",
      );
    } catch (error: any) {
      console.info(`no readme ${dirPath}`);
      if (error.code !== "ENOENT") {
        console.error(`Error reading readme.txt in ${dirPath}:`, error);
      }
    }

    let dirsort: string[] | null = null;
    try {
      dirsort = (await fsp.readFile(path.join(dirPath, ".dirsort"), "utf8"))
        .split("\n")
        .map((x) => x.trim())
        .filter(Boolean);
    } catch (error: any) {
      if (error.code !== "ENOENT") {
        console.error(`Error reading .dirsort in ${dirPath}:`, error);
      }
    }

    if (await fsp.exists(path.join(dirPath, ".friends"))) {
      FilePermissions.setPermissions(dbPath, 1);
    } else {
      FilePermissions.setPermissions(dbPath, 0);
    }

    // Process children
    for (const child of children) {
      totalSize += child.size;
      allHashes += child.hash;

      // Update newest date, ignoring readme.txt
      if (!child.path.endsWith("/readme.txt") && child.date > newestDate) {
        newestDate = child.date;
      }
    }

    // Create a hash for the directory
    const dirHash = new Bun.CryptoHasher("sha1")
      .update(dbPath + allHashes)
      .digest("hex");

    // Update directory metadata
    MediaFile.markDirectoryProcessed({
      id: dir.id,
      timestamp: newestDate,
      contents: readmeContent,
      size: totalSize,
      hash: dirHash,
      dirsort,
    });
  }

  async findDeletedFiles(): Promise<void> {
    if (DRY_RUN) return;

    // Find all paths that exist in the DB but not in the filesystem
    const deletedPaths = Array.from(this.previousPaths).filter(
      (path) => !this.visitedPaths.has(path),
    );

    for (const dbPath of deletedPaths) {
      const file = MediaFile.getByPath(dbPath);
      if (!file) continue;

      log(`Item Deleted: ${dbPath}`, true);
      if (file.processed === 2) {
        if (BlobAsset.decrementOrDelete(file.hash)) {
          log(`Deleted compressed asset ${file.hash}.{gzip, zstd}`, true);
          await fsp.unlink(
            path.join(
              COMPRESS_STORE,
              file.hash.substring(0, 2),
              file.hash + ".gz",
            ),
          );
          await fsp.unlink(
            path.join(
              COMPRESS_STORE,
              file.hash.substring(0, 2),
              file.hash + ".zstd",
            ),
          );
        }
      }
      MediaFile.deleteByPath(dbPath);
    }
  }

  async loadPreviousPaths(): Promise<void> {
    // Get all files and directories from the database
    // This uses a custom query to get all paths at once
    const getAllPathsQuery = cache
      .prepare(`SELECT path, kind FROM media_files`)
      .all() as {
        path: string;
        kind: MediaFile.Kind;
      }[];

    let dirs = 0;
    let files = 0;
    for (const row of getAllPathsQuery) {
      this.previousPaths.add(row.path);
      if (row.kind === MediaFile.Kind.directory) {
        dirs++;
      } else {
        files++;
      }
    }

    this.dirQueue.setEstimate(dirs);
    this.fileQueue.setEstimate(files);

    // log(`Loaded ${this.previousPaths.size} paths from database`, true);
  }

  async scan(): Promise<void> {
    log(`Starting file system scan in ${LOCAL_DIR}`, true);

    // Check if the root directory exists and is accessible
    try {
      const rootStat = await fsp.stat(LOCAL_DIR);
      if (!rootStat.isDirectory()) {
        throw new Error(`${LOCAL_DIR} is not a directory`);
      }
    } catch (error) {
      console.error(`Error: Cannot access root directory ${LOCAL_DIR}`, error);
      console.error(
        `Aborting scan to prevent database corruption. Please check if the volume is mounted.`,
      );
      process.exit(1);
    }

    await this.loadPreviousPaths();

    await this.dirQueue.add(LOCAL_DIR, this.scanDirectory.bind(this));

    await this.dirQueue.waitForCompletion();
    await this.fileQueue.waitForCompletion();

    await this.findDeletedFiles();

    const allDirs = Array.from(this.visitedPaths)
      .filter((path) => {
        const file = MediaFile.getByPath(path);
        return file && file.kind === MediaFile.Kind.directory;
      })
      .sort((a, b) => b.length - a.length);

    const dirMetadataQueue = new AsyncQueue<string>("Directory Metadata", 10);
    for (const dirPath of allDirs) {
      await this.processDirectoryMetadata(this.getLocalPath(dirPath));
    }

    await dirMetadataQueue.waitForCompletion();

    if (SHOULD_COMPRESS) {
      await this.compressQueue!.waitForCompletion();
    }

    log("Scan completed successfully!", true);
  }
}

// Main execution
function showHelp() {
  console.log(`
MediaFile Scanner - Index filesystem content for paperclover.net

Environment variables:
  FILE_ROOT       Required. Path to the directory to scan
  COMPRESS_STORE  Optional. Path to store compressed files (default: .clover/compressed)

Options:
  --help          Show this help message
  --dry-run       Don't make any changes to the database
  --verbose       Show detailed output

Usage:
  bun ./media/scan.ts [options]

`);
  process.exit(0);
}

{
  // Show help if requested
  if (process.argv.includes("--help")) {
    showHelp();
    process.exit(0);
  }

  // Check if the root directory exists before starting
  if (!existsSync(LOCAL_DIR)) {
    console.error(
      `Error: Root directory ${LOCAL_DIR} does not exist or is not accessible.`,
    );
    console.error(`Please check if the volume is mounted correctly.`);
    process.exit(1);
  }

  const startTime = Date.now();

  try {
    const scanner = new FileSystemScanner();
    await scanner.scan();

    const endTime = Date.now();
    log(`Scan completed in ${(endTime - startTime) / 1000} seconds`, true);

    const rootDir = MediaFile.getByPath("/")!;
    const totalEntries = cache
      .prepare(`SELECT COUNT(*) as count FROM media_files`)
      .get() as { count: number };
    const totalDuration = cache
      .prepare(`SELECT SUM(duration) as duration FROM media_files`)
      .get() as { duration: number };
    console.log();
    console.log("Global Stats");
    console.log(`  Entry count: ${totalEntries.count}`);
    console.log(`  Uncompressed size: ${formatSize(rootDir.size)}`);
    console.log(
      `  Total audio/video duration: ${
        (
          totalDuration.duration /
          60 /
          60
        ).toFixed(1)
      } hours`,
    );
  } catch (error) {
    console.error("Error during scan:", error);
    process.exit(1);
  }
}