#!/usr/bin/env node /** * scan-project.mjs * * Deterministic file enumeration + language/category detection for the * project-scanner agent. Replaces the LLM-written prose scanner that used to * (a) author a per-run Node.js script (`tmp/ua-project-scan.js`), (b) walk the * file tree, and (c) classify each file via lookup tables in LLM context — a * pure rule-lookup pass that was being billed at LLM rates and adding many * minutes of per-run latency on mid-sized monorepos. * * What the LLM still owns (Step A of project-scanner.md Phase 1): * - Reading README + top-level manifests to synthesize `name`, * `rawDescription`, `readmeHead`, `frameworks`, and the high-level * `languages` narrative. * * What this script owns: * - File enumeration (git ls-files preferred, recursive walk fallback) * - `.understandignore` filtering (delegated to core's createIgnoreFilter) * - Per-file language detection (extension + filename table) * - Per-file category assignment (priority-ordered rules from * project-scanner.md Step 4) * - Line counting * - Complexity estimation (project-scanner.md Step 7 thresholds) * * Usage: * node scan-project.mjs * * Output JSON (subset of what project-scanner.md Phase 1 expects — the LLM * agent merges this with Step A's narrative fields and Step C's importMap to * produce the final scan-result.json): * { * "scriptCompleted": true, * "files": [{ "path": "...", "language": "...", "sizeLines": N, "fileCategory": "..." }, ...], * "totalFiles": N, * "filteredByIgnore": M, * "estimatedComplexity": "small" | "moderate" | "large" | "very-large", * "stats": { "filesScanned": N, "byCategory": {...}, "byLanguage": {...} } * } * * Logging: stderr only (stdout reserved for piped tooling). * Per-file resilience: read/stat failures emit * `Warning: scan-project: — file skipped from output` * to stderr and the file is dropped; the rest of the scan completes. * * Determinism: files are sorted by `path.localeCompare` before emission, and * the underlying enumeration is deterministic (git ls-files returns a stable * order; the fallback walker sorts each directory's entries). */ import { createRequire } from 'node:module'; import { dirname, resolve, join, basename, extname, relative, sep } from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; import { existsSync, readFileSync, readdirSync, realpathSync, statSync, writeFileSync, } from 'node:fs'; import { spawnSync } from 'node:child_process'; const __dirname = dirname(fileURLToPath(import.meta.url)); // skills/understand/ -> plugin root is two dirs up const pluginRoot = resolve(__dirname, '../..'); const require = createRequire(resolve(pluginRoot, 'package.json')); // --------------------------------------------------------------------------- // Resolve @understand-anything/core // // Two-step resolution: try the workspace-linked package first, fall back to // the installed plugin cache layout. pathToFileURL() is required on Windows // because dynamic import() of raw "C:\..." paths throws // ERR_UNSUPPORTED_ESM_URL_SCHEME (Node parses "C:" as a URL scheme). // --------------------------------------------------------------------------- let core; try { core = await import(pathToFileURL(require.resolve('@understand-anything/core')).href); } catch { core = await import(pathToFileURL(resolve(pluginRoot, 'packages/core/dist/index.js')).href); } const { createIgnoreFilter } = core; // --------------------------------------------------------------------------- // Language detection // // Mirrors the canonical extension list from // understand-anything-plugin/packages/core/src/languages/configs/* and the // project-scanner.md Step 3 table. Extensions are matched lowercase; // filenames (Dockerfile, Makefile, etc.) are matched case-sensitively because // the projects-in-the-wild use canonical capitalizations. // // Where the core configs and project-scanner.md diverge (rare), project- // scanner.md wins because it is the user-facing contract. // --------------------------------------------------------------------------- /** * Extension -> language id. Lowercase keys; lookup is `.ext.toLowerCase()`. * Includes the legacy Step-3 mapping (.cfg/.ini/.env -> `config`) — note * that `config` is a language id here, not a category. Category routing * for these extensions is handled separately in CATEGORY_BY_EXT. */ const LANGUAGE_BY_EXT = Object.freeze({ // TypeScript / JavaScript '.ts': 'typescript', '.tsx': 'typescript', '.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript', // Python '.py': 'python', '.pyi': 'python', // Go / Rust / Java / Kotlin / C# / Swift / Lua '.go': 'go', '.rs': 'rust', '.java': 'java', '.kt': 'kotlin', '.kts': 'kotlin', '.cs': 'csharp', '.swift': 'swift', '.lua': 'lua', // Ruby / PHP '.rb': 'ruby', '.rake': 'ruby', '.php': 'php', // C / C++ '.c': 'c', '.h': 'c', '.cpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp', '.hpp': 'cpp', '.hxx': 'cpp', // Vue / Svelte (no tree-sitter extractor, but project-scanner contract // lists them as code languages — downstream import map will return []) '.vue': 'vue', '.svelte': 'svelte', // Shell / Batch / PowerShell '.sh': 'shell', '.bash': 'shell', '.zsh': 'shell', '.ps1': 'powershell', '.psm1': 'powershell', '.psd1': 'powershell', '.bat': 'batch', '.cmd': 'batch', // Markup / docs '.html': 'html', '.htm': 'html', '.css': 'css', '.scss': 'css', '.sass': 'css', '.less': 'css', '.md': 'markdown', '.mdx': 'markdown', '.rst': 'markdown', // Config / data '.yaml': 'yaml', '.yml': 'yaml', '.json': 'json', '.jsonc': 'jsonc', '.toml': 'toml', '.xml': 'xml', '.xsl': 'xml', '.xsd': 'xml', '.plist': 'xml', '.cfg': 'config', '.ini': 'config', '.env': 'config', // Data / schema '.sql': 'sql', '.graphql': 'graphql', '.gql': 'graphql', '.proto': 'protobuf', '.prisma': 'prisma', '.csv': 'csv', '.tsv': 'csv', // Infra '.tf': 'terraform', '.tfvars': 'terraform', // JVM build files (categorized via filename-or-extension) '.gradle': 'gradle', // .NET project files (mapped to extension-derived ids; downstream // treats them as config — see CATEGORY_BY_EXT) '.csproj': 'csproj', '.sln': 'sln', '.properties': 'properties', '.mod': 'mod', '.sum': 'sum', }); /** * Filename (no extension) -> language id. Compared case-sensitively against * basename(path). Includes the most common no-extension conventions; anything * NOT in this table with no extension falls back to `unknown`. * * Dockerfile.* variants (Dockerfile.dev, Dockerfile.prod) are handled by a * startsWith check in `detectLanguage()` so we don't have to enumerate every * possible suffix. */ const LANGUAGE_BY_FILENAME = Object.freeze({ Dockerfile: 'dockerfile', Makefile: 'makefile', GNUmakefile: 'makefile', makefile: 'makefile', Jenkinsfile: 'jenkinsfile', Procfile: 'procfile', Vagrantfile: 'vagrantfile', }); /** * Detect the language of a file by its path. Lowercase extension lookup, * then no-extension filename lookup. Never returns null — falls back to * the lowercased extension (without dot) or 'unknown' if there is no * extension. Downstream consumers rely on this field always being a string * (see project-scanner.md Step 3 "Fallback" note). */ export function detectLanguage(filePath) { const base = basename(filePath); const ext = extname(filePath).toLowerCase(); // Dockerfile.dev, Dockerfile.prod, etc. — common variant form. if (base === 'Dockerfile' || base.startsWith('Dockerfile.')) return 'dockerfile'; // Dotfile names like .env, .env.local — path.extname returns '' for // single-segment dotfiles (e.g. '.env') and the SECOND segment for // compound dotfiles (e.g. '.local' for '.env.local'). Neither hits the // intended LANGUAGE_BY_EXT['.env'] mapping. Try the leading dotfile // portion first so `.env`, `.env.local`, `.env.production` all map. const dotKey = dotfileKey(base); if (dotKey && LANGUAGE_BY_EXT[dotKey]) return LANGUAGE_BY_EXT[dotKey]; if (ext) { const byExt = LANGUAGE_BY_EXT[ext]; if (byExt) return byExt; // Unknown extension → drop the leading dot, lowercase. Never null. return ext.slice(1); } // No-extension file — try filename table. const byFilename = LANGUAGE_BY_FILENAME[base]; if (byFilename) return byFilename; return 'unknown'; } /** * Extract the canonical dotfile "extension" from a basename, or null. * * `.env` -> `.env` * `.env.local` -> `.env` * `.bashrc` -> `.bashrc` * `package.json` -> null (not a dotfile) * * Used by both detectLanguage and detectCategory so dotfile-style configs * (e.g., `.env`, `.env.local`, `.env.production`) get their leading * segment treated as the implicit extension instead of falling through * to `unknown` / `code`. */ function dotfileKey(base) { if (!base.startsWith('.')) return null; const m = base.match(/^(\.[a-z0-9]+)/i); return m ? m[1].toLowerCase() : null; } // --------------------------------------------------------------------------- // Category detection // // Implements the priority-ordered rules from project-scanner.md Step 4. // Order matters: more specific rules must run before more general ones // (e.g. `docker-compose.yml` is infra, not config). // // Categories: code | config | docs | infra | data | script | markup // --------------------------------------------------------------------------- /** * Extension -> category. Used only after the higher-priority path-based * checks (infra/docs exclusions) in `detectCategory()`. Plain extension * lookup is intentionally last-resort — many configs need their full path * inspected first. */ const CATEGORY_BY_EXT = Object.freeze({ // docs '.md': 'docs', '.mdx': 'docs', '.rst': 'docs', '.txt': 'docs', '.text': 'docs', // config '.yaml': 'config', '.yml': 'config', '.json': 'config', '.jsonc': 'config', '.toml': 'config', '.xml': 'config', '.xsl': 'config', '.xsd': 'config', '.plist': 'config', '.cfg': 'config', '.ini': 'config', '.env': 'config', '.properties': 'config', '.csproj': 'config', '.sln': 'config', '.mod': 'config', '.sum': 'config', '.gradle': 'config', // infra '.tf': 'infra', '.tfvars': 'infra', // data '.sql': 'data', '.graphql': 'data', '.gql': 'data', '.proto': 'data', '.prisma': 'data', '.csv': 'data', '.tsv': 'data', // script '.sh': 'script', '.bash': 'script', '.zsh': 'script', '.ps1': 'script', '.psm1': 'script', '.psd1': 'script', '.bat': 'script', '.cmd': 'script', // markup '.html': 'markup', '.htm': 'markup', '.css': 'markup', '.scss': 'markup', '.sass': 'markup', '.less': 'markup', }); /** * Filenames (no extension or full filename with extension) that always * map to `infra` regardless of their extension. Compared case-sensitively * against basename(path). */ const INFRA_FILENAMES = new Set([ 'Dockerfile', '.dockerignore', 'Makefile', 'GNUmakefile', 'makefile', 'Jenkinsfile', 'Procfile', 'Vagrantfile', '.gitlab-ci.yml', ]); /** * Detect the project-scanner category for a file. Priority order matches * project-scanner.md Step 4 "Priority rule" — most specific wins. * * 1. LICENSE -> code (per the spec note "except LICENSE"). The Step-2 * exclusion table normally removes LICENSE, but if a project chooses to * re-include it via `.understandignore` negation, it should NOT land in * docs. We classify as `code` rather than inventing a new bucket. * 2. Filename-based infra (Dockerfile, Makefile, Jenkinsfile, * docker-compose.*, Vagrantfile, Procfile, .gitlab-ci.yml, * .dockerignore). * 3. Path-based infra (.github/workflows/, .circleci/, k8s/, kubernetes/, * *.k8s.yml, *.k8s.yaml). * 4. Extension-based mapping (CATEGORY_BY_EXT). * 5. Fallback: `code` (matches the spec — "All other extensions"). */ export function detectCategory(filePath) { const base = basename(filePath); const ext = extname(filePath).toLowerCase(); const posix = filePath.split(sep).join('/'); // Rule 1: LICENSE exception (project-scanner.md Step 4 table comment). if (base === 'LICENSE') return 'code'; // Rule 2: infra by filename — Dockerfile + variants, Makefile, // Jenkinsfile, docker-compose.*, Procfile, Vagrantfile, .gitlab-ci.yml, // .dockerignore. if (INFRA_FILENAMES.has(base)) return 'infra'; if (base === 'Dockerfile' || base.startsWith('Dockerfile.')) return 'infra'; if (base.startsWith('docker-compose.')) return 'infra'; if (base === 'compose.yml' || base === 'compose.yaml') return 'infra'; // Rule 3: infra by path. if (posix.startsWith('.github/workflows/')) return 'infra'; if (posix.startsWith('.circleci/')) return 'infra'; // Match a `k8s/` or `kubernetes/` segment anywhere in the path. if (/(^|\/)(k8s|kubernetes)\//.test(posix)) return 'infra'; // `*.k8s.yml` and `*.k8s.yaml` — Kubernetes-flavored YAML. if (/\.k8s\.(ya?ml)$/i.test(base)) return 'infra'; // Rule 4: extension-based lookup. if (ext) { const byExt = CATEGORY_BY_EXT[ext]; if (byExt) return byExt; } // Rule 4.5: dotfile-style configs (.env, .env.local, .env.production). // path.extname misses these — see dotfileKey docstring. const dotKey = dotfileKey(base); if (dotKey) { const byDot = CATEGORY_BY_EXT[dotKey]; if (byDot) return byDot; } // Rule 5: filename-based config catch-all for no-extension config files // commonly seen in JVM/Go/.NET projects (covered above for infra but not // config). We don't enumerate every possible config filename here — that // gets handled by the language map's no-extension entries upstream. // Anything not matched falls through to `code`. return 'code'; } // --------------------------------------------------------------------------- // Complexity estimation (project-scanner.md Step 7) // --------------------------------------------------------------------------- /** * Map a total file count to a complexity tier. Thresholds are inclusive on * the lower bound: * - small: 1-30 * - moderate: 31-150 * - large: 151-500 * - very-large: >500 * * Edge case: 0 files maps to `small` (the lowest tier) so the field is * always set even on empty repos. Downstream consumers treat 0 files as * a sentinel for "nothing to analyze" via `totalFiles`, not complexity. */ export function estimateComplexity(totalFiles) { if (totalFiles <= 30) return 'small'; if (totalFiles <= 150) return 'moderate'; if (totalFiles <= 500) return 'large'; return 'very-large'; } // --------------------------------------------------------------------------- // File enumeration // --------------------------------------------------------------------------- /** * Normalize a path to forward-slash POSIX. The project-scanner contract * emits POSIX paths; we re-normalize so the output is stable across * Windows/macOS/Linux. */ function toPosix(p) { return p.split(sep).join('/'); } /** * Enumerate all files in `projectRoot` via `git ls-files`. Returns an * array of project-relative POSIX paths, or null if the directory is not * a git repository (or git is not installed). Caller falls back to the * recursive walker. * * Why git ls-files first: it respects the repo's `.gitignore`, handles * submodules sensibly, and gives a fast, deterministic listing. The walker * is a strict superset of what git would emit (no .gitignore awareness), * so the ignore filter has to do more work in the fallback path. */ function enumerateViaGit(projectRoot) { // -z = NUL-terminated output. Without it, `git ls-files` C-escapes non-ASCII // bytes in path names — paths containing emoji, accented characters, CJK // codepoints, etc. come back quoted with octal escapes (e.g. // `"30. \360\237\217\227 BD-CCER/file.md"` for a path containing 🏗️). // Those quoted-escaped strings then fail to round-trip back to real disk // paths in downstream consumers, so files in such directories are silently // dropped from the scan. The -z form emits raw bytes between NUL separators, // preserving every codepoint as-is. This is the same approach git itself // uses for `--null` everywhere downstream (xargs -0, etc.). const result = spawnSync('git', ['ls-files', '-z', '-co', '--exclude-standard'], { cwd: projectRoot, encoding: 'utf-8', maxBuffer: 256 * 1024 * 1024, // 256MB — huge monorepos can produce >10MB of paths }); if (result.status !== 0 || !result.stdout) return null; // Each NUL-separated chunk is one path, project-relative, already POSIX on // all platforms because git emits forward slashes regardless of OS. return result.stdout .split('\0') .filter(Boolean) .map(toPosix); } /** * Recursive directory walker — fallback when `git ls-files` is unavailable * (no git, not a repo, or git refused). Skips hard-coded "obviously bad" * directory names BEFORE invoking the ignore filter so we don't waste cycles * descending into `node_modules/` etc. on huge trees. * * Yields project-relative POSIX paths in directory-sorted order so the * output is deterministic without an extra sort pass. */ function enumerateViaWalk(projectRoot) { // Hard skip — these directories are universally non-source and skipping // at the walker level avoids materializing thousands of node_modules // paths before the ignore filter drops them. The ignore filter still // runs on everything else. const HARD_SKIP_DIRS = new Set([ 'node_modules', '.git', '.svn', '.hg', '__pycache__', ]); const out = []; function walk(absDir) { let entries; try { entries = readdirSync(absDir, { withFileTypes: true }); } catch (err) { process.stderr.write( `Warning: scan-project: ${toPosix(relative(projectRoot, absDir)) || '.'} ` + `— directory read failed (${err.message}) — subtree skipped\n`, ); return; } // Sort deterministically by name; mix files and dirs together so the // final output (after the path sort) is identical regardless of // OS-specific readdir order. entries.sort((a, b) => a.name.localeCompare(b.name)); for (const ent of entries) { if (ent.isDirectory()) { if (HARD_SKIP_DIRS.has(ent.name)) continue; walk(join(absDir, ent.name)); } else if (ent.isFile()) { const rel = toPosix(relative(projectRoot, join(absDir, ent.name))); if (rel) out.push(rel); } // Symlinks intentionally ignored — git ls-files doesn't follow them // either, and following them is a classic recursion-bomb footgun. } } walk(projectRoot); return out; } /** * Enumerate all candidate files in `projectRoot`. Tries git ls-files first; * falls back to a recursive walk if git is unavailable or this is not a * repo. Returns an array of project-relative POSIX paths in unspecified * order — caller is responsible for sorting + filtering. */ function enumerateFiles(projectRoot) { const fromGit = enumerateViaGit(projectRoot); if (fromGit !== null) return fromGit; process.stderr.write( `scan-project: git ls-files unavailable — falling back to recursive walk\n`, ); return enumerateViaWalk(projectRoot); } // --------------------------------------------------------------------------- // Filter accounting // // The project-scanner.md contract requires `filteredByIgnore` to count files // dropped *specifically* by user `.understandignore` patterns (the delta // beyond what the hardcoded defaults would have removed). We accomplish this // by building TWO filters: // - `defaultOnly`: defaults only, no user patterns // - `combined`: defaults + user patterns (createIgnoreFilter) // and counting paths that the combined filter excludes but the defaults-only // filter would have kept. // // Negation (`!pattern`) is correctly handled by the combined filter — a file // re-included via `!` won't be in the combined-excluded set, so it WON'T be // counted in filteredByIgnore (it's "kept", not "additionally filtered"). // --------------------------------------------------------------------------- /** * Build a defaults-only IgnoreFilter — same patterns as createIgnoreFilter * would apply, minus any user .understandignore content. We synthesize this * via a temp directory with no .understandignore files so the core function * still drives the matcher. (Re-implementing the ignore-package wiring here * would risk subtle behavior drift from core's matcher.) */ function buildDefaultsOnlyFilter() { // Use the createIgnoreFilter with a path that we KNOW has no .understandignore. // `os.tmpdir()`-based fresh dir guarantees no user patterns leak in. // The directory doesn't need to exist on disk because createIgnoreFilter // only checks existsSync() before reading. const fakeProjectRoot = join( require('node:os').tmpdir(), `ua-scan-defaults-${process.pid}-${Date.now()}`, ); return createIgnoreFilter(fakeProjectRoot); } /** * Determine whether `projectRoot` has any user .understandignore files. * When neither file exists, the combined and defaults-only filters are * identical, so we can skip the dual-filter accounting entirely. */ function hasUserIgnoreFile(projectRoot) { return ( existsSync(join(projectRoot, '.understandignore')) || existsSync(join(projectRoot, '.understand-anything', '.understandignore')) ); } // --------------------------------------------------------------------------- // Line counting // --------------------------------------------------------------------------- /** * Count newline-delimited lines in a file. Returns the number of `\n` * characters; this matches `wc -l` semantics (which counts newlines, not * "lines of content"). Files without a trailing newline therefore report * one fewer than the visible line count — same behavior as wc. * * Per-file failure: emits a Warning: and returns null. Caller decides * whether to drop the file or keep it with sizeLines=0. */ function countLines(absPath, posixPath) { try { const buf = readFileSync(absPath); // Manual newline count beats split('\n').length on large files — no // intermediate array allocation. We count the `\n` byte (0x0a) directly. let count = 0; for (let i = 0; i < buf.length; i++) { if (buf[i] === 0x0a) count++; } return count; } catch (err) { process.stderr.write( `Warning: scan-project: ${posixPath} — line count failed ` + `(${err.message}) — file skipped from output\n`, ); return null; } } // --------------------------------------------------------------------------- // Main // --------------------------------------------------------------------------- async function main() { const [, , projectRoot, outputPath] = process.argv; if (!projectRoot || !outputPath) { process.stderr.write( 'Usage: node scan-project.mjs \n', ); process.exit(1); } if (!existsSync(projectRoot)) { process.stderr.write( `scan-project.mjs failed: projectRoot does not exist: ${projectRoot}\n`, ); process.exit(1); } const projectRootStat = statSync(projectRoot); if (!projectRootStat.isDirectory()) { process.stderr.write( `scan-project.mjs failed: projectRoot is not a directory: ${projectRoot}\n`, ); process.exit(1); } // 1. Enumerate. Either git ls-files or recursive walk. const candidates = enumerateFiles(projectRoot); // 2. Filter via createIgnoreFilter (defaults + user .understandignore). // Build a defaults-only filter in parallel to count user-driven drops. const combined = createIgnoreFilter(projectRoot); const userIgnoresPresent = hasUserIgnoreFile(projectRoot); const defaultsOnly = userIgnoresPresent ? buildDefaultsOnlyFilter() : combined; let filteredByIgnore = 0; const kept = []; for (const rel of candidates) { const isIgnoredCombined = combined.isIgnored(rel); if (!isIgnoredCombined) { kept.push(rel); continue; } // Dropped by combined filter. If defaults-only would have ALSO dropped // it, this is a baseline default drop — not counted. If defaults-only // would have KEPT it, this drop is attributable to the user's // .understandignore content. if (userIgnoresPresent && !defaultsOnly.isIgnored(rel)) { filteredByIgnore++; } } // 3. Per-file: language + category + line count. // Drop files that fail line counting (per-file resilience). const fileEntries = []; for (const rel of kept) { const absPath = join(projectRoot, rel); // Stat first — git ls-files could include paths that vanished between // listing and processing; the walker shouldn't but defensive anyway. try { const st = statSync(absPath); if (!st.isFile()) { // Symlinks-to-dir, special files, etc. — skip silently. Not a // warning condition because git wouldn't have tracked it as a file. continue; } } catch (err) { process.stderr.write( `Warning: scan-project: ${rel} — stat failed (${err.message}) ` + `— file skipped from output\n`, ); continue; } const sizeLines = countLines(absPath, rel); if (sizeLines === null) { // countLines already emitted the Warning: line. continue; } fileEntries.push({ path: rel, language: detectLanguage(rel), sizeLines, fileCategory: detectCategory(rel), }); } // 4. Determinism: sort by path.localeCompare. fileEntries.sort((a, b) => a.path.localeCompare(b.path)); // 5. Stats. const byCategory = {}; const byLanguage = {}; for (const f of fileEntries) { byCategory[f.fileCategory] = (byCategory[f.fileCategory] || 0) + 1; byLanguage[f.language] = (byLanguage[f.language] || 0) + 1; } const estimatedComplexity = estimateComplexity(fileEntries.length); const output = { scriptCompleted: true, files: fileEntries, totalFiles: fileEntries.length, filteredByIgnore, estimatedComplexity, stats: { filesScanned: fileEntries.length, byCategory, byLanguage, }, }; writeFileSync(outputPath, JSON.stringify(output, null, 2), 'utf-8'); if (!existsSync(outputPath)) { throw new Error(`output file missing after write: ${outputPath}`); } process.stderr.write( `scan-project: filesScanned=${fileEntries.length} ` + `filteredByIgnore=${filteredByIgnore} ` + `complexity=${estimatedComplexity}\n`, ); } // --------------------------------------------------------------------------- // Run only when executed directly as a CLI; importing the module (e.g. from // tests) must not trigger main(). // // Canonicalize both sides through realpathSync. Node ESM resolves // import.meta.url through symlinks but pathToFileURL(process.argv[1]) preserves // them, so a raw equality check silently no-ops when the script is invoked via // a symlinked plugin install path (the default in Claude Code / Copilot CLI // caches). See GitHub issue #162. // --------------------------------------------------------------------------- function isCliEntry() { if (!process.argv[1]) return false; try { const modulePath = realpathSync(fileURLToPath(import.meta.url)); const argvPath = realpathSync(process.argv[1]); return modulePath === argvPath; } catch { return false; } } if (isCliEntry()) { try { await main(); } catch (err) { process.stderr.write(`scan-project.mjs failed: ${err.message}\n${err.stack}\n`); process.exit(1); } } // Default export of helpers for testability. export default { detectLanguage, detectCategory, estimateComplexity, };