diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0792d09cb6..e2680707a0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -121,7 +121,7 @@ jobs: # Build dist once for Node-relevant changes and share it with downstream jobs. build-artifacts: - needs: [docs-scope, changed-scope, code-analysis, check] + needs: [docs-scope, changed-scope, check] if: needs.docs-scope.outputs.docs_only != 'true' && (github.event_name == 'push' || needs.changed-scope.outputs.run_node == 'true') runs-on: blacksmith-4vcpu-ubuntu-2404 steps: @@ -171,7 +171,7 @@ jobs: run: pnpm release:check checks: - needs: [docs-scope, changed-scope, code-analysis, check] + needs: [docs-scope, changed-scope, check] if: needs.docs-scope.outputs.docs_only != 'true' && (github.event_name == 'push' || needs.changed-scope.outputs.run_node == 'true') runs-on: blacksmith-4vcpu-ubuntu-2404 strategy: @@ -234,37 +234,6 @@ jobs: - name: Check docs run: pnpm check:docs - # Check for files that grew past LOC threshold in this PR (delta-only). - # On push events, all steps are skipped and the job passes (no-op). - # Heavy downstream jobs depend on this to fail fast on violations. - code-analysis: - runs-on: blacksmith-4vcpu-ubuntu-2404 - steps: - - name: Checkout - if: github.event_name == 'pull_request' - uses: actions/checkout@v4 - with: - fetch-depth: 0 - submodules: false - - - name: Setup Python - if: github.event_name == 'pull_request' - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - - name: Fetch base branch - if: github.event_name == 'pull_request' - run: git fetch origin ${{ github.base_ref }}:refs/remotes/origin/${{ github.base_ref }} - - - name: Check code file sizes - if: github.event_name == 'pull_request' - run: | - python scripts/analyze_code_files.py \ - --compare-to origin/${{ github.base_ref }} \ - --threshold 1000 \ - --strict - secrets: runs-on: blacksmith-4vcpu-ubuntu-2404 steps: @@ -291,7 +260,7 @@ jobs: fi checks-windows: - needs: [docs-scope, changed-scope, build-artifacts, code-analysis, check] + needs: [docs-scope, changed-scope, build-artifacts, check] if: needs.docs-scope.outputs.docs_only != 'true' && (github.event_name == 'push' || needs.changed-scope.outputs.run_node == 'true') runs-on: blacksmith-4vcpu-windows-2025 env: @@ -399,7 +368,7 @@ jobs: # running 4 separate jobs per PR (as before) starved the queue. One job # per PR allows 5 PRs to run macOS checks simultaneously. macos: - needs: [docs-scope, changed-scope, code-analysis, check] + needs: [docs-scope, changed-scope, check] if: github.event_name == 'pull_request' && needs.docs-scope.outputs.docs_only != 'true' && needs.changed-scope.outputs.run_macos == 'true' runs-on: macos-latest steps: @@ -632,7 +601,7 @@ jobs: PY android: - needs: [docs-scope, changed-scope, code-analysis, check] + needs: [docs-scope, changed-scope, check] if: needs.docs-scope.outputs.docs_only != 'true' && (github.event_name == 'push' || needs.changed-scope.outputs.run_android == 'true') runs-on: blacksmith-4vcpu-ubuntu-2404 strategy: diff --git a/docs/ci.md b/docs/ci.md index 145b1284d6..cdf5b126a2 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -32,18 +32,6 @@ Jobs are ordered so cheap checks fail before expensive ones run: 2. `build-artifacts` (blocked on above) 3. `checks`, `checks-windows`, `macos`, `android` (blocked on build) -## Code Analysis - -The `code-analysis` job runs `scripts/analyze_code_files.py` on PRs to enforce code quality: - -- **LOC threshold**: Files that grow past 1000 lines fail the build -- **Delta-only**: Only checks files changed in the PR, not the entire codebase -- **Push to main**: Skipped (job passes as no-op) so merges aren't blocked - -When `--strict` is set, violations block all downstream jobs. This catches bloated files early before expensive tests run. - -Excluded directories: `node_modules`, `dist`, `vendor`, `.git`, `coverage`, `Swabble`, `skills`, `.pi` - ## Runners | Runner | Jobs | diff --git a/scripts/analyze_code_files.py b/scripts/analyze_code_files.py deleted file mode 100644 index 03558cc06a..0000000000 --- a/scripts/analyze_code_files.py +++ /dev/null @@ -1,805 +0,0 @@ -#!/usr/bin/env python3 -""" -Lists the longest and shortest code files in the project, and counts duplicated function names across files. Useful for identifying potential refactoring targets and enforcing code size guidelines. -Threshold can be set to warn about files longer or shorter than a certain number of lines. - -CI mode (--compare-to): Only warns about files that grew past threshold compared to a base ref. -Use --strict to exit non-zero on violations for CI gating. - -GitHub Actions: when GITHUB_ACTIONS=true, emits ::error annotations on flagged files -and writes a Markdown job summary to $GITHUB_STEP_SUMMARY (if set). -""" - -import os -import re -import sys -import subprocess -import argparse -from pathlib import Path -from typing import List, Tuple, Dict, Set, Optional -from collections import defaultdict - -# File extensions to consider as code files -CODE_EXTENSIONS = { - ".ts", - ".tsx", - ".js", - ".jsx", - ".mjs", - ".cjs", # TypeScript/JavaScript - ".swift", # macOS/iOS - ".kt", - ".java", # Android - ".py", - ".sh", # Scripts -} - -# Directories to skip -SKIP_DIRS = { - "node_modules", - ".git", - "dist", - "build", - "coverage", - "__pycache__", - ".turbo", - "out", - ".worktrees", - "vendor", - "Pods", - "DerivedData", - ".gradle", - ".idea", - "Swabble", # Separate Swift package - "skills", # Standalone skill scripts - ".pi", # Pi editor extensions -} - -# Filename patterns to skip in short-file warnings (barrel exports, stubs) -SKIP_SHORT_PATTERNS = { - "index.js", - "index.ts", - "postinstall.js", -} -SKIP_SHORT_SUFFIXES = ("-cli.ts",) - -# Function names to skip in duplicate detection. -# Only list names so generic they're expected to appear independently in many modules. -# Do NOT use prefix-based skipping — it hides real duplication (e.g. formatDuration, -# stripPrefix, parseConfig are specific enough to flag). -SKIP_DUPLICATE_FUNCTIONS = { - # Lifecycle / framework plumbing - "main", - "init", - "setup", - "teardown", - "cleanup", - "dispose", - "destroy", - "open", - "close", - "connect", - "disconnect", - "execute", - "run", - "start", - "stop", - "render", - "update", - "refresh", - "reset", - "clear", - "flush", - # Too-short / too-generic identifiers - "text", - "json", - "pad", - "mask", - "digest", - "confirm", - "intro", - "outro", - "exists", - "send", - "receive", - "listen", - "log", - "warn", - "error", - "info", - "help", - "version", - "config", - "configure", - "describe", - "test", - "action", -} -SKIP_DUPLICATE_FILE_PATTERNS = (".test.ts", ".test.tsx", ".spec.ts") - -# Known packages in the monorepo -PACKAGES = {"src", "apps", "extensions", "packages", "scripts", "ui", "test", "docs"} - - -def get_package(file_path: Path, root_dir: Path) -> str: - """Get the package name for a file, or 'root' if at top level.""" - try: - relative = file_path.relative_to(root_dir) - parts = relative.parts - if len(parts) > 0 and parts[0] in PACKAGES: - return parts[0] - return "root" - except ValueError: - return "root" - - -def count_lines(file_path: Path) -> int: - """Count the number of lines in a file.""" - try: - with open(file_path, "r", encoding="utf-8", errors="ignore") as f: - return sum(1 for _ in f) - except Exception: - return 0 - - -def find_code_files(root_dir: Path) -> List[Tuple[Path, int]]: - """Find all code files and their line counts.""" - files_with_counts = [] - - for dirpath, dirnames, filenames in os.walk(root_dir): - # Remove skip directories from dirnames to prevent walking into them - dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS] - - for filename in filenames: - file_path = Path(dirpath) / filename - if file_path.suffix.lower() in CODE_EXTENSIONS: - line_count = count_lines(file_path) - files_with_counts.append((file_path, line_count)) - - return files_with_counts - - -# Regex patterns for TypeScript functions (exported and internal) -TS_FUNCTION_PATTERNS = [ - # export function name(...) or function name(...) - re.compile(r"^(?:export\s+)?(?:async\s+)?function\s+(\w+)", re.MULTILINE), - # export const name = or const name = - re.compile( - r"^(?:export\s+)?const\s+(\w+)\s*=\s*(?:\([^)]*\)|\w+)\s*=>", re.MULTILINE - ), -] - - -def extract_functions(file_path: Path) -> Set[str]: - """Extract function names from a TypeScript file.""" - if file_path.suffix.lower() not in {".ts", ".tsx"}: - return set() - - try: - with open(file_path, "r", encoding="utf-8", errors="ignore") as f: - content = f.read() - except Exception: - return set() - - return extract_functions_from_content(content) - - -def find_duplicate_functions( - files: List[Tuple[Path, int]], root_dir: Path -) -> Dict[str, List[Path]]: - """Find function names that appear in multiple files.""" - function_locations: Dict[str, List[Path]] = defaultdict(list) - - for file_path, _ in files: - # Skip test files for duplicate detection - if any(file_path.name.endswith(pat) for pat in SKIP_DUPLICATE_FILE_PATTERNS): - continue - - functions = extract_functions(file_path) - for func in functions: - # Skip known common function names - if func in SKIP_DUPLICATE_FUNCTIONS: - continue - function_locations[func].append(file_path) - - # Filter to only duplicates, ignoring cross-package duplicates. - # Independent packages (extensions/*, apps/*, ui/) are treated like separate codebases — - # the same function name in extensions/telegram and extensions/discord, - # or in apps/ios and apps/macos, is expected, not duplication. - result: Dict[str, List[Path]] = {} - for name, paths in function_locations.items(): - if len(paths) < 2: - continue - - # Identify which independent package each path belongs to (if any) - # Returns a unique package key or None if it's core code - def get_independent_package(p: Path) -> Optional[str]: - try: - rel = p.relative_to(root_dir) - parts = rel.parts - if len(parts) >= 2: - # extensions/, apps/ are each independent - if parts[0] in ("extensions", "apps"): - return f"{parts[0]}/{parts[1]}" - # ui/ is a single independent package (browser frontend) - if len(parts) >= 1 and parts[0] == "ui": - return "ui" - return None - except ValueError: - return None - - package_keys = set() - has_core = False - for p in paths: - pkg = get_independent_package(p) - if pkg: - package_keys.add(pkg) - else: - has_core = True - - # Skip if ALL instances are in different independent packages (no core overlap) - if not has_core and len(package_keys) == len(paths): - continue - result[name] = paths - return result - - -def validate_git_ref(root_dir: Path, ref: str) -> bool: - """Validate that a git ref exists. Exits with error if not.""" - try: - result = subprocess.run( - ["git", "rev-parse", "--verify", ref], - capture_output=True, - cwd=root_dir, - encoding="utf-8", - ) - return result.returncode == 0 - except Exception: - return False - - -def get_file_content_at_ref(file_path: Path, root_dir: Path, ref: str) -> Optional[str]: - """Get content of a file at a specific git ref. Returns None if file doesn't exist at ref.""" - try: - relative_path = file_path.relative_to(root_dir) - # Use forward slashes for git paths - git_path = str(relative_path).replace("\\", "/") - result = subprocess.run( - ["git", "show", f"{ref}:{git_path}"], - capture_output=True, - cwd=root_dir, - encoding="utf-8", - errors="ignore", - ) - if result.returncode != 0: - stderr = result.stderr.strip() - # "does not exist" or "exists on disk, but not in" = file missing at ref (OK) - if "does not exist" in stderr or "exists on disk" in stderr: - return None - # Other errors (bad ref, git broken) = genuine failure - if stderr: - print(f"⚠️ git show error for {git_path}: {stderr}", file=sys.stderr) - return None - return result.stdout - except Exception as e: - print(f"⚠️ failed to read {file_path} at {ref}: {e}", file=sys.stderr) - return None - - -def get_line_count_at_ref(file_path: Path, root_dir: Path, ref: str) -> Optional[int]: - """Get line count of a file at a specific git ref. Returns None if file doesn't exist at ref.""" - content = get_file_content_at_ref(file_path, root_dir, ref) - if content is None: - return None - return len(content.splitlines()) - - -def extract_functions_from_content(content: str) -> Set[str]: - """Extract function names from TypeScript content string.""" - functions = set() - for pattern in TS_FUNCTION_PATTERNS: - for match in pattern.finditer(content): - functions.add(match.group(1)) - return functions - - -def get_changed_files(root_dir: Path, compare_ref: str) -> Set[str]: - """Get set of files changed between compare_ref and HEAD (relative paths with forward slashes).""" - try: - result = subprocess.run( - ["git", "diff", "--name-only", compare_ref, "HEAD"], - capture_output=True, - cwd=root_dir, - encoding="utf-8", - errors="ignore", - ) - if result.returncode != 0: - return set() - return {line.strip() for line in result.stdout.splitlines() if line.strip()} - except Exception: - return set() - - -def find_duplicate_regressions( - files: List[Tuple[Path, int]], - root_dir: Path, - compare_ref: str, -) -> Dict[str, List[Path]]: - """ - Find new duplicate function names that didn't exist at the base ref. - Only checks functions in files that changed to keep CI fast. - Returns dict of function_name -> list of current file paths, only for - duplicates that are new (weren't duplicated at compare_ref). - """ - # Build current duplicate map - current_dupes = find_duplicate_functions(files, root_dir) - if not current_dupes: - return {} - - # Get changed files to scope the comparison - changed_files = get_changed_files(root_dir, compare_ref) - if not changed_files: - return {} # Nothing changed, no new duplicates possible - - # Only check duplicate functions that involve at least one changed file - relevant_dupes: Dict[str, List[Path]] = {} - for func_name, paths in current_dupes.items(): - involves_changed = any( - str(p.relative_to(root_dir)).replace("\\", "/") in changed_files - for p in paths - ) - if involves_changed: - relevant_dupes[func_name] = paths - - if not relevant_dupes: - return {} - - # For relevant duplicates, check if they were already duplicated at base ref - # Only need to read base versions of files involved in these duplicates - files_to_check: Set[Path] = set() - for paths in relevant_dupes.values(): - files_to_check.update(paths) - - base_function_locations: Dict[str, List[Path]] = defaultdict(list) - for file_path in files_to_check: - if file_path.suffix.lower() not in {".ts", ".tsx"}: - continue - content = get_file_content_at_ref(file_path, root_dir, compare_ref) - if content is None: - continue - functions = extract_functions_from_content(content) - for func in functions: - if func in SKIP_DUPLICATE_FUNCTIONS: - continue - base_function_locations[func].append(file_path) - - base_dupes = { - name for name, paths in base_function_locations.items() if len(paths) > 1 - } - - # Return only new duplicates - return { - name: paths for name, paths in relevant_dupes.items() if name not in base_dupes - } - - -def find_threshold_regressions( - files: List[Tuple[Path, int]], - root_dir: Path, - compare_ref: str, - threshold: int, -) -> Tuple[List[Tuple[Path, int, Optional[int]]], List[Tuple[Path, int, int]]]: - """ - Find files that crossed the threshold or grew while already over it. - Returns two lists: - - crossed: (path, current_lines, base_lines) for files that newly crossed the threshold - - grew: (path, current_lines, base_lines) for files already over threshold that got larger - """ - crossed = [] - grew = [] - - for file_path, current_lines in files: - if current_lines < threshold: - continue # Not over threshold now, skip - - base_lines = get_line_count_at_ref(file_path, root_dir, compare_ref) - - if base_lines is None or base_lines < threshold: - # New file or crossed the threshold - crossed.append((file_path, current_lines, base_lines)) - elif current_lines > base_lines: - # Already over threshold and grew larger - grew.append((file_path, current_lines, base_lines)) - - return crossed, grew - - -def _write_github_summary( - summary_path: str, - crossed: List[Tuple[Path, int, Optional[int]]], - grew: List[Tuple[Path, int, int]], - new_dupes: Dict[str, List[Path]], - root_dir: Path, - threshold: int, - compare_ref: str, -) -> None: - """Write a Markdown job summary to $GITHUB_STEP_SUMMARY.""" - lines: List[str] = [] - lines.append("## Code Size Check Failed\n") - lines.append("> ⚠️ **DO NOT trash the code base!** The goal is maintainability.\n") - - if crossed: - lines.append( - f"### {len(crossed)} file(s) crossed the {threshold}-line threshold\n" - ) - lines.append("| File | Before | After | Delta |") - lines.append("|------|-------:|------:|------:|") - for file_path, current, base in crossed: - rel = str(file_path.relative_to(root_dir)).replace("\\", "/") - before = f"{base:,}" if base is not None else "new" - lines.append( - f"| `{rel}` | {before} | {current:,} | +{current - (base or 0):,} |" - ) - lines.append("") - - if grew: - lines.append(f"### {len(grew)} already-large file(s) grew larger\n") - lines.append("| File | Before | After | Delta |") - lines.append("|------|-------:|------:|------:|") - for file_path, current, base in grew: - rel = str(file_path.relative_to(root_dir)).replace("\\", "/") - lines.append(f"| `{rel}` | {base:,} | {current:,} | +{current - base:,} |") - lines.append("") - - if new_dupes: - lines.append(f"### {len(new_dupes)} new duplicate function name(s)\n") - lines.append("| Function | Files |") - lines.append("|----------|-------|") - for func_name in sorted(new_dupes.keys()): - paths = new_dupes[func_name] - file_list = ", ".join( - f"`{str(p.relative_to(root_dir)).replace(chr(92), '/')}`" for p in paths - ) - lines.append(f"| `{func_name}` | {file_list} |") - lines.append("") - - lines.append("
How to fix\n") - lines.append("- Split large files into smaller, focused modules") - lines.append("- Extract helpers, types, or constants into separate files") - lines.append("- See `AGENTS.md` for guidelines (~500–700 LOC target)") - lines.append(f"- This check compares your PR against `{compare_ref}`") - lines.append( - f"- Only code files are checked: {', '.join(f'`{e}`' for e in sorted(CODE_EXTENSIONS))}" - ) - lines.append("- Docs, test names, and config files are **not** affected") - lines.append("\n
") - - try: - with open(summary_path, "a", encoding="utf-8") as f: - f.write("\n".join(lines) + "\n") - except Exception as e: - print(f"⚠️ Failed to write job summary: {e}", file=sys.stderr) - - -def main(): - parser = argparse.ArgumentParser( - description="Analyze code files: list longest/shortest files, find duplicate function names" - ) - parser.add_argument( - "-t", - "--threshold", - type=int, - default=1000, - help="Warn about files longer than this many lines (default: 1000)", - ) - parser.add_argument( - "--min-threshold", - type=int, - default=10, - help="Warn about files shorter than this many lines (default: 10)", - ) - parser.add_argument( - "-n", - "--top", - type=int, - default=20, - help="Show top N longest files (default: 20)", - ) - parser.add_argument( - "-b", - "--bottom", - type=int, - default=10, - help="Show bottom N shortest files (default: 10)", - ) - parser.add_argument( - "-d", - "--directory", - type=str, - default=".", - help="Directory to scan (default: current directory)", - ) - parser.add_argument( - "--compare-to", - type=str, - default=None, - help="Git ref to compare against (e.g., origin/main). Only warn about files that grew past threshold.", - ) - parser.add_argument( - "--strict", - action="store_true", - help="Exit with non-zero status if any violations found (for CI)", - ) - - args = parser.parse_args() - - root_dir = Path(args.directory).resolve() - - # CI delta mode: only show regressions - if args.compare_to: - print(f"\n📂 Scanning: {root_dir}") - print(f"🔍 Comparing to: {args.compare_to}\n") - - if not validate_git_ref(root_dir, args.compare_to): - print(f"❌ Invalid git ref: {args.compare_to}", file=sys.stderr) - print( - " Make sure the ref exists (e.g. run 'git fetch origin ')", - file=sys.stderr, - ) - sys.exit(2) - - files = find_code_files(root_dir) - violations = False - - # Check file length regressions - crossed, grew = find_threshold_regressions( - files, root_dir, args.compare_to, args.threshold - ) - - if crossed: - print( - f"⚠️ {len(crossed)} file(s) crossed {args.threshold} line threshold:\n" - ) - for file_path, current, base in crossed: - relative_path = file_path.relative_to(root_dir) - if base is None: - print(f" {relative_path}: {current:,} lines (new file)") - else: - print( - f" {relative_path}: {base:,} → {current:,} lines (+{current - base:,})" - ) - print() - violations = True - else: - print(f"✅ No files crossed {args.threshold} line threshold") - - if grew: - print(f"⚠️ {len(grew)} already-large file(s) grew larger:\n") - for file_path, current, base in grew: - relative_path = file_path.relative_to(root_dir) - print( - f" {relative_path}: {base:,} → {current:,} lines (+{current - base:,})" - ) - print() - violations = True - else: - print(f"✅ No already-large files grew") - - # Check new duplicate function names - new_dupes = find_duplicate_regressions(files, root_dir, args.compare_to) - - if new_dupes: - print(f"⚠️ {len(new_dupes)} new duplicate function name(s):\n") - for func_name in sorted(new_dupes.keys()): - paths = new_dupes[func_name] - print(f" {func_name}:") - for path in paths: - print(f" {path.relative_to(root_dir)}") - print() - violations = True - else: - print(f"✅ No new duplicate function names") - - print() - if args.strict and violations: - # Emit GitHub Actions file annotations so violations appear inline in the PR diff - in_gha = os.environ.get("GITHUB_ACTIONS") == "true" - if in_gha: - for file_path, current, base in crossed: - rel = str(file_path.relative_to(root_dir)).replace("\\", "/") - if base is None: - print( - f"::error file={rel},title=File over {args.threshold} lines::{rel} is {current:,} lines (new file). Split into smaller modules." - ) - else: - print( - f"::error file={rel},title=File crossed {args.threshold} lines::{rel} grew from {base:,} to {current:,} lines (+{current - base:,}). Split into smaller modules." - ) - for file_path, current, base in grew: - rel = str(file_path.relative_to(root_dir)).replace("\\", "/") - print( - f"::error file={rel},title=Large file grew larger::{rel} is already {base:,} lines and grew to {current:,} (+{current - base:,}). Consider refactoring." - ) - for func_name in sorted(new_dupes.keys()): - for p in new_dupes[func_name]: - rel = str(p.relative_to(root_dir)).replace("\\", "/") - print( - f"::error file={rel},title=Duplicate function '{func_name}'::Function '{func_name}' appears in multiple files. Centralize or rename." - ) - - # Write GitHub Actions job summary (visible in the Actions check details) - summary_path = os.environ.get("GITHUB_STEP_SUMMARY") - if summary_path: - _write_github_summary( - summary_path, - crossed, - grew, - new_dupes, - root_dir, - args.threshold, - args.compare_to, - ) - - # Print actionable summary so contributors know what to do - print("─" * 60) - print("❌ Code size check failed\n") - print(" ⚠️ DO NOT just trash the code base!") - print(" The goal is maintainability.\n") - if crossed: - print( - f" {len(crossed)} file(s) grew past the {args.threshold}-line limit." - ) - if grew: - print( - f" {len(grew)} file(s) already over {args.threshold} lines got larger." - ) - print() - print(" How to fix:") - print(" • Split large files into smaller, focused modules") - print(" • Extract helpers, types, or constants into separate files") - print(" • See AGENTS.md for guidelines (~500-700 LOC target)") - print() - print(f" This check compares your PR against {args.compare_to}.") - print( - f" Only code files are checked ({', '.join(sorted(e for e in CODE_EXTENSIONS))})." - ) - print(" Docs, tests names, and config files are not affected.") - print("─" * 60) - sys.exit(1) - elif args.strict: - print("─" * 60) - print("✅ Code size check passed — no files exceed thresholds.") - print("─" * 60) - - return - - print(f"\n📂 Scanning: {root_dir}\n") - - # Find and sort files by line count - files = find_code_files(root_dir) - files_desc = sorted(files, key=lambda x: x[1], reverse=True) - files_asc = sorted(files, key=lambda x: x[1]) - - # Show top N longest files - top_files = files_desc[: args.top] - - print(f"📊 Top {min(args.top, len(top_files))} longest code files:\n") - print(f"{'Lines':>8} {'File'}") - print("-" * 60) - - long_warnings = [] - - for file_path, line_count in top_files: - relative_path = file_path.relative_to(root_dir) - - # Check if over threshold - if line_count >= args.threshold: - marker = " ⚠️" - long_warnings.append((relative_path, line_count)) - else: - marker = "" - - print(f"{line_count:>8} {relative_path}{marker}") - - # Show bottom N shortest files - bottom_files = files_asc[: args.bottom] - - print(f"\n📉 Bottom {min(args.bottom, len(bottom_files))} shortest code files:\n") - print(f"{'Lines':>8} {'File'}") - print("-" * 60) - - short_warnings = [] - - for file_path, line_count in bottom_files: - relative_path = file_path.relative_to(root_dir) - filename = file_path.name - - # Skip known barrel exports and stubs - is_expected_short = filename in SKIP_SHORT_PATTERNS or any( - filename.endswith(suffix) for suffix in SKIP_SHORT_SUFFIXES - ) - - # Check if under threshold - if line_count <= args.min_threshold and not is_expected_short: - marker = " ⚠️" - short_warnings.append((relative_path, line_count)) - else: - marker = "" - - print(f"{line_count:>8} {relative_path}{marker}") - - # Summary - total_files = len(files) - total_lines = sum(count for _, count in files) - - print("-" * 60) - print(f"\n📈 Summary:") - print(f" Total code files: {total_files:,}") - print(f" Total lines: {total_lines:,}") - print( - f" Average lines/file: {total_lines // total_files if total_files else 0:,}" - ) - - # Per-package breakdown - package_stats: dict[str, dict] = {} - for file_path, line_count in files: - pkg = get_package(file_path, root_dir) - if pkg not in package_stats: - package_stats[pkg] = {"files": 0, "lines": 0} - package_stats[pkg]["files"] += 1 - package_stats[pkg]["lines"] += line_count - - print(f"\n📦 Per-package breakdown:\n") - print(f"{'Package':<15} {'Files':>8} {'Lines':>10} {'Avg':>8}") - print("-" * 45) - - for pkg in sorted( - package_stats.keys(), key=lambda p: package_stats[p]["lines"], reverse=True - ): - stats = package_stats[pkg] - avg = stats["lines"] // stats["files"] if stats["files"] else 0 - print(f"{pkg:<15} {stats['files']:>8,} {stats['lines']:>10,} {avg:>8,}") - - # Long file warnings - if long_warnings: - print( - f"\n⚠️ Warning: {len(long_warnings)} file(s) exceed {args.threshold} lines (consider refactoring):" - ) - for path, count in long_warnings: - print(f" - {path} ({count:,} lines)") - else: - print(f"\n✅ No files exceed {args.threshold} lines") - - # Short file warnings - if short_warnings: - print( - f"\n⚠️ Warning: {len(short_warnings)} file(s) are {args.min_threshold} lines or less (check if needed):" - ) - for path, count in short_warnings: - print(f" - {path} ({count} lines)") - else: - print(f"\n✅ No files are {args.min_threshold} lines or less") - - # Duplicate function names - duplicates = find_duplicate_functions(files, root_dir) - if duplicates: - print( - f"\n⚠️ Warning: {len(duplicates)} function name(s) appear in multiple files (consider renaming):" - ) - for func_name in sorted(duplicates.keys()): - paths = duplicates[func_name] - print(f" - {func_name}:") - for path in paths: - print(f" {path.relative_to(root_dir)}") - else: - print(f"\n✅ No duplicate function names") - - print() - - # Exit with error if --strict and there are violations - if args.strict and long_warnings: - sys.exit(1) - - -if __name__ == "__main__": - main()