mirror of
https://github.com/RichieCahill/dotfiles.git
synced 2026-04-17 04:58:19 -04:00
removed repo_line_counter.py
This commit is contained in:
@@ -1,231 +0,0 @@
|
|||||||
"""Count lines of code in a local directory, grouped by file extension."""
|
|
||||||
|
|
||||||
import shutil
|
|
||||||
import subprocess
|
|
||||||
from collections import defaultdict
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Annotated
|
|
||||||
|
|
||||||
import typer
|
|
||||||
|
|
||||||
app = typer.Typer(help="Count lines of code by file extension.")
|
|
||||||
|
|
||||||
MAX_DISPLAY_EXTENSIONS = 10
|
|
||||||
|
|
||||||
BINARY_EXTENSIONS = {
|
|
||||||
".png",
|
|
||||||
".jpg",
|
|
||||||
".jpeg",
|
|
||||||
".gif",
|
|
||||||
".ico",
|
|
||||||
".svg",
|
|
||||||
".woff",
|
|
||||||
".woff2",
|
|
||||||
".ttf",
|
|
||||||
".eot",
|
|
||||||
".pdf",
|
|
||||||
".zip",
|
|
||||||
".tar",
|
|
||||||
".gz",
|
|
||||||
".bz2",
|
|
||||||
".exe",
|
|
||||||
".bin",
|
|
||||||
".so",
|
|
||||||
".dylib",
|
|
||||||
".dll",
|
|
||||||
".o",
|
|
||||||
".a",
|
|
||||||
".pyc",
|
|
||||||
".class",
|
|
||||||
".jar",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _git(*args: str, cwd: Path) -> subprocess.CompletedProcess[str]:
|
|
||||||
git = shutil.which("git")
|
|
||||||
if not git:
|
|
||||||
msg = "git not found on PATH"
|
|
||||||
raise typer.BadParameter(msg)
|
|
||||||
return subprocess.run(
|
|
||||||
[git, *args],
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
check=False,
|
|
||||||
cwd=cwd,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_git_files(directory: Path) -> set[Path]:
|
|
||||||
"""Get the set of files tracked/not-ignored by git."""
|
|
||||||
result = _git("ls-files", "--cached", "--others", "--exclude-standard", cwd=directory)
|
|
||||||
if result.returncode != 0:
|
|
||||||
msg = f"Not a git repository or git error: {directory}"
|
|
||||||
raise typer.BadParameter(msg)
|
|
||||||
return {directory / line for line in result.stdout.splitlines()}
|
|
||||||
|
|
||||||
|
|
||||||
def count_lines(target: Path, *, respect_gitignore: bool) -> dict[str, int]:
|
|
||||||
"""Walk a directory and count lines per file extension."""
|
|
||||||
if respect_gitignore:
|
|
||||||
allowed_files = get_git_files(target)
|
|
||||||
|
|
||||||
counts: dict[str, int] = defaultdict(int)
|
|
||||||
for filepath in target.rglob("*"):
|
|
||||||
if not filepath.is_file():
|
|
||||||
continue
|
|
||||||
if respect_gitignore and filepath not in allowed_files:
|
|
||||||
continue
|
|
||||||
ext = filepath.suffix or "(no extension)"
|
|
||||||
if ext in BINARY_EXTENSIONS:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
lines = filepath.read_text(encoding="utf-8", errors="ignore").count("\n")
|
|
||||||
counts[ext] += lines
|
|
||||||
except OSError:
|
|
||||||
continue
|
|
||||||
return dict(counts)
|
|
||||||
|
|
||||||
|
|
||||||
def get_first_commit_date(directory: Path) -> datetime:
|
|
||||||
"""Get the date of the first commit in the repo."""
|
|
||||||
result = _git("log", "--reverse", "--format=%aI", cwd=directory)
|
|
||||||
if result.returncode != 0 or not result.stdout.strip():
|
|
||||||
msg = f"Could not read git history: {directory}"
|
|
||||||
raise typer.BadParameter(msg)
|
|
||||||
first_line = result.stdout.splitlines()[0]
|
|
||||||
return datetime.fromisoformat(first_line)
|
|
||||||
|
|
||||||
|
|
||||||
def get_weekly_commits(directory: Path) -> list[tuple[str, str]]:
|
|
||||||
"""Get one commit per week from the repo's history.
|
|
||||||
|
|
||||||
Returns list of (date_str, commit_hash) tuples.
|
|
||||||
"""
|
|
||||||
first_date = get_first_commit_date(directory)
|
|
||||||
now = datetime.now(tz=datetime.UTC)
|
|
||||||
|
|
||||||
weeks: list[tuple[str, str]] = []
|
|
||||||
current = first_date
|
|
||||||
while current <= now:
|
|
||||||
iso = current.strftime("%Y-%m-%dT%H:%M:%S%z")
|
|
||||||
if not iso:
|
|
||||||
iso = current.isoformat()
|
|
||||||
result = _git("rev-list", "-1", f"--before={iso}", "HEAD", cwd=directory)
|
|
||||||
if result.returncode == 0 and result.stdout.strip():
|
|
||||||
commit = result.stdout.strip()
|
|
||||||
date_str = current.strftime("%Y-%m-%d")
|
|
||||||
if not weeks or weeks[-1][1] != commit:
|
|
||||||
weeks.append((date_str, commit))
|
|
||||||
current += timedelta(weeks=1)
|
|
||||||
|
|
||||||
# Always include the latest commit
|
|
||||||
result = _git("rev-parse", "HEAD", cwd=directory)
|
|
||||||
if result.returncode == 0:
|
|
||||||
head = result.stdout.strip()
|
|
||||||
if not weeks or weeks[-1][1] != head:
|
|
||||||
weeks.append((now.strftime("%Y-%m-%d"), head))
|
|
||||||
|
|
||||||
return weeks
|
|
||||||
|
|
||||||
|
|
||||||
def count_lines_at_commit(directory: Path, commit: str) -> dict[str, int]:
|
|
||||||
"""List files and count lines at a specific commit using git show."""
|
|
||||||
result = _git("ls-tree", "-r", "--name-only", commit, cwd=directory)
|
|
||||||
if result.returncode != 0:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
counts: dict[str, int] = defaultdict(int)
|
|
||||||
for filepath in result.stdout.splitlines():
|
|
||||||
ext = Path(filepath).suffix or "(no extension)"
|
|
||||||
if ext in BINARY_EXTENSIONS:
|
|
||||||
continue
|
|
||||||
blob = _git("show", f"{commit}:{filepath}", cwd=directory)
|
|
||||||
if blob.returncode != 0:
|
|
||||||
continue
|
|
||||||
counts[ext] += blob.stdout.count("\n")
|
|
||||||
return dict(counts)
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def snapshot(
|
|
||||||
directory: Annotated[Path, typer.Argument(help="Directory to scan")],
|
|
||||||
include_gitignored: Annotated[bool, typer.Option(help="Include files ignored by git")] = False,
|
|
||||||
) -> None:
|
|
||||||
"""Count lines of code at the current state of the directory."""
|
|
||||||
target = directory.resolve()
|
|
||||||
if not target.is_dir():
|
|
||||||
raise typer.BadParameter(f"Not a directory: {target}")
|
|
||||||
|
|
||||||
respect_gitignore = not include_gitignored
|
|
||||||
print(f"Scanning {target}")
|
|
||||||
if respect_gitignore:
|
|
||||||
print("Respecting .gitignore (use --include-gitignored to include all files)")
|
|
||||||
print()
|
|
||||||
|
|
||||||
counts = count_lines(target, respect_gitignore=respect_gitignore)
|
|
||||||
|
|
||||||
if not counts:
|
|
||||||
print("No files found.")
|
|
||||||
return
|
|
||||||
|
|
||||||
print(f"{'Extension':<20} {'Lines':>10}")
|
|
||||||
print("-" * 32)
|
|
||||||
for ext, lines in sorted(counts.items(), key=lambda x: x[1], reverse=True):
|
|
||||||
print(f"{ext:<20} {lines:>10,}")
|
|
||||||
print("-" * 32)
|
|
||||||
print(f"{'TOTAL':<20} {sum(counts.values()):>10,}")
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def history(
|
|
||||||
directory: Annotated[Path, typer.Argument(help="Git repo directory to scan")],
|
|
||||||
) -> None:
|
|
||||||
"""Walk through git history in 1-week increments, counting lines at each point."""
|
|
||||||
target = directory.resolve()
|
|
||||||
if not target.is_dir():
|
|
||||||
raise typer.BadParameter(f"Not a directory: {target}")
|
|
||||||
|
|
||||||
weeks = get_weekly_commits(target)
|
|
||||||
if not weeks:
|
|
||||||
print("No commits found.")
|
|
||||||
return
|
|
||||||
|
|
||||||
print(f"Scanning {len(weeks)} weekly snapshots in {target}\n")
|
|
||||||
|
|
||||||
all_extensions: set[str] = set()
|
|
||||||
weekly_data: list[tuple[str, dict[str, int]]] = []
|
|
||||||
|
|
||||||
for date_str, commit in weeks:
|
|
||||||
print(f" Processing {date_str} ({commit[:8]})...")
|
|
||||||
counts = count_lines_at_commit(target, commit)
|
|
||||||
all_extensions.update(counts.keys())
|
|
||||||
weekly_data.append((date_str, counts))
|
|
||||||
|
|
||||||
# Sort extensions by total lines across all weeks
|
|
||||||
ext_totals = defaultdict(int)
|
|
||||||
for _, counts in weekly_data:
|
|
||||||
for ext, lines in counts.items():
|
|
||||||
ext_totals[ext] += lines
|
|
||||||
sorted_exts = sorted(ext_totals, key=lambda e: ext_totals[e], reverse=True)
|
|
||||||
|
|
||||||
# Print table
|
|
||||||
top_exts = sorted_exts[:MAX_DISPLAY_EXTENSIONS]
|
|
||||||
header = f"{'Date':<12}" + "".join(f"{ext:>10}" for ext in top_exts) + f"{'TOTAL':>10}"
|
|
||||||
print(f"\n{header}")
|
|
||||||
print("-" * len(header))
|
|
||||||
|
|
||||||
for date_str, counts in weekly_data:
|
|
||||||
row = f"{date_str:<12}"
|
|
||||||
for ext in top_exts:
|
|
||||||
row += f"{counts.get(ext, 0):>10,}"
|
|
||||||
row += f"{sum(counts.values()):>10,}"
|
|
||||||
print(row)
|
|
||||||
|
|
||||||
remaining = len(sorted_exts) - MAX_DISPLAY_EXTENSIONS
|
|
||||||
if remaining > 0:
|
|
||||||
typer.echo(f"\n({remaining} more extensions not shown)")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
app()
|
|
||||||
Reference in New Issue
Block a user