reworked AGENTS.md

2026-04-19 13:49:09 -04:00 · 2025-12-06 20:58:29 -05:00
parent 12e398514b
commit b5ac770003
3 changed files with 178 additions and 81 deletions
--- a/.github/workflows/fix_eval_warnings.yml
+++ b/.github/workflows/fix_eval_warnings.yml
@@ -31,6 +31,7 @@ jobs:
          GITHUB_TOKEN: ${{ github.token }}
          GITHUB_REPOSITORY: ${{ github.repository }}
          RUN_ID: ${{ github.event.workflow_run.id }}
          PYTHONPATH: .
        run: |
          python3 python/tools/fix_eval_warnings.py build.log
--- a/python/tools/fix_eval_warnings.py
+++ b/python/tools/fix_eval_warnings.py
@@ -1,52 +1,75 @@
 #!/usr/bin/env python3
-"""
+"""fix_eval_warnings."""
-Script to detect "evaluation warning:" in logs and suggest fixes using GitHub Models.
+
-"""
+from __future__ import annotations
 import logging
 import os
-import sys
+from dataclasses import dataclass
 import re
 import requests
 import json
 from pathlib import Path
-# Configuration
+import requests
-GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
+import typer
 GITHUB_REPOSITORY = os.environ.get("GITHUB_REPOSITORY")
 PR_NUMBER = os.environ.get("PR_NUMBER") # If triggered by PR
 RUN_ID = os.environ.get("RUN_ID")
-# GitHub Models API Endpoint (OpenAI compatible)
+from python.common import configure_logger
 # https://github.com/marketplace/models
 API_BASE = "https://models.inference.ai.azure.com"
 # Default to gpt-4o, but allow override via env var
 MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o")
-def get_log_content(run_id):
+logger = logging.getLogger(__name__)
    """Fetches the logs for a specific workflow run."""
    print(f"Fetching logs for run ID: {run_id}")
    headers = {
        "Authorization": f"Bearer {GITHUB_TOKEN}",
        "Accept": "application/vnd.github+json",
        "X-GitHub-Api-Version": "2022-11-28"
    }
@dataclass
 class Config:
    """Configuration for the script.
    Attributes:
        github_token (str): GitHub token for API authentication.
        model_name (str): The name of the LLM model to use. Defaults to "gpt-4o".
        api_base (str): The base URL for the GitHub Models API.
            Defaults to "https://models.inference.ai.azure.com".
    """
    github_token: str
    model_name: str = "gpt-4o"
    api_base: str = "https://models.inference.ai.azure.com"
 def get_log_content(run_id: str) -> None:
    """Fetch the logs for a specific workflow run.
    Args:
        run_id (str): The run ID.
    """
    logger.info(f"Fetching logs for run ID: {run_id}")
    # List artifacts to find logs (or use jobs API)
    # For simplicity, we might need to use 'gh' cli in the workflow to download logs
    # But let's try to read from a file if passed as argument, which is easier for the workflow
    return None
-def parse_warnings(log_file_path):
+
-    """Parses the log file for evaluation warnings."""
+def parse_warnings(log_file_path: Path) -> list[str]:
    """Parse the log file for evaluation warnings.
    Args:
        log_file_path (Path): The path to the log file.
    Returns:
        list[str]: A list of warning messages.
    """
    warnings = []
-    with open(log_file_path, 'r', encoding='utf-8', errors='ignore') as f:
+    with log_file_path.open(encoding="utf-8", errors="ignore") as f:
-        for line in f:
+        warnings.extend(line.strip() for line in f if "evaluation warning:" in line)
            if "evaluation warning:" in line:
                warnings.append(line.strip())
    return warnings
-def generate_fix(warning_msg):
+
-    """Calls GitHub Models to generate a fix for the warning."""
+def generate_fix(warning_msg: str, config: Config) -> str | None:
-    print(f"Generating fix for: {warning_msg}")
+    """Call GitHub Models to generate a fix for the warning.
    Args:
        warning_msg (str): The warning message.
        config (Config): The configuration object.
    Returns:
        Optional[str]: The suggested fix or None.
    """
    logger.info(f"Generating fix for: {warning_msg}")
    prompt = f"""
    I encountered the following Nix evaluation warning:
@@ -57,71 +80,82 @@ def generate_fix(warning_msg):
    If possible, provide the exact code change in a diff format or a clear description of what to change.
    """
-    headers = {
+    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {config.github_token}"}
        "Content-Type": "application/json",
        "Authorization": f"Bearer {GITHUB_TOKEN}"
    }
    payload = {
        "messages": [
-            {
+            {"role": "system", "content": "You are an expert NixOS and Nix language developer."},
-                "role": "system",
+            {"role": "user", "content": prompt},
                "content": "You are an expert NixOS and Nix language developer."
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
-        "model": MODEL_NAME,
+        "model": config.model_name,
-        "temperature": 0.1
+        "temperature": 0.1,
    }
    try:
-        response = requests.post(
+        response = requests.post(f"{config.api_base}/chat/completions", headers=headers, json=payload, timeout=30)
            f"{API_BASE}/chat/completions",
            headers=headers,
            json=payload
        )
        response.raise_for_status()
        result = response.json()
-        return result['choices'][0]['message']['content']
+        return result["choices"][0]["message"]["content"]  # type: ignore[no-any-return]
-    except Exception as e:
+    except Exception:
-        print(f"Error calling LLM: {e}")
+        logger.exception("Error calling LLM")
        return None
 def main():
    if len(sys.argv) < 2:
        print("Usage: fix_eval_warnings.py <log_file>")
        sys.exit(1)
-    log_file = sys.argv[1]
+def main(
-    if not os.path.exists(log_file):
+    log_file: Path = typer.Argument(..., help="Path to the build log file"),  # noqa: B008
-        print(f"Log file not found: {log_file}")
+    model_name: str = typer.Option("gpt-4o", envvar="MODEL_NAME", help="LLM Model Name"),
-        sys.exit(1)
+) -> None:
    """Detect evaluation warnings in logs and suggest fixes using GitHub Models.
    Args:
        log_file (Path): Path to the build log file containing evaluation warnings.
        model_name (str): The name of the LLM model to use for generating fixes.
            Defaults to "gpt-4o", can be overridden by MODEL_NAME environment variable.
    """
    configure_logger()
    github_token = os.environ.get("GITHUB_TOKEN")
    if not github_token:
        logger.warning("GITHUB_TOKEN not set. LLM calls will fail.")
    config = Config(github_token=github_token or "", model_name=model_name)
    if not log_file.exists():
        logger.error(f"Log file not found: {log_file}")
        raise typer.Exit(code=1)
    warnings = parse_warnings(log_file)
    if not warnings:
-        print("No evaluation warnings found.")
+        logger.info("No evaluation warnings found.")
-        sys.exit(0)
+        raise typer.Exit(code=0)
-    print(f"Found {len(warnings)} warnings.")
+    logger.info(f"Found {len(warnings)} warnings.")
    # Process unique warnings to save tokens
    unique_warnings = list(set(warnings))
    fixes = []
    for warning in unique_warnings:
-        fix = generate_fix(warning)
+        if not config.github_token:
            logger.warning("Skipping LLM call due to missing GITHUB_TOKEN")
            continue
        fix = generate_fix(warning, config)
        if fix:
            fixes.append(f"## Warning\n`{warning}`\n\n## Suggested Fix\n{fix}\n")
    # Output fixes to a markdown file for the PR body
-    with open("fix_suggestions.md", "w") as f:
+    if fixes:
-        f.write("# Automated Fix Suggestions\n\n")
+        with Path("fix_suggestions.md").open("w") as f:
-        f.write("\n---\n".join(fixes))
+            f.write("# Automated Fix Suggestions\n\n")
            f.write("\n---\n".join(fixes))
        logger.info("Fix suggestions written to fix_suggestions.md")
    else:
        logger.info("No fixes generated.")
-    print("Fix suggestions written to fix_suggestions.md")
+
 app = typer.Typer()
 app.command()(main)
 if __name__ == "__main__":
-    main()
+    app()
--- a/tests/test_fix_eval_warnings.py
+++ b/tests/test_fix_eval_warnings.py
@@ -0,0 +1,62 @@
 """Tests for fix_eval_warnings."""
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 import pytest
 from typer.testing import CliRunner
 from python.tools.fix_eval_warnings import Config, app, generate_fix, parse_warnings
 runner = CliRunner()
@pytest.fixture
 def log_file(tmp_path: Path) -> Path:
    """Create a dummy log file."""
    log_path = tmp_path / "build.log"
    log_path.write_text("Some output\nevaluation warning: 'system' is deprecated\nMore output", encoding="utf-8")
    return log_path
 def test_parse_warnings(log_file: Path) -> None:
    """Test parsing warnings from a log file."""
    warnings = parse_warnings(log_file)
    assert len(warnings) == 1
    assert warnings[0] == "evaluation warning: 'system' is deprecated"
@patch("python.tools.fix_eval_warnings.requests.post")
 def test_generate_fix(mock_post: MagicMock) -> None:
    """Test generating a fix."""
    mock_response = MagicMock()
    mock_response.json.return_value = {"choices": [{"message": {"content": "Use stdenv.hostPlatform.system"}}]}
    mock_post.return_value = mock_response
    config = Config(github_token="dummy_token")
    fix = generate_fix("evaluation warning: 'system' is deprecated", config)
    assert fix == "Use stdenv.hostPlatform.system"
    mock_post.assert_called_once()
@patch("python.tools.fix_eval_warnings.logger")
@patch("python.tools.fix_eval_warnings.generate_fix")
 def test_main(mock_generate_fix: MagicMock, mock_logger: MagicMock, log_file: Path) -> None:
    """Test the main CLI."""
    mock_generate_fix.return_value = "Fixed it"
    # We need to mock GITHUB_TOKEN env var or the script will warn/fail
    with patch.dict("os.environ", {"GITHUB_TOKEN": "dummy"}):
        result = runner.invoke(app, [str(log_file)])
    assert result.exit_code == 0
    # Verify logger calls instead of stdout, as CliRunner might not capture logging output correctly
    # when logging is configured to write to sys.stdout directly.
    assert any("Found 1 warnings" in str(call) for call in mock_logger.info.call_args_list)
    assert any(
        "Fix suggestions written to fix_suggestions.md" in str(call)
        for call in mock_logger.info.call_args_list
    )
    assert Path("fix_suggestions.md").exists()
    Path("fix_suggestions.md").unlink()