improved unit tests

used TOKEN from conftest and updated coverage.run source
reworked AGENTS.md
2026-04-21 14:49:10 -04:00 · 2025-12-06 21:29:51 -05:00 · 2025-12-06 21:17:44 -05:00 · 2025-12-06 20:58:29 -05:00 · 2025-12-06 20:43:54 -05:00 · 2025-12-06 20:41:33 -05:00
8 changed files with 310 additions and 3 deletions
--- a/.agent/workflows/format_code.md
+++ b/.agent/workflows/format_code.md
@@ -0,0 +1,9 @@
 ---
 description: Format code using treefmt
 ---
 // turbo
 1. Run treefmt
    ```bash
    treefmt
    ```
--- a/.github/workflows/fix_eval_warnings.yml
+++ b/.github/workflows/fix_eval_warnings.yml
@@ -0,0 +1,48 @@
 name: Fix Evaluation Warnings
 on:
  workflow_run:
    workflows: ["build_systems"]
    types:
      - completed
 permissions:
  contents: write
  pull-requests: write
  actions: read
 jobs:
  analyze-and-fix:
    runs-on: self-hosted
    if: ${{ github.event.workflow_run.conclusion == 'success' || github.event.workflow_run.conclusion == 'failure' }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Download logs
        env:
          GH_TOKEN: ${{ github.token }}
          RUN_ID: ${{ github.event.workflow_run.id }}
        run: |
          gh run view $RUN_ID --log > build.log
      - name: Run Fix Script
        env:
          GITHUB_TOKEN: ${{ github.token }}
          GITHUB_REPOSITORY: ${{ github.repository }}
          RUN_ID: ${{ github.event.workflow_run.id }}
          PYTHONPATH: .
        run: |
          python3 python/tools/fix_eval_warnings.py build.log
      - name: Create Pull Request
        if: hashFiles('fix_suggestions.md') != ''
        uses: peter-evans/create-pull-request@v6
        with:
          token: ${{ github.token }}
          commit-message: "fix: automated evaluation warning fixes"
          title: "fix: automated evaluation warning fixes"
          body-path: fix_suggestions.md
          branch: "auto-fix-eval-warnings-${{ github.event.workflow_run.id }}"
          base: main
          labels: "automated-fix"
--- a/.gitignore
+++ b/.gitignore
@@ -165,3 +165,4 @@ test.*
 # syncthing
 .stfolder
 fix_suggestions.md
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,5 +1,12 @@
 ## Dev environment tips
 - use treefmt to format all files
- make python code ruff compliant
+- keep new code consistent with the existing style
- use pytest to test python code
+
 ### Python
 - make code `ruff` compliant
 - use pytest to test python code tests should be put in `tests` directory
 - dont use global state
 - use google style docstrings
 - use typer over argparse
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,7 +58,7 @@ builtins-ignorelist = ["id"]
 max-args = 9
 [tool.coverage.run]
-source = ["system_tools"]
+source = ["python"]
 [tool.coverage.report]
 exclude_lines = [
--- a/python/tools/fix_eval_warnings.py
+++ b/python/tools/fix_eval_warnings.py
@@ -0,0 +1,161 @@
 #!/usr/bin/env python3
 """fix_eval_warnings."""
 from __future__ import annotations
 import logging
 import os
 from dataclasses import dataclass
 from pathlib import Path
 import requests
 import typer
 from python.common import configure_logger
 logger = logging.getLogger(__name__)
@dataclass
 class Config:
    """Configuration for the script.
    Attributes:
        github_token (str): GitHub token for API authentication.
        model_name (str): The name of the LLM model to use. Defaults to "gpt-4o".
        api_base (str): The base URL for the GitHub Models API.
            Defaults to "https://models.inference.ai.azure.com".
    """
    github_token: str
    model_name: str = "gpt-4o"
    api_base: str = "https://models.inference.ai.azure.com"
 def get_log_content(run_id: str) -> None:
    """Fetch the logs for a specific workflow run.
    Args:
        run_id (str): The run ID.
    """
    logger.info(f"Fetching logs for run ID: {run_id}")
    # List artifacts to find logs (or use jobs API)
    # For simplicity, we might need to use 'gh' cli in the workflow to download logs
    # But let's try to read from a file if passed as argument, which is easier for the workflow
 def parse_warnings(log_file_path: Path) -> list[str]:
    """Parse the log file for evaluation warnings.
    Args:
        log_file_path (Path): The path to the log file.
    Returns:
        list[str]: A list of warning messages.
    """
    warnings = []
    with log_file_path.open(encoding="utf-8", errors="ignore") as f:
        warnings.extend(line.strip() for line in f if "evaluation warning:" in line)
    return warnings
 def generate_fix(warning_msg: str, config: Config) -> str | None:
    """Call GitHub Models to generate a fix for the warning.
    Args:
        warning_msg (str): The warning message.
        config (Config): The configuration object.
    Returns:
        Optional[str]: The suggested fix or None.
    """
    logger.info(f"Generating fix for: {warning_msg}")
    prompt = f"""
    I encountered the following Nix evaluation warning:
    `{warning_msg}`
    Please explain what this warning means and suggest how to fix it in the Nix code.
    If possible, provide the exact code change in a diff format or a clear description of what to change.
    """
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {config.github_token}"}
    payload = {
        "messages": [
            {"role": "system", "content": "You are an expert NixOS and Nix language developer."},
            {"role": "user", "content": prompt},
        ],
        "model": config.model_name,
        "temperature": 0.1,
    }
    try:
        response = requests.post(f"{config.api_base}/chat/completions", headers=headers, json=payload, timeout=30)
        response.raise_for_status()
        result = response.json()
        return result["choices"][0]["message"]["content"]  # type: ignore[no-any-return]
    except Exception:
        logger.exception("Error calling LLM")
        return None
 def main(
    log_file: Path = typer.Argument(..., help="Path to the build log file"),  # noqa: B008
    model_name: str = typer.Option("gpt-4o", envvar="MODEL_NAME", help="LLM Model Name"),
 ) -> None:
    """Detect evaluation warnings in logs and suggest fixes using GitHub Models.
    Args:
        log_file (Path): Path to the build log file containing evaluation warnings.
        model_name (str): The name of the LLM model to use for generating fixes.
            Defaults to "gpt-4o", can be overridden by MODEL_NAME environment variable.
    """
    configure_logger()
    github_token = os.environ.get("GITHUB_TOKEN")
    if not github_token:
        logger.warning("GITHUB_TOKEN not set. LLM calls will fail.")
    config = Config(github_token=github_token or "", model_name=model_name)
    if not log_file.exists():
        logger.error(f"Log file not found: {log_file}")
        raise typer.Exit(code=1)
    warnings = parse_warnings(log_file)
    if not warnings:
        logger.info("No evaluation warnings found.")
        raise typer.Exit(code=0)
    logger.info(f"Found {len(warnings)} warnings.")
    # Process unique warnings to save tokens
    unique_warnings = list(set(warnings))
    fixes = []
    for warning in unique_warnings:
        if not config.github_token:
            logger.warning("Skipping LLM call due to missing GITHUB_TOKEN")
            continue
        fix = generate_fix(warning, config)
        if fix:
            fixes.append(f"## Warning\n`{warning}`\n\n## Suggested Fix\n{fix}\n")
    # Output fixes to a markdown file for the PR body
    if fixes:
        with Path("fix_suggestions.md").open("w") as f:
            f.write("# Automated Fix Suggestions\n\n")
            f.write("\n---\n".join(fixes))
        logger.info("Fix suggestions written to fix_suggestions.md")
    else:
        logger.info("No fixes generated.")
 app = typer.Typer()
 app.command()(main)
 if __name__ == "__main__":
    app()
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,6 @@
 """Fixtures for tests."""
 from __future__ import annotations
 PASSWORD = "password"  # noqa: S105
 TOKEN = "token"  # noqa: S105
--- a/tests/test_fix_eval_warnings.py
+++ b/tests/test_fix_eval_warnings.py
@@ -0,0 +1,75 @@
 """test_fix_eval_warnings."""
 from __future__ import annotations
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typer.testing import CliRunner
 from python.tools.fix_eval_warnings import Config, app, generate_fix, parse_warnings
 from tests.conftest import TOKEN
 if TYPE_CHECKING:
    from pyfakefs.fake_filesystem import FakeFilesystem
    from pytest_mock import MockerFixture
 runner = CliRunner()
 def test_parse_warnings(fs: FakeFilesystem) -> None:
    """test_parse_warnings."""
    log_file = Path("/build.log")
    fs.create_file(
        log_file,
        contents="Some output\nevaluation warning: 'system' is deprecated\nMore output",
        encoding="utf-8",
    )
    warnings = parse_warnings(log_file)
    assert len(warnings) == 1
    assert warnings[0] == "evaluation warning: 'system' is deprecated"
 def test_generate_fix(mocker: MockerFixture) -> None:
    """test_generate_fix."""
    mock_post = mocker.patch("python.tools.fix_eval_warnings.requests.post")
    mock_response = mocker.MagicMock()
    mock_response.json.return_value = {
        "choices": [{"message": {"content": "Use stdenv.hostPlatform.system"}}]
    }
    mock_post.return_value = mock_response
    config = Config(github_token=TOKEN)
    fix = generate_fix("evaluation warning: 'system' is deprecated", config)
    assert fix == "Use stdenv.hostPlatform.system"
    mock_post.assert_called_once()
 def test_main(mocker: MockerFixture, fs: FakeFilesystem) -> None:
    """test_main."""
    log_file = Path("/build.log")
    fs.create_file(
        log_file,
        contents="Some output\nevaluation warning: 'system' is deprecated\nMore output",
        encoding="utf-8",
    )
    mock_generate_fix = mocker.patch("python.tools.fix_eval_warnings.generate_fix")
    mock_generate_fix.return_value = "Fixed it"
    mock_logger = mocker.patch("python.tools.fix_eval_warnings.logger")
    # We need to mock GITHUB_TOKEN env var or the script will warn/fail
    mocker.patch.dict("os.environ", {"GITHUB_TOKEN": TOKEN})
    result = runner.invoke(app, [str(log_file)])
    assert result.exit_code == 0
    # Verify logger calls instead of stdout, as CliRunner might not capture logging output correctly
    # when logging is configured to write to sys.stdout directly.
    assert any("Found 1 warnings" in str(call) for call in mock_logger.info.call_args_list)
    assert any(
        "Fix suggestions written to fix_suggestions.md" in str(call)
        for call in mock_logger.info.call_args_list
    )
    assert Path("fix_suggestions.md").exists()
Author	SHA1	Message	Date
Richie Cahill	59cfc0d02f	improved unit tests	2025-12-06 21:29:51 -05:00
Richie Cahill	3914a1a7ab	used TOKEN from conftest and updated coverage.run source	2025-12-06 21:17:44 -05:00
Richie Cahill	b5ac770003	reworked AGENTS.md	2025-12-06 20:58:29 -05:00
Richie Cahill	12e398514b	added conftest.py	2025-12-06 20:43:54 -05:00
Richie Cahill	69f9ef8187	updated AGENTS.md	2025-12-06 20:41:33 -05:00
Richie Cahill	1b171fcd3e	added format_code.md to .agent	2025-12-06 18:53:19 -05:00
Richie Cahill	16d938dc59	Merge branch 'main' into feature/adding-fix_eval_warnings.yml	2025-12-06 17:54:55 -05:00
Richie Cahill	c7fe44755f	Merge branch 'main' into feature/adding-fix_eval_warnings.yml	2025-12-06 17:32:38 -05:00
Richie Cahill	bb9200860e	adding fix_eval_warnings.yml	2025-12-06 17:31:08 -05:00
Richie Cahill	b91f7c34e1	'system' has been renamed to/replaced by 'stdenv.hostPlatform.system'	2025-12-06 12:37:22 -05:00