reworked AGENTS.md

This commit is contained in:
2025-12-06 20:58:29 -05:00
parent 12e398514b
commit b5ac770003
3 changed files with 178 additions and 81 deletions

View File

@@ -31,6 +31,7 @@ jobs:
GITHUB_TOKEN: ${{ github.token }} GITHUB_TOKEN: ${{ github.token }}
GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_REPOSITORY: ${{ github.repository }}
RUN_ID: ${{ github.event.workflow_run.id }} RUN_ID: ${{ github.event.workflow_run.id }}
PYTHONPATH: .
run: | run: |
python3 python/tools/fix_eval_warnings.py build.log python3 python/tools/fix_eval_warnings.py build.log

View File

@@ -1,52 +1,75 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """fix_eval_warnings."""
Script to detect "evaluation warning:" in logs and suggest fixes using GitHub Models.
""" from __future__ import annotations
import logging
import os import os
import sys from dataclasses import dataclass
import re
import requests
import json
from pathlib import Path from pathlib import Path
# Configuration import requests
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN") import typer
GITHUB_REPOSITORY = os.environ.get("GITHUB_REPOSITORY")
PR_NUMBER = os.environ.get("PR_NUMBER") # If triggered by PR
RUN_ID = os.environ.get("RUN_ID")
# GitHub Models API Endpoint (OpenAI compatible) from python.common import configure_logger
# https://github.com/marketplace/models
API_BASE = "https://models.inference.ai.azure.com"
# Default to gpt-4o, but allow override via env var
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o")
def get_log_content(run_id): logger = logging.getLogger(__name__)
"""Fetches the logs for a specific workflow run."""
print(f"Fetching logs for run ID: {run_id}")
headers = {
"Authorization": f"Bearer {GITHUB_TOKEN}",
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28"
}
@dataclass
class Config:
"""Configuration for the script.
Attributes:
github_token (str): GitHub token for API authentication.
model_name (str): The name of the LLM model to use. Defaults to "gpt-4o".
api_base (str): The base URL for the GitHub Models API.
Defaults to "https://models.inference.ai.azure.com".
"""
github_token: str
model_name: str = "gpt-4o"
api_base: str = "https://models.inference.ai.azure.com"
def get_log_content(run_id: str) -> None:
"""Fetch the logs for a specific workflow run.
Args:
run_id (str): The run ID.
"""
logger.info(f"Fetching logs for run ID: {run_id}")
# List artifacts to find logs (or use jobs API) # List artifacts to find logs (or use jobs API)
# For simplicity, we might need to use 'gh' cli in the workflow to download logs # For simplicity, we might need to use 'gh' cli in the workflow to download logs
# But let's try to read from a file if passed as argument, which is easier for the workflow # But let's try to read from a file if passed as argument, which is easier for the workflow
return None
def parse_warnings(log_file_path):
"""Parses the log file for evaluation warnings.""" def parse_warnings(log_file_path: Path) -> list[str]:
"""Parse the log file for evaluation warnings.
Args:
log_file_path (Path): The path to the log file.
Returns:
list[str]: A list of warning messages.
"""
warnings = [] warnings = []
with open(log_file_path, 'r', encoding='utf-8', errors='ignore') as f: with log_file_path.open(encoding="utf-8", errors="ignore") as f:
for line in f: warnings.extend(line.strip() for line in f if "evaluation warning:" in line)
if "evaluation warning:" in line:
warnings.append(line.strip())
return warnings return warnings
def generate_fix(warning_msg):
"""Calls GitHub Models to generate a fix for the warning.""" def generate_fix(warning_msg: str, config: Config) -> str | None:
print(f"Generating fix for: {warning_msg}") """Call GitHub Models to generate a fix for the warning.
Args:
warning_msg (str): The warning message.
config (Config): The configuration object.
Returns:
Optional[str]: The suggested fix or None.
"""
logger.info(f"Generating fix for: {warning_msg}")
prompt = f""" prompt = f"""
I encountered the following Nix evaluation warning: I encountered the following Nix evaluation warning:
@@ -57,71 +80,82 @@ def generate_fix(warning_msg):
If possible, provide the exact code change in a diff format or a clear description of what to change. If possible, provide the exact code change in a diff format or a clear description of what to change.
""" """
headers = { headers = {"Content-Type": "application/json", "Authorization": f"Bearer {config.github_token}"}
"Content-Type": "application/json",
"Authorization": f"Bearer {GITHUB_TOKEN}"
}
payload = { payload = {
"messages": [ "messages": [
{ {"role": "system", "content": "You are an expert NixOS and Nix language developer."},
"role": "system", {"role": "user", "content": prompt},
"content": "You are an expert NixOS and Nix language developer."
},
{
"role": "user",
"content": prompt
}
], ],
"model": MODEL_NAME, "model": config.model_name,
"temperature": 0.1 "temperature": 0.1,
} }
try: try:
response = requests.post( response = requests.post(f"{config.api_base}/chat/completions", headers=headers, json=payload, timeout=30)
f"{API_BASE}/chat/completions",
headers=headers,
json=payload
)
response.raise_for_status() response.raise_for_status()
result = response.json() result = response.json()
return result['choices'][0]['message']['content'] return result["choices"][0]["message"]["content"] # type: ignore[no-any-return]
except Exception as e: except Exception:
print(f"Error calling LLM: {e}") logger.exception("Error calling LLM")
return None return None
def main():
if len(sys.argv) < 2:
print("Usage: fix_eval_warnings.py <log_file>")
sys.exit(1)
log_file = sys.argv[1] def main(
if not os.path.exists(log_file): log_file: Path = typer.Argument(..., help="Path to the build log file"), # noqa: B008
print(f"Log file not found: {log_file}") model_name: str = typer.Option("gpt-4o", envvar="MODEL_NAME", help="LLM Model Name"),
sys.exit(1) ) -> None:
"""Detect evaluation warnings in logs and suggest fixes using GitHub Models.
Args:
log_file (Path): Path to the build log file containing evaluation warnings.
model_name (str): The name of the LLM model to use for generating fixes.
Defaults to "gpt-4o", can be overridden by MODEL_NAME environment variable.
"""
configure_logger()
github_token = os.environ.get("GITHUB_TOKEN")
if not github_token:
logger.warning("GITHUB_TOKEN not set. LLM calls will fail.")
config = Config(github_token=github_token or "", model_name=model_name)
if not log_file.exists():
logger.error(f"Log file not found: {log_file}")
raise typer.Exit(code=1)
warnings = parse_warnings(log_file) warnings = parse_warnings(log_file)
if not warnings: if not warnings:
print("No evaluation warnings found.") logger.info("No evaluation warnings found.")
sys.exit(0) raise typer.Exit(code=0)
print(f"Found {len(warnings)} warnings.") logger.info(f"Found {len(warnings)} warnings.")
# Process unique warnings to save tokens # Process unique warnings to save tokens
unique_warnings = list(set(warnings)) unique_warnings = list(set(warnings))
fixes = [] fixes = []
for warning in unique_warnings: for warning in unique_warnings:
fix = generate_fix(warning) if not config.github_token:
logger.warning("Skipping LLM call due to missing GITHUB_TOKEN")
continue
fix = generate_fix(warning, config)
if fix: if fix:
fixes.append(f"## Warning\n`{warning}`\n\n## Suggested Fix\n{fix}\n") fixes.append(f"## Warning\n`{warning}`\n\n## Suggested Fix\n{fix}\n")
# Output fixes to a markdown file for the PR body # Output fixes to a markdown file for the PR body
with open("fix_suggestions.md", "w") as f: if fixes:
with Path("fix_suggestions.md").open("w") as f:
f.write("# Automated Fix Suggestions\n\n") f.write("# Automated Fix Suggestions\n\n")
f.write("\n---\n".join(fixes)) f.write("\n---\n".join(fixes))
logger.info("Fix suggestions written to fix_suggestions.md")
else:
logger.info("No fixes generated.")
print("Fix suggestions written to fix_suggestions.md")
app = typer.Typer()
app.command()(main)
if __name__ == "__main__": if __name__ == "__main__":
main() app()

View File

@@ -0,0 +1,62 @@
"""Tests for fix_eval_warnings."""
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from typer.testing import CliRunner
from python.tools.fix_eval_warnings import Config, app, generate_fix, parse_warnings
runner = CliRunner()
@pytest.fixture
def log_file(tmp_path: Path) -> Path:
"""Create a dummy log file."""
log_path = tmp_path / "build.log"
log_path.write_text("Some output\nevaluation warning: 'system' is deprecated\nMore output", encoding="utf-8")
return log_path
def test_parse_warnings(log_file: Path) -> None:
"""Test parsing warnings from a log file."""
warnings = parse_warnings(log_file)
assert len(warnings) == 1
assert warnings[0] == "evaluation warning: 'system' is deprecated"
@patch("python.tools.fix_eval_warnings.requests.post")
def test_generate_fix(mock_post: MagicMock) -> None:
"""Test generating a fix."""
mock_response = MagicMock()
mock_response.json.return_value = {"choices": [{"message": {"content": "Use stdenv.hostPlatform.system"}}]}
mock_post.return_value = mock_response
config = Config(github_token="dummy_token")
fix = generate_fix("evaluation warning: 'system' is deprecated", config)
assert fix == "Use stdenv.hostPlatform.system"
mock_post.assert_called_once()
@patch("python.tools.fix_eval_warnings.logger")
@patch("python.tools.fix_eval_warnings.generate_fix")
def test_main(mock_generate_fix: MagicMock, mock_logger: MagicMock, log_file: Path) -> None:
"""Test the main CLI."""
mock_generate_fix.return_value = "Fixed it"
# We need to mock GITHUB_TOKEN env var or the script will warn/fail
with patch.dict("os.environ", {"GITHUB_TOKEN": "dummy"}):
result = runner.invoke(app, [str(log_file)])
assert result.exit_code == 0
# Verify logger calls instead of stdout, as CliRunner might not capture logging output correctly
# when logging is configured to write to sys.stdout directly.
assert any("Found 1 warnings" in str(call) for call in mock_logger.info.call_args_list)
assert any(
"Fix suggestions written to fix_suggestions.md" in str(call)
for call in mock_logger.info.call_args_list
)
assert Path("fix_suggestions.md").exists()
Path("fix_suggestions.md").unlink()