reworked AGENTS.md

This commit is contained in:
2025-12-06 20:58:29 -05:00
parent 12e398514b
commit b5ac770003
3 changed files with 178 additions and 81 deletions

View File

@@ -31,6 +31,7 @@ jobs:
GITHUB_TOKEN: ${{ github.token }} GITHUB_TOKEN: ${{ github.token }}
GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_REPOSITORY: ${{ github.repository }}
RUN_ID: ${{ github.event.workflow_run.id }} RUN_ID: ${{ github.event.workflow_run.id }}
PYTHONPATH: .
run: | run: |
python3 python/tools/fix_eval_warnings.py build.log python3 python/tools/fix_eval_warnings.py build.log

View File

@@ -1,127 +1,161 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """fix_eval_warnings."""
Script to detect "evaluation warning:" in logs and suggest fixes using GitHub Models.
""" from __future__ import annotations
import logging
import os import os
import sys from dataclasses import dataclass
import re
import requests
import json
from pathlib import Path from pathlib import Path
# Configuration import requests
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN") import typer
GITHUB_REPOSITORY = os.environ.get("GITHUB_REPOSITORY")
PR_NUMBER = os.environ.get("PR_NUMBER") # If triggered by PR
RUN_ID = os.environ.get("RUN_ID")
# GitHub Models API Endpoint (OpenAI compatible) from python.common import configure_logger
# https://github.com/marketplace/models
API_BASE = "https://models.inference.ai.azure.com"
# Default to gpt-4o, but allow override via env var
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o")
def get_log_content(run_id): logger = logging.getLogger(__name__)
"""Fetches the logs for a specific workflow run."""
print(f"Fetching logs for run ID: {run_id}")
headers = { @dataclass
"Authorization": f"Bearer {GITHUB_TOKEN}", class Config:
"Accept": "application/vnd.github+json", """Configuration for the script.
"X-GitHub-Api-Version": "2022-11-28"
} Attributes:
github_token (str): GitHub token for API authentication.
model_name (str): The name of the LLM model to use. Defaults to "gpt-4o".
api_base (str): The base URL for the GitHub Models API.
Defaults to "https://models.inference.ai.azure.com".
"""
github_token: str
model_name: str = "gpt-4o"
api_base: str = "https://models.inference.ai.azure.com"
def get_log_content(run_id: str) -> None:
"""Fetch the logs for a specific workflow run.
Args:
run_id (str): The run ID.
"""
logger.info(f"Fetching logs for run ID: {run_id}")
# List artifacts to find logs (or use jobs API) # List artifacts to find logs (or use jobs API)
# For simplicity, we might need to use 'gh' cli in the workflow to download logs # For simplicity, we might need to use 'gh' cli in the workflow to download logs
# But let's try to read from a file if passed as argument, which is easier for the workflow # But let's try to read from a file if passed as argument, which is easier for the workflow
return None
def parse_warnings(log_file_path):
"""Parses the log file for evaluation warnings.""" def parse_warnings(log_file_path: Path) -> list[str]:
"""Parse the log file for evaluation warnings.
Args:
log_file_path (Path): The path to the log file.
Returns:
list[str]: A list of warning messages.
"""
warnings = [] warnings = []
with open(log_file_path, 'r', encoding='utf-8', errors='ignore') as f: with log_file_path.open(encoding="utf-8", errors="ignore") as f:
for line in f: warnings.extend(line.strip() for line in f if "evaluation warning:" in line)
if "evaluation warning:" in line:
warnings.append(line.strip())
return warnings return warnings
def generate_fix(warning_msg):
"""Calls GitHub Models to generate a fix for the warning.""" def generate_fix(warning_msg: str, config: Config) -> str | None:
print(f"Generating fix for: {warning_msg}") """Call GitHub Models to generate a fix for the warning.
Args:
warning_msg (str): The warning message.
config (Config): The configuration object.
Returns:
Optional[str]: The suggested fix or None.
"""
logger.info(f"Generating fix for: {warning_msg}")
prompt = f""" prompt = f"""
I encountered the following Nix evaluation warning: I encountered the following Nix evaluation warning:
`{warning_msg}` `{warning_msg}`
Please explain what this warning means and suggest how to fix it in the Nix code. Please explain what this warning means and suggest how to fix it in the Nix code.
If possible, provide the exact code change in a diff format or a clear description of what to change. If possible, provide the exact code change in a diff format or a clear description of what to change.
""" """
headers = { headers = {"Content-Type": "application/json", "Authorization": f"Bearer {config.github_token}"}
"Content-Type": "application/json",
"Authorization": f"Bearer {GITHUB_TOKEN}"
}
payload = { payload = {
"messages": [ "messages": [
{ {"role": "system", "content": "You are an expert NixOS and Nix language developer."},
"role": "system", {"role": "user", "content": prompt},
"content": "You are an expert NixOS and Nix language developer."
},
{
"role": "user",
"content": prompt
}
], ],
"model": MODEL_NAME, "model": config.model_name,
"temperature": 0.1 "temperature": 0.1,
} }
try: try:
response = requests.post( response = requests.post(f"{config.api_base}/chat/completions", headers=headers, json=payload, timeout=30)
f"{API_BASE}/chat/completions",
headers=headers,
json=payload
)
response.raise_for_status() response.raise_for_status()
result = response.json() result = response.json()
return result['choices'][0]['message']['content'] return result["choices"][0]["message"]["content"] # type: ignore[no-any-return]
except Exception as e: except Exception:
print(f"Error calling LLM: {e}") logger.exception("Error calling LLM")
return None return None
def main():
if len(sys.argv) < 2:
print("Usage: fix_eval_warnings.py <log_file>")
sys.exit(1)
log_file = sys.argv[1] def main(
if not os.path.exists(log_file): log_file: Path = typer.Argument(..., help="Path to the build log file"), # noqa: B008
print(f"Log file not found: {log_file}") model_name: str = typer.Option("gpt-4o", envvar="MODEL_NAME", help="LLM Model Name"),
sys.exit(1) ) -> None:
"""Detect evaluation warnings in logs and suggest fixes using GitHub Models.
Args:
log_file (Path): Path to the build log file containing evaluation warnings.
model_name (str): The name of the LLM model to use for generating fixes.
Defaults to "gpt-4o", can be overridden by MODEL_NAME environment variable.
"""
configure_logger()
github_token = os.environ.get("GITHUB_TOKEN")
if not github_token:
logger.warning("GITHUB_TOKEN not set. LLM calls will fail.")
config = Config(github_token=github_token or "", model_name=model_name)
if not log_file.exists():
logger.error(f"Log file not found: {log_file}")
raise typer.Exit(code=1)
warnings = parse_warnings(log_file) warnings = parse_warnings(log_file)
if not warnings: if not warnings:
print("No evaluation warnings found.") logger.info("No evaluation warnings found.")
sys.exit(0) raise typer.Exit(code=0)
logger.info(f"Found {len(warnings)} warnings.")
print(f"Found {len(warnings)} warnings.")
# Process unique warnings to save tokens # Process unique warnings to save tokens
unique_warnings = list(set(warnings)) unique_warnings = list(set(warnings))
fixes = [] fixes = []
for warning in unique_warnings: for warning in unique_warnings:
fix = generate_fix(warning) if not config.github_token:
logger.warning("Skipping LLM call due to missing GITHUB_TOKEN")
continue
fix = generate_fix(warning, config)
if fix: if fix:
fixes.append(f"## Warning\n`{warning}`\n\n## Suggested Fix\n{fix}\n") fixes.append(f"## Warning\n`{warning}`\n\n## Suggested Fix\n{fix}\n")
# Output fixes to a markdown file for the PR body # Output fixes to a markdown file for the PR body
with open("fix_suggestions.md", "w") as f: if fixes:
f.write("# Automated Fix Suggestions\n\n") with Path("fix_suggestions.md").open("w") as f:
f.write("\n---\n".join(fixes)) f.write("# Automated Fix Suggestions\n\n")
f.write("\n---\n".join(fixes))
logger.info("Fix suggestions written to fix_suggestions.md")
else:
logger.info("No fixes generated.")
print("Fix suggestions written to fix_suggestions.md")
app = typer.Typer()
app.command()(main)
if __name__ == "__main__": if __name__ == "__main__":
main() app()

View File

@@ -0,0 +1,62 @@
"""Tests for fix_eval_warnings."""
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from typer.testing import CliRunner
from python.tools.fix_eval_warnings import Config, app, generate_fix, parse_warnings
runner = CliRunner()
@pytest.fixture
def log_file(tmp_path: Path) -> Path:
"""Create a dummy log file."""
log_path = tmp_path / "build.log"
log_path.write_text("Some output\nevaluation warning: 'system' is deprecated\nMore output", encoding="utf-8")
return log_path
def test_parse_warnings(log_file: Path) -> None:
"""Test parsing warnings from a log file."""
warnings = parse_warnings(log_file)
assert len(warnings) == 1
assert warnings[0] == "evaluation warning: 'system' is deprecated"
@patch("python.tools.fix_eval_warnings.requests.post")
def test_generate_fix(mock_post: MagicMock) -> None:
"""Test generating a fix."""
mock_response = MagicMock()
mock_response.json.return_value = {"choices": [{"message": {"content": "Use stdenv.hostPlatform.system"}}]}
mock_post.return_value = mock_response
config = Config(github_token="dummy_token")
fix = generate_fix("evaluation warning: 'system' is deprecated", config)
assert fix == "Use stdenv.hostPlatform.system"
mock_post.assert_called_once()
@patch("python.tools.fix_eval_warnings.logger")
@patch("python.tools.fix_eval_warnings.generate_fix")
def test_main(mock_generate_fix: MagicMock, mock_logger: MagicMock, log_file: Path) -> None:
"""Test the main CLI."""
mock_generate_fix.return_value = "Fixed it"
# We need to mock GITHUB_TOKEN env var or the script will warn/fail
with patch.dict("os.environ", {"GITHUB_TOKEN": "dummy"}):
result = runner.invoke(app, [str(log_file)])
assert result.exit_code == 0
# Verify logger calls instead of stdout, as CliRunner might not capture logging output correctly
# when logging is configured to write to sys.stdout directly.
assert any("Found 1 warnings" in str(call) for call in mock_logger.info.call_args_list)
assert any(
"Fix suggestions written to fix_suggestions.md" in str(call)
for call in mock_logger.info.call_args_list
)
assert Path("fix_suggestions.md").exists()
Path("fix_suggestions.md").unlink()