dotfiles/python/sheet_music_ocr/review.py

"""LLM-based MusicXML review and correction.

Supports both Claude (Anthropic) and OpenAI APIs for reviewing
MusicXML output from Audiveris and suggesting/applying fixes.
"""

from __future__ import annotations

import enum
import os
from typing import TYPE_CHECKING

import httpx

if TYPE_CHECKING:
    from pathlib import Path

REVIEW_PROMPT = """\
You are a music notation expert. Review the following MusicXML file produced by \
optical music recognition (Audiveris). Look for and fix common OCR errors including:

- Incorrect note pitches or durations
- Wrong or missing key signatures, time signatures, or clefs
- Incorrect rest durations or placements
- Missing or incorrect accidentals
- Wrong beam groupings or tuplets
- Garbled or misspelled lyrics and text annotations
- Missing or incorrect dynamic markings
- Incorrect measure numbers or barline types
- Voice/staff assignment errors

Return ONLY the corrected MusicXML. Do not include any explanation, commentary, or \
markdown formatting. Output the raw XML directly.

Here is the MusicXML to review:

"""

_TIMEOUT = 300


class LLMProvider(enum.StrEnum):
    """Supported LLM providers."""

    CLAUDE = "claude"
    OPENAI = "openai"


class ReviewError(Exception):
    """Raised when LLM review fails."""


def _get_api_key(provider: LLMProvider) -> str:
    env_var = "ANTHROPIC_API_KEY" if provider == LLMProvider.CLAUDE else "OPENAI_API_KEY"
    key = os.environ.get(env_var)
    if not key:
        msg = f"{env_var} environment variable is not set."
        raise ReviewError(msg)
    return key


def _call_claude(content: str, api_key: str) -> str:
    response = httpx.post(
        "https://api.anthropic.com/v1/messages",
        headers={
            "x-api-key": api_key,
            "anthropic-version": "2023-06-01",
            "content-type": "application/json",
        },
        json={
            "model": "claude-sonnet-4-20250514",
            "max_tokens": 16384,
            "messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
        },
        timeout=_TIMEOUT,
    )
    if response.status_code != 200:  # noqa: PLR2004
        msg = f"Claude API error ({response.status_code}): {response.text}"
        raise ReviewError(msg)

    data = response.json()
    return data["content"][0]["text"]


def _call_openai(content: str, api_key: str) -> str:
    response = httpx.post(
        "https://api.openai.com/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
        },
        json={
            "model": "gpt-4o",
            "messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
            "max_tokens": 16384,
        },
        timeout=_TIMEOUT,
    )
    if response.status_code != 200:  # noqa: PLR2004
        msg = f"OpenAI API error ({response.status_code}): {response.text}"
        raise ReviewError(msg)

    data = response.json()
    return data["choices"][0]["message"]["content"]


def review_mxml(mxml_path: Path, provider: LLMProvider) -> str:
    """Review a MusicXML file using an LLM and return corrected content.

    Args:
        mxml_path: Path to the .mxml file to review.
        provider: Which LLM provider to use.

    Returns:
        The corrected MusicXML content as a string.

    Raises:
        ReviewError: If the API call fails or the key is missing.
        FileNotFoundError: If the input file does not exist.
    """
    content = mxml_path.read_text(encoding="utf-8")
    api_key = _get_api_key(provider)

    if provider == LLMProvider.CLAUDE:
        return _call_claude(content, api_key)
    return _call_openai(content, api_key)