mirror of
https://github.com/RichieCahill/dotfiles.git
synced 2026-04-21 06:39:09 -04:00
New `sheet-music-ocr review` command that sends MusicXML output to an LLM (Claude or OpenAI, configurable via --provider flag) for reviewing and fixing common OCR errors like incorrect pitches, rhythms, key signatures, and garbled lyrics. Uses httpx for direct API calls. https://claude.ai/code/session_017GqUbuRDT58toRaxMtfRmf
127 lines
3.6 KiB
Python
127 lines
3.6 KiB
Python
"""LLM-based MusicXML review and correction.
|
|
|
|
Supports both Claude (Anthropic) and OpenAI APIs for reviewing
|
|
MusicXML output from Audiveris and suggesting/applying fixes.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import enum
|
|
import os
|
|
from typing import TYPE_CHECKING
|
|
|
|
import httpx
|
|
|
|
if TYPE_CHECKING:
|
|
from pathlib import Path
|
|
|
|
REVIEW_PROMPT = """\
|
|
You are a music notation expert. Review the following MusicXML file produced by \
|
|
optical music recognition (Audiveris). Look for and fix common OCR errors including:
|
|
|
|
- Incorrect note pitches or durations
|
|
- Wrong or missing key signatures, time signatures, or clefs
|
|
- Incorrect rest durations or placements
|
|
- Missing or incorrect accidentals
|
|
- Wrong beam groupings or tuplets
|
|
- Garbled or misspelled lyrics and text annotations
|
|
- Missing or incorrect dynamic markings
|
|
- Incorrect measure numbers or barline types
|
|
- Voice/staff assignment errors
|
|
|
|
Return ONLY the corrected MusicXML. Do not include any explanation, commentary, or \
|
|
markdown formatting. Output the raw XML directly.
|
|
|
|
Here is the MusicXML to review:
|
|
|
|
"""
|
|
|
|
_TIMEOUT = 300
|
|
|
|
|
|
class LLMProvider(enum.StrEnum):
|
|
"""Supported LLM providers."""
|
|
|
|
CLAUDE = "claude"
|
|
OPENAI = "openai"
|
|
|
|
|
|
class ReviewError(Exception):
|
|
"""Raised when LLM review fails."""
|
|
|
|
|
|
def _get_api_key(provider: LLMProvider) -> str:
|
|
env_var = "ANTHROPIC_API_KEY" if provider == LLMProvider.CLAUDE else "OPENAI_API_KEY"
|
|
key = os.environ.get(env_var)
|
|
if not key:
|
|
msg = f"{env_var} environment variable is not set."
|
|
raise ReviewError(msg)
|
|
return key
|
|
|
|
|
|
def _call_claude(content: str, api_key: str) -> str:
|
|
response = httpx.post(
|
|
"https://api.anthropic.com/v1/messages",
|
|
headers={
|
|
"x-api-key": api_key,
|
|
"anthropic-version": "2023-06-01",
|
|
"content-type": "application/json",
|
|
},
|
|
json={
|
|
"model": "claude-sonnet-4-20250514",
|
|
"max_tokens": 16384,
|
|
"messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
|
|
},
|
|
timeout=_TIMEOUT,
|
|
)
|
|
if response.status_code != 200: # noqa: PLR2004
|
|
msg = f"Claude API error ({response.status_code}): {response.text}"
|
|
raise ReviewError(msg)
|
|
|
|
data = response.json()
|
|
return data["content"][0]["text"]
|
|
|
|
|
|
def _call_openai(content: str, api_key: str) -> str:
|
|
response = httpx.post(
|
|
"https://api.openai.com/v1/chat/completions",
|
|
headers={
|
|
"Authorization": f"Bearer {api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
json={
|
|
"model": "gpt-4o",
|
|
"messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
|
|
"max_tokens": 16384,
|
|
},
|
|
timeout=_TIMEOUT,
|
|
)
|
|
if response.status_code != 200: # noqa: PLR2004
|
|
msg = f"OpenAI API error ({response.status_code}): {response.text}"
|
|
raise ReviewError(msg)
|
|
|
|
data = response.json()
|
|
return data["choices"][0]["message"]["content"]
|
|
|
|
|
|
def review_mxml(mxml_path: Path, provider: LLMProvider) -> str:
|
|
"""Review a MusicXML file using an LLM and return corrected content.
|
|
|
|
Args:
|
|
mxml_path: Path to the .mxml file to review.
|
|
provider: Which LLM provider to use.
|
|
|
|
Returns:
|
|
The corrected MusicXML content as a string.
|
|
|
|
Raises:
|
|
ReviewError: If the API call fails or the key is missing.
|
|
FileNotFoundError: If the input file does not exist.
|
|
"""
|
|
content = mxml_path.read_text(encoding="utf-8")
|
|
api_key = _get_api_key(provider)
|
|
|
|
if provider == LLMProvider.CLAUDE:
|
|
return _call_claude(content, api_key)
|
|
return _call_openai(content, api_key)
|