Add LLM review command for MusicXML correction

New `sheet-music-ocr review` command that sends MusicXML output to an
LLM (Claude or OpenAI, configurable via --provider flag) for reviewing
and fixing common OCR errors like incorrect pitches, rhythms, key
signatures, and garbled lyrics. Uses httpx for direct API calls.

https://claude.ai/code/session_017GqUbuRDT58toRaxMtfRmf
This commit is contained in:
Claude
2026-03-17 11:52:55 +00:00
parent a076cb47f3
commit 3301bb0aea
3 changed files with 307 additions and 6 deletions

View File

@@ -1,8 +1,9 @@
"""CLI tool for converting scanned sheet music to MusicXML.
Usage:
sheet-music-ocr scan.pdf
sheet-music-ocr scan.png -o output.mxml
sheet-music-ocr convert scan.pdf
sheet-music-ocr convert scan.png -o output.mxml
sheet-music-ocr review output.mxml --provider claude
"""
from __future__ import annotations
@@ -15,6 +16,7 @@ from typing import Annotated
import typer
from python.sheet_music_ocr.audiveris import AudiverisError, run_audiveris
from python.sheet_music_ocr.review import LLMProvider, ReviewError, review_mxml
SUPPORTED_EXTENSIONS = {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"}
@@ -84,5 +86,38 @@ def convert(
typer.echo(f"Written: {output_path}")
@app.command()
def review(
input_file: Annotated[Path, typer.Argument(help="Path to MusicXML (.mxml) file to review.")],
output: Annotated[
Path | None,
typer.Option("--output", "-o", help="Output path for corrected .mxml. Defaults to overwriting input."),
] = None,
provider: Annotated[
LLMProvider,
typer.Option("--provider", "-p", help="LLM provider to use."),
] = LLMProvider.CLAUDE,
) -> None:
"""Review and fix a MusicXML file using an LLM."""
if not input_file.exists():
typer.echo(f"Error: {input_file} does not exist.", err=True)
raise typer.Exit(code=1)
if input_file.suffix.lower() != ".mxml":
typer.echo("Error: Input file must be a .mxml file.", err=True)
raise typer.Exit(code=1)
output_path = output or input_file
try:
corrected = review_mxml(input_file, provider)
except ReviewError as e:
typer.echo(f"Error: {e}", err=True)
raise typer.Exit(code=1) from e
output_path.write_text(corrected, encoding="utf-8")
typer.echo(f"Reviewed: {output_path}")
if __name__ == "__main__":
app()

View File

@@ -0,0 +1,126 @@
"""LLM-based MusicXML review and correction.
Supports both Claude (Anthropic) and OpenAI APIs for reviewing
MusicXML output from Audiveris and suggesting/applying fixes.
"""
from __future__ import annotations
import enum
import os
from typing import TYPE_CHECKING
import httpx
if TYPE_CHECKING:
from pathlib import Path
REVIEW_PROMPT = """\
You are a music notation expert. Review the following MusicXML file produced by \
optical music recognition (Audiveris). Look for and fix common OCR errors including:
- Incorrect note pitches or durations
- Wrong or missing key signatures, time signatures, or clefs
- Incorrect rest durations or placements
- Missing or incorrect accidentals
- Wrong beam groupings or tuplets
- Garbled or misspelled lyrics and text annotations
- Missing or incorrect dynamic markings
- Incorrect measure numbers or barline types
- Voice/staff assignment errors
Return ONLY the corrected MusicXML. Do not include any explanation, commentary, or \
markdown formatting. Output the raw XML directly.
Here is the MusicXML to review:
"""
_TIMEOUT = 300
class LLMProvider(enum.StrEnum):
"""Supported LLM providers."""
CLAUDE = "claude"
OPENAI = "openai"
class ReviewError(Exception):
"""Raised when LLM review fails."""
def _get_api_key(provider: LLMProvider) -> str:
env_var = "ANTHROPIC_API_KEY" if provider == LLMProvider.CLAUDE else "OPENAI_API_KEY"
key = os.environ.get(env_var)
if not key:
msg = f"{env_var} environment variable is not set."
raise ReviewError(msg)
return key
def _call_claude(content: str, api_key: str) -> str:
response = httpx.post(
"https://api.anthropic.com/v1/messages",
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
json={
"model": "claude-sonnet-4-20250514",
"max_tokens": 16384,
"messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
},
timeout=_TIMEOUT,
)
if response.status_code != 200: # noqa: PLR2004
msg = f"Claude API error ({response.status_code}): {response.text}"
raise ReviewError(msg)
data = response.json()
return data["content"][0]["text"]
def _call_openai(content: str, api_key: str) -> str:
response = httpx.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
json={
"model": "gpt-4o",
"messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
"max_tokens": 16384,
},
timeout=_TIMEOUT,
)
if response.status_code != 200: # noqa: PLR2004
msg = f"OpenAI API error ({response.status_code}): {response.text}"
raise ReviewError(msg)
data = response.json()
return data["choices"][0]["message"]["content"]
def review_mxml(mxml_path: Path, provider: LLMProvider) -> str:
"""Review a MusicXML file using an LLM and return corrected content.
Args:
mxml_path: Path to the .mxml file to review.
provider: Which LLM provider to use.
Returns:
The corrected MusicXML content as a string.
Raises:
ReviewError: If the API call fails or the key is missing.
FileNotFoundError: If the input file does not exist.
"""
content = mxml_path.read_text(encoding="utf-8")
api_key = _get_api_key(provider)
if provider == LLMProvider.CLAUDE:
return _call_claude(content, api_key)
return _call_openai(content, api_key)