mirror of
https://github.com/RichieCahill/dotfiles.git
synced 2026-04-20 22:29:09 -04:00
Add LLM review command for MusicXML correction
New `sheet-music-ocr review` command that sends MusicXML output to an LLM (Claude or OpenAI, configurable via --provider flag) for reviewing and fixing common OCR errors like incorrect pitches, rhythms, key signatures, and garbled lyrics. Uses httpx for direct API calls. https://claude.ai/code/session_017GqUbuRDT58toRaxMtfRmf
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
"""CLI tool for converting scanned sheet music to MusicXML.
|
||||
|
||||
Usage:
|
||||
sheet-music-ocr scan.pdf
|
||||
sheet-music-ocr scan.png -o output.mxml
|
||||
sheet-music-ocr convert scan.pdf
|
||||
sheet-music-ocr convert scan.png -o output.mxml
|
||||
sheet-music-ocr review output.mxml --provider claude
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -15,6 +16,7 @@ from typing import Annotated
|
||||
import typer
|
||||
|
||||
from python.sheet_music_ocr.audiveris import AudiverisError, run_audiveris
|
||||
from python.sheet_music_ocr.review import LLMProvider, ReviewError, review_mxml
|
||||
|
||||
SUPPORTED_EXTENSIONS = {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"}
|
||||
|
||||
@@ -84,5 +86,38 @@ def convert(
|
||||
typer.echo(f"Written: {output_path}")
|
||||
|
||||
|
||||
@app.command()
|
||||
def review(
|
||||
input_file: Annotated[Path, typer.Argument(help="Path to MusicXML (.mxml) file to review.")],
|
||||
output: Annotated[
|
||||
Path | None,
|
||||
typer.Option("--output", "-o", help="Output path for corrected .mxml. Defaults to overwriting input."),
|
||||
] = None,
|
||||
provider: Annotated[
|
||||
LLMProvider,
|
||||
typer.Option("--provider", "-p", help="LLM provider to use."),
|
||||
] = LLMProvider.CLAUDE,
|
||||
) -> None:
|
||||
"""Review and fix a MusicXML file using an LLM."""
|
||||
if not input_file.exists():
|
||||
typer.echo(f"Error: {input_file} does not exist.", err=True)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
if input_file.suffix.lower() != ".mxml":
|
||||
typer.echo("Error: Input file must be a .mxml file.", err=True)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
output_path = output or input_file
|
||||
|
||||
try:
|
||||
corrected = review_mxml(input_file, provider)
|
||||
except ReviewError as e:
|
||||
typer.echo(f"Error: {e}", err=True)
|
||||
raise typer.Exit(code=1) from e
|
||||
|
||||
output_path.write_text(corrected, encoding="utf-8")
|
||||
typer.echo(f"Reviewed: {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
126
python/sheet_music_ocr/review.py
Normal file
126
python/sheet_music_ocr/review.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""LLM-based MusicXML review and correction.
|
||||
|
||||
Supports both Claude (Anthropic) and OpenAI APIs for reviewing
|
||||
MusicXML output from Audiveris and suggesting/applying fixes.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import httpx
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
REVIEW_PROMPT = """\
|
||||
You are a music notation expert. Review the following MusicXML file produced by \
|
||||
optical music recognition (Audiveris). Look for and fix common OCR errors including:
|
||||
|
||||
- Incorrect note pitches or durations
|
||||
- Wrong or missing key signatures, time signatures, or clefs
|
||||
- Incorrect rest durations or placements
|
||||
- Missing or incorrect accidentals
|
||||
- Wrong beam groupings or tuplets
|
||||
- Garbled or misspelled lyrics and text annotations
|
||||
- Missing or incorrect dynamic markings
|
||||
- Incorrect measure numbers or barline types
|
||||
- Voice/staff assignment errors
|
||||
|
||||
Return ONLY the corrected MusicXML. Do not include any explanation, commentary, or \
|
||||
markdown formatting. Output the raw XML directly.
|
||||
|
||||
Here is the MusicXML to review:
|
||||
|
||||
"""
|
||||
|
||||
_TIMEOUT = 300
|
||||
|
||||
|
||||
class LLMProvider(enum.StrEnum):
|
||||
"""Supported LLM providers."""
|
||||
|
||||
CLAUDE = "claude"
|
||||
OPENAI = "openai"
|
||||
|
||||
|
||||
class ReviewError(Exception):
|
||||
"""Raised when LLM review fails."""
|
||||
|
||||
|
||||
def _get_api_key(provider: LLMProvider) -> str:
|
||||
env_var = "ANTHROPIC_API_KEY" if provider == LLMProvider.CLAUDE else "OPENAI_API_KEY"
|
||||
key = os.environ.get(env_var)
|
||||
if not key:
|
||||
msg = f"{env_var} environment variable is not set."
|
||||
raise ReviewError(msg)
|
||||
return key
|
||||
|
||||
|
||||
def _call_claude(content: str, api_key: str) -> str:
|
||||
response = httpx.post(
|
||||
"https://api.anthropic.com/v1/messages",
|
||||
headers={
|
||||
"x-api-key": api_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"max_tokens": 16384,
|
||||
"messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
|
||||
},
|
||||
timeout=_TIMEOUT,
|
||||
)
|
||||
if response.status_code != 200: # noqa: PLR2004
|
||||
msg = f"Claude API error ({response.status_code}): {response.text}"
|
||||
raise ReviewError(msg)
|
||||
|
||||
data = response.json()
|
||||
return data["content"][0]["text"]
|
||||
|
||||
|
||||
def _call_openai(content: str, api_key: str) -> str:
|
||||
response = httpx.post(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "gpt-4o",
|
||||
"messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
|
||||
"max_tokens": 16384,
|
||||
},
|
||||
timeout=_TIMEOUT,
|
||||
)
|
||||
if response.status_code != 200: # noqa: PLR2004
|
||||
msg = f"OpenAI API error ({response.status_code}): {response.text}"
|
||||
raise ReviewError(msg)
|
||||
|
||||
data = response.json()
|
||||
return data["choices"][0]["message"]["content"]
|
||||
|
||||
|
||||
def review_mxml(mxml_path: Path, provider: LLMProvider) -> str:
|
||||
"""Review a MusicXML file using an LLM and return corrected content.
|
||||
|
||||
Args:
|
||||
mxml_path: Path to the .mxml file to review.
|
||||
provider: Which LLM provider to use.
|
||||
|
||||
Returns:
|
||||
The corrected MusicXML content as a string.
|
||||
|
||||
Raises:
|
||||
ReviewError: If the API call fails or the key is missing.
|
||||
FileNotFoundError: If the input file does not exist.
|
||||
"""
|
||||
content = mxml_path.read_text(encoding="utf-8")
|
||||
api_key = _get_api_key(provider)
|
||||
|
||||
if provider == LLMProvider.CLAUDE:
|
||||
return _call_claude(content, api_key)
|
||||
return _call_openai(content, api_key)
|
||||
@@ -1,11 +1,13 @@
|
||||
import zipfile
|
||||
from unittest.mock import patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from typer.testing import CliRunner
|
||||
|
||||
from python.sheet_music_ocr.audiveris import AudiverisError, find_audiveris, run_audiveris
|
||||
from python.sheet_music_ocr.main import SUPPORTED_EXTENSIONS, app, extract_mxml_from_mxl
|
||||
from python.sheet_music_ocr.review import LLMProvider, ReviewError, review_mxml
|
||||
|
||||
runner = CliRunner()
|
||||
|
||||
@@ -105,14 +107,14 @@ class TestRunAudiveris:
|
||||
|
||||
class TestCli:
|
||||
def test_missing_input_file(self, tmp_path):
|
||||
result = runner.invoke(app, [str(tmp_path / "nonexistent.pdf")])
|
||||
result = runner.invoke(app, ["convert", str(tmp_path / "nonexistent.pdf")])
|
||||
assert result.exit_code == 1
|
||||
assert "does not exist" in result.output
|
||||
|
||||
def test_unsupported_format(self, tmp_path):
|
||||
bad_file = tmp_path / "music.bmp"
|
||||
bad_file.touch()
|
||||
result = runner.invoke(app, [str(bad_file)])
|
||||
result = runner.invoke(app, ["convert", str(bad_file)])
|
||||
assert result.exit_code == 1
|
||||
assert "Unsupported format" in result.output
|
||||
|
||||
@@ -133,7 +135,7 @@ class TestCli:
|
||||
make_mxl(mxl_path, b"<score-partwise/>")
|
||||
|
||||
with patch("python.sheet_music_ocr.main.run_audiveris", return_value=mxl_path):
|
||||
result = runner.invoke(app, [str(input_file), "-o", str(output_file)])
|
||||
result = runner.invoke(app, ["convert", str(input_file), "-o", str(output_file)])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert output_file.exists()
|
||||
@@ -148,7 +150,145 @@ class TestCli:
|
||||
make_mxl(mxl_path)
|
||||
|
||||
with patch("python.sheet_music_ocr.main.run_audiveris", return_value=mxl_path):
|
||||
result = runner.invoke(app, [str(input_file)])
|
||||
result = runner.invoke(app, ["convert", str(input_file)])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert (tmp_path / "score.mxml").exists()
|
||||
|
||||
|
||||
class TestReviewMxml:
|
||||
def test_raises_when_no_api_key(self, tmp_path, monkeypatch):
|
||||
mxml = tmp_path / "score.mxml"
|
||||
mxml.write_text("<score-partwise/>")
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
|
||||
with pytest.raises(ReviewError, match="ANTHROPIC_API_KEY"):
|
||||
review_mxml(mxml, LLMProvider.CLAUDE)
|
||||
|
||||
def test_raises_when_no_openai_key(self, tmp_path, monkeypatch):
|
||||
mxml = tmp_path / "score.mxml"
|
||||
mxml.write_text("<score-partwise/>")
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
|
||||
with pytest.raises(ReviewError, match="OPENAI_API_KEY"):
|
||||
review_mxml(mxml, LLMProvider.OPENAI)
|
||||
|
||||
def test_claude_success(self, tmp_path, monkeypatch):
|
||||
mxml = tmp_path / "score.mxml"
|
||||
mxml.write_text("<score-partwise/>")
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
|
||||
|
||||
corrected = "<score-partwise><part/></score-partwise>"
|
||||
mock_response = httpx.Response(
|
||||
200,
|
||||
json={"content": [{"text": corrected}]},
|
||||
request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
|
||||
)
|
||||
|
||||
with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
|
||||
result = review_mxml(mxml, LLMProvider.CLAUDE)
|
||||
|
||||
assert result == corrected
|
||||
|
||||
def test_openai_success(self, tmp_path, monkeypatch):
|
||||
mxml = tmp_path / "score.mxml"
|
||||
mxml.write_text("<score-partwise/>")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
||||
|
||||
corrected = "<score-partwise><part/></score-partwise>"
|
||||
mock_response = httpx.Response(
|
||||
200,
|
||||
json={"choices": [{"message": {"content": corrected}}]},
|
||||
request=httpx.Request("POST", "https://api.openai.com/v1/chat/completions"),
|
||||
)
|
||||
|
||||
with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
|
||||
result = review_mxml(mxml, LLMProvider.OPENAI)
|
||||
|
||||
assert result == corrected
|
||||
|
||||
def test_claude_api_error(self, tmp_path, monkeypatch):
|
||||
mxml = tmp_path / "score.mxml"
|
||||
mxml.write_text("<score-partwise/>")
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
|
||||
|
||||
mock_response = httpx.Response(
|
||||
500,
|
||||
text="Internal Server Error",
|
||||
request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
|
||||
)
|
||||
|
||||
with (
|
||||
patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response),
|
||||
pytest.raises(ReviewError, match="Claude API error"),
|
||||
):
|
||||
review_mxml(mxml, LLMProvider.CLAUDE)
|
||||
|
||||
def test_openai_api_error(self, tmp_path, monkeypatch):
|
||||
mxml = tmp_path / "score.mxml"
|
||||
mxml.write_text("<score-partwise/>")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
||||
|
||||
mock_response = httpx.Response(
|
||||
429,
|
||||
text="Rate limited",
|
||||
request=httpx.Request("POST", "https://api.openai.com/v1/chat/completions"),
|
||||
)
|
||||
|
||||
with (
|
||||
patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response),
|
||||
pytest.raises(ReviewError, match="OpenAI API error"),
|
||||
):
|
||||
review_mxml(mxml, LLMProvider.OPENAI)
|
||||
|
||||
|
||||
class TestReviewCli:
|
||||
def test_missing_input_file(self, tmp_path):
|
||||
result = runner.invoke(app, ["review", str(tmp_path / "nonexistent.mxml")])
|
||||
assert result.exit_code == 1
|
||||
assert "does not exist" in result.output
|
||||
|
||||
def test_wrong_extension(self, tmp_path):
|
||||
bad_file = tmp_path / "score.pdf"
|
||||
bad_file.touch()
|
||||
result = runner.invoke(app, ["review", str(bad_file)])
|
||||
assert result.exit_code == 1
|
||||
assert ".mxml" in result.output
|
||||
|
||||
def test_successful_review(self, tmp_path, monkeypatch):
|
||||
mxml = tmp_path / "score.mxml"
|
||||
mxml.write_text("<score-partwise/>")
|
||||
output = tmp_path / "corrected.mxml"
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
|
||||
|
||||
corrected = "<score-partwise><part/></score-partwise>"
|
||||
mock_response = httpx.Response(
|
||||
200,
|
||||
json={"content": [{"text": corrected}]},
|
||||
request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
|
||||
)
|
||||
|
||||
with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
|
||||
result = runner.invoke(app, ["review", str(mxml), "-o", str(output)])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "Reviewed" in result.output
|
||||
assert output.read_text() == corrected
|
||||
|
||||
def test_overwrites_input_by_default(self, tmp_path, monkeypatch):
|
||||
mxml = tmp_path / "score.mxml"
|
||||
mxml.write_text("<score-partwise/>")
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
|
||||
|
||||
corrected = "<score-partwise><part/></score-partwise>"
|
||||
mock_response = httpx.Response(
|
||||
200,
|
||||
json={"content": [{"text": corrected}]},
|
||||
request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
|
||||
)
|
||||
|
||||
with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
|
||||
result = runner.invoke(app, ["review", str(mxml)])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert mxml.read_text() == corrected
|
||||
|
||||
Reference in New Issue
Block a user