From 3301bb0aea770bf85541f02996f1d609ccd034ad Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Mar 2026 11:52:55 +0000 Subject: [PATCH] Add LLM review command for MusicXML correction New `sheet-music-ocr review` command that sends MusicXML output to an LLM (Claude or OpenAI, configurable via --provider flag) for reviewing and fixing common OCR errors like incorrect pitches, rhythms, key signatures, and garbled lyrics. Uses httpx for direct API calls. https://claude.ai/code/session_017GqUbuRDT58toRaxMtfRmf --- python/sheet_music_ocr/main.py | 39 +++++++- python/sheet_music_ocr/review.py | 126 ++++++++++++++++++++++++++ tests/test_sheet_music_ocr.py | 148 ++++++++++++++++++++++++++++++- 3 files changed, 307 insertions(+), 6 deletions(-) create mode 100644 python/sheet_music_ocr/review.py diff --git a/python/sheet_music_ocr/main.py b/python/sheet_music_ocr/main.py index 2d957a5..f8131ff 100644 --- a/python/sheet_music_ocr/main.py +++ b/python/sheet_music_ocr/main.py @@ -1,8 +1,9 @@ """CLI tool for converting scanned sheet music to MusicXML. Usage: - sheet-music-ocr scan.pdf - sheet-music-ocr scan.png -o output.mxml + sheet-music-ocr convert scan.pdf + sheet-music-ocr convert scan.png -o output.mxml + sheet-music-ocr review output.mxml --provider claude """ from __future__ import annotations @@ -15,6 +16,7 @@ from typing import Annotated import typer from python.sheet_music_ocr.audiveris import AudiverisError, run_audiveris +from python.sheet_music_ocr.review import LLMProvider, ReviewError, review_mxml SUPPORTED_EXTENSIONS = {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"} @@ -84,5 +86,38 @@ def convert( typer.echo(f"Written: {output_path}") +@app.command() +def review( + input_file: Annotated[Path, typer.Argument(help="Path to MusicXML (.mxml) file to review.")], + output: Annotated[ + Path | None, + typer.Option("--output", "-o", help="Output path for corrected .mxml. Defaults to overwriting input."), + ] = None, + provider: Annotated[ + LLMProvider, + typer.Option("--provider", "-p", help="LLM provider to use."), + ] = LLMProvider.CLAUDE, +) -> None: + """Review and fix a MusicXML file using an LLM.""" + if not input_file.exists(): + typer.echo(f"Error: {input_file} does not exist.", err=True) + raise typer.Exit(code=1) + + if input_file.suffix.lower() != ".mxml": + typer.echo("Error: Input file must be a .mxml file.", err=True) + raise typer.Exit(code=1) + + output_path = output or input_file + + try: + corrected = review_mxml(input_file, provider) + except ReviewError as e: + typer.echo(f"Error: {e}", err=True) + raise typer.Exit(code=1) from e + + output_path.write_text(corrected, encoding="utf-8") + typer.echo(f"Reviewed: {output_path}") + + if __name__ == "__main__": app() diff --git a/python/sheet_music_ocr/review.py b/python/sheet_music_ocr/review.py new file mode 100644 index 0000000..d596603 --- /dev/null +++ b/python/sheet_music_ocr/review.py @@ -0,0 +1,126 @@ +"""LLM-based MusicXML review and correction. + +Supports both Claude (Anthropic) and OpenAI APIs for reviewing +MusicXML output from Audiveris and suggesting/applying fixes. +""" + +from __future__ import annotations + +import enum +import os +from typing import TYPE_CHECKING + +import httpx + +if TYPE_CHECKING: + from pathlib import Path + +REVIEW_PROMPT = """\ +You are a music notation expert. Review the following MusicXML file produced by \ +optical music recognition (Audiveris). Look for and fix common OCR errors including: + +- Incorrect note pitches or durations +- Wrong or missing key signatures, time signatures, or clefs +- Incorrect rest durations or placements +- Missing or incorrect accidentals +- Wrong beam groupings or tuplets +- Garbled or misspelled lyrics and text annotations +- Missing or incorrect dynamic markings +- Incorrect measure numbers or barline types +- Voice/staff assignment errors + +Return ONLY the corrected MusicXML. Do not include any explanation, commentary, or \ +markdown formatting. Output the raw XML directly. + +Here is the MusicXML to review: + +""" + +_TIMEOUT = 300 + + +class LLMProvider(enum.StrEnum): + """Supported LLM providers.""" + + CLAUDE = "claude" + OPENAI = "openai" + + +class ReviewError(Exception): + """Raised when LLM review fails.""" + + +def _get_api_key(provider: LLMProvider) -> str: + env_var = "ANTHROPIC_API_KEY" if provider == LLMProvider.CLAUDE else "OPENAI_API_KEY" + key = os.environ.get(env_var) + if not key: + msg = f"{env_var} environment variable is not set." + raise ReviewError(msg) + return key + + +def _call_claude(content: str, api_key: str) -> str: + response = httpx.post( + "https://api.anthropic.com/v1/messages", + headers={ + "x-api-key": api_key, + "anthropic-version": "2023-06-01", + "content-type": "application/json", + }, + json={ + "model": "claude-sonnet-4-20250514", + "max_tokens": 16384, + "messages": [{"role": "user", "content": REVIEW_PROMPT + content}], + }, + timeout=_TIMEOUT, + ) + if response.status_code != 200: # noqa: PLR2004 + msg = f"Claude API error ({response.status_code}): {response.text}" + raise ReviewError(msg) + + data = response.json() + return data["content"][0]["text"] + + +def _call_openai(content: str, api_key: str) -> str: + response = httpx.post( + "https://api.openai.com/v1/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={ + "model": "gpt-4o", + "messages": [{"role": "user", "content": REVIEW_PROMPT + content}], + "max_tokens": 16384, + }, + timeout=_TIMEOUT, + ) + if response.status_code != 200: # noqa: PLR2004 + msg = f"OpenAI API error ({response.status_code}): {response.text}" + raise ReviewError(msg) + + data = response.json() + return data["choices"][0]["message"]["content"] + + +def review_mxml(mxml_path: Path, provider: LLMProvider) -> str: + """Review a MusicXML file using an LLM and return corrected content. + + Args: + mxml_path: Path to the .mxml file to review. + provider: Which LLM provider to use. + + Returns: + The corrected MusicXML content as a string. + + Raises: + ReviewError: If the API call fails or the key is missing. + FileNotFoundError: If the input file does not exist. + """ + content = mxml_path.read_text(encoding="utf-8") + api_key = _get_api_key(provider) + + if provider == LLMProvider.CLAUDE: + return _call_claude(content, api_key) + return _call_openai(content, api_key) diff --git a/tests/test_sheet_music_ocr.py b/tests/test_sheet_music_ocr.py index cde8537..cbf39e4 100644 --- a/tests/test_sheet_music_ocr.py +++ b/tests/test_sheet_music_ocr.py @@ -1,11 +1,13 @@ import zipfile from unittest.mock import patch +import httpx import pytest from typer.testing import CliRunner from python.sheet_music_ocr.audiveris import AudiverisError, find_audiveris, run_audiveris from python.sheet_music_ocr.main import SUPPORTED_EXTENSIONS, app, extract_mxml_from_mxl +from python.sheet_music_ocr.review import LLMProvider, ReviewError, review_mxml runner = CliRunner() @@ -105,14 +107,14 @@ class TestRunAudiveris: class TestCli: def test_missing_input_file(self, tmp_path): - result = runner.invoke(app, [str(tmp_path / "nonexistent.pdf")]) + result = runner.invoke(app, ["convert", str(tmp_path / "nonexistent.pdf")]) assert result.exit_code == 1 assert "does not exist" in result.output def test_unsupported_format(self, tmp_path): bad_file = tmp_path / "music.bmp" bad_file.touch() - result = runner.invoke(app, [str(bad_file)]) + result = runner.invoke(app, ["convert", str(bad_file)]) assert result.exit_code == 1 assert "Unsupported format" in result.output @@ -133,7 +135,7 @@ class TestCli: make_mxl(mxl_path, b"") with patch("python.sheet_music_ocr.main.run_audiveris", return_value=mxl_path): - result = runner.invoke(app, [str(input_file), "-o", str(output_file)]) + result = runner.invoke(app, ["convert", str(input_file), "-o", str(output_file)]) assert result.exit_code == 0 assert output_file.exists() @@ -148,7 +150,145 @@ class TestCli: make_mxl(mxl_path) with patch("python.sheet_music_ocr.main.run_audiveris", return_value=mxl_path): - result = runner.invoke(app, [str(input_file)]) + result = runner.invoke(app, ["convert", str(input_file)]) assert result.exit_code == 0 assert (tmp_path / "score.mxml").exists() + + +class TestReviewMxml: + def test_raises_when_no_api_key(self, tmp_path, monkeypatch): + mxml = tmp_path / "score.mxml" + mxml.write_text("") + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + + with pytest.raises(ReviewError, match="ANTHROPIC_API_KEY"): + review_mxml(mxml, LLMProvider.CLAUDE) + + def test_raises_when_no_openai_key(self, tmp_path, monkeypatch): + mxml = tmp_path / "score.mxml" + mxml.write_text("") + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + + with pytest.raises(ReviewError, match="OPENAI_API_KEY"): + review_mxml(mxml, LLMProvider.OPENAI) + + def test_claude_success(self, tmp_path, monkeypatch): + mxml = tmp_path / "score.mxml" + mxml.write_text("") + monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key") + + corrected = "" + mock_response = httpx.Response( + 200, + json={"content": [{"text": corrected}]}, + request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"), + ) + + with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response): + result = review_mxml(mxml, LLMProvider.CLAUDE) + + assert result == corrected + + def test_openai_success(self, tmp_path, monkeypatch): + mxml = tmp_path / "score.mxml" + mxml.write_text("") + monkeypatch.setenv("OPENAI_API_KEY", "test-key") + + corrected = "" + mock_response = httpx.Response( + 200, + json={"choices": [{"message": {"content": corrected}}]}, + request=httpx.Request("POST", "https://api.openai.com/v1/chat/completions"), + ) + + with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response): + result = review_mxml(mxml, LLMProvider.OPENAI) + + assert result == corrected + + def test_claude_api_error(self, tmp_path, monkeypatch): + mxml = tmp_path / "score.mxml" + mxml.write_text("") + monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key") + + mock_response = httpx.Response( + 500, + text="Internal Server Error", + request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"), + ) + + with ( + patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response), + pytest.raises(ReviewError, match="Claude API error"), + ): + review_mxml(mxml, LLMProvider.CLAUDE) + + def test_openai_api_error(self, tmp_path, monkeypatch): + mxml = tmp_path / "score.mxml" + mxml.write_text("") + monkeypatch.setenv("OPENAI_API_KEY", "test-key") + + mock_response = httpx.Response( + 429, + text="Rate limited", + request=httpx.Request("POST", "https://api.openai.com/v1/chat/completions"), + ) + + with ( + patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response), + pytest.raises(ReviewError, match="OpenAI API error"), + ): + review_mxml(mxml, LLMProvider.OPENAI) + + +class TestReviewCli: + def test_missing_input_file(self, tmp_path): + result = runner.invoke(app, ["review", str(tmp_path / "nonexistent.mxml")]) + assert result.exit_code == 1 + assert "does not exist" in result.output + + def test_wrong_extension(self, tmp_path): + bad_file = tmp_path / "score.pdf" + bad_file.touch() + result = runner.invoke(app, ["review", str(bad_file)]) + assert result.exit_code == 1 + assert ".mxml" in result.output + + def test_successful_review(self, tmp_path, monkeypatch): + mxml = tmp_path / "score.mxml" + mxml.write_text("") + output = tmp_path / "corrected.mxml" + monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key") + + corrected = "" + mock_response = httpx.Response( + 200, + json={"content": [{"text": corrected}]}, + request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"), + ) + + with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response): + result = runner.invoke(app, ["review", str(mxml), "-o", str(output)]) + + assert result.exit_code == 0 + assert "Reviewed" in result.output + assert output.read_text() == corrected + + def test_overwrites_input_by_default(self, tmp_path, monkeypatch): + mxml = tmp_path / "score.mxml" + mxml.write_text("") + monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key") + + corrected = "" + mock_response = httpx.Response( + 200, + json={"content": [{"text": corrected}]}, + request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"), + ) + + with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response): + result = runner.invoke(app, ["review", str(mxml)]) + + assert result.exit_code == 0 + assert mxml.read_text() == corrected