From 3301bb0aea770bf85541f02996f1d609ccd034ad Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 17 Mar 2026 11:52:55 +0000
Subject: [PATCH] Add LLM review command for MusicXML correction

New `sheet-music-ocr review` command that sends MusicXML output to an
LLM (Claude or OpenAI, configurable via --provider flag) for reviewing
and fixing common OCR errors like incorrect pitches, rhythms, key
signatures, and garbled lyrics. Uses httpx for direct API calls.

https://claude.ai/code/session_017GqUbuRDT58toRaxMtfRmf
---
 python/sheet_music_ocr/main.py   |  39 +++++++-
 python/sheet_music_ocr/review.py | 126 ++++++++++++++++++++++++++
 tests/test_sheet_music_ocr.py    | 148 ++++++++++++++++++++++++++++++-
 3 files changed, 307 insertions(+), 6 deletions(-)
 create mode 100644 python/sheet_music_ocr/review.py

diff --git a/python/sheet_music_ocr/main.py b/python/sheet_music_ocr/main.py
index 2d957a5..f8131ff 100644
--- a/python/sheet_music_ocr/main.py
+++ b/python/sheet_music_ocr/main.py
@@ -1,8 +1,9 @@
 """CLI tool for converting scanned sheet music to MusicXML.
 
 Usage:
-    sheet-music-ocr scan.pdf
-    sheet-music-ocr scan.png -o output.mxml
+    sheet-music-ocr convert scan.pdf
+    sheet-music-ocr convert scan.png -o output.mxml
+    sheet-music-ocr review output.mxml --provider claude
 """
 
 from __future__ import annotations
@@ -15,6 +16,7 @@ from typing import Annotated
 import typer
 
 from python.sheet_music_ocr.audiveris import AudiverisError, run_audiveris
+from python.sheet_music_ocr.review import LLMProvider, ReviewError, review_mxml
 
 SUPPORTED_EXTENSIONS = {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"}
 
@@ -84,5 +86,38 @@ def convert(
     typer.echo(f"Written: {output_path}")
 
 
+@app.command()
+def review(
+    input_file: Annotated[Path, typer.Argument(help="Path to MusicXML (.mxml) file to review.")],
+    output: Annotated[
+        Path | None,
+        typer.Option("--output", "-o", help="Output path for corrected .mxml. Defaults to overwriting input."),
+    ] = None,
+    provider: Annotated[
+        LLMProvider,
+        typer.Option("--provider", "-p", help="LLM provider to use."),
+    ] = LLMProvider.CLAUDE,
+) -> None:
+    """Review and fix a MusicXML file using an LLM."""
+    if not input_file.exists():
+        typer.echo(f"Error: {input_file} does not exist.", err=True)
+        raise typer.Exit(code=1)
+
+    if input_file.suffix.lower() != ".mxml":
+        typer.echo("Error: Input file must be a .mxml file.", err=True)
+        raise typer.Exit(code=1)
+
+    output_path = output or input_file
+
+    try:
+        corrected = review_mxml(input_file, provider)
+    except ReviewError as e:
+        typer.echo(f"Error: {e}", err=True)
+        raise typer.Exit(code=1) from e
+
+    output_path.write_text(corrected, encoding="utf-8")
+    typer.echo(f"Reviewed: {output_path}")
+
+
 if __name__ == "__main__":
     app()
diff --git a/python/sheet_music_ocr/review.py b/python/sheet_music_ocr/review.py
new file mode 100644
index 0000000..d596603
--- /dev/null
+++ b/python/sheet_music_ocr/review.py
@@ -0,0 +1,126 @@
+"""LLM-based MusicXML review and correction.
+
+Supports both Claude (Anthropic) and OpenAI APIs for reviewing
+MusicXML output from Audiveris and suggesting/applying fixes.
+"""
+
+from __future__ import annotations
+
+import enum
+import os
+from typing import TYPE_CHECKING
+
+import httpx
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+REVIEW_PROMPT = """\
+You are a music notation expert. Review the following MusicXML file produced by \
+optical music recognition (Audiveris). Look for and fix common OCR errors including:
+
+- Incorrect note pitches or durations
+- Wrong or missing key signatures, time signatures, or clefs
+- Incorrect rest durations or placements
+- Missing or incorrect accidentals
+- Wrong beam groupings or tuplets
+- Garbled or misspelled lyrics and text annotations
+- Missing or incorrect dynamic markings
+- Incorrect measure numbers or barline types
+- Voice/staff assignment errors
+
+Return ONLY the corrected MusicXML. Do not include any explanation, commentary, or \
+markdown formatting. Output the raw XML directly.
+
+Here is the MusicXML to review:
+
+"""
+
+_TIMEOUT = 300
+
+
+class LLMProvider(enum.StrEnum):
+    """Supported LLM providers."""
+
+    CLAUDE = "claude"
+    OPENAI = "openai"
+
+
+class ReviewError(Exception):
+    """Raised when LLM review fails."""
+
+
+def _get_api_key(provider: LLMProvider) -> str:
+    env_var = "ANTHROPIC_API_KEY" if provider == LLMProvider.CLAUDE else "OPENAI_API_KEY"
+    key = os.environ.get(env_var)
+    if not key:
+        msg = f"{env_var} environment variable is not set."
+        raise ReviewError(msg)
+    return key
+
+
+def _call_claude(content: str, api_key: str) -> str:
+    response = httpx.post(
+        "https://api.anthropic.com/v1/messages",
+        headers={
+            "x-api-key": api_key,
+            "anthropic-version": "2023-06-01",
+            "content-type": "application/json",
+        },
+        json={
+            "model": "claude-sonnet-4-20250514",
+            "max_tokens": 16384,
+            "messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
+        },
+        timeout=_TIMEOUT,
+    )
+    if response.status_code != 200:  # noqa: PLR2004
+        msg = f"Claude API error ({response.status_code}): {response.text}"
+        raise ReviewError(msg)
+
+    data = response.json()
+    return data["content"][0]["text"]
+
+
+def _call_openai(content: str, api_key: str) -> str:
+    response = httpx.post(
+        "https://api.openai.com/v1/chat/completions",
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        },
+        json={
+            "model": "gpt-4o",
+            "messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
+            "max_tokens": 16384,
+        },
+        timeout=_TIMEOUT,
+    )
+    if response.status_code != 200:  # noqa: PLR2004
+        msg = f"OpenAI API error ({response.status_code}): {response.text}"
+        raise ReviewError(msg)
+
+    data = response.json()
+    return data["choices"][0]["message"]["content"]
+
+
+def review_mxml(mxml_path: Path, provider: LLMProvider) -> str:
+    """Review a MusicXML file using an LLM and return corrected content.
+
+    Args:
+        mxml_path: Path to the .mxml file to review.
+        provider: Which LLM provider to use.
+
+    Returns:
+        The corrected MusicXML content as a string.
+
+    Raises:
+        ReviewError: If the API call fails or the key is missing.
+        FileNotFoundError: If the input file does not exist.
+    """
+    content = mxml_path.read_text(encoding="utf-8")
+    api_key = _get_api_key(provider)
+
+    if provider == LLMProvider.CLAUDE:
+        return _call_claude(content, api_key)
+    return _call_openai(content, api_key)
diff --git a/tests/test_sheet_music_ocr.py b/tests/test_sheet_music_ocr.py
index cde8537..cbf39e4 100644
--- a/tests/test_sheet_music_ocr.py
+++ b/tests/test_sheet_music_ocr.py
@@ -1,11 +1,13 @@
 import zipfile
 from unittest.mock import patch
 
+import httpx
 import pytest
 from typer.testing import CliRunner
 
 from python.sheet_music_ocr.audiveris import AudiverisError, find_audiveris, run_audiveris
 from python.sheet_music_ocr.main import SUPPORTED_EXTENSIONS, app, extract_mxml_from_mxl
+from python.sheet_music_ocr.review import LLMProvider, ReviewError, review_mxml
 
 runner = CliRunner()
 
@@ -105,14 +107,14 @@ class TestRunAudiveris:
 
 class TestCli:
     def test_missing_input_file(self, tmp_path):
-        result = runner.invoke(app, [str(tmp_path / "nonexistent.pdf")])
+        result = runner.invoke(app, ["convert", str(tmp_path / "nonexistent.pdf")])
         assert result.exit_code == 1
         assert "does not exist" in result.output
 
     def test_unsupported_format(self, tmp_path):
         bad_file = tmp_path / "music.bmp"
         bad_file.touch()
-        result = runner.invoke(app, [str(bad_file)])
+        result = runner.invoke(app, ["convert", str(bad_file)])
         assert result.exit_code == 1
         assert "Unsupported format" in result.output
 
@@ -133,7 +135,7 @@ class TestCli:
         make_mxl(mxl_path, b"<score-partwise/>")
 
         with patch("python.sheet_music_ocr.main.run_audiveris", return_value=mxl_path):
-            result = runner.invoke(app, [str(input_file), "-o", str(output_file)])
+            result = runner.invoke(app, ["convert", str(input_file), "-o", str(output_file)])
 
         assert result.exit_code == 0
         assert output_file.exists()
@@ -148,7 +150,145 @@ class TestCli:
         make_mxl(mxl_path)
 
         with patch("python.sheet_music_ocr.main.run_audiveris", return_value=mxl_path):
-            result = runner.invoke(app, [str(input_file)])
+            result = runner.invoke(app, ["convert", str(input_file)])
 
         assert result.exit_code == 0
         assert (tmp_path / "score.mxml").exists()
+
+
+class TestReviewMxml:
+    def test_raises_when_no_api_key(self, tmp_path, monkeypatch):
+        mxml = tmp_path / "score.mxml"
+        mxml.write_text("<score-partwise/>")
+        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+
+        with pytest.raises(ReviewError, match="ANTHROPIC_API_KEY"):
+            review_mxml(mxml, LLMProvider.CLAUDE)
+
+    def test_raises_when_no_openai_key(self, tmp_path, monkeypatch):
+        mxml = tmp_path / "score.mxml"
+        mxml.write_text("<score-partwise/>")
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+
+        with pytest.raises(ReviewError, match="OPENAI_API_KEY"):
+            review_mxml(mxml, LLMProvider.OPENAI)
+
+    def test_claude_success(self, tmp_path, monkeypatch):
+        mxml = tmp_path / "score.mxml"
+        mxml.write_text("<score-partwise/>")
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
+
+        corrected = "<score-partwise><part/></score-partwise>"
+        mock_response = httpx.Response(
+            200,
+            json={"content": [{"text": corrected}]},
+            request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
+        )
+
+        with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
+            result = review_mxml(mxml, LLMProvider.CLAUDE)
+
+        assert result == corrected
+
+    def test_openai_success(self, tmp_path, monkeypatch):
+        mxml = tmp_path / "score.mxml"
+        mxml.write_text("<score-partwise/>")
+        monkeypatch.setenv("OPENAI_API_KEY", "test-key")
+
+        corrected = "<score-partwise><part/></score-partwise>"
+        mock_response = httpx.Response(
+            200,
+            json={"choices": [{"message": {"content": corrected}}]},
+            request=httpx.Request("POST", "https://api.openai.com/v1/chat/completions"),
+        )
+
+        with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
+            result = review_mxml(mxml, LLMProvider.OPENAI)
+
+        assert result == corrected
+
+    def test_claude_api_error(self, tmp_path, monkeypatch):
+        mxml = tmp_path / "score.mxml"
+        mxml.write_text("<score-partwise/>")
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
+
+        mock_response = httpx.Response(
+            500,
+            text="Internal Server Error",
+            request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
+        )
+
+        with (
+            patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response),
+            pytest.raises(ReviewError, match="Claude API error"),
+        ):
+            review_mxml(mxml, LLMProvider.CLAUDE)
+
+    def test_openai_api_error(self, tmp_path, monkeypatch):
+        mxml = tmp_path / "score.mxml"
+        mxml.write_text("<score-partwise/>")
+        monkeypatch.setenv("OPENAI_API_KEY", "test-key")
+
+        mock_response = httpx.Response(
+            429,
+            text="Rate limited",
+            request=httpx.Request("POST", "https://api.openai.com/v1/chat/completions"),
+        )
+
+        with (
+            patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response),
+            pytest.raises(ReviewError, match="OpenAI API error"),
+        ):
+            review_mxml(mxml, LLMProvider.OPENAI)
+
+
+class TestReviewCli:
+    def test_missing_input_file(self, tmp_path):
+        result = runner.invoke(app, ["review", str(tmp_path / "nonexistent.mxml")])
+        assert result.exit_code == 1
+        assert "does not exist" in result.output
+
+    def test_wrong_extension(self, tmp_path):
+        bad_file = tmp_path / "score.pdf"
+        bad_file.touch()
+        result = runner.invoke(app, ["review", str(bad_file)])
+        assert result.exit_code == 1
+        assert ".mxml" in result.output
+
+    def test_successful_review(self, tmp_path, monkeypatch):
+        mxml = tmp_path / "score.mxml"
+        mxml.write_text("<score-partwise/>")
+        output = tmp_path / "corrected.mxml"
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
+
+        corrected = "<score-partwise><part/></score-partwise>"
+        mock_response = httpx.Response(
+            200,
+            json={"content": [{"text": corrected}]},
+            request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
+        )
+
+        with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
+            result = runner.invoke(app, ["review", str(mxml), "-o", str(output)])
+
+        assert result.exit_code == 0
+        assert "Reviewed" in result.output
+        assert output.read_text() == corrected
+
+    def test_overwrites_input_by_default(self, tmp_path, monkeypatch):
+        mxml = tmp_path / "score.mxml"
+        mxml.write_text("<score-partwise/>")
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
+
+        corrected = "<score-partwise><part/></score-partwise>"
+        mock_response = httpx.Response(
+            200,
+            json={"content": [{"text": corrected}]},
+            request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
+        )
+
+        with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
+            result = runner.invoke(app, ["review", str(mxml)])
+
+        assert result.exit_code == 0
+        assert mxml.read_text() == corrected