mirror of
https://github.com/RichieCahill/dotfiles.git
synced 2026-04-21 06:39:09 -04:00
Compare commits
2 Commits
feature/cr
...
claude/she
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3301bb0aea | ||
|
|
a076cb47f3 |
@@ -27,6 +27,7 @@ dependencies = [
|
|||||||
[project.scripts]
|
[project.scripts]
|
||||||
database = "python.database_cli:app"
|
database = "python.database_cli:app"
|
||||||
van-inventory = "python.van_inventory.main:serve"
|
van-inventory = "python.van_inventory.main:serve"
|
||||||
|
sheet-music-ocr = "python.sheet_music_ocr.main:app"
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
dev = [
|
dev = [
|
||||||
|
|||||||
1
python/sheet_music_ocr/__init__.py
Normal file
1
python/sheet_music_ocr/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Sheet music OCR tool using Audiveris."""
|
||||||
62
python/sheet_music_ocr/audiveris.py
Normal file
62
python/sheet_music_ocr/audiveris.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
"""Audiveris subprocess wrapper for optical music recognition."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class AudiverisError(Exception):
|
||||||
|
"""Raised when Audiveris processing fails."""
|
||||||
|
|
||||||
|
|
||||||
|
def find_audiveris() -> str:
|
||||||
|
"""Find the Audiveris executable on PATH.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to the audiveris executable.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AudiverisError: If Audiveris is not found.
|
||||||
|
"""
|
||||||
|
path = shutil.which("audiveris")
|
||||||
|
if not path:
|
||||||
|
msg = "Audiveris not found on PATH. Install it via 'nix develop' or add it to your environment."
|
||||||
|
raise AudiverisError(msg)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def run_audiveris(input_path: Path, output_dir: Path) -> Path:
|
||||||
|
"""Run Audiveris on an input file and return the path to the generated .mxl.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_path: Path to the input sheet music file (PDF, PNG, JPG, TIFF).
|
||||||
|
output_dir: Directory where Audiveris will write its output.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to the generated .mxl file.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AudiverisError: If Audiveris fails or produces no output.
|
||||||
|
"""
|
||||||
|
audiveris = find_audiveris()
|
||||||
|
result = subprocess.run(
|
||||||
|
[audiveris, "-batch", "-export", "-output", str(output_dir), str(input_path)],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
msg = f"Audiveris failed (exit {result.returncode}):\n{result.stderr}"
|
||||||
|
raise AudiverisError(msg)
|
||||||
|
|
||||||
|
mxl_files = list(output_dir.rglob("*.mxl"))
|
||||||
|
if not mxl_files:
|
||||||
|
msg = f"Audiveris produced no .mxl output in {output_dir}"
|
||||||
|
raise AudiverisError(msg)
|
||||||
|
|
||||||
|
return mxl_files[0]
|
||||||
123
python/sheet_music_ocr/main.py
Normal file
123
python/sheet_music_ocr/main.py
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
"""CLI tool for converting scanned sheet music to MusicXML.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
sheet-music-ocr convert scan.pdf
|
||||||
|
sheet-music-ocr convert scan.png -o output.mxml
|
||||||
|
sheet-music-ocr review output.mxml --provider claude
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import zipfile
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Annotated
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from python.sheet_music_ocr.audiveris import AudiverisError, run_audiveris
|
||||||
|
from python.sheet_music_ocr.review import LLMProvider, ReviewError, review_mxml
|
||||||
|
|
||||||
|
SUPPORTED_EXTENSIONS = {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"}
|
||||||
|
|
||||||
|
app = typer.Typer(help="Convert scanned sheet music to MusicXML using Audiveris.")
|
||||||
|
|
||||||
|
|
||||||
|
def extract_mxml_from_mxl(mxl_path: Path, output_path: Path) -> Path:
|
||||||
|
"""Extract the MusicXML file from an .mxl archive.
|
||||||
|
|
||||||
|
An .mxl file is a ZIP archive containing one or more .xml MusicXML files.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mxl_path: Path to the .mxl file.
|
||||||
|
output_path: Path where the extracted .mxml file should be written.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The output path.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
FileNotFoundError: If no MusicXML file is found inside the archive.
|
||||||
|
"""
|
||||||
|
with zipfile.ZipFile(mxl_path, "r") as zf:
|
||||||
|
xml_names = [n for n in zf.namelist() if n.endswith(".xml") and not n.startswith("META-INF")]
|
||||||
|
if not xml_names:
|
||||||
|
msg = f"No MusicXML (.xml) file found inside {mxl_path}"
|
||||||
|
raise FileNotFoundError(msg)
|
||||||
|
with zf.open(xml_names[0]) as src, output_path.open("wb") as dst:
|
||||||
|
dst.write(src.read())
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def convert(
|
||||||
|
input_file: Annotated[Path, typer.Argument(help="Path to sheet music scan (PDF, PNG, JPG, TIFF).")],
|
||||||
|
output: Annotated[
|
||||||
|
Path | None,
|
||||||
|
typer.Option("--output", "-o", help="Output .mxml file path. Defaults to <input_stem>.mxml."),
|
||||||
|
] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Convert a scanned sheet music file to MusicXML."""
|
||||||
|
if not input_file.exists():
|
||||||
|
typer.echo(f"Error: {input_file} does not exist.", err=True)
|
||||||
|
raise typer.Exit(code=1)
|
||||||
|
|
||||||
|
if input_file.suffix.lower() not in SUPPORTED_EXTENSIONS:
|
||||||
|
typer.echo(
|
||||||
|
f"Error: Unsupported format '{input_file.suffix}'. Supported: {', '.join(sorted(SUPPORTED_EXTENSIONS))}",
|
||||||
|
err=True,
|
||||||
|
)
|
||||||
|
raise typer.Exit(code=1)
|
||||||
|
|
||||||
|
output_path = output or input_file.with_suffix(".mxml")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
try:
|
||||||
|
mxl_path = run_audiveris(input_file, Path(tmpdir))
|
||||||
|
except AudiverisError as e:
|
||||||
|
typer.echo(f"Error: {e}", err=True)
|
||||||
|
raise typer.Exit(code=1) from e
|
||||||
|
|
||||||
|
try:
|
||||||
|
extract_mxml_from_mxl(mxl_path, output_path)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
typer.echo(f"Error: {e}", err=True)
|
||||||
|
raise typer.Exit(code=1) from e
|
||||||
|
|
||||||
|
typer.echo(f"Written: {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def review(
|
||||||
|
input_file: Annotated[Path, typer.Argument(help="Path to MusicXML (.mxml) file to review.")],
|
||||||
|
output: Annotated[
|
||||||
|
Path | None,
|
||||||
|
typer.Option("--output", "-o", help="Output path for corrected .mxml. Defaults to overwriting input."),
|
||||||
|
] = None,
|
||||||
|
provider: Annotated[
|
||||||
|
LLMProvider,
|
||||||
|
typer.Option("--provider", "-p", help="LLM provider to use."),
|
||||||
|
] = LLMProvider.CLAUDE,
|
||||||
|
) -> None:
|
||||||
|
"""Review and fix a MusicXML file using an LLM."""
|
||||||
|
if not input_file.exists():
|
||||||
|
typer.echo(f"Error: {input_file} does not exist.", err=True)
|
||||||
|
raise typer.Exit(code=1)
|
||||||
|
|
||||||
|
if input_file.suffix.lower() != ".mxml":
|
||||||
|
typer.echo("Error: Input file must be a .mxml file.", err=True)
|
||||||
|
raise typer.Exit(code=1)
|
||||||
|
|
||||||
|
output_path = output or input_file
|
||||||
|
|
||||||
|
try:
|
||||||
|
corrected = review_mxml(input_file, provider)
|
||||||
|
except ReviewError as e:
|
||||||
|
typer.echo(f"Error: {e}", err=True)
|
||||||
|
raise typer.Exit(code=1) from e
|
||||||
|
|
||||||
|
output_path.write_text(corrected, encoding="utf-8")
|
||||||
|
typer.echo(f"Reviewed: {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app()
|
||||||
126
python/sheet_music_ocr/review.py
Normal file
126
python/sheet_music_ocr/review.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
"""LLM-based MusicXML review and correction.
|
||||||
|
|
||||||
|
Supports both Claude (Anthropic) and OpenAI APIs for reviewing
|
||||||
|
MusicXML output from Audiveris and suggesting/applying fixes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import enum
|
||||||
|
import os
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REVIEW_PROMPT = """\
|
||||||
|
You are a music notation expert. Review the following MusicXML file produced by \
|
||||||
|
optical music recognition (Audiveris). Look for and fix common OCR errors including:
|
||||||
|
|
||||||
|
- Incorrect note pitches or durations
|
||||||
|
- Wrong or missing key signatures, time signatures, or clefs
|
||||||
|
- Incorrect rest durations or placements
|
||||||
|
- Missing or incorrect accidentals
|
||||||
|
- Wrong beam groupings or tuplets
|
||||||
|
- Garbled or misspelled lyrics and text annotations
|
||||||
|
- Missing or incorrect dynamic markings
|
||||||
|
- Incorrect measure numbers or barline types
|
||||||
|
- Voice/staff assignment errors
|
||||||
|
|
||||||
|
Return ONLY the corrected MusicXML. Do not include any explanation, commentary, or \
|
||||||
|
markdown formatting. Output the raw XML directly.
|
||||||
|
|
||||||
|
Here is the MusicXML to review:
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
_TIMEOUT = 300
|
||||||
|
|
||||||
|
|
||||||
|
class LLMProvider(enum.StrEnum):
|
||||||
|
"""Supported LLM providers."""
|
||||||
|
|
||||||
|
CLAUDE = "claude"
|
||||||
|
OPENAI = "openai"
|
||||||
|
|
||||||
|
|
||||||
|
class ReviewError(Exception):
|
||||||
|
"""Raised when LLM review fails."""
|
||||||
|
|
||||||
|
|
||||||
|
def _get_api_key(provider: LLMProvider) -> str:
|
||||||
|
env_var = "ANTHROPIC_API_KEY" if provider == LLMProvider.CLAUDE else "OPENAI_API_KEY"
|
||||||
|
key = os.environ.get(env_var)
|
||||||
|
if not key:
|
||||||
|
msg = f"{env_var} environment variable is not set."
|
||||||
|
raise ReviewError(msg)
|
||||||
|
return key
|
||||||
|
|
||||||
|
|
||||||
|
def _call_claude(content: str, api_key: str) -> str:
|
||||||
|
response = httpx.post(
|
||||||
|
"https://api.anthropic.com/v1/messages",
|
||||||
|
headers={
|
||||||
|
"x-api-key": api_key,
|
||||||
|
"anthropic-version": "2023-06-01",
|
||||||
|
"content-type": "application/json",
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": "claude-sonnet-4-20250514",
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
|
||||||
|
},
|
||||||
|
timeout=_TIMEOUT,
|
||||||
|
)
|
||||||
|
if response.status_code != 200: # noqa: PLR2004
|
||||||
|
msg = f"Claude API error ({response.status_code}): {response.text}"
|
||||||
|
raise ReviewError(msg)
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
return data["content"][0]["text"]
|
||||||
|
|
||||||
|
|
||||||
|
def _call_openai(content: str, api_key: str) -> str:
|
||||||
|
response = httpx.post(
|
||||||
|
"https://api.openai.com/v1/chat/completions",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"messages": [{"role": "user", "content": REVIEW_PROMPT + content}],
|
||||||
|
"max_tokens": 16384,
|
||||||
|
},
|
||||||
|
timeout=_TIMEOUT,
|
||||||
|
)
|
||||||
|
if response.status_code != 200: # noqa: PLR2004
|
||||||
|
msg = f"OpenAI API error ({response.status_code}): {response.text}"
|
||||||
|
raise ReviewError(msg)
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
return data["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
|
||||||
|
def review_mxml(mxml_path: Path, provider: LLMProvider) -> str:
|
||||||
|
"""Review a MusicXML file using an LLM and return corrected content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mxml_path: Path to the .mxml file to review.
|
||||||
|
provider: Which LLM provider to use.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The corrected MusicXML content as a string.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ReviewError: If the API call fails or the key is missing.
|
||||||
|
FileNotFoundError: If the input file does not exist.
|
||||||
|
"""
|
||||||
|
content = mxml_path.read_text(encoding="utf-8")
|
||||||
|
api_key = _get_api_key(provider)
|
||||||
|
|
||||||
|
if provider == LLMProvider.CLAUDE:
|
||||||
|
return _call_claude(content, api_key)
|
||||||
|
return _call_openai(content, api_key)
|
||||||
@@ -14,6 +14,8 @@
|
|||||||
ssh-to-age
|
ssh-to-age
|
||||||
gnupg
|
gnupg
|
||||||
age
|
age
|
||||||
|
|
||||||
|
audiveris
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
294
tests/test_sheet_music_ocr.py
Normal file
294
tests/test_sheet_music_ocr.py
Normal file
@@ -0,0 +1,294 @@
|
|||||||
|
import zipfile
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
from typer.testing import CliRunner
|
||||||
|
|
||||||
|
from python.sheet_music_ocr.audiveris import AudiverisError, find_audiveris, run_audiveris
|
||||||
|
from python.sheet_music_ocr.main import SUPPORTED_EXTENSIONS, app, extract_mxml_from_mxl
|
||||||
|
from python.sheet_music_ocr.review import LLMProvider, ReviewError, review_mxml
|
||||||
|
|
||||||
|
runner = CliRunner()
|
||||||
|
|
||||||
|
|
||||||
|
def make_mxl(path, xml_content=b"<score-partwise/>"):
|
||||||
|
"""Create a minimal .mxl (ZIP) file with a MusicXML inside."""
|
||||||
|
with zipfile.ZipFile(path, "w") as zf:
|
||||||
|
zf.writestr("score.xml", xml_content)
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractMxmlFromMxl:
|
||||||
|
def test_extracts_xml(self, tmp_path):
|
||||||
|
mxl = tmp_path / "test.mxl"
|
||||||
|
output = tmp_path / "output.mxml"
|
||||||
|
content = b"<score-partwise>hello</score-partwise>"
|
||||||
|
make_mxl(mxl, content)
|
||||||
|
|
||||||
|
result = extract_mxml_from_mxl(mxl, output)
|
||||||
|
|
||||||
|
assert result == output
|
||||||
|
assert output.read_bytes() == content
|
||||||
|
|
||||||
|
def test_skips_meta_inf(self, tmp_path):
|
||||||
|
mxl = tmp_path / "test.mxl"
|
||||||
|
output = tmp_path / "output.mxml"
|
||||||
|
with zipfile.ZipFile(mxl, "w") as zf:
|
||||||
|
zf.writestr("META-INF/container.xml", "<container/>")
|
||||||
|
zf.writestr("score.xml", b"<score/>")
|
||||||
|
|
||||||
|
extract_mxml_from_mxl(mxl, output)
|
||||||
|
|
||||||
|
assert output.read_bytes() == b"<score/>"
|
||||||
|
|
||||||
|
def test_raises_when_no_xml(self, tmp_path):
|
||||||
|
mxl = tmp_path / "test.mxl"
|
||||||
|
output = tmp_path / "output.mxml"
|
||||||
|
with zipfile.ZipFile(mxl, "w") as zf:
|
||||||
|
zf.writestr("readme.txt", "no xml here")
|
||||||
|
|
||||||
|
with pytest.raises(FileNotFoundError, match="No MusicXML"):
|
||||||
|
extract_mxml_from_mxl(mxl, output)
|
||||||
|
|
||||||
|
|
||||||
|
class TestFindAudiveris:
|
||||||
|
def test_raises_when_not_found(self):
|
||||||
|
with (
|
||||||
|
patch("python.sheet_music_ocr.audiveris.shutil.which", return_value=None),
|
||||||
|
pytest.raises(AudiverisError, match="not found"),
|
||||||
|
):
|
||||||
|
find_audiveris()
|
||||||
|
|
||||||
|
def test_returns_path_when_found(self):
|
||||||
|
with patch("python.sheet_music_ocr.audiveris.shutil.which", return_value="/usr/bin/audiveris"):
|
||||||
|
assert find_audiveris() == "/usr/bin/audiveris"
|
||||||
|
|
||||||
|
|
||||||
|
class TestRunAudiveris:
|
||||||
|
def test_raises_on_nonzero_exit(self, tmp_path):
|
||||||
|
with (
|
||||||
|
patch("python.sheet_music_ocr.audiveris.find_audiveris", return_value="audiveris"),
|
||||||
|
patch("python.sheet_music_ocr.audiveris.subprocess.run") as mock_run,
|
||||||
|
):
|
||||||
|
mock_run.return_value.returncode = 1
|
||||||
|
mock_run.return_value.stderr = "something went wrong"
|
||||||
|
|
||||||
|
with pytest.raises(AudiverisError, match="failed"):
|
||||||
|
run_audiveris(tmp_path / "input.pdf", tmp_path / "output")
|
||||||
|
|
||||||
|
def test_raises_when_no_mxl_produced(self, tmp_path):
|
||||||
|
output_dir = tmp_path / "output"
|
||||||
|
output_dir.mkdir()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("python.sheet_music_ocr.audiveris.find_audiveris", return_value="audiveris"),
|
||||||
|
patch("python.sheet_music_ocr.audiveris.subprocess.run") as mock_run,
|
||||||
|
):
|
||||||
|
mock_run.return_value.returncode = 0
|
||||||
|
|
||||||
|
with pytest.raises(AudiverisError, match=r"no \.mxl output"):
|
||||||
|
run_audiveris(tmp_path / "input.pdf", output_dir)
|
||||||
|
|
||||||
|
def test_returns_mxl_path(self, tmp_path):
|
||||||
|
output_dir = tmp_path / "output"
|
||||||
|
output_dir.mkdir()
|
||||||
|
mxl = output_dir / "score.mxl"
|
||||||
|
make_mxl(mxl)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("python.sheet_music_ocr.audiveris.find_audiveris", return_value="audiveris"),
|
||||||
|
patch("python.sheet_music_ocr.audiveris.subprocess.run") as mock_run,
|
||||||
|
):
|
||||||
|
mock_run.return_value.returncode = 0
|
||||||
|
|
||||||
|
result = run_audiveris(tmp_path / "input.pdf", output_dir)
|
||||||
|
assert result == mxl
|
||||||
|
|
||||||
|
|
||||||
|
class TestCli:
|
||||||
|
def test_missing_input_file(self, tmp_path):
|
||||||
|
result = runner.invoke(app, ["convert", str(tmp_path / "nonexistent.pdf")])
|
||||||
|
assert result.exit_code == 1
|
||||||
|
assert "does not exist" in result.output
|
||||||
|
|
||||||
|
def test_unsupported_format(self, tmp_path):
|
||||||
|
bad_file = tmp_path / "music.bmp"
|
||||||
|
bad_file.touch()
|
||||||
|
result = runner.invoke(app, ["convert", str(bad_file)])
|
||||||
|
assert result.exit_code == 1
|
||||||
|
assert "Unsupported format" in result.output
|
||||||
|
|
||||||
|
def test_supported_extensions_complete(self):
|
||||||
|
assert ".pdf" in SUPPORTED_EXTENSIONS
|
||||||
|
assert ".png" in SUPPORTED_EXTENSIONS
|
||||||
|
assert ".jpg" in SUPPORTED_EXTENSIONS
|
||||||
|
assert ".jpeg" in SUPPORTED_EXTENSIONS
|
||||||
|
assert ".tiff" in SUPPORTED_EXTENSIONS
|
||||||
|
|
||||||
|
def test_successful_conversion(self, tmp_path):
|
||||||
|
input_file = tmp_path / "score.pdf"
|
||||||
|
input_file.touch()
|
||||||
|
output_file = tmp_path / "score.mxml"
|
||||||
|
|
||||||
|
mxl_path = tmp_path / "tmp_mxl" / "score.mxl"
|
||||||
|
mxl_path.parent.mkdir()
|
||||||
|
make_mxl(mxl_path, b"<score-partwise/>")
|
||||||
|
|
||||||
|
with patch("python.sheet_music_ocr.main.run_audiveris", return_value=mxl_path):
|
||||||
|
result = runner.invoke(app, ["convert", str(input_file), "-o", str(output_file)])
|
||||||
|
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert output_file.exists()
|
||||||
|
assert "Written" in result.output
|
||||||
|
|
||||||
|
def test_default_output_path(self, tmp_path):
|
||||||
|
input_file = tmp_path / "score.png"
|
||||||
|
input_file.touch()
|
||||||
|
|
||||||
|
mxl_path = tmp_path / "tmp_mxl" / "score.mxl"
|
||||||
|
mxl_path.parent.mkdir()
|
||||||
|
make_mxl(mxl_path)
|
||||||
|
|
||||||
|
with patch("python.sheet_music_ocr.main.run_audiveris", return_value=mxl_path):
|
||||||
|
result = runner.invoke(app, ["convert", str(input_file)])
|
||||||
|
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert (tmp_path / "score.mxml").exists()
|
||||||
|
|
||||||
|
|
||||||
|
class TestReviewMxml:
|
||||||
|
def test_raises_when_no_api_key(self, tmp_path, monkeypatch):
|
||||||
|
mxml = tmp_path / "score.mxml"
|
||||||
|
mxml.write_text("<score-partwise/>")
|
||||||
|
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||||
|
|
||||||
|
with pytest.raises(ReviewError, match="ANTHROPIC_API_KEY"):
|
||||||
|
review_mxml(mxml, LLMProvider.CLAUDE)
|
||||||
|
|
||||||
|
def test_raises_when_no_openai_key(self, tmp_path, monkeypatch):
|
||||||
|
mxml = tmp_path / "score.mxml"
|
||||||
|
mxml.write_text("<score-partwise/>")
|
||||||
|
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||||
|
|
||||||
|
with pytest.raises(ReviewError, match="OPENAI_API_KEY"):
|
||||||
|
review_mxml(mxml, LLMProvider.OPENAI)
|
||||||
|
|
||||||
|
def test_claude_success(self, tmp_path, monkeypatch):
|
||||||
|
mxml = tmp_path / "score.mxml"
|
||||||
|
mxml.write_text("<score-partwise/>")
|
||||||
|
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
|
||||||
|
|
||||||
|
corrected = "<score-partwise><part/></score-partwise>"
|
||||||
|
mock_response = httpx.Response(
|
||||||
|
200,
|
||||||
|
json={"content": [{"text": corrected}]},
|
||||||
|
request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
|
||||||
|
result = review_mxml(mxml, LLMProvider.CLAUDE)
|
||||||
|
|
||||||
|
assert result == corrected
|
||||||
|
|
||||||
|
def test_openai_success(self, tmp_path, monkeypatch):
|
||||||
|
mxml = tmp_path / "score.mxml"
|
||||||
|
mxml.write_text("<score-partwise/>")
|
||||||
|
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
||||||
|
|
||||||
|
corrected = "<score-partwise><part/></score-partwise>"
|
||||||
|
mock_response = httpx.Response(
|
||||||
|
200,
|
||||||
|
json={"choices": [{"message": {"content": corrected}}]},
|
||||||
|
request=httpx.Request("POST", "https://api.openai.com/v1/chat/completions"),
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
|
||||||
|
result = review_mxml(mxml, LLMProvider.OPENAI)
|
||||||
|
|
||||||
|
assert result == corrected
|
||||||
|
|
||||||
|
def test_claude_api_error(self, tmp_path, monkeypatch):
|
||||||
|
mxml = tmp_path / "score.mxml"
|
||||||
|
mxml.write_text("<score-partwise/>")
|
||||||
|
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
|
||||||
|
|
||||||
|
mock_response = httpx.Response(
|
||||||
|
500,
|
||||||
|
text="Internal Server Error",
|
||||||
|
request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
|
||||||
|
)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response),
|
||||||
|
pytest.raises(ReviewError, match="Claude API error"),
|
||||||
|
):
|
||||||
|
review_mxml(mxml, LLMProvider.CLAUDE)
|
||||||
|
|
||||||
|
def test_openai_api_error(self, tmp_path, monkeypatch):
|
||||||
|
mxml = tmp_path / "score.mxml"
|
||||||
|
mxml.write_text("<score-partwise/>")
|
||||||
|
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
||||||
|
|
||||||
|
mock_response = httpx.Response(
|
||||||
|
429,
|
||||||
|
text="Rate limited",
|
||||||
|
request=httpx.Request("POST", "https://api.openai.com/v1/chat/completions"),
|
||||||
|
)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response),
|
||||||
|
pytest.raises(ReviewError, match="OpenAI API error"),
|
||||||
|
):
|
||||||
|
review_mxml(mxml, LLMProvider.OPENAI)
|
||||||
|
|
||||||
|
|
||||||
|
class TestReviewCli:
|
||||||
|
def test_missing_input_file(self, tmp_path):
|
||||||
|
result = runner.invoke(app, ["review", str(tmp_path / "nonexistent.mxml")])
|
||||||
|
assert result.exit_code == 1
|
||||||
|
assert "does not exist" in result.output
|
||||||
|
|
||||||
|
def test_wrong_extension(self, tmp_path):
|
||||||
|
bad_file = tmp_path / "score.pdf"
|
||||||
|
bad_file.touch()
|
||||||
|
result = runner.invoke(app, ["review", str(bad_file)])
|
||||||
|
assert result.exit_code == 1
|
||||||
|
assert ".mxml" in result.output
|
||||||
|
|
||||||
|
def test_successful_review(self, tmp_path, monkeypatch):
|
||||||
|
mxml = tmp_path / "score.mxml"
|
||||||
|
mxml.write_text("<score-partwise/>")
|
||||||
|
output = tmp_path / "corrected.mxml"
|
||||||
|
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
|
||||||
|
|
||||||
|
corrected = "<score-partwise><part/></score-partwise>"
|
||||||
|
mock_response = httpx.Response(
|
||||||
|
200,
|
||||||
|
json={"content": [{"text": corrected}]},
|
||||||
|
request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
|
||||||
|
result = runner.invoke(app, ["review", str(mxml), "-o", str(output)])
|
||||||
|
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert "Reviewed" in result.output
|
||||||
|
assert output.read_text() == corrected
|
||||||
|
|
||||||
|
def test_overwrites_input_by_default(self, tmp_path, monkeypatch):
|
||||||
|
mxml = tmp_path / "score.mxml"
|
||||||
|
mxml.write_text("<score-partwise/>")
|
||||||
|
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
|
||||||
|
|
||||||
|
corrected = "<score-partwise><part/></score-partwise>"
|
||||||
|
mock_response = httpx.Response(
|
||||||
|
200,
|
||||||
|
json={"content": [{"text": corrected}]},
|
||||||
|
request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("python.sheet_music_ocr.review.httpx.post", return_value=mock_response):
|
||||||
|
result = runner.invoke(app, ["review", str(mxml)])
|
||||||
|
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert mxml.read_text() == corrected
|
||||||
Reference in New Issue
Block a user