built workflow

2026-06-06 15:20:57 -04:00
parent b6395ef18f
commit 1ffc48bb02
5 changed files with 2604 additions and 0 deletions
@@ -0,0 +1 @@
 """Audiobook tools."""
@@ -0,0 +1,428 @@
 """Convert Audible AAX downloads into Audiobookshelf-friendly M4B files."""
 from __future__ import annotations
 import json
 import logging
 import shutil
 import subprocess
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import asdict, dataclass
 from os import getenv
 from pathlib import Path  # noqa: TC003 This is required for the typer CLI
 from typing import TYPE_CHECKING, Annotated, Any
 from uuid import uuid7
 import typer
 from python.common import configure_logger
 from python.orm.common import get_postgres_engine
 from python.tools.audiobook.metadata_agent import (
    AgentConfig,
    StandardBookMetadata,
    standard_book_metadata,
    write_agent_log,
 )
 if TYPE_CHECKING:
    from sqlalchemy.engine import Engine
 logger = logging.getLogger(__name__)
 SENSITIVE_COMMAND_ARGUMENTS = {"-activation_bytes"}
@dataclass(frozen=True)
 class ConversionConfig:
    """Runtime settings for one conversion command."""
    resolved_output: Path
    ollama_api_key: str
    agent_config: AgentConfig
    engine: Engine
    activation_bytes: str | None
    dry_run: bool
    overwrite: bool
    work_directory_name: str = ".audible_convert"
    temp_directory_name: str = "tmp"
    log_directory_name: str = "logs"
    review_directory_name: str = "review"
@dataclass(frozen=True)
 class ConcurrentConversionResult:
    """Result from running ffmpeg and metadata resolution together."""
    metadata: StandardBookMetadata | None
    conversion_error: Exception | None
    metadata_error: Exception | None
 class CommandExecutionError(RuntimeError):
    """Command failed without exposing sensitive arguments."""
    def __init__(self, arguments: list[str], returncode: int) -> None:
        """Create a redacted command failure."""
        self.arguments = tuple(arguments)
        self.returncode = returncode
        command = " ".join(redact_command_arguments(arguments))
        super().__init__(f"Command failed with exit code {returncode}: {command}")
 def main(
    input_directory: Annotated[Path, typer.Argument(help="Directory audible-cli downloads AAX files into.")],
    output_directory: Annotated[Path, typer.Argument(help="Audiobook output directory.")],
    *,
    dry_run: Annotated[bool, typer.Option("--dry-run", help="Print planned output files without converting.")] = False,
    overwrite: Annotated[bool, typer.Option("--overwrite", help="Overwrite existing M4B files.")] = False,
 ) -> None:
    """Convert AAX files from a download directory into M4B files."""
    configure_logger()
    resolved_input = input_directory.resolve(strict=True)
    resolved_output = output_directory.resolve()
    if not dry_run:
        resolved_output.mkdir(parents=True, exist_ok=True)
    ollama_api_key = getenv("OLLAMA_API_KEY")
    if not ollama_api_key:
        msg = "OLLAMA_API_KEY is required for audiobook metadata resolution"
        raise RuntimeError(msg)
    config = ConversionConfig(
        resolved_output=resolved_output,
        ollama_api_key=ollama_api_key,
        agent_config=AgentConfig(),
        engine=get_postgres_engine(name="RICHIE"),
        activation_bytes=getenv("AUDIBLE_ACTIVATION_BYTES"),
        dry_run=dry_run,
        overwrite=overwrite,
    )
    aax_files = sorted(resolved_input.glob("*.aax"))
    if not aax_files:
        logger.info("No AAX files found in %s", resolved_input)
        return
    for aax_file in aax_files:
        logger.info("Converting %s", aax_file)
        convert_aax_file_with_agent(aax_file, config)
 def run_command(arguments: list[str], *, capture: bool = False) -> subprocess.CompletedProcess[str]:
    """Run a command and return the completed process.
    Args:
        arguments: Command and arguments to run.
        capture: Whether to capture stdout and stderr.
    Returns:
        The completed process.
    """
    logger.debug("%s", " ".join(redact_command_arguments(arguments)))
    try:
        return subprocess.run(arguments, check=True, capture_output=capture, text=True)
    except subprocess.CalledProcessError as error:
        raise CommandExecutionError(arguments, error.returncode) from error
 def redact_command_arguments(arguments: list[str]) -> list[str]:
    """Return command arguments with sensitive values redacted."""
    redacted = []
    redact_next = False
    for argument in arguments:
        if redact_next:
            redacted.append("<redacted>")
            redact_next = False
            continue
        redacted.append(argument)
        redact_next = argument in SENSITIVE_COMMAND_ARGUMENTS
    return redacted
 def read_metadata(aax_file: Path) -> dict[str, str]:
    """Read ffprobe format tags from an AAX file.
    Args:
        aax_file: AAX file to inspect.
    Returns:
        Lower-cased metadata tag names mapped to their values.
    """
    completed = run_command(
        [
            "ffprobe",
            "-v",
            "quiet",
            "-print_format",
            "json",
            "-show_format",
            str(aax_file),
        ],
        capture=True,
    )
    ffprobe_data: dict[str, Any] = json.loads(completed.stdout)
    tags = ffprobe_data.get("format", {}).get("tags", {})
    return {str(key).lower(): str(value) for key, value in tags.items()}
 def output_stem(metadata: StandardBookMetadata) -> str:
    """Build the output stem for a book.
    Args:
        metadata: Book metadata.
    Returns:
        Output stem in author-series_01-title form.
    """
    return f"{metadata.author}-{metadata.series}_{metadata.series_index:02}-{metadata.title}"
 def metadata_output_path(output_directory: Path, metadata: StandardBookMetadata) -> Path:
    """Build the final M4B path from resolved metadata."""
    stem = output_stem(metadata)
    return output_directory / stem / f"{stem}.m4b"
 def convert_aax_file(
    aax_file: Path,
    destination: Path,
    activation_bytes: str | None,
    *,
    overwrite: bool,
 ) -> None:
    """Convert an AAX file into an M4B file.
    Args:
        aax_file: Source AAX file.
        destination: Destination M4B file.
        activation_bytes: Optional Audible activation bytes for ffmpeg.
        overwrite: Whether to overwrite an existing M4B.
    """
    if destination.exists() and not overwrite:
        logger.info("Skipping existing file %s", destination)
        return
    destination.parent.mkdir(parents=True, exist_ok=True)
    arguments = ["ffmpeg", "-hide_banner", "-y" if overwrite else "-n"]
    if activation_bytes:
        arguments.extend(["-activation_bytes", activation_bytes])
    arguments.extend(["-i", str(aax_file), "-map_metadata", "0", "-c", "copy", str(destination)])
    run_command(arguments)
 def write_review_file(
    *,
    destination: Path | None,
    ffprobe_metadata: dict[str, str],
    log_file: Path,
    metadata: StandardBookMetadata | None,
    reason: str,
    review_file: Path,
    source: Path,
    temp_file: Path | None,
 ) -> None:
    """Write a manual review file for an unresolved conversion."""
    review_file.parent.mkdir(parents=True, exist_ok=True)
    payload = {
        "destination": str(destination) if destination else None,
        "ffprobe_metadata": ffprobe_metadata,
        "metadata": asdict(metadata) if metadata else None,
        "reason": reason,
        "source": str(source),
        "temp_file": str(temp_file) if temp_file else None,
    }
    review_file.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
    write_agent_log(log_file, "review_written", path=str(review_file), reason=reason)
 def cleanup_temp_output(temp_file: Path) -> None:
    """Remove a run's temporary output directory."""
    shutil.rmtree(temp_file.parent, ignore_errors=True)
 def dry_run_aax_file_with_agent(
    aax_file: Path,
    ffprobe_metadata: dict[str, str],
    engine: Engine,
    config: ConversionConfig,
    log_file: Path,
    review_file: Path,
 ) -> None:
    """Resolve and print the planned output path without converting."""
    metadata = standard_book_metadata(
        aax_file.name,
        ffprobe_metadata,
        engine,
        log_file,
        config.ollama_api_key,
        config.agent_config,
    )
    destination = None if metadata.needs_review else metadata_output_path(config.resolved_output, metadata)
    if metadata.needs_review:
        write_review_file(
            destination=destination,
            ffprobe_metadata=ffprobe_metadata,
            log_file=log_file,
            metadata=metadata,
            reason="metadata_needs_review",
            review_file=review_file,
            source=aax_file,
            temp_file=None,
        )
        typer.echo(f"{aax_file} -> REVIEW {review_file}")
    else:
        typer.echo(f"{aax_file} -> {destination}")
 def convert_temp_file_and_resolve_metadata(
    aax_file: Path,
    temp_file: Path,
    ffprobe_metadata: dict[str, str],
    config: ConversionConfig,
    log_file: Path,
 ) -> ConcurrentConversionResult:
    """Run ffmpeg and metadata resolution in parallel."""
    conversion_error: Exception | None = None
    metadata_error: Exception | None = None
    metadata: StandardBookMetadata | None = None
    with ThreadPoolExecutor(max_workers=2) as executor:
        conversion_future = executor.submit(
            convert_aax_file,
            aax_file,
            temp_file,
            config.activation_bytes,
            overwrite=True,
        )
        metadata_future = executor.submit(
            standard_book_metadata,
            aax_file.name,
            ffprobe_metadata,
            config.engine,
            log_file,
            config.ollama_api_key,
            config.agent_config,
        )
        conversion_error = conversion_future.exception()
        if conversion_error is None:
            conversion_future.result()
        metadata_error = metadata_future.exception()
        if metadata_error is None:
            metadata = metadata_future.result()
    return ConcurrentConversionResult(
        metadata=metadata,
        conversion_error=conversion_error,
        metadata_error=metadata_error,
    )
 def convert_aax_file_with_agent(aax_file: Path, config: ConversionConfig) -> None:
    """Convert one AAX file using the metadata agent for the final path."""
    run_id = uuid7().hex
    log_file = config.resolved_output / config.work_directory_name / config.log_directory_name / f"{run_id}.jsonl"
    review_file = config.resolved_output / config.work_directory_name / config.review_directory_name / f"{run_id}.json"
    write_agent_log(log_file, "conversion_start", source=str(aax_file), dry_run=config.dry_run)
    try:
        ffprobe_metadata = read_metadata(aax_file)
    except Exception as error:
        logger.exception("ffprobe failed")
        write_review_file(
            destination=None,
            ffprobe_metadata={},
            log_file=log_file,
            metadata=None,
            reason=f"ffprobe_failed: {error}",
            review_file=review_file,
            source=aax_file,
            temp_file=None,
        )
        return
    if config.dry_run:
        dry_run_aax_file_with_agent(
            aax_file,
            ffprobe_metadata,
            config.engine,
            config,
            log_file,
            review_file,
        )
        return
    temp_file = (
        config.resolved_output / config.work_directory_name / config.temp_directory_name / run_id / "converted.m4b"
    )
    temp_file.parent.mkdir(parents=True, exist_ok=True)
    result = convert_temp_file_and_resolve_metadata(aax_file, temp_file, ffprobe_metadata, config, log_file)
    if result.conversion_error:
        reason = f"ffmpeg_failed: {result.conversion_error}"
        write_review_file(
            destination=None,
            ffprobe_metadata=ffprobe_metadata,
            log_file=log_file,
            metadata=result.metadata,
            reason=reason,
            review_file=review_file,
            source=aax_file,
            temp_file=temp_file if temp_file.exists() else None,
        )
        return
    if result.metadata_error:
        write_review_file(
            destination=None,
            ffprobe_metadata=ffprobe_metadata,
            log_file=log_file,
            metadata=None,
            reason=f"metadata_failed: {result.metadata_error}",
            review_file=review_file,
            source=aax_file,
            temp_file=temp_file,
        )
        return
    if result.metadata is None or result.metadata.needs_review:
        write_review_file(
            destination=None,
            ffprobe_metadata=ffprobe_metadata,
            log_file=log_file,
            metadata=result.metadata,
            reason="metadata_needs_review",
            review_file=review_file,
            source=aax_file,
            temp_file=temp_file,
        )
        return
    destination = metadata_output_path(config.resolved_output, result.metadata)
    if destination.exists() and not config.overwrite:
        write_agent_log(log_file, "destination_exists", destination=str(destination))
        cleanup_temp_output(temp_file)
        return
    destination.parent.mkdir(parents=True, exist_ok=True)
    try:
        temp_file.replace(destination)
    except Exception as error:  # noqa: BLE001
        write_review_file(
            destination=destination,
            ffprobe_metadata=ffprobe_metadata,
            log_file=log_file,
            metadata=result.metadata,
            reason=f"rename_failed: {error}",
            review_file=review_file,
            source=aax_file,
            temp_file=temp_file if temp_file.exists() else None,
        )
    else:
        cleanup_temp_output(temp_file)
        write_agent_log(log_file, "conversion_complete", destination=str(destination))
 if __name__ == "__main__":
    typer.run(main)
@@ -0,0 +1,969 @@
 """test_audible_convert."""
 from __future__ import annotations
 import json
 import subprocess
 import pytest
 from sqlalchemy import create_engine
 from sqlalchemy.orm import Session, sessionmaker
 from python.orm.richie import Audiobook, AudiobookAuthor, AudiobookSeries, RichieBase
 from python.tools.audiobook import audible_convert, metadata_agent
 from python.tools.audiobook.metadata_agent import StandardBookMetadata, standard_book_metadata
 class FakeOllamaResponse:
    def __init__(self, payload):
        self._payload = payload
    def raise_for_status(self):
        return None
    def json(self):
        return self._payload
 class FakeFfprobeError(RuntimeError):
    def __str__(self):
        return "bad ffprobe"
@pytest.fixture
 def audiobook_engine():
    engine = create_engine("sqlite+pysqlite:///:memory:", future=True)
    RichieBase.metadata.create_all(engine)
    with sessionmaker(bind=engine, expire_on_commit=False, future=True)() as session:
        session.add_all(
            [
                AudiobookAuthor(id=1, name="glynn_stewart"),
                AudiobookAuthor(id=2, name="craig_alanson"),
                AudiobookAuthor(id=4, name="dennis_e_taylor"),
                AudiobookSeries(id=1, name="starships_mage", author_id=1),
                AudiobookSeries(id=2, name="black_fleet_trilogy", author_id=1),
                AudiobookSeries(id=3, name="expeditionary_force", author_id=2),
                AudiobookSeries(id=4, name="bobiverse", author_id=4),
            ],
        )
        session.commit()
    yield engine
    engine.dispose()
 def install_fake_ollama(monkeypatch, payloads):
    calls = []
    def fake_post(*args, **kwargs):
        calls.append((args, kwargs))
        return FakeOllamaResponse(payloads.pop(0))
    monkeypatch.setattr(metadata_agent.httpx, "post", fake_post)
    return calls
 def conversion_config(output_directory, *, dry_run=False, overwrite=False):
    return audible_convert.ConversionConfig(
        resolved_output=output_directory,
        ollama_api_key="test-key",
        agent_config=metadata_agent.AgentConfig(),
        engine=create_engine("sqlite+pysqlite:///:memory:"),
        activation_bytes=None,
        dry_run=dry_run,
        overwrite=overwrite,
    )
 def sqlite_engine():
    return create_engine("sqlite+pysqlite:///:memory:")
 def tool_response(name, arguments):
    return {
        "message": {
            "role": "assistant",
            "content": "",
            "tool_calls": [{"function": {"name": name, "arguments": arguments}}],
        },
    }
 def final_response(metadata):
    return {"message": {"role": "assistant", "content": json.dumps(metadata)}}
 def fenced_final_response(metadata):
    return {"message": {"role": "assistant", "content": f"```json\n{json.dumps(metadata)}\n```"}}
 def test_output_stem_uses_catalog_slugs() -> None:
    metadata = StandardBookMetadata(
        author_id=1,
        author="glynn_stewart",
        book_id=None,
        title="title-slug",
        series_id=1,
        series="starships_mage",
        series_index=1,
        confidence=0.96,
        needs_review=False,
        evidence=["test"],
    )
    assert audible_convert.output_stem(metadata) == "glynn_stewart-starships_mage_01-title-slug"
 def test_convert_aax_file_runs_ffmpeg(tmp_path, monkeypatch) -> None:
    """test_convert_aax_file_runs_ffmpeg."""
    commands = []
    def fake_run_command(arguments, *, capture=False):
        assert capture is False
        commands.append(arguments)
        return subprocess.CompletedProcess(arguments, 0, "", "")
    source = tmp_path / "book.aax"
    destination = tmp_path / "book" / "book.m4b"
    monkeypatch.setattr(audible_convert, "run_command", fake_run_command)
    audible_convert.convert_aax_file(source, destination, "abc123", overwrite=False)
    assert commands == [
        [
            "ffmpeg",
            "-hide_banner",
            "-n",
            "-activation_bytes",
            "abc123",
            "-i",
            str(source),
            "-map_metadata",
            "0",
            "-c",
            "copy",
            str(destination),
        ],
    ]
    assert destination.parent.is_dir()
 def test_run_command_redacts_activation_bytes_in_logs_and_errors(monkeypatch, caplog) -> None:
    def fake_run(arguments, *, check, capture_output, text):
        assert check is True
        assert capture_output is False
        assert text is True
        raise subprocess.CalledProcessError(1, arguments)
    monkeypatch.setattr(audible_convert.subprocess, "run", fake_run)
    caplog.set_level("DEBUG", audible_convert.__name__)
    with pytest.raises(audible_convert.CommandExecutionError) as error:
        audible_convert.run_command(["ffmpeg", "-activation_bytes", "secret-token", "-i", "book.aax"])
    assert "secret-token" not in caplog.text
    assert "secret-token" not in str(error.value)
    assert "<redacted>" in caplog.text
    assert "<redacted>" in str(error.value)
 def test_write_agent_log_serializes_metadata_as_json_object(tmp_path) -> None:
    metadata = StandardBookMetadata(
        author_id=1,
        author="glynn_stewart",
        book_id=None,
        title="starship-mage",
        series_id=1,
        series="starships_mage",
        series_index=1,
        confidence=0.95,
        needs_review=False,
        evidence=["test"],
    )
    log_file = tmp_path / "agent.jsonl"
    metadata_agent.write_agent_log(log_file, "final_metadata", metadata=metadata, path=tmp_path)
    record = json.loads(log_file.read_text(encoding="utf-8"))
    assert record["event"] == "final_metadata"
    assert record["metadata"]["author"] == "glynn_stewart"
    assert record["metadata"]["title"] == "starship-mage"
    assert record["path"] == str(tmp_path)
 def test_standard_book_metadata_accepts_valid_tool_output(tmp_path, monkeypatch, audiobook_engine) -> None:
    install_fake_ollama(
        monkeypatch,
        [
            tool_response("search_authors", {"query": "Glynn Stewart"}),
            tool_response("search_series", {"query": "starships_mage"}),
            final_response(
                {
                    "author_id": 1,
                    "book_id": None,
                    "title": "starship-mage",
                    "series_id": 1,
                    "series_index": 1,
                    "confidence": 0.95,
                    "evidence": ["filename and catalog match"],
                },
            ),
        ],
    )
    metadata = standard_book_metadata(
        "Starship Mage.aax",
        {"title": "Starship Mage", "artist": "Glynn Stewart"},
        audiobook_engine,
        tmp_path / "agent.jsonl",
        "test-key",
        config=metadata_agent.AgentConfig(),
    )
    assert metadata == StandardBookMetadata(
        author_id=1,
        author="glynn_stewart",
        book_id=1,
        title="starship-mage",
        series_id=1,
        series="starships_mage",
        series_index=1,
        confidence=0.95,
        needs_review=False,
        evidence=["filename and catalog match"],
    )
    records = [
        json.loads(line)
        for line in (tmp_path / "agent.jsonl").read_text(encoding="utf-8").splitlines()
    ]
    sent = [record for record in records if record["event"] == "llm_messages_sent"]
    received = [record for record in records if record["event"] == "llm_message_received"]
    assert sent[0]["messages"][0]["role"] == "system"
    assert "Starship Mage" in sent[0]["messages"][1]["content"]
    assert received[0]["message"]["tool_calls"][0]["function"]["name"] == "search_authors"
    with Session(audiobook_engine) as session:
        book = session.get(Audiobook, 1)
        assert book.title == "starship-mage"
        assert book.author.name == "glynn_stewart"
 def test_standard_book_metadata_uses_agent_config(tmp_path, monkeypatch, audiobook_engine) -> None:
    config = metadata_agent.AgentConfig(
        model="custom-model",
        ollama_chat_url="https://ollama.example.test/api/chat",
        http_timeout_seconds=12,
        max_agent_turns=1,
        min_confidence=0.5,
        tool_names=("search_authors",),
    )
    calls = install_fake_ollama(
        monkeypatch,
        [
            tool_response("search_authors", {"query": "Glynn Stewart"}),
            final_response(
                {
                    "author_id": 1,
                    "book_id": None,
                    "title": "standalone-book",
                    "series_id": None,
                    "series_index": 0,
                    "confidence": 0.5,
                    "evidence": ["custom config"],
                },
            ),
        ],
    )
    metadata = standard_book_metadata(
        "Standalone Book.aax",
        {"title": "Standalone Book", "artist": "Glynn Stewart"},
        audiobook_engine,
        tmp_path / "agent.jsonl",
        "test-key",
        config=config,
    )
    first_request_url = calls[0][0][0]
    first_request_options = calls[0][1]
    tool_names = [
        tool_schema["function"]["name"]
        for tool_schema in first_request_options["json"]["tools"]
    ]
    assert first_request_url == "https://ollama.example.test/api/chat"
    assert first_request_options["timeout"] == 12
    assert first_request_options["json"]["model"] == "custom-model"
    assert tool_names == ["search_authors"]
    assert metadata.needs_review is False
    assert metadata.series == "standalone"
 def test_standard_book_metadata_retries_invalid_json_then_needs_review(
    tmp_path,
    monkeypatch,
    audiobook_engine,
 ) -> None:
    install_fake_ollama(
        monkeypatch,
        [
            tool_response("search_authors", {"query": "Glynn Stewart"}),
            tool_response("search_series", {"query": "Starship Mage"}),
            {"message": {"role": "assistant", "content": "{"}},
            {"message": {"role": "assistant", "content": "{"}},
        ],
    )
    metadata = standard_book_metadata(
        "Starship Mage.aax",
        {"title": "Starship Mage"},
        audiobook_engine,
        tmp_path / "agent.jsonl",
        "test-key",
        config=metadata_agent.AgentConfig(),
    )
    assert metadata.needs_review is True
    assert metadata.confidence == 0
 def test_standard_book_metadata_accepts_fenced_final_json(
    tmp_path,
    monkeypatch,
    audiobook_engine,
 ) -> None:
    install_fake_ollama(
        monkeypatch,
        [
            tool_response("search_authors", {"query": "Dennis E. Taylor"}),
            tool_response("search_series", {"query": "Bobiverse", "author_id": 4}),
            tool_response("search_books", {"query": "All These Worlds", "author_id": 4, "series_id": 4}),
            fenced_final_response(
                {
                    "author_id": 4,
                    "book_id": None,
                    "title": "all-these-worlds",
                    "series_id": 4,
                    "series_index": 3,
                    "confidence": 0.95,
                    "evidence": ["fenced json from model"],
                },
            ),
        ],
    )
    metadata = standard_book_metadata(
        "All These Worlds.aax",
        {"title": "All These Worlds: Bobiverse, Book 3", "artist": "Dennis E. Taylor"},
        audiobook_engine,
        tmp_path / "agent.jsonl",
        "test-key",
        config=metadata_agent.AgentConfig(),
    )
    assert metadata.needs_review is False
    assert metadata.author == "dennis_e_taylor"
    assert metadata.series == "bobiverse"
    assert metadata.title == "all-these-worlds"
 def test_standard_book_metadata_recovers_from_tool_validation_error(
    tmp_path,
    monkeypatch,
    audiobook_engine,
 ) -> None:
    install_fake_ollama(
        monkeypatch,
        [
            tool_response("search_authors", {"query": "Cormac McCarthy"}),
            tool_response("ensure_author", {"name": "Cormac McCarthy"}),
            tool_response("ensure_series", {"name": "The Cormac McCarthy Collection", "author_id": 5}),
            tool_response(
                "ensure_book",
                {
                    "title": "The Road",
                    "author_id": 5,
                    "series_id": 5,
                    "series_index": 0,
                },
            ),
            final_response(
                {
                    "author_id": 5,
                    "book_id": None,
                    "title": "The Road",
                    "series_id": None,
                    "series_index": 0,
                    "confidence": 0.9,
                    "evidence": ["tool error showed this should be standalone"],
                },
            ),
        ],
    )
    log_file = tmp_path / "agent.jsonl"
    metadata = standard_book_metadata(
        "The Road.aax",
        {"title": "The Road", "artist": "Cormac McCarthy"},
        audiobook_engine,
        log_file,
        "test-key",
        config=metadata_agent.AgentConfig(),
    )
    assert metadata == StandardBookMetadata(
        author_id=5,
        author="cormac_mccarthy",
        book_id=1,
        title="the-road",
        series_id=None,
        series="standalone",
        series_index=0,
        confidence=0.9,
        needs_review=False,
        evidence=["tool error showed this should be standalone"],
    )
    assert "series books must use a positive series_index" in log_file.read_text(encoding="utf-8")
    with Session(audiobook_engine) as session:
        assert session.get(AudiobookSeries, 5) is None
        book = session.get(Audiobook, 1)
        assert book.title == "the-road"
        assert book.series_id is None
 def test_standard_book_metadata_rejects_unknown_tool(tmp_path, monkeypatch, audiobook_engine) -> None:
    log_file = tmp_path / "agent.jsonl"
    install_fake_ollama(monkeypatch, [tool_response("drop_table", {})])
    metadata = standard_book_metadata(
        "Book.aax",
        {"title": "Book"},
        audiobook_engine,
        log_file,
        "test-key",
        config=metadata_agent.AgentConfig(),
    )
    assert metadata.needs_review is True
    assert "Unknown audiobook metadata tool" in metadata.evidence[0]
    assert "tool_error" in log_file.read_text(encoding="utf-8")
 def test_standard_book_metadata_rejects_ids_not_returned_by_tools(
    tmp_path,
    monkeypatch,
    audiobook_engine,
 ) -> None:
    install_fake_ollama(
        monkeypatch,
        [
            tool_response("search_authors", {"query": "Glynn Stewart"}),
            tool_response("search_series", {"query": "Starship Mage"}),
            final_response(
                {
                    "author_id": 2,
                    "book_id": None,
                    "title": "expeditionary-force",
                    "series_id": 1,
                    "series_index": 1,
                    "confidence": 0.99,
                    "evidence": ["bad id"],
                },
            ),
            final_response(
                {
                    "author_id": 2,
                    "book_id": None,
                    "title": "expeditionary-force",
                    "series_id": 1,
                    "series_index": 1,
                    "confidence": 0.99,
                    "evidence": ["bad id"],
                },
            ),
        ],
    )
    metadata = standard_book_metadata(
        "Book.aax",
        {"title": "Book"},
        audiobook_engine,
        tmp_path / "agent.jsonl",
        "test-key",
        config=metadata_agent.AgentConfig(),
    )
    assert metadata.needs_review is True
    assert "author_id 2 was not returned" in metadata.evidence[0]
 def test_standard_book_metadata_rejects_series_for_wrong_author(
    tmp_path,
    monkeypatch,
    audiobook_engine,
 ) -> None:
    install_fake_ollama(
        monkeypatch,
        [
            tool_response("search_authors", {"query": "Glynn Stewart"}),
            tool_response("search_series", {"query": "expeditionary_force"}),
            final_response(
                {
                    "author_id": 1,
                    "book_id": None,
                    "title": "expeditionary-force",
                    "series_id": 3,
                    "series_index": 1,
                    "confidence": 0.99,
                    "evidence": ["wrong author"],
                },
            ),
            final_response(
                {
                    "author_id": 1,
                    "book_id": None,
                    "title": "expeditionary-force",
                    "series_id": 3,
                    "series_index": 1,
                    "confidence": 0.99,
                    "evidence": ["wrong author"],
                },
            ),
        ],
    )
    metadata = standard_book_metadata(
        "Book.aax",
        {"title": "Book"},
        audiobook_engine,
        tmp_path / "agent.jsonl",
        "test-key",
        config=metadata_agent.AgentConfig(),
    )
    assert metadata.needs_review is True
    assert "series_id 3 does not belong to author_id 1" in metadata.evidence[0]
 def test_standard_book_metadata_forces_final_after_empty_book_searches(
    tmp_path,
    monkeypatch,
    audiobook_engine,
 ) -> None:
    config = metadata_agent.AgentConfig(max_agent_turns=5)
    install_fake_ollama(
        monkeypatch,
        [
            tool_response("search_authors", {"query": "Dennis E. Taylor"}),
            tool_response("search_series", {"query": "Bobiverse", "author_id": 4}),
            tool_response("search_books", {"query": "We Are Legion We Are Bob", "author_id": 4, "series_id": 4}),
            tool_response("search_books", {"query": "we are legion", "author_id": 4}),
            tool_response("search_books", {"query": "We Are Legion"}),
            final_response(
                {
                    "author_id": 4,
                    "book_id": None,
                    "title": "we-are-legion-we-are-bob",
                    "series_id": 4,
                    "series_index": 1,
                    "confidence": 0.95,
                    "evidence": ["author and series tool results; title from ffprobe tags"],
                },
            ),
        ],
    )
    metadata = standard_book_metadata(
        "We_Are_Legion_(We_Are_Bob)_Bobiverse_Book_1-LC_128_44100_stereo.aax",
        {
            "album": "We Are Legion (We Are Bob): Bobiverse, Book 1",
            "artist": "Dennis E. Taylor",
            "title": "We Are Legion (We Are Bob): Bobiverse, Book 1",
        },
        audiobook_engine,
        tmp_path / "agent.jsonl",
        "test-key",
        config=config,
    )
    assert metadata == StandardBookMetadata(
        author_id=4,
        author="dennis_e_taylor",
        book_id=1,
        title="we-are-legion-we-are-bob",
        series_id=4,
        series="bobiverse",
        series_index=1,
        confidence=0.95,
        needs_review=False,
        evidence=["author and series tool results; title from ffprobe tags"],
    )
    assert '"tools_enabled": false' in (tmp_path / "agent.jsonl").read_text(encoding="utf-8")
 def test_standard_book_metadata_can_create_missing_catalog_rows(
    tmp_path,
    monkeypatch,
    audiobook_engine,
 ) -> None:
    install_fake_ollama(
        monkeypatch,
        [
            tool_response("search_authors", {"query": "Martha Wells"}),
            tool_response("ensure_author", {"name": "martha_wells"}),
            tool_response("search_series", {"query": "Murderbot Diaries", "author_id": 5}),
            tool_response("ensure_series", {"name": "murderbot_diaries", "author_id": 5}),
            tool_response("search_books", {"query": "All Systems Red", "author_id": 5, "series_id": 5}),
            final_response(
                {
                    "author_id": 5,
                    "book_id": None,
                    "title": "all-systems-red",
                    "series_id": 5,
                    "series_index": 1,
                    "confidence": 0.96,
                    "evidence": ["created missing author and series; title from tags"],
                },
            ),
        ],
    )
    metadata = standard_book_metadata(
        "All Systems Red.aax",
        {"title": "All Systems Red", "artist": "Martha Wells"},
        audiobook_engine,
        tmp_path / "agent.jsonl",
        "test-key",
        config=metadata_agent.AgentConfig(),
    )
    assert metadata == StandardBookMetadata(
        author_id=5,
        author="martha_wells",
        book_id=1,
        title="all-systems-red",
        series_id=5,
        series="murderbot_diaries",
        series_index=1,
        confidence=0.96,
        needs_review=False,
        evidence=["created missing author and series; title from tags"],
    )
    with Session(audiobook_engine) as session:
        author = session.get(AudiobookAuthor, 5)
        series = session.get(AudiobookSeries, 5)
        book = session.get(Audiobook, 1)
        assert author.name == "martha_wells"
        assert series.name == "murderbot_diaries"
        assert series.author_id == author.id
        assert book.title == "all-systems-red"
        assert book.author_id == author.id
        assert book.series_id == series.id
 def test_standard_book_metadata_normalizes_noisy_created_catalog_rows(
    tmp_path,
    monkeypatch,
    audiobook_engine,
 ) -> None:
    install_fake_ollama(
        monkeypatch,
        [
            tool_response("search_authors", {"query": "Charles Lamb"}),
            tool_response("ensure_author", {"name": "charles-lamb"}),
            tool_response("search_series", {"query": "AL:ICE Series", "author_id": 5}),
            tool_response("ensure_series", {"name": "AL:ICE Series", "author_id": 5}),
            tool_response("search_books", {"query": "AL:ICE Space War", "author_id": 5, "series_id": 5}),
            final_response(
                {
                    "author_id": 5,
                    "book_id": None,
                    "title": "AL:ICE Space War",
                    "series_id": 5,
                    "series_index": 4,
                    "confidence": 0.95,
                    "evidence": ["created normalized author and series; title from tags"],
                },
            ),
        ],
    )
    metadata = standard_book_metadata(
        "ALICE_Space_War_ALICE_Series_Book_4-LC_64_22050_stereo.aax",
        {
            "album": "AL:ICE Space War: AL:ICE Series, Book 4",
            "artist": "Charles Lamb",
            "title": "AL:ICE Space War: AL:ICE Series, Book 4",
        },
        audiobook_engine,
        tmp_path / "agent.jsonl",
        "test-key",
        config=metadata_agent.AgentConfig(),
    )
    assert metadata == StandardBookMetadata(
        author_id=5,
        author="charles_lamb",
        book_id=1,
        title="al-ice-space-war",
        series_id=5,
        series="al_ice_series",
        series_index=4,
        confidence=0.95,
        needs_review=False,
        evidence=["created normalized author and series; title from tags"],
    )
    with Session(audiobook_engine) as session:
        author = session.get(AudiobookAuthor, 5)
        series = session.get(AudiobookSeries, 5)
        book = session.get(Audiobook, 1)
        assert author.name == "charles_lamb"
        assert series.name == "al_ice_series"
        assert series.author_id == author.id
        assert book.title == "al-ice-space-war"
        assert book.author_id == author.id
        assert book.series_id == series.id
 def test_convert_aax_file_with_agent_success_renames_temp_output(tmp_path, monkeypatch) -> None:
    source = tmp_path / "book.aax"
    output_directory = tmp_path / "audiobooks"
    source.touch()
    monkeypatch.setattr(audible_convert, "read_metadata", lambda _: {"title": "Starship Mage"})
    monkeypatch.setattr(
        audible_convert,
        "standard_book_metadata",
        lambda *_, **__: StandardBookMetadata(
            author_id=1,
            author="glynn_stewart",
            book_id=None,
            title="starship-mage",
            series_id=1,
            series="starships_mage",
            series_index=1,
            confidence=0.95,
            needs_review=False,
            evidence=["test"],
        ),
    )
    def fake_convert(_source, destination, _activation_bytes, *, overwrite):
        assert overwrite is True
        destination.parent.mkdir(parents=True, exist_ok=True)
        destination.write_text("converted", encoding="utf-8")
    monkeypatch.setattr(audible_convert, "convert_aax_file", fake_convert)
    audible_convert.convert_aax_file_with_agent(
        source,
        conversion_config(output_directory),
    )
    expected = output_directory / "glynn_stewart-starships_mage_01-starship-mage"
    destination = expected / "glynn_stewart-starships_mage_01-starship-mage.m4b"
    assert destination.read_text(encoding="utf-8") == "converted"
    assert not list((output_directory / ".audible_convert" / "tmp").glob("*/converted.m4b"))
 def test_ffprobe_failure_writes_review_without_converting(tmp_path, monkeypatch) -> None:
    source = tmp_path / "book.aax"
    output_directory = tmp_path / "audiobooks"
    source.touch()
    calls = []
    def fake_read_metadata(_source):
        raise FakeFfprobeError
    def fake_convert(*args, **kwargs):
        calls.append((args, kwargs))
    monkeypatch.setattr(audible_convert, "read_metadata", fake_read_metadata)
    monkeypatch.setattr(audible_convert, "convert_aax_file", fake_convert)
    audible_convert.convert_aax_file_with_agent(source, conversion_config(output_directory))
    review_files = list((output_directory / ".audible_convert" / "review").glob("*.json"))
    assert calls == []
    assert len(review_files) == 1
    review = json.loads(review_files[0].read_text(encoding="utf-8"))
    assert review["ffprobe_metadata"] == {}
    assert review["reason"] == "ffprobe_failed: bad ffprobe"
    assert review["temp_file"] is None
 def test_low_confidence_metadata_keeps_temp_output_for_review(tmp_path, monkeypatch) -> None:
    source = tmp_path / "book.aax"
    output_directory = tmp_path / "audiobooks"
    source.touch()
    monkeypatch.setattr(audible_convert, "read_metadata", lambda _: {"title": "Unknown"})
    monkeypatch.setattr(
        audible_convert,
        "standard_book_metadata",
        lambda *_, **__: StandardBookMetadata(
            author_id=0,
            author="unknown_author",
            book_id=None,
            title="unknown-title",
            series_id=None,
            series="standalone",
            series_index=0,
            confidence=0.25,
            needs_review=True,
            evidence=["unclear"],
        ),
    )
    def fake_convert(_source, destination, _activation_bytes, *, overwrite):
        assert overwrite is True
        destination.parent.mkdir(parents=True, exist_ok=True)
        destination.write_text("converted", encoding="utf-8")
    monkeypatch.setattr(audible_convert, "convert_aax_file", fake_convert)
    audible_convert.convert_aax_file_with_agent(
        source,
        conversion_config(output_directory),
    )
    temp_files = list((output_directory / ".audible_convert" / "tmp").glob("*/converted.m4b"))
    review_files = list((output_directory / ".audible_convert" / "review").glob("*.json"))
    assert len(temp_files) == 1
    assert temp_files[0].read_text(encoding="utf-8") == "converted"
    assert len(review_files) == 1
 def test_existing_destination_skips_rename_and_removes_temp(tmp_path, monkeypatch) -> None:
    source = tmp_path / "book.aax"
    output_directory = tmp_path / "audiobooks"
    source.touch()
    final_file = (
        output_directory
        / "glynn_stewart-starships_mage_01-starship-mage"
        / "glynn_stewart-starships_mage_01-starship-mage.m4b"
    )
    final_file.parent.mkdir(parents=True)
    final_file.write_text("existing", encoding="utf-8")
    monkeypatch.setattr(audible_convert, "read_metadata", lambda _: {"title": "Starship Mage"})
    monkeypatch.setattr(
        audible_convert,
        "standard_book_metadata",
        lambda *_, **__: StandardBookMetadata(
            author_id=1,
            author="glynn_stewart",
            book_id=None,
            title="starship-mage",
            series_id=1,
            series="starships_mage",
            series_index=1,
            confidence=0.95,
            needs_review=False,
            evidence=["test"],
        ),
    )
    def fake_convert(_source, destination, _activation_bytes, *, overwrite):
        assert overwrite is True
        destination.parent.mkdir(parents=True, exist_ok=True)
        destination.write_text("converted", encoding="utf-8")
    monkeypatch.setattr(audible_convert, "convert_aax_file", fake_convert)
    audible_convert.convert_aax_file_with_agent(
        source,
        conversion_config(output_directory),
    )
    assert final_file.read_text(encoding="utf-8") == "existing"
    assert not list((output_directory / ".audible_convert" / "tmp").glob("*/converted.m4b"))
 def test_richie_exports_audiobook_models() -> None:
    from python.orm.richie import Audiobook  # noqa: PLC0415
    assert Audiobook.__tablename__ == "audiobook"
 def test_main_dry_run_prints_outputs_without_converting(tmp_path, monkeypatch, capsys) -> None:
    input_directory = tmp_path / "raw"
    output_directory = tmp_path / "audiobooks"
    input_directory.mkdir()
    source = input_directory / "book.aax"
    source.touch()
    monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
    monkeypatch.setattr(
        audible_convert,
        "read_metadata",
        lambda _: {
            "artist": "Charles Lamb",
            "title": "Alice: Alice Series #1",
        },
    )
    calls = []
    def fake_convert(*args, **kwargs):
        calls.append((args, kwargs))
    monkeypatch.setattr(audible_convert, "convert_aax_file", fake_convert)
    monkeypatch.setattr(
        audible_convert,
        "standard_book_metadata",
        lambda *_, **__: StandardBookMetadata(
            author_id=1,
            author="charles_lamb",
            book_id=None,
            title="alice",
            series_id=1,
            series="alice",
            series_index=1,
            confidence=0.95,
            needs_review=False,
            evidence=["test"],
        ),
    )
    def fake_get_postgres_engine(*, name):
        assert name == "RICHIE"
        return create_engine("sqlite+pysqlite:///:memory:")
    monkeypatch.setattr(audible_convert, "get_postgres_engine", fake_get_postgres_engine)
    audible_convert.main(input_directory, output_directory, dry_run=True)
    assert calls == []
    assert capsys.readouterr().out == (
        f"{source} -> "
        f"{output_directory / 'charles_lamb-alice_01-alice' / 'charles_lamb-alice_01-alice.m4b'}\n"
    )
    assert (output_directory / ".audible_convert" / "logs").is_dir()
 def test_main_reads_activation_bytes_from_env(tmp_path, monkeypatch) -> None:
    input_directory = tmp_path / "raw"
    output_directory = tmp_path / "audiobooks"
    input_directory.mkdir()
    source = input_directory / "book.aax"
    source.touch()
    configs = []
    def fake_convert(_source, config):
        configs.append(config)
    def fake_get_postgres_engine(*, name):
        assert name == "RICHIE"
        return sqlite_engine()
    monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
    monkeypatch.setenv("AUDIBLE_ACTIVATION_BYTES", "activation-secret")
    monkeypatch.setattr(audible_convert, "get_postgres_engine", fake_get_postgres_engine)
    monkeypatch.setattr(audible_convert, "convert_aax_file_with_agent", fake_convert)
    audible_convert.main(input_directory, output_directory)
    assert configs == [
        audible_convert.ConversionConfig(
            resolved_output=output_directory,
            ollama_api_key="test-key",
            agent_config=configs[0].agent_config,
            engine=configs[0].engine,
            activation_bytes="activation-secret",
            dry_run=False,
            overwrite=False,
        ),
    ]
@@ -0,0 +1,126 @@
 """test_audiobook_catalog."""
 from __future__ import annotations
 import pytest
 from sqlalchemy import create_engine, select
 from sqlalchemy.orm import sessionmaker
 from python.orm.richie import AudiobookAuthor, AudiobookSeries, RichieBase
 from python.tools.audiobook import catalog
@pytest.fixture
 def audiobook_session():
    engine = create_engine("sqlite+pysqlite:///:memory:", future=True)
    RichieBase.metadata.create_all(engine)
    with sessionmaker(bind=engine, expire_on_commit=False, future=True)() as session:
        yield session
    engine.dispose()
 def test_upsert_catalog_csv_inserts_and_updates_authors_and_series(tmp_path, audiobook_session) -> None:
    audiobook_session.add_all(
        [
            AudiobookAuthor(id=10, name="old_author"),
            AudiobookAuthor(id=11, name="craig_alanson"),
        ],
    )
    audiobook_session.commit()
    authors_csv = tmp_path / "authors.csv"
    series_csv = tmp_path / "series.csv"
    authors_csv.write_text(
        "name,id\n"
        "glynn_stewart,\n"
        "craig_alanson,\n"
        "updated_author,10\n",
        encoding="utf-8",
    )
    series_csv.write_text(
        "name,author_name,id\n"
        "starships_mage,glynn_stewart,\n"
        "expeditionary_force,craig_alanson,\n",
        encoding="utf-8",
    )
    author_count = catalog.upsert_authors_from_csv(audiobook_session, authors_csv)
    series_count = catalog.upsert_series_from_csv(audiobook_session, series_csv)
    audiobook_session.commit()
    authors = audiobook_session.scalars(select(AudiobookAuthor).order_by(AudiobookAuthor.id)).all()
    series = audiobook_session.scalars(select(AudiobookSeries).order_by(AudiobookSeries.name)).all()
    assert author_count == 3
    assert series_count == 2
    assert [(author.id, author.name) for author in authors] == [
        (10, "updated_author"),
        (11, "craig_alanson"),
        (12, "glynn_stewart"),
    ]
    assert [(row.name, row.author.name) for row in series] == [
        ("expeditionary_force", "craig_alanson"),
        ("starships_mage", "glynn_stewart"),
    ]
 def test_upsert_series_csv_updates_series_by_id(tmp_path, audiobook_session) -> None:
    author = AudiobookAuthor(id=1, name="glynn_stewart")
    audiobook_session.add_all(
        [
            author,
            AudiobookSeries(id=7, name="old_series", author=author),
        ],
    )
    audiobook_session.commit()
    series_csv = tmp_path / "series.csv"
    series_csv.write_text(
        "name,author_name,id\n"
        "starships_mage,glynn_stewart,7\n",
        encoding="utf-8",
    )
    count = catalog.upsert_series_from_csv(audiobook_session, series_csv)
    audiobook_session.commit()
    series = audiobook_session.get(AudiobookSeries, 7)
    assert count == 1
    assert series.name == "starships_mage"
    assert series.author.name == "glynn_stewart"
 def test_upsert_csv_allows_missing_id_column(tmp_path, audiobook_session) -> None:
    authors_csv = tmp_path / "authors.csv"
    series_csv = tmp_path / "series.csv"
    authors_csv.write_text(
        "name\n"
        "glynn_stewart\n",
        encoding="utf-8",
    )
    series_csv.write_text(
        "name,author_name\n"
        "starships_mage,glynn_stewart\n",
        encoding="utf-8",
    )
    author_count = catalog.upsert_authors_from_csv(audiobook_session, authors_csv)
    series_count = catalog.upsert_series_from_csv(audiobook_session, series_csv)
    audiobook_session.commit()
    series = audiobook_session.scalar(select(AudiobookSeries))
    assert author_count == 1
    assert series_count == 1
    assert series.name == "starships_mage"
    assert series.author.name == "glynn_stewart"
 def test_upsert_series_csv_rejects_unknown_author(tmp_path, audiobook_session) -> None:
    series_csv = tmp_path / "series.csv"
    series_csv.write_text(
        "name,author_name,id\n"
        "starships_mage,glynn_stewart,\n",
        encoding="utf-8",
    )
    with pytest.raises(catalog.CatalogImportError) as error:
        catalog.upsert_series_from_csv(audiobook_session, series_csv)
    assert "author not found: glynn_stewart" in str(error.value)