built workflow

This commit is contained in:
2026-06-06 15:20:57 -04:00
parent b6395ef18f
commit 1ffc48bb02
5 changed files with 2604 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
"""Audiobook tools."""
+428
View File
@@ -0,0 +1,428 @@
"""Convert Audible AAX downloads into Audiobookshelf-friendly M4B files."""
from __future__ import annotations
import json
import logging
import shutil
import subprocess
from concurrent.futures import ThreadPoolExecutor
from dataclasses import asdict, dataclass
from os import getenv
from pathlib import Path # noqa: TC003 This is required for the typer CLI
from typing import TYPE_CHECKING, Annotated, Any
from uuid import uuid7
import typer
from python.common import configure_logger
from python.orm.common import get_postgres_engine
from python.tools.audiobook.metadata_agent import (
AgentConfig,
StandardBookMetadata,
standard_book_metadata,
write_agent_log,
)
if TYPE_CHECKING:
from sqlalchemy.engine import Engine
logger = logging.getLogger(__name__)
SENSITIVE_COMMAND_ARGUMENTS = {"-activation_bytes"}
@dataclass(frozen=True)
class ConversionConfig:
"""Runtime settings for one conversion command."""
resolved_output: Path
ollama_api_key: str
agent_config: AgentConfig
engine: Engine
activation_bytes: str | None
dry_run: bool
overwrite: bool
work_directory_name: str = ".audible_convert"
temp_directory_name: str = "tmp"
log_directory_name: str = "logs"
review_directory_name: str = "review"
@dataclass(frozen=True)
class ConcurrentConversionResult:
"""Result from running ffmpeg and metadata resolution together."""
metadata: StandardBookMetadata | None
conversion_error: Exception | None
metadata_error: Exception | None
class CommandExecutionError(RuntimeError):
"""Command failed without exposing sensitive arguments."""
def __init__(self, arguments: list[str], returncode: int) -> None:
"""Create a redacted command failure."""
self.arguments = tuple(arguments)
self.returncode = returncode
command = " ".join(redact_command_arguments(arguments))
super().__init__(f"Command failed with exit code {returncode}: {command}")
def main(
input_directory: Annotated[Path, typer.Argument(help="Directory audible-cli downloads AAX files into.")],
output_directory: Annotated[Path, typer.Argument(help="Audiobook output directory.")],
*,
dry_run: Annotated[bool, typer.Option("--dry-run", help="Print planned output files without converting.")] = False,
overwrite: Annotated[bool, typer.Option("--overwrite", help="Overwrite existing M4B files.")] = False,
) -> None:
"""Convert AAX files from a download directory into M4B files."""
configure_logger()
resolved_input = input_directory.resolve(strict=True)
resolved_output = output_directory.resolve()
if not dry_run:
resolved_output.mkdir(parents=True, exist_ok=True)
ollama_api_key = getenv("OLLAMA_API_KEY")
if not ollama_api_key:
msg = "OLLAMA_API_KEY is required for audiobook metadata resolution"
raise RuntimeError(msg)
config = ConversionConfig(
resolved_output=resolved_output,
ollama_api_key=ollama_api_key,
agent_config=AgentConfig(),
engine=get_postgres_engine(name="RICHIE"),
activation_bytes=getenv("AUDIBLE_ACTIVATION_BYTES"),
dry_run=dry_run,
overwrite=overwrite,
)
aax_files = sorted(resolved_input.glob("*.aax"))
if not aax_files:
logger.info("No AAX files found in %s", resolved_input)
return
for aax_file in aax_files:
logger.info("Converting %s", aax_file)
convert_aax_file_with_agent(aax_file, config)
def run_command(arguments: list[str], *, capture: bool = False) -> subprocess.CompletedProcess[str]:
"""Run a command and return the completed process.
Args:
arguments: Command and arguments to run.
capture: Whether to capture stdout and stderr.
Returns:
The completed process.
"""
logger.debug("%s", " ".join(redact_command_arguments(arguments)))
try:
return subprocess.run(arguments, check=True, capture_output=capture, text=True)
except subprocess.CalledProcessError as error:
raise CommandExecutionError(arguments, error.returncode) from error
def redact_command_arguments(arguments: list[str]) -> list[str]:
"""Return command arguments with sensitive values redacted."""
redacted = []
redact_next = False
for argument in arguments:
if redact_next:
redacted.append("<redacted>")
redact_next = False
continue
redacted.append(argument)
redact_next = argument in SENSITIVE_COMMAND_ARGUMENTS
return redacted
def read_metadata(aax_file: Path) -> dict[str, str]:
"""Read ffprobe format tags from an AAX file.
Args:
aax_file: AAX file to inspect.
Returns:
Lower-cased metadata tag names mapped to their values.
"""
completed = run_command(
[
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
str(aax_file),
],
capture=True,
)
ffprobe_data: dict[str, Any] = json.loads(completed.stdout)
tags = ffprobe_data.get("format", {}).get("tags", {})
return {str(key).lower(): str(value) for key, value in tags.items()}
def output_stem(metadata: StandardBookMetadata) -> str:
"""Build the output stem for a book.
Args:
metadata: Book metadata.
Returns:
Output stem in author-series_01-title form.
"""
return f"{metadata.author}-{metadata.series}_{metadata.series_index:02}-{metadata.title}"
def metadata_output_path(output_directory: Path, metadata: StandardBookMetadata) -> Path:
"""Build the final M4B path from resolved metadata."""
stem = output_stem(metadata)
return output_directory / stem / f"{stem}.m4b"
def convert_aax_file(
aax_file: Path,
destination: Path,
activation_bytes: str | None,
*,
overwrite: bool,
) -> None:
"""Convert an AAX file into an M4B file.
Args:
aax_file: Source AAX file.
destination: Destination M4B file.
activation_bytes: Optional Audible activation bytes for ffmpeg.
overwrite: Whether to overwrite an existing M4B.
"""
if destination.exists() and not overwrite:
logger.info("Skipping existing file %s", destination)
return
destination.parent.mkdir(parents=True, exist_ok=True)
arguments = ["ffmpeg", "-hide_banner", "-y" if overwrite else "-n"]
if activation_bytes:
arguments.extend(["-activation_bytes", activation_bytes])
arguments.extend(["-i", str(aax_file), "-map_metadata", "0", "-c", "copy", str(destination)])
run_command(arguments)
def write_review_file(
*,
destination: Path | None,
ffprobe_metadata: dict[str, str],
log_file: Path,
metadata: StandardBookMetadata | None,
reason: str,
review_file: Path,
source: Path,
temp_file: Path | None,
) -> None:
"""Write a manual review file for an unresolved conversion."""
review_file.parent.mkdir(parents=True, exist_ok=True)
payload = {
"destination": str(destination) if destination else None,
"ffprobe_metadata": ffprobe_metadata,
"metadata": asdict(metadata) if metadata else None,
"reason": reason,
"source": str(source),
"temp_file": str(temp_file) if temp_file else None,
}
review_file.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
write_agent_log(log_file, "review_written", path=str(review_file), reason=reason)
def cleanup_temp_output(temp_file: Path) -> None:
"""Remove a run's temporary output directory."""
shutil.rmtree(temp_file.parent, ignore_errors=True)
def dry_run_aax_file_with_agent(
aax_file: Path,
ffprobe_metadata: dict[str, str],
engine: Engine,
config: ConversionConfig,
log_file: Path,
review_file: Path,
) -> None:
"""Resolve and print the planned output path without converting."""
metadata = standard_book_metadata(
aax_file.name,
ffprobe_metadata,
engine,
log_file,
config.ollama_api_key,
config.agent_config,
)
destination = None if metadata.needs_review else metadata_output_path(config.resolved_output, metadata)
if metadata.needs_review:
write_review_file(
destination=destination,
ffprobe_metadata=ffprobe_metadata,
log_file=log_file,
metadata=metadata,
reason="metadata_needs_review",
review_file=review_file,
source=aax_file,
temp_file=None,
)
typer.echo(f"{aax_file} -> REVIEW {review_file}")
else:
typer.echo(f"{aax_file} -> {destination}")
def convert_temp_file_and_resolve_metadata(
aax_file: Path,
temp_file: Path,
ffprobe_metadata: dict[str, str],
config: ConversionConfig,
log_file: Path,
) -> ConcurrentConversionResult:
"""Run ffmpeg and metadata resolution in parallel."""
conversion_error: Exception | None = None
metadata_error: Exception | None = None
metadata: StandardBookMetadata | None = None
with ThreadPoolExecutor(max_workers=2) as executor:
conversion_future = executor.submit(
convert_aax_file,
aax_file,
temp_file,
config.activation_bytes,
overwrite=True,
)
metadata_future = executor.submit(
standard_book_metadata,
aax_file.name,
ffprobe_metadata,
config.engine,
log_file,
config.ollama_api_key,
config.agent_config,
)
conversion_error = conversion_future.exception()
if conversion_error is None:
conversion_future.result()
metadata_error = metadata_future.exception()
if metadata_error is None:
metadata = metadata_future.result()
return ConcurrentConversionResult(
metadata=metadata,
conversion_error=conversion_error,
metadata_error=metadata_error,
)
def convert_aax_file_with_agent(aax_file: Path, config: ConversionConfig) -> None:
"""Convert one AAX file using the metadata agent for the final path."""
run_id = uuid7().hex
log_file = config.resolved_output / config.work_directory_name / config.log_directory_name / f"{run_id}.jsonl"
review_file = config.resolved_output / config.work_directory_name / config.review_directory_name / f"{run_id}.json"
write_agent_log(log_file, "conversion_start", source=str(aax_file), dry_run=config.dry_run)
try:
ffprobe_metadata = read_metadata(aax_file)
except Exception as error:
logger.exception("ffprobe failed")
write_review_file(
destination=None,
ffprobe_metadata={},
log_file=log_file,
metadata=None,
reason=f"ffprobe_failed: {error}",
review_file=review_file,
source=aax_file,
temp_file=None,
)
return
if config.dry_run:
dry_run_aax_file_with_agent(
aax_file,
ffprobe_metadata,
config.engine,
config,
log_file,
review_file,
)
return
temp_file = (
config.resolved_output / config.work_directory_name / config.temp_directory_name / run_id / "converted.m4b"
)
temp_file.parent.mkdir(parents=True, exist_ok=True)
result = convert_temp_file_and_resolve_metadata(aax_file, temp_file, ffprobe_metadata, config, log_file)
if result.conversion_error:
reason = f"ffmpeg_failed: {result.conversion_error}"
write_review_file(
destination=None,
ffprobe_metadata=ffprobe_metadata,
log_file=log_file,
metadata=result.metadata,
reason=reason,
review_file=review_file,
source=aax_file,
temp_file=temp_file if temp_file.exists() else None,
)
return
if result.metadata_error:
write_review_file(
destination=None,
ffprobe_metadata=ffprobe_metadata,
log_file=log_file,
metadata=None,
reason=f"metadata_failed: {result.metadata_error}",
review_file=review_file,
source=aax_file,
temp_file=temp_file,
)
return
if result.metadata is None or result.metadata.needs_review:
write_review_file(
destination=None,
ffprobe_metadata=ffprobe_metadata,
log_file=log_file,
metadata=result.metadata,
reason="metadata_needs_review",
review_file=review_file,
source=aax_file,
temp_file=temp_file,
)
return
destination = metadata_output_path(config.resolved_output, result.metadata)
if destination.exists() and not config.overwrite:
write_agent_log(log_file, "destination_exists", destination=str(destination))
cleanup_temp_output(temp_file)
return
destination.parent.mkdir(parents=True, exist_ok=True)
try:
temp_file.replace(destination)
except Exception as error: # noqa: BLE001
write_review_file(
destination=destination,
ffprobe_metadata=ffprobe_metadata,
log_file=log_file,
metadata=result.metadata,
reason=f"rename_failed: {error}",
review_file=review_file,
source=aax_file,
temp_file=temp_file if temp_file.exists() else None,
)
else:
cleanup_temp_output(temp_file)
write_agent_log(log_file, "conversion_complete", destination=str(destination))
if __name__ == "__main__":
typer.run(main)
File diff suppressed because it is too large Load Diff
+969
View File
@@ -0,0 +1,969 @@
"""test_audible_convert."""
from __future__ import annotations
import json
import subprocess
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker
from python.orm.richie import Audiobook, AudiobookAuthor, AudiobookSeries, RichieBase
from python.tools.audiobook import audible_convert, metadata_agent
from python.tools.audiobook.metadata_agent import StandardBookMetadata, standard_book_metadata
class FakeOllamaResponse:
def __init__(self, payload):
self._payload = payload
def raise_for_status(self):
return None
def json(self):
return self._payload
class FakeFfprobeError(RuntimeError):
def __str__(self):
return "bad ffprobe"
@pytest.fixture
def audiobook_engine():
engine = create_engine("sqlite+pysqlite:///:memory:", future=True)
RichieBase.metadata.create_all(engine)
with sessionmaker(bind=engine, expire_on_commit=False, future=True)() as session:
session.add_all(
[
AudiobookAuthor(id=1, name="glynn_stewart"),
AudiobookAuthor(id=2, name="craig_alanson"),
AudiobookAuthor(id=4, name="dennis_e_taylor"),
AudiobookSeries(id=1, name="starships_mage", author_id=1),
AudiobookSeries(id=2, name="black_fleet_trilogy", author_id=1),
AudiobookSeries(id=3, name="expeditionary_force", author_id=2),
AudiobookSeries(id=4, name="bobiverse", author_id=4),
],
)
session.commit()
yield engine
engine.dispose()
def install_fake_ollama(monkeypatch, payloads):
calls = []
def fake_post(*args, **kwargs):
calls.append((args, kwargs))
return FakeOllamaResponse(payloads.pop(0))
monkeypatch.setattr(metadata_agent.httpx, "post", fake_post)
return calls
def conversion_config(output_directory, *, dry_run=False, overwrite=False):
return audible_convert.ConversionConfig(
resolved_output=output_directory,
ollama_api_key="test-key",
agent_config=metadata_agent.AgentConfig(),
engine=create_engine("sqlite+pysqlite:///:memory:"),
activation_bytes=None,
dry_run=dry_run,
overwrite=overwrite,
)
def sqlite_engine():
return create_engine("sqlite+pysqlite:///:memory:")
def tool_response(name, arguments):
return {
"message": {
"role": "assistant",
"content": "",
"tool_calls": [{"function": {"name": name, "arguments": arguments}}],
},
}
def final_response(metadata):
return {"message": {"role": "assistant", "content": json.dumps(metadata)}}
def fenced_final_response(metadata):
return {"message": {"role": "assistant", "content": f"```json\n{json.dumps(metadata)}\n```"}}
def test_output_stem_uses_catalog_slugs() -> None:
metadata = StandardBookMetadata(
author_id=1,
author="glynn_stewart",
book_id=None,
title="title-slug",
series_id=1,
series="starships_mage",
series_index=1,
confidence=0.96,
needs_review=False,
evidence=["test"],
)
assert audible_convert.output_stem(metadata) == "glynn_stewart-starships_mage_01-title-slug"
def test_convert_aax_file_runs_ffmpeg(tmp_path, monkeypatch) -> None:
"""test_convert_aax_file_runs_ffmpeg."""
commands = []
def fake_run_command(arguments, *, capture=False):
assert capture is False
commands.append(arguments)
return subprocess.CompletedProcess(arguments, 0, "", "")
source = tmp_path / "book.aax"
destination = tmp_path / "book" / "book.m4b"
monkeypatch.setattr(audible_convert, "run_command", fake_run_command)
audible_convert.convert_aax_file(source, destination, "abc123", overwrite=False)
assert commands == [
[
"ffmpeg",
"-hide_banner",
"-n",
"-activation_bytes",
"abc123",
"-i",
str(source),
"-map_metadata",
"0",
"-c",
"copy",
str(destination),
],
]
assert destination.parent.is_dir()
def test_run_command_redacts_activation_bytes_in_logs_and_errors(monkeypatch, caplog) -> None:
def fake_run(arguments, *, check, capture_output, text):
assert check is True
assert capture_output is False
assert text is True
raise subprocess.CalledProcessError(1, arguments)
monkeypatch.setattr(audible_convert.subprocess, "run", fake_run)
caplog.set_level("DEBUG", audible_convert.__name__)
with pytest.raises(audible_convert.CommandExecutionError) as error:
audible_convert.run_command(["ffmpeg", "-activation_bytes", "secret-token", "-i", "book.aax"])
assert "secret-token" not in caplog.text
assert "secret-token" not in str(error.value)
assert "<redacted>" in caplog.text
assert "<redacted>" in str(error.value)
def test_write_agent_log_serializes_metadata_as_json_object(tmp_path) -> None:
metadata = StandardBookMetadata(
author_id=1,
author="glynn_stewart",
book_id=None,
title="starship-mage",
series_id=1,
series="starships_mage",
series_index=1,
confidence=0.95,
needs_review=False,
evidence=["test"],
)
log_file = tmp_path / "agent.jsonl"
metadata_agent.write_agent_log(log_file, "final_metadata", metadata=metadata, path=tmp_path)
record = json.loads(log_file.read_text(encoding="utf-8"))
assert record["event"] == "final_metadata"
assert record["metadata"]["author"] == "glynn_stewart"
assert record["metadata"]["title"] == "starship-mage"
assert record["path"] == str(tmp_path)
def test_standard_book_metadata_accepts_valid_tool_output(tmp_path, monkeypatch, audiobook_engine) -> None:
install_fake_ollama(
monkeypatch,
[
tool_response("search_authors", {"query": "Glynn Stewart"}),
tool_response("search_series", {"query": "starships_mage"}),
final_response(
{
"author_id": 1,
"book_id": None,
"title": "starship-mage",
"series_id": 1,
"series_index": 1,
"confidence": 0.95,
"evidence": ["filename and catalog match"],
},
),
],
)
metadata = standard_book_metadata(
"Starship Mage.aax",
{"title": "Starship Mage", "artist": "Glynn Stewart"},
audiobook_engine,
tmp_path / "agent.jsonl",
"test-key",
config=metadata_agent.AgentConfig(),
)
assert metadata == StandardBookMetadata(
author_id=1,
author="glynn_stewart",
book_id=1,
title="starship-mage",
series_id=1,
series="starships_mage",
series_index=1,
confidence=0.95,
needs_review=False,
evidence=["filename and catalog match"],
)
records = [
json.loads(line)
for line in (tmp_path / "agent.jsonl").read_text(encoding="utf-8").splitlines()
]
sent = [record for record in records if record["event"] == "llm_messages_sent"]
received = [record for record in records if record["event"] == "llm_message_received"]
assert sent[0]["messages"][0]["role"] == "system"
assert "Starship Mage" in sent[0]["messages"][1]["content"]
assert received[0]["message"]["tool_calls"][0]["function"]["name"] == "search_authors"
with Session(audiobook_engine) as session:
book = session.get(Audiobook, 1)
assert book.title == "starship-mage"
assert book.author.name == "glynn_stewart"
def test_standard_book_metadata_uses_agent_config(tmp_path, monkeypatch, audiobook_engine) -> None:
config = metadata_agent.AgentConfig(
model="custom-model",
ollama_chat_url="https://ollama.example.test/api/chat",
http_timeout_seconds=12,
max_agent_turns=1,
min_confidence=0.5,
tool_names=("search_authors",),
)
calls = install_fake_ollama(
monkeypatch,
[
tool_response("search_authors", {"query": "Glynn Stewart"}),
final_response(
{
"author_id": 1,
"book_id": None,
"title": "standalone-book",
"series_id": None,
"series_index": 0,
"confidence": 0.5,
"evidence": ["custom config"],
},
),
],
)
metadata = standard_book_metadata(
"Standalone Book.aax",
{"title": "Standalone Book", "artist": "Glynn Stewart"},
audiobook_engine,
tmp_path / "agent.jsonl",
"test-key",
config=config,
)
first_request_url = calls[0][0][0]
first_request_options = calls[0][1]
tool_names = [
tool_schema["function"]["name"]
for tool_schema in first_request_options["json"]["tools"]
]
assert first_request_url == "https://ollama.example.test/api/chat"
assert first_request_options["timeout"] == 12
assert first_request_options["json"]["model"] == "custom-model"
assert tool_names == ["search_authors"]
assert metadata.needs_review is False
assert metadata.series == "standalone"
def test_standard_book_metadata_retries_invalid_json_then_needs_review(
tmp_path,
monkeypatch,
audiobook_engine,
) -> None:
install_fake_ollama(
monkeypatch,
[
tool_response("search_authors", {"query": "Glynn Stewart"}),
tool_response("search_series", {"query": "Starship Mage"}),
{"message": {"role": "assistant", "content": "{"}},
{"message": {"role": "assistant", "content": "{"}},
],
)
metadata = standard_book_metadata(
"Starship Mage.aax",
{"title": "Starship Mage"},
audiobook_engine,
tmp_path / "agent.jsonl",
"test-key",
config=metadata_agent.AgentConfig(),
)
assert metadata.needs_review is True
assert metadata.confidence == 0
def test_standard_book_metadata_accepts_fenced_final_json(
tmp_path,
monkeypatch,
audiobook_engine,
) -> None:
install_fake_ollama(
monkeypatch,
[
tool_response("search_authors", {"query": "Dennis E. Taylor"}),
tool_response("search_series", {"query": "Bobiverse", "author_id": 4}),
tool_response("search_books", {"query": "All These Worlds", "author_id": 4, "series_id": 4}),
fenced_final_response(
{
"author_id": 4,
"book_id": None,
"title": "all-these-worlds",
"series_id": 4,
"series_index": 3,
"confidence": 0.95,
"evidence": ["fenced json from model"],
},
),
],
)
metadata = standard_book_metadata(
"All These Worlds.aax",
{"title": "All These Worlds: Bobiverse, Book 3", "artist": "Dennis E. Taylor"},
audiobook_engine,
tmp_path / "agent.jsonl",
"test-key",
config=metadata_agent.AgentConfig(),
)
assert metadata.needs_review is False
assert metadata.author == "dennis_e_taylor"
assert metadata.series == "bobiverse"
assert metadata.title == "all-these-worlds"
def test_standard_book_metadata_recovers_from_tool_validation_error(
tmp_path,
monkeypatch,
audiobook_engine,
) -> None:
install_fake_ollama(
monkeypatch,
[
tool_response("search_authors", {"query": "Cormac McCarthy"}),
tool_response("ensure_author", {"name": "Cormac McCarthy"}),
tool_response("ensure_series", {"name": "The Cormac McCarthy Collection", "author_id": 5}),
tool_response(
"ensure_book",
{
"title": "The Road",
"author_id": 5,
"series_id": 5,
"series_index": 0,
},
),
final_response(
{
"author_id": 5,
"book_id": None,
"title": "The Road",
"series_id": None,
"series_index": 0,
"confidence": 0.9,
"evidence": ["tool error showed this should be standalone"],
},
),
],
)
log_file = tmp_path / "agent.jsonl"
metadata = standard_book_metadata(
"The Road.aax",
{"title": "The Road", "artist": "Cormac McCarthy"},
audiobook_engine,
log_file,
"test-key",
config=metadata_agent.AgentConfig(),
)
assert metadata == StandardBookMetadata(
author_id=5,
author="cormac_mccarthy",
book_id=1,
title="the-road",
series_id=None,
series="standalone",
series_index=0,
confidence=0.9,
needs_review=False,
evidence=["tool error showed this should be standalone"],
)
assert "series books must use a positive series_index" in log_file.read_text(encoding="utf-8")
with Session(audiobook_engine) as session:
assert session.get(AudiobookSeries, 5) is None
book = session.get(Audiobook, 1)
assert book.title == "the-road"
assert book.series_id is None
def test_standard_book_metadata_rejects_unknown_tool(tmp_path, monkeypatch, audiobook_engine) -> None:
log_file = tmp_path / "agent.jsonl"
install_fake_ollama(monkeypatch, [tool_response("drop_table", {})])
metadata = standard_book_metadata(
"Book.aax",
{"title": "Book"},
audiobook_engine,
log_file,
"test-key",
config=metadata_agent.AgentConfig(),
)
assert metadata.needs_review is True
assert "Unknown audiobook metadata tool" in metadata.evidence[0]
assert "tool_error" in log_file.read_text(encoding="utf-8")
def test_standard_book_metadata_rejects_ids_not_returned_by_tools(
tmp_path,
monkeypatch,
audiobook_engine,
) -> None:
install_fake_ollama(
monkeypatch,
[
tool_response("search_authors", {"query": "Glynn Stewart"}),
tool_response("search_series", {"query": "Starship Mage"}),
final_response(
{
"author_id": 2,
"book_id": None,
"title": "expeditionary-force",
"series_id": 1,
"series_index": 1,
"confidence": 0.99,
"evidence": ["bad id"],
},
),
final_response(
{
"author_id": 2,
"book_id": None,
"title": "expeditionary-force",
"series_id": 1,
"series_index": 1,
"confidence": 0.99,
"evidence": ["bad id"],
},
),
],
)
metadata = standard_book_metadata(
"Book.aax",
{"title": "Book"},
audiobook_engine,
tmp_path / "agent.jsonl",
"test-key",
config=metadata_agent.AgentConfig(),
)
assert metadata.needs_review is True
assert "author_id 2 was not returned" in metadata.evidence[0]
def test_standard_book_metadata_rejects_series_for_wrong_author(
tmp_path,
monkeypatch,
audiobook_engine,
) -> None:
install_fake_ollama(
monkeypatch,
[
tool_response("search_authors", {"query": "Glynn Stewart"}),
tool_response("search_series", {"query": "expeditionary_force"}),
final_response(
{
"author_id": 1,
"book_id": None,
"title": "expeditionary-force",
"series_id": 3,
"series_index": 1,
"confidence": 0.99,
"evidence": ["wrong author"],
},
),
final_response(
{
"author_id": 1,
"book_id": None,
"title": "expeditionary-force",
"series_id": 3,
"series_index": 1,
"confidence": 0.99,
"evidence": ["wrong author"],
},
),
],
)
metadata = standard_book_metadata(
"Book.aax",
{"title": "Book"},
audiobook_engine,
tmp_path / "agent.jsonl",
"test-key",
config=metadata_agent.AgentConfig(),
)
assert metadata.needs_review is True
assert "series_id 3 does not belong to author_id 1" in metadata.evidence[0]
def test_standard_book_metadata_forces_final_after_empty_book_searches(
tmp_path,
monkeypatch,
audiobook_engine,
) -> None:
config = metadata_agent.AgentConfig(max_agent_turns=5)
install_fake_ollama(
monkeypatch,
[
tool_response("search_authors", {"query": "Dennis E. Taylor"}),
tool_response("search_series", {"query": "Bobiverse", "author_id": 4}),
tool_response("search_books", {"query": "We Are Legion We Are Bob", "author_id": 4, "series_id": 4}),
tool_response("search_books", {"query": "we are legion", "author_id": 4}),
tool_response("search_books", {"query": "We Are Legion"}),
final_response(
{
"author_id": 4,
"book_id": None,
"title": "we-are-legion-we-are-bob",
"series_id": 4,
"series_index": 1,
"confidence": 0.95,
"evidence": ["author and series tool results; title from ffprobe tags"],
},
),
],
)
metadata = standard_book_metadata(
"We_Are_Legion_(We_Are_Bob)_Bobiverse_Book_1-LC_128_44100_stereo.aax",
{
"album": "We Are Legion (We Are Bob): Bobiverse, Book 1",
"artist": "Dennis E. Taylor",
"title": "We Are Legion (We Are Bob): Bobiverse, Book 1",
},
audiobook_engine,
tmp_path / "agent.jsonl",
"test-key",
config=config,
)
assert metadata == StandardBookMetadata(
author_id=4,
author="dennis_e_taylor",
book_id=1,
title="we-are-legion-we-are-bob",
series_id=4,
series="bobiverse",
series_index=1,
confidence=0.95,
needs_review=False,
evidence=["author and series tool results; title from ffprobe tags"],
)
assert '"tools_enabled": false' in (tmp_path / "agent.jsonl").read_text(encoding="utf-8")
def test_standard_book_metadata_can_create_missing_catalog_rows(
tmp_path,
monkeypatch,
audiobook_engine,
) -> None:
install_fake_ollama(
monkeypatch,
[
tool_response("search_authors", {"query": "Martha Wells"}),
tool_response("ensure_author", {"name": "martha_wells"}),
tool_response("search_series", {"query": "Murderbot Diaries", "author_id": 5}),
tool_response("ensure_series", {"name": "murderbot_diaries", "author_id": 5}),
tool_response("search_books", {"query": "All Systems Red", "author_id": 5, "series_id": 5}),
final_response(
{
"author_id": 5,
"book_id": None,
"title": "all-systems-red",
"series_id": 5,
"series_index": 1,
"confidence": 0.96,
"evidence": ["created missing author and series; title from tags"],
},
),
],
)
metadata = standard_book_metadata(
"All Systems Red.aax",
{"title": "All Systems Red", "artist": "Martha Wells"},
audiobook_engine,
tmp_path / "agent.jsonl",
"test-key",
config=metadata_agent.AgentConfig(),
)
assert metadata == StandardBookMetadata(
author_id=5,
author="martha_wells",
book_id=1,
title="all-systems-red",
series_id=5,
series="murderbot_diaries",
series_index=1,
confidence=0.96,
needs_review=False,
evidence=["created missing author and series; title from tags"],
)
with Session(audiobook_engine) as session:
author = session.get(AudiobookAuthor, 5)
series = session.get(AudiobookSeries, 5)
book = session.get(Audiobook, 1)
assert author.name == "martha_wells"
assert series.name == "murderbot_diaries"
assert series.author_id == author.id
assert book.title == "all-systems-red"
assert book.author_id == author.id
assert book.series_id == series.id
def test_standard_book_metadata_normalizes_noisy_created_catalog_rows(
tmp_path,
monkeypatch,
audiobook_engine,
) -> None:
install_fake_ollama(
monkeypatch,
[
tool_response("search_authors", {"query": "Charles Lamb"}),
tool_response("ensure_author", {"name": "charles-lamb"}),
tool_response("search_series", {"query": "AL:ICE Series", "author_id": 5}),
tool_response("ensure_series", {"name": "AL:ICE Series", "author_id": 5}),
tool_response("search_books", {"query": "AL:ICE Space War", "author_id": 5, "series_id": 5}),
final_response(
{
"author_id": 5,
"book_id": None,
"title": "AL:ICE Space War",
"series_id": 5,
"series_index": 4,
"confidence": 0.95,
"evidence": ["created normalized author and series; title from tags"],
},
),
],
)
metadata = standard_book_metadata(
"ALICE_Space_War_ALICE_Series_Book_4-LC_64_22050_stereo.aax",
{
"album": "AL:ICE Space War: AL:ICE Series, Book 4",
"artist": "Charles Lamb",
"title": "AL:ICE Space War: AL:ICE Series, Book 4",
},
audiobook_engine,
tmp_path / "agent.jsonl",
"test-key",
config=metadata_agent.AgentConfig(),
)
assert metadata == StandardBookMetadata(
author_id=5,
author="charles_lamb",
book_id=1,
title="al-ice-space-war",
series_id=5,
series="al_ice_series",
series_index=4,
confidence=0.95,
needs_review=False,
evidence=["created normalized author and series; title from tags"],
)
with Session(audiobook_engine) as session:
author = session.get(AudiobookAuthor, 5)
series = session.get(AudiobookSeries, 5)
book = session.get(Audiobook, 1)
assert author.name == "charles_lamb"
assert series.name == "al_ice_series"
assert series.author_id == author.id
assert book.title == "al-ice-space-war"
assert book.author_id == author.id
assert book.series_id == series.id
def test_convert_aax_file_with_agent_success_renames_temp_output(tmp_path, monkeypatch) -> None:
source = tmp_path / "book.aax"
output_directory = tmp_path / "audiobooks"
source.touch()
monkeypatch.setattr(audible_convert, "read_metadata", lambda _: {"title": "Starship Mage"})
monkeypatch.setattr(
audible_convert,
"standard_book_metadata",
lambda *_, **__: StandardBookMetadata(
author_id=1,
author="glynn_stewart",
book_id=None,
title="starship-mage",
series_id=1,
series="starships_mage",
series_index=1,
confidence=0.95,
needs_review=False,
evidence=["test"],
),
)
def fake_convert(_source, destination, _activation_bytes, *, overwrite):
assert overwrite is True
destination.parent.mkdir(parents=True, exist_ok=True)
destination.write_text("converted", encoding="utf-8")
monkeypatch.setattr(audible_convert, "convert_aax_file", fake_convert)
audible_convert.convert_aax_file_with_agent(
source,
conversion_config(output_directory),
)
expected = output_directory / "glynn_stewart-starships_mage_01-starship-mage"
destination = expected / "glynn_stewart-starships_mage_01-starship-mage.m4b"
assert destination.read_text(encoding="utf-8") == "converted"
assert not list((output_directory / ".audible_convert" / "tmp").glob("*/converted.m4b"))
def test_ffprobe_failure_writes_review_without_converting(tmp_path, monkeypatch) -> None:
source = tmp_path / "book.aax"
output_directory = tmp_path / "audiobooks"
source.touch()
calls = []
def fake_read_metadata(_source):
raise FakeFfprobeError
def fake_convert(*args, **kwargs):
calls.append((args, kwargs))
monkeypatch.setattr(audible_convert, "read_metadata", fake_read_metadata)
monkeypatch.setattr(audible_convert, "convert_aax_file", fake_convert)
audible_convert.convert_aax_file_with_agent(source, conversion_config(output_directory))
review_files = list((output_directory / ".audible_convert" / "review").glob("*.json"))
assert calls == []
assert len(review_files) == 1
review = json.loads(review_files[0].read_text(encoding="utf-8"))
assert review["ffprobe_metadata"] == {}
assert review["reason"] == "ffprobe_failed: bad ffprobe"
assert review["temp_file"] is None
def test_low_confidence_metadata_keeps_temp_output_for_review(tmp_path, monkeypatch) -> None:
source = tmp_path / "book.aax"
output_directory = tmp_path / "audiobooks"
source.touch()
monkeypatch.setattr(audible_convert, "read_metadata", lambda _: {"title": "Unknown"})
monkeypatch.setattr(
audible_convert,
"standard_book_metadata",
lambda *_, **__: StandardBookMetadata(
author_id=0,
author="unknown_author",
book_id=None,
title="unknown-title",
series_id=None,
series="standalone",
series_index=0,
confidence=0.25,
needs_review=True,
evidence=["unclear"],
),
)
def fake_convert(_source, destination, _activation_bytes, *, overwrite):
assert overwrite is True
destination.parent.mkdir(parents=True, exist_ok=True)
destination.write_text("converted", encoding="utf-8")
monkeypatch.setattr(audible_convert, "convert_aax_file", fake_convert)
audible_convert.convert_aax_file_with_agent(
source,
conversion_config(output_directory),
)
temp_files = list((output_directory / ".audible_convert" / "tmp").glob("*/converted.m4b"))
review_files = list((output_directory / ".audible_convert" / "review").glob("*.json"))
assert len(temp_files) == 1
assert temp_files[0].read_text(encoding="utf-8") == "converted"
assert len(review_files) == 1
def test_existing_destination_skips_rename_and_removes_temp(tmp_path, monkeypatch) -> None:
source = tmp_path / "book.aax"
output_directory = tmp_path / "audiobooks"
source.touch()
final_file = (
output_directory
/ "glynn_stewart-starships_mage_01-starship-mage"
/ "glynn_stewart-starships_mage_01-starship-mage.m4b"
)
final_file.parent.mkdir(parents=True)
final_file.write_text("existing", encoding="utf-8")
monkeypatch.setattr(audible_convert, "read_metadata", lambda _: {"title": "Starship Mage"})
monkeypatch.setattr(
audible_convert,
"standard_book_metadata",
lambda *_, **__: StandardBookMetadata(
author_id=1,
author="glynn_stewart",
book_id=None,
title="starship-mage",
series_id=1,
series="starships_mage",
series_index=1,
confidence=0.95,
needs_review=False,
evidence=["test"],
),
)
def fake_convert(_source, destination, _activation_bytes, *, overwrite):
assert overwrite is True
destination.parent.mkdir(parents=True, exist_ok=True)
destination.write_text("converted", encoding="utf-8")
monkeypatch.setattr(audible_convert, "convert_aax_file", fake_convert)
audible_convert.convert_aax_file_with_agent(
source,
conversion_config(output_directory),
)
assert final_file.read_text(encoding="utf-8") == "existing"
assert not list((output_directory / ".audible_convert" / "tmp").glob("*/converted.m4b"))
def test_richie_exports_audiobook_models() -> None:
from python.orm.richie import Audiobook # noqa: PLC0415
assert Audiobook.__tablename__ == "audiobook"
def test_main_dry_run_prints_outputs_without_converting(tmp_path, monkeypatch, capsys) -> None:
input_directory = tmp_path / "raw"
output_directory = tmp_path / "audiobooks"
input_directory.mkdir()
source = input_directory / "book.aax"
source.touch()
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
monkeypatch.setattr(
audible_convert,
"read_metadata",
lambda _: {
"artist": "Charles Lamb",
"title": "Alice: Alice Series #1",
},
)
calls = []
def fake_convert(*args, **kwargs):
calls.append((args, kwargs))
monkeypatch.setattr(audible_convert, "convert_aax_file", fake_convert)
monkeypatch.setattr(
audible_convert,
"standard_book_metadata",
lambda *_, **__: StandardBookMetadata(
author_id=1,
author="charles_lamb",
book_id=None,
title="alice",
series_id=1,
series="alice",
series_index=1,
confidence=0.95,
needs_review=False,
evidence=["test"],
),
)
def fake_get_postgres_engine(*, name):
assert name == "RICHIE"
return create_engine("sqlite+pysqlite:///:memory:")
monkeypatch.setattr(audible_convert, "get_postgres_engine", fake_get_postgres_engine)
audible_convert.main(input_directory, output_directory, dry_run=True)
assert calls == []
assert capsys.readouterr().out == (
f"{source} -> "
f"{output_directory / 'charles_lamb-alice_01-alice' / 'charles_lamb-alice_01-alice.m4b'}\n"
)
assert (output_directory / ".audible_convert" / "logs").is_dir()
def test_main_reads_activation_bytes_from_env(tmp_path, monkeypatch) -> None:
input_directory = tmp_path / "raw"
output_directory = tmp_path / "audiobooks"
input_directory.mkdir()
source = input_directory / "book.aax"
source.touch()
configs = []
def fake_convert(_source, config):
configs.append(config)
def fake_get_postgres_engine(*, name):
assert name == "RICHIE"
return sqlite_engine()
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
monkeypatch.setenv("AUDIBLE_ACTIVATION_BYTES", "activation-secret")
monkeypatch.setattr(audible_convert, "get_postgres_engine", fake_get_postgres_engine)
monkeypatch.setattr(audible_convert, "convert_aax_file_with_agent", fake_convert)
audible_convert.main(input_directory, output_directory)
assert configs == [
audible_convert.ConversionConfig(
resolved_output=output_directory,
ollama_api_key="test-key",
agent_config=configs[0].agent_config,
engine=configs[0].engine,
activation_bytes="activation-secret",
dry_run=False,
overwrite=False,
),
]
+126
View File
@@ -0,0 +1,126 @@
"""test_audiobook_catalog."""
from __future__ import annotations
import pytest
from sqlalchemy import create_engine, select
from sqlalchemy.orm import sessionmaker
from python.orm.richie import AudiobookAuthor, AudiobookSeries, RichieBase
from python.tools.audiobook import catalog
@pytest.fixture
def audiobook_session():
engine = create_engine("sqlite+pysqlite:///:memory:", future=True)
RichieBase.metadata.create_all(engine)
with sessionmaker(bind=engine, expire_on_commit=False, future=True)() as session:
yield session
engine.dispose()
def test_upsert_catalog_csv_inserts_and_updates_authors_and_series(tmp_path, audiobook_session) -> None:
audiobook_session.add_all(
[
AudiobookAuthor(id=10, name="old_author"),
AudiobookAuthor(id=11, name="craig_alanson"),
],
)
audiobook_session.commit()
authors_csv = tmp_path / "authors.csv"
series_csv = tmp_path / "series.csv"
authors_csv.write_text(
"name,id\n"
"glynn_stewart,\n"
"craig_alanson,\n"
"updated_author,10\n",
encoding="utf-8",
)
series_csv.write_text(
"name,author_name,id\n"
"starships_mage,glynn_stewart,\n"
"expeditionary_force,craig_alanson,\n",
encoding="utf-8",
)
author_count = catalog.upsert_authors_from_csv(audiobook_session, authors_csv)
series_count = catalog.upsert_series_from_csv(audiobook_session, series_csv)
audiobook_session.commit()
authors = audiobook_session.scalars(select(AudiobookAuthor).order_by(AudiobookAuthor.id)).all()
series = audiobook_session.scalars(select(AudiobookSeries).order_by(AudiobookSeries.name)).all()
assert author_count == 3
assert series_count == 2
assert [(author.id, author.name) for author in authors] == [
(10, "updated_author"),
(11, "craig_alanson"),
(12, "glynn_stewart"),
]
assert [(row.name, row.author.name) for row in series] == [
("expeditionary_force", "craig_alanson"),
("starships_mage", "glynn_stewart"),
]
def test_upsert_series_csv_updates_series_by_id(tmp_path, audiobook_session) -> None:
author = AudiobookAuthor(id=1, name="glynn_stewart")
audiobook_session.add_all(
[
author,
AudiobookSeries(id=7, name="old_series", author=author),
],
)
audiobook_session.commit()
series_csv = tmp_path / "series.csv"
series_csv.write_text(
"name,author_name,id\n"
"starships_mage,glynn_stewart,7\n",
encoding="utf-8",
)
count = catalog.upsert_series_from_csv(audiobook_session, series_csv)
audiobook_session.commit()
series = audiobook_session.get(AudiobookSeries, 7)
assert count == 1
assert series.name == "starships_mage"
assert series.author.name == "glynn_stewart"
def test_upsert_csv_allows_missing_id_column(tmp_path, audiobook_session) -> None:
authors_csv = tmp_path / "authors.csv"
series_csv = tmp_path / "series.csv"
authors_csv.write_text(
"name\n"
"glynn_stewart\n",
encoding="utf-8",
)
series_csv.write_text(
"name,author_name\n"
"starships_mage,glynn_stewart\n",
encoding="utf-8",
)
author_count = catalog.upsert_authors_from_csv(audiobook_session, authors_csv)
series_count = catalog.upsert_series_from_csv(audiobook_session, series_csv)
audiobook_session.commit()
series = audiobook_session.scalar(select(AudiobookSeries))
assert author_count == 1
assert series_count == 1
assert series.name == "starships_mage"
assert series.author.name == "glynn_stewart"
def test_upsert_series_csv_rejects_unknown_author(tmp_path, audiobook_session) -> None:
series_csv = tmp_path / "series.csv"
series_csv.write_text(
"name,author_name,id\n"
"starships_mage,glynn_stewart,\n",
encoding="utf-8",
)
with pytest.raises(catalog.CatalogImportError) as error:
catalog.upsert_series_from_csv(audiobook_session, series_csv)
assert "author not found: glynn_stewart" in str(error.value)