updated series_index to float and added UniqueConstraint to audiobook and audiobook_author
treefmt / nix fmt (pull_request) Failing after 5s
pytest / pytest (pull_request) Successful in 26s
build_systems / build-bob (pull_request) Successful in 45s
build_systems / build-leviathan (pull_request) Successful in 55s
build_systems / build-rhapsody-in-green (pull_request) Successful in 56s
build_systems / build-brain (pull_request) Successful in 47s
build_systems / build-jeeves (pull_request) Successful in 2m36s

This commit is contained in:
2026-06-10 20:07:27 -04:00
parent 5b9da9258c
commit 4d2a017f2e
6 changed files with 307 additions and 20 deletions
@@ -0,0 +1,63 @@
"""updated series_index to float and added UniqueConstraint to audiobook and audiobook_author.
Revision ID: b3c60cc5beb5
Revises: d7864d1ffc17
Create Date: 2026-06-10 20:02:43.073725
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "b3c60cc5beb5"
down_revision: str | None = "d7864d1ffc17"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column(
"audiobook",
"series_index",
existing_type=sa.INTEGER(),
type_=sa.Float(),
existing_nullable=False,
schema=schema,
)
op.create_unique_constraint(
op.f("uq_audiobook_author_id"),
"audiobook",
["author_id", "series_id", "title"],
schema=schema,
postgresql_nulls_not_distinct=True,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(op.f("uq_audiobook_author_id"), "audiobook", schema=schema, type_="unique")
op.alter_column(
"audiobook",
"series_index",
existing_type=sa.Float(),
type_=sa.INTEGER(),
existing_nullable=False,
schema=schema,
)
# ### end Alembic commands ###
+10 -1
View File
@@ -12,6 +12,7 @@ class AudiobookAuthor(TableBase):
"""Canonical audiobook author.""" """Canonical audiobook author."""
__tablename__ = "audiobook_author" __tablename__ = "audiobook_author"
__table_args__ = (UniqueConstraint("name"),)
name: Mapped[str] = mapped_column(String, unique=True) name: Mapped[str] = mapped_column(String, unique=True)
@@ -36,11 +37,19 @@ class Audiobook(TableBase):
"""Canonical audiobook title.""" """Canonical audiobook title."""
__tablename__ = "audiobook" __tablename__ = "audiobook"
__table_args__ = (
UniqueConstraint(
"author_id",
"series_id",
"title",
postgresql_nulls_not_distinct=True,
),
)
title: Mapped[str] = mapped_column(String) title: Mapped[str] = mapped_column(String)
author_id: Mapped[int] = mapped_column(ForeignKey("main.audiobook_author.id", ondelete="CASCADE")) author_id: Mapped[int] = mapped_column(ForeignKey("main.audiobook_author.id", ondelete="CASCADE"))
series_id: Mapped[int | None] = mapped_column(ForeignKey("main.audiobook_series.id", ondelete="SET NULL")) series_id: Mapped[int | None] = mapped_column(ForeignKey("main.audiobook_series.id", ondelete="SET NULL"))
series_index: Mapped[int] = mapped_column(default=0) series_index: Mapped[float] = mapped_column(default=0.0)
author: Mapped[AudiobookAuthor] = relationship("AudiobookAuthor", back_populates="books") author: Mapped[AudiobookAuthor] = relationship("AudiobookAuthor", back_populates="books")
series: Mapped[AudiobookSeries | None] = relationship("AudiobookSeries", back_populates="books") series: Mapped[AudiobookSeries | None] = relationship("AudiobookSeries", back_populates="books")
+28 -1
View File
@@ -4,6 +4,7 @@ from __future__ import annotations
import json import json
import logging import logging
import re
import shutil import shutil
import subprocess import subprocess
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
@@ -30,6 +31,7 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
SENSITIVE_COMMAND_ARGUMENTS = {"-activation_bytes"} SENSITIVE_COMMAND_ARGUMENTS = {"-activation_bytes"}
BOOK_RANGE_PATTERN = re.compile(r"(?:^|-)books?-(?P<start>[1-9]\d*)-(?P<end>[1-9]\d*)(?:-|$)")
@dataclass(frozen=True) @dataclass(frozen=True)
@@ -178,7 +180,32 @@ def output_stem(metadata: StandardBookMetadata) -> str:
Returns: Returns:
Output stem in author-series_01-title form. Output stem in author-series_01-title form.
""" """
return f"{metadata.author}-{metadata.series}_{metadata.series_index:02}-{metadata.title}" index_slug = series_index_slug(metadata.series_index, metadata.title)
return f"{metadata.author}-{metadata.series}_{index_slug}-{metadata.title}"
def series_index_slug(series_index: float, title: str = "") -> str:
"""Return a filename-safe series index."""
if title_range := title_series_range_slug(series_index, title):
return title_range
index = float(series_index)
if index.is_integer():
return f"{int(index):02}"
return f"{int(index):02}.5"
def title_series_range_slug(series_index: float, title: str) -> str | None:
"""Return a series range slug found in an omnibus title."""
index = float(series_index)
if not index.is_integer():
return None
first_index = int(index)
for match in BOOK_RANGE_PATTERN.finditer(title):
start = int(match.group("start"))
end = int(match.group("end"))
if start == first_index and end > start:
return f"{start:02}-{end:02}"
return None
def metadata_output_path(output_directory: Path, metadata: StandardBookMetadata) -> Path: def metadata_output_path(output_directory: Path, metadata: StandardBookMetadata) -> Path:
+44 -10
View File
@@ -144,7 +144,7 @@ class CatalogToolRegistry:
"title": {"type": "string"}, "title": {"type": "string"},
"author_id": {"type": "integer"}, "author_id": {"type": "integer"},
"series_id": {"type": ["integer", "null"]}, "series_id": {"type": ["integer", "null"]},
"series_index": {"type": "integer"}, "series_index": {"type": "number", "multipleOf": 0.5},
}, },
"required": ["title", "author_id", "series_id", "series_index"], "required": ["title", "author_id", "series_id", "series_index"],
}, },
@@ -306,12 +306,7 @@ class CatalogToolRegistry:
author_id = required_int(arguments, "author_id") author_id = required_int(arguments, "author_id")
validate_catalog_slug(name, "series") validate_catalog_slug(name, "series")
author = self.required_author(author_id) author = self.required_author(author_id)
series = self.session.scalar( series = self.find_series_by_catalog_slug(name, author.id)
select(AudiobookSeries).where(
AudiobookSeries.name == name,
AudiobookSeries.author_id == author.id,
),
)
action = "existing" action = "existing"
if series is None: if series is None:
series = AudiobookSeries(name=name, author=author) series = AudiobookSeries(name=name, author=author)
@@ -329,7 +324,7 @@ class CatalogToolRegistry:
title = required_string(arguments, "title") title = required_string(arguments, "title")
author_id = required_int(arguments, "author_id") author_id = required_int(arguments, "author_id")
series_id = optional_int(arguments.get("series_id"), "series_id") series_id = optional_int(arguments.get("series_id"), "series_id")
series_index = required_int(arguments, "series_index") series_index = required_series_index(arguments, "series_index")
ensured = self.ensure_book(title, author_id, series_id, series_index) ensured = self.ensure_book(title, author_id, series_id, series_index)
return [self.book_result(ensured.book, ensured.action)] return [self.book_result(ensured.book, ensured.action)]
@@ -338,7 +333,7 @@ class CatalogToolRegistry:
title: str, title: str,
author_id: int, author_id: int,
series_id: int | None, series_id: int | None,
series_index: int, series_index: float,
) -> EnsuredBook: ) -> EnsuredBook:
"""Return an existing book row, or create it after validating ownership.""" """Return an existing book row, or create it after validating ownership."""
title = normalize_title_slug(title) title = normalize_title_slug(title)
@@ -398,6 +393,26 @@ class CatalogToolRegistry:
raise MetadataResolutionError(msg) raise MetadataResolutionError(msg)
return series return series
def find_series_by_catalog_slug(self, name: str, author_id: int) -> AudiobookSeries | None:
"""Return a series by exact slug or underscore-insensitive slug."""
exact = self.session.scalar(
select(AudiobookSeries).where(
AudiobookSeries.name == name,
AudiobookSeries.author_id == author_id,
),
)
if exact is not None:
return exact
compact_name = compact_catalog_slug(name)
series_rows = self.session.scalars(
select(AudiobookSeries).where(AudiobookSeries.author_id == author_id).order_by(AudiobookSeries.name),
).all()
for series in series_rows:
if compact_catalog_slug(series.name) == compact_name:
return series
return None
def series_result(self, series: AudiobookSeries, action: str) -> dict[str, object]: def series_result(self, series: AudiobookSeries, action: str) -> dict[str, object]:
"""Build a normalized series tool result.""" """Build a normalized series tool result."""
return { return {
@@ -513,6 +528,11 @@ def normalize_catalog_slug(value: str) -> str:
return re.sub(r"[^a-z0-9]+", "_", value.strip().casefold()).strip("_") return re.sub(r"[^a-z0-9]+", "_", value.strip().casefold()).strip("_")
def compact_catalog_slug(value: str) -> str:
"""Return a catalog slug comparison key that ignores underscores."""
return normalize_catalog_slug(value).replace("_", "")
def normalize_title_slug(value: str) -> str: def normalize_title_slug(value: str) -> str:
"""Normalize noisy book titles into lower kebab-case slugs.""" """Normalize noisy book titles into lower kebab-case slugs."""
return re.sub(r"[^a-z0-9]+", "-", value.strip().casefold()).strip("-") return re.sub(r"[^a-z0-9]+", "-", value.strip().casefold()).strip("-")
@@ -533,8 +553,9 @@ def query_terms(query: str) -> tuple[str, ...]:
"""Return text variants useful for matching noisy audiobook metadata.""" """Return text variants useful for matching noisy audiobook metadata."""
normalized = query.strip().casefold() normalized = query.strip().casefold()
underscore_slug = normalize_catalog_slug(normalized) underscore_slug = normalize_catalog_slug(normalized)
compact_slug = compact_catalog_slug(normalized)
hyphen_slug = normalize_title_slug(normalized) hyphen_slug = normalize_title_slug(normalized)
return tuple(dict.fromkeys(term for term in (normalized, underscore_slug, hyphen_slug) if term)) return tuple(dict.fromkeys(term for term in (normalized, underscore_slug, compact_slug, hyphen_slug) if term))
def required_string(data: dict[str, object], key: str) -> str: def required_string(data: dict[str, object], key: str) -> str:
@@ -555,6 +576,19 @@ def required_int(data: dict[str, object], key: str) -> int:
return value return value
def required_series_index(data: dict[str, object], key: str) -> float:
"""Read a required whole-number or half-number series index."""
value = data.get(key)
if isinstance(value, bool) or not isinstance(value, int | float):
msg = f"{key} must be a number"
raise MetadataResolutionError(msg)
series_index = float(value)
if not (series_index * 2).is_integer():
msg = f"{key} must be a whole number or .5 increment"
raise MetadataResolutionError(msg)
return series_index
def optional_int(value: object, key: str) -> int | None: def optional_int(value: object, key: str) -> int | None:
"""Read an optional integer field.""" """Read an optional integer field."""
if value is None: if value is None:
+10 -7
View File
@@ -19,6 +19,7 @@ from python.tools.audiobook.llm_tool_calling import (
optional_int, optional_int,
parse_tool_calls, parse_tool_calls,
required_int, required_int,
required_series_index,
required_string, required_string,
run_tool_calls, run_tool_calls,
validate_catalog_slug, validate_catalog_slug,
@@ -67,7 +68,7 @@ class StandardBookMetadata:
title: str title: str
series_id: int | None series_id: int | None
series: str series: str
series_index: int series_index: float
confidence: float confidence: float
needs_review: bool needs_review: bool
evidence: list[str] evidence: list[str]
@@ -81,7 +82,7 @@ class FinalMetadataFields:
book_id: int | None book_id: int | None
title: str title: str
series_id: int | None series_id: int | None
series_index: int series_index: float
confidence: float confidence: float
evidence: list[str] evidence: list[str]
@@ -93,7 +94,7 @@ class ResolvedBookFields:
book_id: int | None book_id: int | None
title: str title: str
series_id: int | None series_id: int | None
series_index: int series_index: float
@dataclass(frozen=True) @dataclass(frozen=True)
@@ -283,7 +284,7 @@ class AudiobookMetadataAgent:
"model": self._config.model, "model": self._config.model,
"messages": messages, "messages": messages,
"stream": False, "stream": False,
"options": {"temperature": 0}, "options": {"temperature": 0.1},
} }
tool_names = [] tool_names = []
if tools_enabled: if tools_enabled:
@@ -403,7 +404,7 @@ class AudiobookMetadataAgent:
series_index=book.series_index, series_index=book.series_index,
) )
def validate_series(self, author_id: int, series_id: int | None, series_index: int) -> str: def validate_series(self, author_id: int, series_id: int | None, series_index: float) -> str:
"""Validate final series fields and return the canonical series slug.""" """Validate final series fields and return the canonical series slug."""
if series_id is None: if series_id is None:
if series_index != 0: if series_index != 0:
@@ -467,7 +468,9 @@ Rules:
- The final JSON object must contain author_id, book_id, title, series_id, series_index, confidence, and evidence. - The final JSON object must contain author_id, book_id, title, series_id, series_index, confidence, and evidence.
- title must be a canonical title slug using lower-case words separated by hyphens. - title must be a canonical title slug using lower-case words separated by hyphens.
- Use series_id null and series_index 0 for standalone books. - Use series_id null and series_index 0 for standalone books.
- If you use a series_id, series_index must be an integer greater than or equal to 1. - If you use a series_id, series_index must be a whole number or .5 value greater than 0.
- Treat series slugs that differ only by underscores as the same series. Prefer the existing catalog row instead of
creating a new series.
- Detect omnibus or box-set editions that contain multiple numbered novels, books, or novellas. - Detect omnibus or box-set editions that contain multiple numbered novels, books, or novellas.
- For an omnibus, make a best-effort range from the filename, tags, and catalog rows. Keep series_index as the - For an omnibus, make a best-effort range from the filename, tags, and catalog rows. Keep series_index as the
first covered book number and include the range in the title when the source title includes it, for example first covered book number and include the range in the title when the source title includes it, for example
@@ -524,7 +527,7 @@ def parse_final_metadata_fields(raw_metadata: object) -> FinalMetadataFields:
book_id=optional_int(data.get("book_id"), "book_id"), book_id=optional_int(data.get("book_id"), "book_id"),
title=required_string(data, "title"), title=required_string(data, "title"),
series_id=optional_int(data.get("series_id"), "series_id"), series_id=optional_int(data.get("series_id"), "series_id"),
series_index=required_int(data, "series_index"), series_index=required_series_index(data, "series_index"),
confidence=required_float(data, "confidence"), confidence=required_float(data, "confidence"),
evidence=required_string_list(data, "evidence"), evidence=required_string_list(data, "evidence"),
) )
+152 -1
View File
@@ -6,7 +6,8 @@ import json
import subprocess import subprocess
import pytest import pytest
from sqlalchemy import create_engine from sqlalchemy import create_engine, select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, sessionmaker from sqlalchemy.orm import Session, sessionmaker
from python.orm.richie import Audiobook, AudiobookAuthor, AudiobookSeries, RichieBase from python.orm.richie import Audiobook, AudiobookAuthor, AudiobookSeries, RichieBase
@@ -113,6 +114,62 @@ def test_output_stem_uses_catalog_slugs() -> None:
assert audible_convert.output_stem(metadata) == "glynn_stewart-starships_mage_01-title-slug" assert audible_convert.output_stem(metadata) == "glynn_stewart-starships_mage_01-title-slug"
def test_output_stem_formats_half_series_index() -> None:
metadata = StandardBookMetadata(
author_id=1,
author="glynn_stewart",
book_id=None,
title="title-slug",
series_id=1,
series="starships_mage",
series_index=1.5,
confidence=0.96,
needs_review=False,
evidence=["test"],
)
assert audible_convert.output_stem(metadata) == "glynn_stewart-starships_mage_01.5-title-slug"
@pytest.mark.parametrize(
("metadata", "expected"),
[
(
StandardBookMetadata(
author_id=1,
author="mark_e_cooper",
book_id=None,
title="merkiaari-wars-series-books-1-3",
series_id=1,
series="merkiaari_wars",
series_index=1,
confidence=0.96,
needs_review=False,
evidence=["test"],
),
"mark_e_cooper-merkiaari_wars_01-03-merkiaari-wars-series-books-1-3",
),
(
StandardBookMetadata(
author_id=1,
author="rhett_c_bruno",
book_id=None,
title="the-circuit-books-1-3",
series_id=1,
series="the_circuit",
series_index=1,
confidence=0.96,
needs_review=False,
evidence=["test"],
),
"rhett_c_bruno-the_circuit_01-03-the-circuit-books-1-3",
),
],
)
def test_output_stem_formats_omnibus_book_range(metadata, expected) -> None:
assert audible_convert.output_stem(metadata) == expected
def test_convert_aax_file_runs_ffmpeg(tmp_path, monkeypatch) -> None: def test_convert_aax_file_runs_ffmpeg(tmp_path, monkeypatch) -> None:
"""test_convert_aax_file_runs_ffmpeg.""" """test_convert_aax_file_runs_ffmpeg."""
commands = [] commands = []
@@ -196,6 +253,8 @@ def test_system_prompt_instructs_agent_to_detect_omnibuses() -> None:
assert "Detect omnibus or box-set editions" in prompt assert "Detect omnibus or box-set editions" in prompt
assert "books-1-3" in prompt assert "books-1-3" in prompt
assert "Keep series_index as the" in prompt assert "Keep series_index as the" in prompt
assert "series_index must be a whole number or .5 value" in prompt
assert "differ only by underscores" in prompt
def test_standard_book_metadata_accepts_valid_tool_output(tmp_path, monkeypatch, audiobook_engine) -> None: def test_standard_book_metadata_accepts_valid_tool_output(tmp_path, monkeypatch, audiobook_engine) -> None:
@@ -666,6 +725,85 @@ def test_standard_book_metadata_can_create_missing_catalog_rows(
assert book.series_id == series.id assert book.series_id == series.id
def test_standard_book_metadata_accepts_half_series_index(tmp_path, monkeypatch, audiobook_engine) -> None:
install_fake_ollama(
monkeypatch,
[
tool_response("search_series", {"query": "bobiverse", "author_id": 4}),
final_response(
{
"author_id": 4,
"book_id": None,
"title": "bobiverse-short",
"series_id": 4,
"series_index": 1.5,
"confidence": 0.95,
"evidence": ["series novella from tags"],
},
),
],
)
metadata = standard_book_metadata(
"Bobiverse Short.aax",
{"title": "Bobiverse Short", "artist": "Dennis E Taylor"},
audiobook_engine,
tmp_path / "agent.jsonl",
"test-key",
config=metadata_agent.AgentConfig(),
)
assert metadata.series_index == 1.5
assert metadata.needs_review is False
with Session(audiobook_engine) as session:
book = session.get(Audiobook, 1)
assert book.series_index == 1.5
def test_standard_book_metadata_reuses_series_with_only_underscore_difference(
tmp_path,
monkeypatch,
audiobook_engine,
) -> None:
with Session(audiobook_engine) as session:
session.add(AudiobookSeries(id=5, name="starships", author_id=1))
session.commit()
install_fake_ollama(
monkeypatch,
[
tool_response("ensure_series", {"name": "starship_s", "author_id": 1}),
final_response(
{
"author_id": 1,
"book_id": None,
"title": "starships-short",
"series_id": 5,
"series_index": 1,
"confidence": 0.95,
"evidence": ["reused existing series with equivalent slug"],
},
),
],
)
metadata = standard_book_metadata(
"Starship S Short.aax",
{"title": "Starship S Short", "artist": "Glynn Stewart"},
audiobook_engine,
tmp_path / "agent.jsonl",
"test-key",
config=metadata_agent.AgentConfig(),
)
assert metadata.series == "starships"
with Session(audiobook_engine) as session:
series_names = session.scalars(
select(AudiobookSeries.name).where(AudiobookSeries.author_id == 1).order_by(AudiobookSeries.name),
).all()
assert "starship_s" not in series_names
assert series_names == ["black_fleet_trilogy", "starships", "starships_mage"]
def test_standard_book_metadata_normalizes_noisy_created_catalog_rows( def test_standard_book_metadata_normalizes_noisy_created_catalog_rows(
tmp_path, tmp_path,
monkeypatch, monkeypatch,
@@ -888,6 +1026,19 @@ def test_richie_exports_audiobook_models() -> None:
assert Audiobook.__tablename__ == "audiobook" assert Audiobook.__tablename__ == "audiobook"
def test_audiobook_title_author_series_is_unique(audiobook_engine) -> None:
with Session(audiobook_engine) as session:
session.add_all(
[
Audiobook(title="duplicate-title", author_id=1, series_id=1, series_index=1),
Audiobook(title="duplicate-title", author_id=1, series_id=1, series_index=2),
],
)
with pytest.raises(IntegrityError):
session.commit()
def test_main_dry_run_prints_outputs_without_converting(tmp_path, monkeypatch, capsys) -> None: def test_main_dry_run_prints_outputs_without_converting(tmp_path, monkeypatch, capsys) -> None:
input_directory = tmp_path / "raw" input_directory = tmp_path / "raw"
output_directory = tmp_path / "audiobooks" output_directory = tmp_path / "audiobooks"