From 9290cb46ee786b76ce497da8aa186ec5ced44699 Mon Sep 17 00:00:00 2001
From: Richie Cahill <Richie@tmmworkshop.com>
Date: Wed, 10 Jun 2026 20:07:27 -0400
Subject: [PATCH] updated series_index to float and added UniqueConstraint to
 audiobook and audiobook_author

---
 ..._index_to_float_and_added__b3c60cc5beb5.py |  63 ++++++++
 python/orm/richie/audiobook.py                |  11 +-
 python/tools/audiobook/audible_convert.py     |  29 +++-
 python/tools/audiobook/llm_tool_calling.py    |  54 +++++--
 python/tools/audiobook/metadata_agent.py      |  17 +-
 tests/test_audible_convert.py                 | 153 +++++++++++++++++-
 6 files changed, 307 insertions(+), 20 deletions(-)
 create mode 100644 python/alembic/richie/versions/2026_06_10-updated_series_index_to_float_and_added__b3c60cc5beb5.py

diff --git a/python/alembic/richie/versions/2026_06_10-updated_series_index_to_float_and_added__b3c60cc5beb5.py b/python/alembic/richie/versions/2026_06_10-updated_series_index_to_float_and_added__b3c60cc5beb5.py
new file mode 100644
index 0000000..6c8d67e
--- /dev/null
+++ b/python/alembic/richie/versions/2026_06_10-updated_series_index_to_float_and_added__b3c60cc5beb5.py
@@ -0,0 +1,63 @@
+"""updated series_index to float and added UniqueConstraint to audiobook and audiobook_author.
+
+Revision ID: b3c60cc5beb5
+Revises: d7864d1ffc17
+Create Date: 2026-06-10 20:02:43.073725
+
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import sqlalchemy as sa
+from alembic import op
+
+from python.orm import RichieBase
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+# revision identifiers, used by Alembic.
+revision: str = "b3c60cc5beb5"
+down_revision: str | None = "d7864d1ffc17"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+schema = RichieBase.schema_name
+
+
+def upgrade() -> None:
+    """Upgrade."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.alter_column(
+        "audiobook",
+        "series_index",
+        existing_type=sa.INTEGER(),
+        type_=sa.Float(),
+        existing_nullable=False,
+        schema=schema,
+    )
+    op.create_unique_constraint(
+        op.f("uq_audiobook_author_id"),
+        "audiobook",
+        ["author_id", "series_id", "title"],
+        schema=schema,
+        postgresql_nulls_not_distinct=True,
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_constraint(op.f("uq_audiobook_author_id"), "audiobook", schema=schema, type_="unique")
+    op.alter_column(
+        "audiobook",
+        "series_index",
+        existing_type=sa.Float(),
+        type_=sa.INTEGER(),
+        existing_nullable=False,
+        schema=schema,
+    )
+    # ### end Alembic commands ###
diff --git a/python/orm/richie/audiobook.py b/python/orm/richie/audiobook.py
index 8ab78ea..0b8d6d4 100644
--- a/python/orm/richie/audiobook.py
+++ b/python/orm/richie/audiobook.py
@@ -12,6 +12,7 @@ class AudiobookAuthor(TableBase):
     """Canonical audiobook author."""
 
     __tablename__ = "audiobook_author"
+    __table_args__ = (UniqueConstraint("name"),)
 
     name: Mapped[str] = mapped_column(String, unique=True)
 
@@ -36,11 +37,19 @@ class Audiobook(TableBase):
     """Canonical audiobook title."""
 
     __tablename__ = "audiobook"
+    __table_args__ = (
+        UniqueConstraint(
+            "author_id",
+            "series_id",
+            "title",
+            postgresql_nulls_not_distinct=True,
+        ),
+    )
 
     title: Mapped[str] = mapped_column(String)
     author_id: Mapped[int] = mapped_column(ForeignKey("main.audiobook_author.id", ondelete="CASCADE"))
     series_id: Mapped[int | None] = mapped_column(ForeignKey("main.audiobook_series.id", ondelete="SET NULL"))
-    series_index: Mapped[int] = mapped_column(default=0)
+    series_index: Mapped[float] = mapped_column(default=0.0)
 
     author: Mapped[AudiobookAuthor] = relationship("AudiobookAuthor", back_populates="books")
     series: Mapped[AudiobookSeries | None] = relationship("AudiobookSeries", back_populates="books")
diff --git a/python/tools/audiobook/audible_convert.py b/python/tools/audiobook/audible_convert.py
index 4d38c3a..4957b04 100644
--- a/python/tools/audiobook/audible_convert.py
+++ b/python/tools/audiobook/audible_convert.py
@@ -4,6 +4,7 @@ from __future__ import annotations
 
 import json
 import logging
+import re
 import shutil
 import subprocess
 from concurrent.futures import ThreadPoolExecutor
@@ -30,6 +31,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 SENSITIVE_COMMAND_ARGUMENTS = {"-activation_bytes"}
+BOOK_RANGE_PATTERN = re.compile(r"(?:^|-)books?-(?P<start>[1-9]\d*)-(?P<end>[1-9]\d*)(?:-|$)")
 
 
 @dataclass(frozen=True)
@@ -178,7 +180,32 @@ def output_stem(metadata: StandardBookMetadata) -> str:
     Returns:
         Output stem in author-series_01-title form.
     """
-    return f"{metadata.author}-{metadata.series}_{metadata.series_index:02}-{metadata.title}"
+    index_slug = series_index_slug(metadata.series_index, metadata.title)
+    return f"{metadata.author}-{metadata.series}_{index_slug}-{metadata.title}"
+
+
+def series_index_slug(series_index: float, title: str = "") -> str:
+    """Return a filename-safe series index."""
+    if title_range := title_series_range_slug(series_index, title):
+        return title_range
+    index = float(series_index)
+    if index.is_integer():
+        return f"{int(index):02}"
+    return f"{int(index):02}.5"
+
+
+def title_series_range_slug(series_index: float, title: str) -> str | None:
+    """Return a series range slug found in an omnibus title."""
+    index = float(series_index)
+    if not index.is_integer():
+        return None
+    first_index = int(index)
+    for match in BOOK_RANGE_PATTERN.finditer(title):
+        start = int(match.group("start"))
+        end = int(match.group("end"))
+        if start == first_index and end > start:
+            return f"{start:02}-{end:02}"
+    return None
 
 
 def metadata_output_path(output_directory: Path, metadata: StandardBookMetadata) -> Path:
diff --git a/python/tools/audiobook/llm_tool_calling.py b/python/tools/audiobook/llm_tool_calling.py
index 03aebb3..fd790c0 100644
--- a/python/tools/audiobook/llm_tool_calling.py
+++ b/python/tools/audiobook/llm_tool_calling.py
@@ -144,7 +144,7 @@ class CatalogToolRegistry:
                             "title": {"type": "string"},
                             "author_id": {"type": "integer"},
                             "series_id": {"type": ["integer", "null"]},
-                            "series_index": {"type": "integer"},
+                            "series_index": {"type": "number", "multipleOf": 0.5},
                         },
                         "required": ["title", "author_id", "series_id", "series_index"],
                     },
@@ -306,12 +306,7 @@ class CatalogToolRegistry:
         author_id = required_int(arguments, "author_id")
         validate_catalog_slug(name, "series")
         author = self.required_author(author_id)
-        series = self.session.scalar(
-            select(AudiobookSeries).where(
-                AudiobookSeries.name == name,
-                AudiobookSeries.author_id == author.id,
-            ),
-        )
+        series = self.find_series_by_catalog_slug(name, author.id)
         action = "existing"
         if series is None:
             series = AudiobookSeries(name=name, author=author)
@@ -329,7 +324,7 @@ class CatalogToolRegistry:
         title = required_string(arguments, "title")
         author_id = required_int(arguments, "author_id")
         series_id = optional_int(arguments.get("series_id"), "series_id")
-        series_index = required_int(arguments, "series_index")
+        series_index = required_series_index(arguments, "series_index")
         ensured = self.ensure_book(title, author_id, series_id, series_index)
         return [self.book_result(ensured.book, ensured.action)]
 
@@ -338,7 +333,7 @@ class CatalogToolRegistry:
         title: str,
         author_id: int,
         series_id: int | None,
-        series_index: int,
+        series_index: float,
     ) -> EnsuredBook:
         """Return an existing book row, or create it after validating ownership."""
         title = normalize_title_slug(title)
@@ -398,6 +393,26 @@ class CatalogToolRegistry:
             raise MetadataResolutionError(msg)
         return series
 
+    def find_series_by_catalog_slug(self, name: str, author_id: int) -> AudiobookSeries | None:
+        """Return a series by exact slug or underscore-insensitive slug."""
+        exact = self.session.scalar(
+            select(AudiobookSeries).where(
+                AudiobookSeries.name == name,
+                AudiobookSeries.author_id == author_id,
+            ),
+        )
+        if exact is not None:
+            return exact
+
+        compact_name = compact_catalog_slug(name)
+        series_rows = self.session.scalars(
+            select(AudiobookSeries).where(AudiobookSeries.author_id == author_id).order_by(AudiobookSeries.name),
+        ).all()
+        for series in series_rows:
+            if compact_catalog_slug(series.name) == compact_name:
+                return series
+        return None
+
     def series_result(self, series: AudiobookSeries, action: str) -> dict[str, object]:
         """Build a normalized series tool result."""
         return {
@@ -513,6 +528,11 @@ def normalize_catalog_slug(value: str) -> str:
     return re.sub(r"[^a-z0-9]+", "_", value.strip().casefold()).strip("_")
 
 
+def compact_catalog_slug(value: str) -> str:
+    """Return a catalog slug comparison key that ignores underscores."""
+    return normalize_catalog_slug(value).replace("_", "")
+
+
 def normalize_title_slug(value: str) -> str:
     """Normalize noisy book titles into lower kebab-case slugs."""
     return re.sub(r"[^a-z0-9]+", "-", value.strip().casefold()).strip("-")
@@ -533,8 +553,9 @@ def query_terms(query: str) -> tuple[str, ...]:
     """Return text variants useful for matching noisy audiobook metadata."""
     normalized = query.strip().casefold()
     underscore_slug = normalize_catalog_slug(normalized)
+    compact_slug = compact_catalog_slug(normalized)
     hyphen_slug = normalize_title_slug(normalized)
-    return tuple(dict.fromkeys(term for term in (normalized, underscore_slug, hyphen_slug) if term))
+    return tuple(dict.fromkeys(term for term in (normalized, underscore_slug, compact_slug, hyphen_slug) if term))
 
 
 def required_string(data: dict[str, object], key: str) -> str:
@@ -555,6 +576,19 @@ def required_int(data: dict[str, object], key: str) -> int:
     return value
 
 
+def required_series_index(data: dict[str, object], key: str) -> float:
+    """Read a required whole-number or half-number series index."""
+    value = data.get(key)
+    if isinstance(value, bool) or not isinstance(value, int | float):
+        msg = f"{key} must be a number"
+        raise MetadataResolutionError(msg)
+    series_index = float(value)
+    if not (series_index * 2).is_integer():
+        msg = f"{key} must be a whole number or .5 increment"
+        raise MetadataResolutionError(msg)
+    return series_index
+
+
 def optional_int(value: object, key: str) -> int | None:
     """Read an optional integer field."""
     if value is None:
diff --git a/python/tools/audiobook/metadata_agent.py b/python/tools/audiobook/metadata_agent.py
index 1828c79..63a2035 100644
--- a/python/tools/audiobook/metadata_agent.py
+++ b/python/tools/audiobook/metadata_agent.py
@@ -19,6 +19,7 @@ from python.tools.audiobook.llm_tool_calling import (
     optional_int,
     parse_tool_calls,
     required_int,
+    required_series_index,
     required_string,
     run_tool_calls,
     validate_catalog_slug,
@@ -67,7 +68,7 @@ class StandardBookMetadata:
     title: str
     series_id: int | None
     series: str
-    series_index: int
+    series_index: float
     confidence: float
     needs_review: bool
     evidence: list[str]
@@ -81,7 +82,7 @@ class FinalMetadataFields:
     book_id: int | None
     title: str
     series_id: int | None
-    series_index: int
+    series_index: float
     confidence: float
     evidence: list[str]
 
@@ -93,7 +94,7 @@ class ResolvedBookFields:
     book_id: int | None
     title: str
     series_id: int | None
-    series_index: int
+    series_index: float
 
 
 @dataclass(frozen=True)
@@ -283,7 +284,7 @@ class AudiobookMetadataAgent:
             "model": self._config.model,
             "messages": messages,
             "stream": False,
-            "options": {"temperature": 0},
+            "options": {"temperature": 0.1},
         }
         tool_names = []
         if tools_enabled:
@@ -403,7 +404,7 @@ class AudiobookMetadataAgent:
             series_index=book.series_index,
         )
 
-    def validate_series(self, author_id: int, series_id: int | None, series_index: int) -> str:
+    def validate_series(self, author_id: int, series_id: int | None, series_index: float) -> str:
         """Validate final series fields and return the canonical series slug."""
         if series_id is None:
             if series_index != 0:
@@ -467,7 +468,9 @@ Rules:
 - The final JSON object must contain author_id, book_id, title, series_id, series_index, confidence, and evidence.
 - title must be a canonical title slug using lower-case words separated by hyphens.
 - Use series_id null and series_index 0 for standalone books.
-- If you use a series_id, series_index must be an integer greater than or equal to 1.
+- If you use a series_id, series_index must be a whole number or .5 value greater than 0.
+- Treat series slugs that differ only by underscores as the same series. Prefer the existing catalog row instead of
+  creating a new series.
 - Detect omnibus or box-set editions that contain multiple numbered novels, books, or novellas.
 - For an omnibus, make a best-effort range from the filename, tags, and catalog rows. Keep series_index as the
   first covered book number and include the range in the title when the source title includes it, for example
@@ -524,7 +527,7 @@ def parse_final_metadata_fields(raw_metadata: object) -> FinalMetadataFields:
         book_id=optional_int(data.get("book_id"), "book_id"),
         title=required_string(data, "title"),
         series_id=optional_int(data.get("series_id"), "series_id"),
-        series_index=required_int(data, "series_index"),
+        series_index=required_series_index(data, "series_index"),
         confidence=required_float(data, "confidence"),
         evidence=required_string_list(data, "evidence"),
     )
diff --git a/tests/test_audible_convert.py b/tests/test_audible_convert.py
index 22cdbe3..28e9c5f 100644
--- a/tests/test_audible_convert.py
+++ b/tests/test_audible_convert.py
@@ -6,7 +6,8 @@ import json
 import subprocess
 
 import pytest
-from sqlalchemy import create_engine
+from sqlalchemy import create_engine, select
+from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session, sessionmaker
 
 from python.orm.richie import Audiobook, AudiobookAuthor, AudiobookSeries, RichieBase
@@ -113,6 +114,62 @@ def test_output_stem_uses_catalog_slugs() -> None:
     assert audible_convert.output_stem(metadata) == "glynn_stewart-starships_mage_01-title-slug"
 
 
+def test_output_stem_formats_half_series_index() -> None:
+    metadata = StandardBookMetadata(
+        author_id=1,
+        author="glynn_stewart",
+        book_id=None,
+        title="title-slug",
+        series_id=1,
+        series="starships_mage",
+        series_index=1.5,
+        confidence=0.96,
+        needs_review=False,
+        evidence=["test"],
+    )
+
+    assert audible_convert.output_stem(metadata) == "glynn_stewart-starships_mage_01.5-title-slug"
+
+
+@pytest.mark.parametrize(
+    ("metadata", "expected"),
+    [
+        (
+            StandardBookMetadata(
+                author_id=1,
+                author="mark_e_cooper",
+                book_id=None,
+                title="merkiaari-wars-series-books-1-3",
+                series_id=1,
+                series="merkiaari_wars",
+                series_index=1,
+                confidence=0.96,
+                needs_review=False,
+                evidence=["test"],
+            ),
+            "mark_e_cooper-merkiaari_wars_01-03-merkiaari-wars-series-books-1-3",
+        ),
+        (
+            StandardBookMetadata(
+                author_id=1,
+                author="rhett_c_bruno",
+                book_id=None,
+                title="the-circuit-books-1-3",
+                series_id=1,
+                series="the_circuit",
+                series_index=1,
+                confidence=0.96,
+                needs_review=False,
+                evidence=["test"],
+            ),
+            "rhett_c_bruno-the_circuit_01-03-the-circuit-books-1-3",
+        ),
+    ],
+)
+def test_output_stem_formats_omnibus_book_range(metadata, expected) -> None:
+    assert audible_convert.output_stem(metadata) == expected
+
+
 def test_convert_aax_file_runs_ffmpeg(tmp_path, monkeypatch) -> None:
     """test_convert_aax_file_runs_ffmpeg."""
     commands = []
@@ -196,6 +253,8 @@ def test_system_prompt_instructs_agent_to_detect_omnibuses() -> None:
     assert "Detect omnibus or box-set editions" in prompt
     assert "books-1-3" in prompt
     assert "Keep series_index as the" in prompt
+    assert "series_index must be a whole number or .5 value" in prompt
+    assert "differ only by underscores" in prompt
 
 
 def test_standard_book_metadata_accepts_valid_tool_output(tmp_path, monkeypatch, audiobook_engine) -> None:
@@ -666,6 +725,85 @@ def test_standard_book_metadata_can_create_missing_catalog_rows(
         assert book.series_id == series.id
 
 
+def test_standard_book_metadata_accepts_half_series_index(tmp_path, monkeypatch, audiobook_engine) -> None:
+    install_fake_ollama(
+        monkeypatch,
+        [
+            tool_response("search_series", {"query": "bobiverse", "author_id": 4}),
+            final_response(
+                {
+                    "author_id": 4,
+                    "book_id": None,
+                    "title": "bobiverse-short",
+                    "series_id": 4,
+                    "series_index": 1.5,
+                    "confidence": 0.95,
+                    "evidence": ["series novella from tags"],
+                },
+            ),
+        ],
+    )
+
+    metadata = standard_book_metadata(
+        "Bobiverse Short.aax",
+        {"title": "Bobiverse Short", "artist": "Dennis E Taylor"},
+        audiobook_engine,
+        tmp_path / "agent.jsonl",
+        "test-key",
+        config=metadata_agent.AgentConfig(),
+    )
+
+    assert metadata.series_index == 1.5
+    assert metadata.needs_review is False
+    with Session(audiobook_engine) as session:
+        book = session.get(Audiobook, 1)
+        assert book.series_index == 1.5
+
+
+def test_standard_book_metadata_reuses_series_with_only_underscore_difference(
+    tmp_path,
+    monkeypatch,
+    audiobook_engine,
+) -> None:
+    with Session(audiobook_engine) as session:
+        session.add(AudiobookSeries(id=5, name="starships", author_id=1))
+        session.commit()
+    install_fake_ollama(
+        monkeypatch,
+        [
+            tool_response("ensure_series", {"name": "starship_s", "author_id": 1}),
+            final_response(
+                {
+                    "author_id": 1,
+                    "book_id": None,
+                    "title": "starships-short",
+                    "series_id": 5,
+                    "series_index": 1,
+                    "confidence": 0.95,
+                    "evidence": ["reused existing series with equivalent slug"],
+                },
+            ),
+        ],
+    )
+
+    metadata = standard_book_metadata(
+        "Starship S Short.aax",
+        {"title": "Starship S Short", "artist": "Glynn Stewart"},
+        audiobook_engine,
+        tmp_path / "agent.jsonl",
+        "test-key",
+        config=metadata_agent.AgentConfig(),
+    )
+
+    assert metadata.series == "starships"
+    with Session(audiobook_engine) as session:
+        series_names = session.scalars(
+            select(AudiobookSeries.name).where(AudiobookSeries.author_id == 1).order_by(AudiobookSeries.name),
+        ).all()
+        assert "starship_s" not in series_names
+        assert series_names == ["black_fleet_trilogy", "starships", "starships_mage"]
+
+
 def test_standard_book_metadata_normalizes_noisy_created_catalog_rows(
     tmp_path,
     monkeypatch,
@@ -888,6 +1026,19 @@ def test_richie_exports_audiobook_models() -> None:
     assert Audiobook.__tablename__ == "audiobook"
 
 
+def test_audiobook_title_author_series_is_unique(audiobook_engine) -> None:
+    with Session(audiobook_engine) as session:
+        session.add_all(
+            [
+                Audiobook(title="duplicate-title", author_id=1, series_id=1, series_index=1),
+                Audiobook(title="duplicate-title", author_id=1, series_id=1, series_index=2),
+            ],
+        )
+
+        with pytest.raises(IntegrityError):
+            session.commit()
+
+
 def test_main_dry_run_prints_outputs_without_converting(tmp_path, monkeypatch, capsys) -> None:
     input_directory = tmp_path / "raw"
     output_directory = tmp_path / "audiobooks"