updated series_index to float and added UniqueConstraint to audiobook and audiobook_author

This commit is contained in:
2026-06-10 20:07:27 -04:00
parent c25f973d4a
commit 5f08932007
6 changed files with 307 additions and 20 deletions
+28 -1
View File
@@ -4,6 +4,7 @@ from __future__ import annotations
import json
import logging
import re
import shutil
import subprocess
from concurrent.futures import ThreadPoolExecutor
@@ -30,6 +31,7 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
SENSITIVE_COMMAND_ARGUMENTS = {"-activation_bytes"}
BOOK_RANGE_PATTERN = re.compile(r"(?:^|-)books?-(?P<start>[1-9]\d*)-(?P<end>[1-9]\d*)(?:-|$)")
@dataclass(frozen=True)
@@ -178,7 +180,32 @@ def output_stem(metadata: StandardBookMetadata) -> str:
Returns:
Output stem in author-series_01-title form.
"""
return f"{metadata.author}-{metadata.series}_{metadata.series_index:02}-{metadata.title}"
index_slug = series_index_slug(metadata.series_index, metadata.title)
return f"{metadata.author}-{metadata.series}_{index_slug}-{metadata.title}"
def series_index_slug(series_index: float, title: str = "") -> str:
"""Return a filename-safe series index."""
if title_range := title_series_range_slug(series_index, title):
return title_range
index = float(series_index)
if index.is_integer():
return f"{int(index):02}"
return f"{int(index):02}.5"
def title_series_range_slug(series_index: float, title: str) -> str | None:
"""Return a series range slug found in an omnibus title."""
index = float(series_index)
if not index.is_integer():
return None
first_index = int(index)
for match in BOOK_RANGE_PATTERN.finditer(title):
start = int(match.group("start"))
end = int(match.group("end"))
if start == first_index and end > start:
return f"{start:02}-{end:02}"
return None
def metadata_output_path(output_directory: Path, metadata: StandardBookMetadata) -> Path:
+44 -10
View File
@@ -144,7 +144,7 @@ class CatalogToolRegistry:
"title": {"type": "string"},
"author_id": {"type": "integer"},
"series_id": {"type": ["integer", "null"]},
"series_index": {"type": "integer"},
"series_index": {"type": "number", "multipleOf": 0.5},
},
"required": ["title", "author_id", "series_id", "series_index"],
},
@@ -306,12 +306,7 @@ class CatalogToolRegistry:
author_id = required_int(arguments, "author_id")
validate_catalog_slug(name, "series")
author = self.required_author(author_id)
series = self.session.scalar(
select(AudiobookSeries).where(
AudiobookSeries.name == name,
AudiobookSeries.author_id == author.id,
),
)
series = self.find_series_by_catalog_slug(name, author.id)
action = "existing"
if series is None:
series = AudiobookSeries(name=name, author=author)
@@ -329,7 +324,7 @@ class CatalogToolRegistry:
title = required_string(arguments, "title")
author_id = required_int(arguments, "author_id")
series_id = optional_int(arguments.get("series_id"), "series_id")
series_index = required_int(arguments, "series_index")
series_index = required_series_index(arguments, "series_index")
ensured = self.ensure_book(title, author_id, series_id, series_index)
return [self.book_result(ensured.book, ensured.action)]
@@ -338,7 +333,7 @@ class CatalogToolRegistry:
title: str,
author_id: int,
series_id: int | None,
series_index: int,
series_index: float,
) -> EnsuredBook:
"""Return an existing book row, or create it after validating ownership."""
title = normalize_title_slug(title)
@@ -398,6 +393,26 @@ class CatalogToolRegistry:
raise MetadataResolutionError(msg)
return series
def find_series_by_catalog_slug(self, name: str, author_id: int) -> AudiobookSeries | None:
"""Return a series by exact slug or underscore-insensitive slug."""
exact = self.session.scalar(
select(AudiobookSeries).where(
AudiobookSeries.name == name,
AudiobookSeries.author_id == author_id,
),
)
if exact is not None:
return exact
compact_name = compact_catalog_slug(name)
series_rows = self.session.scalars(
select(AudiobookSeries).where(AudiobookSeries.author_id == author_id).order_by(AudiobookSeries.name),
).all()
for series in series_rows:
if compact_catalog_slug(series.name) == compact_name:
return series
return None
def series_result(self, series: AudiobookSeries, action: str) -> dict[str, object]:
"""Build a normalized series tool result."""
return {
@@ -513,6 +528,11 @@ def normalize_catalog_slug(value: str) -> str:
return re.sub(r"[^a-z0-9]+", "_", value.strip().casefold()).strip("_")
def compact_catalog_slug(value: str) -> str:
"""Return a catalog slug comparison key that ignores underscores."""
return normalize_catalog_slug(value).replace("_", "")
def normalize_title_slug(value: str) -> str:
"""Normalize noisy book titles into lower kebab-case slugs."""
return re.sub(r"[^a-z0-9]+", "-", value.strip().casefold()).strip("-")
@@ -533,8 +553,9 @@ def query_terms(query: str) -> tuple[str, ...]:
"""Return text variants useful for matching noisy audiobook metadata."""
normalized = query.strip().casefold()
underscore_slug = normalize_catalog_slug(normalized)
compact_slug = compact_catalog_slug(normalized)
hyphen_slug = normalize_title_slug(normalized)
return tuple(dict.fromkeys(term for term in (normalized, underscore_slug, hyphen_slug) if term))
return tuple(dict.fromkeys(term for term in (normalized, underscore_slug, compact_slug, hyphen_slug) if term))
def required_string(data: dict[str, object], key: str) -> str:
@@ -555,6 +576,19 @@ def required_int(data: dict[str, object], key: str) -> int:
return value
def required_series_index(data: dict[str, object], key: str) -> float:
"""Read a required whole-number or half-number series index."""
value = data.get(key)
if isinstance(value, bool) or not isinstance(value, int | float):
msg = f"{key} must be a number"
raise MetadataResolutionError(msg)
series_index = float(value)
if not (series_index * 2).is_integer():
msg = f"{key} must be a whole number or .5 increment"
raise MetadataResolutionError(msg)
return series_index
def optional_int(value: object, key: str) -> int | None:
"""Read an optional integer field."""
if value is None:
+10 -7
View File
@@ -19,6 +19,7 @@ from python.tools.audiobook.llm_tool_calling import (
optional_int,
parse_tool_calls,
required_int,
required_series_index,
required_string,
run_tool_calls,
validate_catalog_slug,
@@ -67,7 +68,7 @@ class StandardBookMetadata:
title: str
series_id: int | None
series: str
series_index: int
series_index: float
confidence: float
needs_review: bool
evidence: list[str]
@@ -81,7 +82,7 @@ class FinalMetadataFields:
book_id: int | None
title: str
series_id: int | None
series_index: int
series_index: float
confidence: float
evidence: list[str]
@@ -93,7 +94,7 @@ class ResolvedBookFields:
book_id: int | None
title: str
series_id: int | None
series_index: int
series_index: float
@dataclass(frozen=True)
@@ -283,7 +284,7 @@ class AudiobookMetadataAgent:
"model": self._config.model,
"messages": messages,
"stream": False,
"options": {"temperature": 0},
"options": {"temperature": 0.1},
}
tool_names = []
if tools_enabled:
@@ -403,7 +404,7 @@ class AudiobookMetadataAgent:
series_index=book.series_index,
)
def validate_series(self, author_id: int, series_id: int | None, series_index: int) -> str:
def validate_series(self, author_id: int, series_id: int | None, series_index: float) -> str:
"""Validate final series fields and return the canonical series slug."""
if series_id is None:
if series_index != 0:
@@ -467,7 +468,9 @@ Rules:
- The final JSON object must contain author_id, book_id, title, series_id, series_index, confidence, and evidence.
- title must be a canonical title slug using lower-case words separated by hyphens.
- Use series_id null and series_index 0 for standalone books.
- If you use a series_id, series_index must be an integer greater than or equal to 1.
- If you use a series_id, series_index must be a whole number or .5 value greater than 0.
- Treat series slugs that differ only by underscores as the same series. Prefer the existing catalog row instead of
creating a new series.
- Detect omnibus or box-set editions that contain multiple numbered novels, books, or novellas.
- For an omnibus, make a best-effort range from the filename, tags, and catalog rows. Keep series_index as the
first covered book number and include the range in the title when the source title includes it, for example
@@ -524,7 +527,7 @@ def parse_final_metadata_fields(raw_metadata: object) -> FinalMetadataFields:
book_id=optional_int(data.get("book_id"), "book_id"),
title=required_string(data, "title"),
series_id=optional_int(data.get("series_id"), "series_id"),
series_index=required_int(data, "series_index"),
series_index=required_series_index(data, "series_index"),
confidence=required_float(data, "confidence"),
evidence=required_string_list(data, "evidence"),
)