converting to pydantic-settings

This commit is contained in:
2026-06-14 21:29:45 -04:00
parent a5d7c3be4f
commit 68b3a38b81
10 changed files with 111 additions and 110 deletions
+10 -5
View File
@@ -79,17 +79,17 @@ def ingest_configured_paths(session: Session, config: EbookSearchConfig) -> int:
path = Path(library_path).expanduser()
logger.info("ebook_ingest_path_start path=%s", path)
if path.is_file() and path.suffix.lower() == ".epub":
count += int(ingest_file(session, path))
count += int(ingest_file(session, path, config))
elif path.is_dir():
for epub_path in sorted(path.rglob("*.epub")):
count += int(ingest_file(session, epub_path))
count += int(ingest_file(session, epub_path, config))
else:
logger.warning("ebook_ingest_path_missing path=%s", path)
logger.info("ebook_ingest_paths_complete changed_files=%s configured_paths=%s", count, len(config.library_paths))
return count
def ingest_file(session: Session, path: Path) -> bool:
def ingest_file(session: Session, path: Path, config: EbookSearchConfig) -> bool:
"""Ingest one EPUB file. Return True when the database changed."""
resolved_path = path.expanduser().resolve()
logger.info("ebook_ingest_file_start path=%s", resolved_path)
@@ -134,7 +134,7 @@ def ingest_file(session: Session, path: Path) -> bool:
)
session.add(chapter)
session.flush()
chunk_index = add_chapter_chunks(session, source, chapter, parsed_chapter, chunk_index)
chunk_index = add_chapter_chunks(session, source, chapter, parsed_chapter, chunk_index, config)
session.flush()
logger.info(
@@ -160,10 +160,15 @@ def add_chapter_chunks(
chapter: EbookChapter,
parsed_chapter: ParsedChapter,
chunk_index: int,
config: EbookSearchConfig,
) -> int:
"""Add chunk rows for one parsed chapter and return the next chunk index."""
page_label = parsed_chapter.page_labels[0] if parsed_chapter.page_labels else None
for text_chunk in chunk_text(parsed_chapter.text):
for text_chunk in chunk_text(
parsed_chapter.text,
chunk_tokens=config.chunk_tokens,
overlap_tokens=config.chunk_overlap,
):
session.add(
EbookChunk(
source_id=source.id,