converting to pydantic-settings
This commit is contained in:
@@ -79,17 +79,17 @@ def ingest_configured_paths(session: Session, config: EbookSearchConfig) -> int:
|
||||
path = Path(library_path).expanduser()
|
||||
logger.info("ebook_ingest_path_start path=%s", path)
|
||||
if path.is_file() and path.suffix.lower() == ".epub":
|
||||
count += int(ingest_file(session, path))
|
||||
count += int(ingest_file(session, path, config))
|
||||
elif path.is_dir():
|
||||
for epub_path in sorted(path.rglob("*.epub")):
|
||||
count += int(ingest_file(session, epub_path))
|
||||
count += int(ingest_file(session, epub_path, config))
|
||||
else:
|
||||
logger.warning("ebook_ingest_path_missing path=%s", path)
|
||||
logger.info("ebook_ingest_paths_complete changed_files=%s configured_paths=%s", count, len(config.library_paths))
|
||||
return count
|
||||
|
||||
|
||||
def ingest_file(session: Session, path: Path) -> bool:
|
||||
def ingest_file(session: Session, path: Path, config: EbookSearchConfig) -> bool:
|
||||
"""Ingest one EPUB file. Return True when the database changed."""
|
||||
resolved_path = path.expanduser().resolve()
|
||||
logger.info("ebook_ingest_file_start path=%s", resolved_path)
|
||||
@@ -134,7 +134,7 @@ def ingest_file(session: Session, path: Path) -> bool:
|
||||
)
|
||||
session.add(chapter)
|
||||
session.flush()
|
||||
chunk_index = add_chapter_chunks(session, source, chapter, parsed_chapter, chunk_index)
|
||||
chunk_index = add_chapter_chunks(session, source, chapter, parsed_chapter, chunk_index, config)
|
||||
|
||||
session.flush()
|
||||
logger.info(
|
||||
@@ -160,10 +160,15 @@ def add_chapter_chunks(
|
||||
chapter: EbookChapter,
|
||||
parsed_chapter: ParsedChapter,
|
||||
chunk_index: int,
|
||||
config: EbookSearchConfig,
|
||||
) -> int:
|
||||
"""Add chunk rows for one parsed chapter and return the next chunk index."""
|
||||
page_label = parsed_chapter.page_labels[0] if parsed_chapter.page_labels else None
|
||||
for text_chunk in chunk_text(parsed_chapter.text):
|
||||
for text_chunk in chunk_text(
|
||||
parsed_chapter.text,
|
||||
chunk_tokens=config.chunk_tokens,
|
||||
overlap_tokens=config.chunk_overlap,
|
||||
):
|
||||
session.add(
|
||||
EbookChunk(
|
||||
source_id=source.id,
|
||||
|
||||
Reference in New Issue
Block a user