converting to pydantic-settings

This commit is contained in:
2026-06-14 21:29:45 -04:00
parent a5d7c3be4f
commit 68b3a38b81
10 changed files with 111 additions and 110 deletions
+1
View File
@@ -55,6 +55,7 @@
polars
psycopg
pydantic
pydantic-settings
pyfakefs
pytest
pytest-cov
+1
View File
@@ -18,6 +18,7 @@ dependencies = [
"polars",
"psycopg[binary]",
"pydantic",
"pydantic-settings",
"python-multipart",
"sqlalchemy",
"tenacity",
+2 -4
View File
@@ -3,7 +3,6 @@
from __future__ import annotations
import logging
from dataclasses import replace
from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse
@@ -17,7 +16,6 @@ from python.ebook_search.ingest import ingest_configured_paths
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/admin")
EMBED_ALL_BATCH_SIZE = 32
@router.get("", response_class=HTMLResponse)
@@ -70,7 +68,7 @@ def embed_all(request: Request) -> HTMLResponse:
"""Embed all chunks missing vectors in fixed-size batches."""
total = 0
batches = 0
config = replace(request.app.state.config, embedding_batch_size=EMBED_ALL_BATCH_SIZE)
config = request.app.state.config
try:
with Session(request.app.state.engine) as session:
while True:
@@ -103,5 +101,5 @@ def embed_all(request: Request) -> HTMLResponse:
return templates.TemplateResponse(
request,
"partials/admin_status.html",
{"message": f"Embedded {total} chunks in {batches} batches of {EMBED_ALL_BATCH_SIZE}"},
{"message": f"Embedded {total} chunks in {batches} batches of {config.embedding_batch_size}"},
)
+82 -89
View File
@@ -2,88 +2,15 @@
from __future__ import annotations
from dataclasses import dataclass
from os import getenv
from typing import Annotated
from pydantic import AliasChoices, Field, field_validator
from pydantic_settings import BaseSettings, NoDecode, SettingsConfigDict
def getenv_bool(name: str, *, default: bool) -> bool:
"""Read a boolean environment variable with a default fallback."""
value = getenv(name)
if value is None:
return default
return value.strip().lower() in {"1", "true", "yes", "on"}
def getenv_int(name: str, *, default: int) -> int:
"""Read an integer environment variable with a default fallback."""
value = getenv(name)
if value is None or not value.strip():
return default
return int(value)
@dataclass(frozen=True)
class RerankConfig:
"""vLLM reranker settings."""
enabled: bool = False
base_url: str = "http://192.168.90.25:8001"
model: str = "qwen3-reranker-06b"
candidates: int = 24
timeout_seconds: float = 30.0
@dataclass(frozen=True)
class EbookSearchConfig:
"""Runtime settings for EPUB search."""
rerank: RerankConfig
top_k: int = 12
library_paths: tuple[str, ...] = ()
vllm_base_url: str = "https://ollama.com/v1"
vllm_api_key: str = "not-needed"
chat_model: str = "deepseek-v4-flash"
answer_enabled: bool = True
embedding_base_url: str = "http://192.168.90.25:8000/v1"
embedding_api_key: str = "not-needed"
embedding_model: str = "qwen3-embedding-0.6b"
embedding_batch_size: int = 32
bm25_index_dir: str = ".ebook_search_bm25"
bm25_refresh_delay_seconds: int = 60
def load_rerank_config() -> RerankConfig:
"""Load reranker config from environment variables."""
return RerankConfig(
enabled=getenv_bool("EBOOK_SEARCH_RERANK_ENABLED", default=False),
base_url=getenv("EBOOK_SEARCH_RERANK_BASE_URL", "http://192.168.90.25:8001"),
model=getenv("EBOOK_SEARCH_RERANK_MODEL", "qwen3-reranker-06b"),
candidates=getenv_int("EBOOK_SEARCH_RERANK_CANDIDATES", default=24),
timeout_seconds=float(getenv_int("EBOOK_SEARCH_RERANK_TIMEOUT_SECONDS", default=30)),
)
def load_config() -> EbookSearchConfig:
"""Load EPUB search config from environment variables."""
return EbookSearchConfig(
rerank=load_rerank_config(),
top_k=getenv_int("EBOOK_SEARCH_TOP_K", default=12),
library_paths=library_paths_from_env(),
vllm_base_url=getenv("EBOOK_SEARCH_VLLM_BASE_URL", "https://ollama.com/v1"),
vllm_api_key=getenv("EBOOK_SEARCH_VLLM_API_KEY") or getenv("OLLAMA_API_KEY") or "not-needed",
chat_model=getenv("EBOOK_SEARCH_CHAT_MODEL", "deepseek-v4-flash"),
answer_enabled=getenv_bool("EBOOK_SEARCH_ANSWER_ENABLED", default=True),
embedding_base_url=getenv("EBOOK_SEARCH_EMBEDDING_BASE_URL", "http://192.168.90.25:8000/v1"),
embedding_api_key=getenv("EBOOK_SEARCH_EMBEDDING_API_KEY", "not-needed"),
embedding_model=normalize_embedding_model(),
embedding_batch_size=getenv_int("EBOOK_SEARCH_EMBEDDING_BATCH_SIZE", default=32),
bm25_index_dir=getenv("EBOOK_SEARCH_BM25_INDEX_DIR", ".ebook_search_bm25"),
bm25_refresh_delay_seconds=getenv_int("EBOOK_SEARCH_BM25_REFRESH_DELAY_SECONDS", default=60),
)
def normalize_embedding_model(default: str = "qwen3-embedding-0.6b") -> str:
"""Normalize supported embedding aliases to provider model names."""
def normalize_embedding_alias(model: str) -> str:
"""Normalize a supported embedding alias to its provider model name."""
aliases = {
"Qwen3-Embedding-0.6B": "qwen3-embedding-0.6b",
"Qwen3-Embedding-4B": "qwen3-embedding-4b",
@@ -98,20 +25,86 @@ def normalize_embedding_model(default: str = "qwen3-embedding-0.6b") -> str:
"qwen3-embedding-4b": "qwen3-embedding-4b",
"qwen3-embedding-8b": "qwen3-embedding-8b",
}
model = getenv("EBOOK_SEARCH_EMBEDDING_MODEL", default)
standard_model = aliases.get(model)
if standard_model is None:
error = f"Embedding model {model} is not supported. Supported models are {aliases.keys()}"
raise ValueError(error)
return standard_model
def library_paths_from_env() -> tuple[str, ...]:
"""Read configured EPUB library paths from the environment."""
value = getenv("EBOOK_SEARCH_LIBRARY_PATHS")
if value is None:
return ()
return tuple(path for path in value.split(":") if path)
def normalize_embedding_model(default: str = "qwen3-embedding-0.6b") -> str:
"""Normalize the configured embedding alias to its provider model name."""
return normalize_embedding_alias(getenv("EBOOK_SEARCH_EMBEDDING_MODEL", default))
class RerankConfig(BaseSettings):
"""vLLM reranker settings."""
model_config = SettingsConfigDict(env_prefix="EBOOK_SEARCH_RERANK_", frozen=True, protected_namespaces=())
enabled: bool = False
base_url: str = "http://192.168.90.25:8001"
model: str = "qwen3-reranker-06b"
candidates: int = 24
timeout_seconds: float = 30.0
score_weight: float = 0.7
hybrid_weight: float = 0.3
class EbookSearchConfig(BaseSettings):
"""Runtime settings for EPUB search."""
model_config = SettingsConfigDict(
env_prefix="EBOOK_SEARCH_",
frozen=True,
populate_by_name=True,
protected_namespaces=(),
)
rerank: RerankConfig = Field(default_factory=RerankConfig)
top_k: int = 12
library_paths: Annotated[tuple[str, ...], NoDecode] = ()
chunk_tokens: int = 700
chunk_overlap: int = 100
vllm_base_url: str = "https://ollama.com/v1"
vllm_api_key: str = Field(
default="not-needed",
validation_alias=AliasChoices("EBOOK_SEARCH_VLLM_API_KEY", "OLLAMA_API_KEY"),
)
chat_model: str = "deepseek-v4-flash"
answer_enabled: bool = True
embedding_base_url: str = "http://192.168.90.25:8000/v1"
embedding_api_key: str = "not-needed"
embedding_model: str = "qwen3-embedding-0.6b"
embedding_batch_size: int = 32
embedding_timeout_seconds: float = 60.0
chat_timeout_seconds: float = 60.0
vector_candidate_multiplier: int = 4
bm25_candidate_limit: int = 120
rrf_rank_constant: int = 60
bm25_index_dir: str = ".ebook_search_bm25"
bm25_refresh_delay_seconds: int = 60
@field_validator("library_paths", mode="before")
@classmethod
def split_library_paths(cls, value: object) -> object:
"""Split a colon-separated library path string into a tuple of paths."""
if isinstance(value, str):
return tuple(path for path in value.split(":") if path)
return value
@field_validator("embedding_model")
@classmethod
def normalize_embedding(cls, value: str) -> str:
"""Normalize the configured embedding alias to its provider model name."""
return normalize_embedding_alias(value)
def load_rerank_config() -> RerankConfig:
"""Load reranker config from environment variables."""
return RerankConfig()
def load_config() -> EbookSearchConfig:
"""Load EPUB search config from environment variables."""
return EbookSearchConfig()
+10 -5
View File
@@ -79,17 +79,17 @@ def ingest_configured_paths(session: Session, config: EbookSearchConfig) -> int:
path = Path(library_path).expanduser()
logger.info("ebook_ingest_path_start path=%s", path)
if path.is_file() and path.suffix.lower() == ".epub":
count += int(ingest_file(session, path))
count += int(ingest_file(session, path, config))
elif path.is_dir():
for epub_path in sorted(path.rglob("*.epub")):
count += int(ingest_file(session, epub_path))
count += int(ingest_file(session, epub_path, config))
else:
logger.warning("ebook_ingest_path_missing path=%s", path)
logger.info("ebook_ingest_paths_complete changed_files=%s configured_paths=%s", count, len(config.library_paths))
return count
def ingest_file(session: Session, path: Path) -> bool:
def ingest_file(session: Session, path: Path, config: EbookSearchConfig) -> bool:
"""Ingest one EPUB file. Return True when the database changed."""
resolved_path = path.expanduser().resolve()
logger.info("ebook_ingest_file_start path=%s", resolved_path)
@@ -134,7 +134,7 @@ def ingest_file(session: Session, path: Path) -> bool:
)
session.add(chapter)
session.flush()
chunk_index = add_chapter_chunks(session, source, chapter, parsed_chapter, chunk_index)
chunk_index = add_chapter_chunks(session, source, chapter, parsed_chapter, chunk_index, config)
session.flush()
logger.info(
@@ -160,10 +160,15 @@ def add_chapter_chunks(
chapter: EbookChapter,
parsed_chapter: ParsedChapter,
chunk_index: int,
config: EbookSearchConfig,
) -> int:
"""Add chunk rows for one parsed chapter and return the next chunk index."""
page_label = parsed_chapter.page_labels[0] if parsed_chapter.page_labels else None
for text_chunk in chunk_text(parsed_chapter.text):
for text_chunk in chunk_text(
parsed_chapter.text,
chunk_tokens=config.chunk_tokens,
overlap_tokens=config.chunk_overlap,
):
session.add(
EbookChunk(
source_id=source.id,
+2 -2
View File
@@ -29,7 +29,7 @@ def request_embeddings(texts: Sequence[str], config: EbookSearchConfig) -> list[
f"{config.embedding_base_url.rstrip('/')}/embeddings",
headers=auth_headers(config.embedding_api_key),
json={"model": config.embedding_model, "input": list(texts)},
timeout=60,
timeout=config.embedding_timeout_seconds,
)
response.raise_for_status()
return embedding_vectors_from_response(response.json())
@@ -106,7 +106,7 @@ def request_chat_completion(
"messages": list(messages),
"temperature": 0,
},
timeout=60,
timeout=config.chat_timeout_seconds,
)
response.raise_for_status()
return chat_content_from_response(response.json())
+8 -5
View File
@@ -13,8 +13,6 @@ if TYPE_CHECKING:
from python.ebook_search.search import SearchResult
logger = logging.getLogger(__name__)
RERANK_SCORE_WEIGHT = 0.7
HYBRID_SCORE_WEIGHT = 0.3
@dataclass(frozen=True)
@@ -41,7 +39,7 @@ def rerank_chunks(query: str, candidates: list[SearchResult], config: RerankConf
(
replace(
result,
score=final_rerank_score(result, scores[result.chunk_id].score, candidates),
score=final_rerank_score(result, scores[result.chunk_id].score, candidates, config),
rerank_score=scores[result.chunk_id].score,
)
for result in candidates
@@ -110,9 +108,14 @@ def clamp_score(score: float) -> float:
return min(max(score, 0.0), 1.0)
def final_rerank_score(result: SearchResult, rerank_score: float, candidates: list[SearchResult]) -> float:
def final_rerank_score(
result: SearchResult,
rerank_score: float,
candidates: list[SearchResult],
config: RerankConfig,
) -> float:
"""Combine rerank relevance with normalized hybrid retrieval evidence."""
return (RERANK_SCORE_WEIGHT * rerank_score) + (HYBRID_SCORE_WEIGHT * normalized_hybrid_score(result, candidates))
return (config.score_weight * rerank_score) + (config.hybrid_weight * normalized_hybrid_score(result, candidates))
def normalized_hybrid_score(result: SearchResult, candidates: list[SearchResult]) -> float:
+3 -3
View File
@@ -35,7 +35,6 @@ if TYPE_CHECKING:
from python.ebook_search.config import EbookSearchConfig
logger = logging.getLogger(__name__)
BM25_CANDIDATE_LIMIT = 120
@dataclass(frozen=True)
@@ -111,6 +110,7 @@ def search_ebooks(
reciprocal_rank_fusion,
retrieval.vector_results,
retrieval.lexical_results,
rank_constant=config.rrf_rank_constant,
)
timings.append(timing)
if config.rerank.enabled and rerank:
@@ -216,7 +216,7 @@ def vector_candidates(engine: Engine, query: str, config: EbookSearchConfig) ->
raise ValueError(msg)
embedding = embed_query(query, config)
limit = max(config.rerank.candidates, config.top_k) * 4
limit = max(config.rerank.candidates, config.top_k) * config.vector_candidate_multiplier
embedding_table = get_embedding_table(model.dimension)
embedding_param = literal(embedding, type_=Vector(model.dimension))
@@ -263,7 +263,7 @@ def bm25_candidates(query: str, config: EbookSearchConfig) -> list[SearchResult]
logger.info("ebook_bm25_search_complete corpus=0 candidates=0")
return []
scored_records = score_bm25_corpus(query, corpus, limit=BM25_CANDIDATE_LIMIT)
scored_records = score_bm25_corpus(query, corpus, limit=config.bm25_candidate_limit)
results = [
replace(search_result_from_row(record), score=score, vector_score=None, bm25_score=score)
for record, score in scored_records
+1 -2
View File
@@ -3,7 +3,6 @@
from __future__ import annotations
import logging
from dataclasses import replace
from datetime import UTC, datetime
from os import environ
from pathlib import Path
@@ -528,7 +527,7 @@ def test_chat_api_key_falls_back_to_ollama_api_key(monkeypatch) -> None:
def test_answer_query_does_not_call_model_when_disabled() -> None:
config = replace(load_config(), answer_enabled=False)
config = load_config().model_copy(update={"answer_enabled": False})
result = SearchResult(chunk_id=1, text="source text", source_title="Book")
answer = answer_query("question", [result], config)
@@ -80,6 +80,7 @@
"fastapi",
"Michal",
"Nornsight",
"pydantic",
"sandboxing",
"syncthing",
],