test(ebook-search): organize tests under dedicated package
Move ebook search tests into tests/ebook_search and standardize mocking on pytest-mock.
This commit is contained in:
@@ -0,0 +1,312 @@
|
||||
"""Tests for EPUB search HTMX routes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from compression import zstd
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from python.ebook_search.api.bm25_tasks import refresh_bm25_for_engine
|
||||
from python.ebook_search.api.main import create_app
|
||||
from python.ebook_search.config import EbookSearchConfig, RerankConfig
|
||||
from python.ebook_search.embeddings import EmbeddingModelStats
|
||||
from python.ebook_search.search import SearchResponse, SearchResult
|
||||
from python.ebook_search.timing import RuntimeStep
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
|
||||
def patch_app_runtime(mocker: MockerFixture):
|
||||
"""Patch app startup dependencies used by UI route tests."""
|
||||
mocker.patch("python.ebook_search.api.main.get_postgres_engine", side_effect=fake_get_postgres_engine)
|
||||
mocker.patch("python.ebook_search.api.main.ensure_bm25_corpus", side_effect=lambda _session, _config: None)
|
||||
|
||||
|
||||
def fake_get_postgres_engine(**_kwargs):
|
||||
"""Return an in-memory engine for route tests."""
|
||||
return create_engine("sqlite+pysqlite:///:memory:", future=True)
|
||||
|
||||
|
||||
def test_search_page_uses_zstd_when_requested(mocker: MockerFixture) -> None:
|
||||
patch_app_runtime(mocker)
|
||||
app = create_app()
|
||||
app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False))
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.get("/", headers={"accept-encoding": "zstd"})
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.headers["content-encoding"] == "zstd"
|
||||
assert b"EPUB Search" in zstd.decompress(response.content)
|
||||
|
||||
|
||||
def test_ui_form_passes_rerank_flag_to_search_handler(mocker: MockerFixture) -> None:
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def fake_search_ebooks(_engine, query, config, *, rerank=False):
|
||||
captured["query"] = query
|
||||
captured["rerank"] = rerank
|
||||
captured["config"] = config
|
||||
return SearchResponse(query=query, results=[], rank_label="Hybrid + rerank")
|
||||
|
||||
mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
|
||||
mocker.patch(
|
||||
"python.ebook_search.api.routes.search.answer_query",
|
||||
side_effect=lambda _query, _results, _config: "answer",
|
||||
)
|
||||
patch_app_runtime(mocker)
|
||||
app = create_app()
|
||||
app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), top_k=12, answer_enabled=True)
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.post("/search", data={"query": "where is the quote?", "rerank": "true"})
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "Hybrid + rerank" in response.text
|
||||
assert captured["query"] == "where is the quote?"
|
||||
assert captured["rerank"] is True
|
||||
|
||||
|
||||
def test_ui_search_failure_returns_visible_error(mocker: MockerFixture) -> None:
|
||||
def fake_search_ebooks(_engine, _query, _config, *, rerank=False):
|
||||
del rerank
|
||||
msg = "search exploded"
|
||||
raise RuntimeError(msg)
|
||||
|
||||
mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
|
||||
patch_app_runtime(mocker)
|
||||
app = create_app()
|
||||
app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), top_k=12)
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.post("/search", data={"query": "where is the quote?"})
|
||||
|
||||
assert response.status_code == 500
|
||||
assert "search exploded" in response.text
|
||||
|
||||
|
||||
def test_ui_answer_failure_still_returns_sources(mocker: MockerFixture) -> None:
|
||||
def fake_search_ebooks(_engine, query, _config, *, rerank=False):
|
||||
del rerank
|
||||
return SearchResponse(query=query, results=[], rank_label="Hybrid")
|
||||
|
||||
def fake_answer_query(_query, _results, _config):
|
||||
msg = "answer exploded"
|
||||
raise RuntimeError(msg)
|
||||
|
||||
mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
|
||||
mocker.patch("python.ebook_search.api.routes.search.answer_query", side_effect=fake_answer_query)
|
||||
patch_app_runtime(mocker)
|
||||
app = create_app()
|
||||
app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), top_k=12, answer_enabled=True)
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.post("/search", data={"query": "where is the quote?"})
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "Answer generation failed" in response.text
|
||||
|
||||
|
||||
def test_ui_skips_answer_when_disabled(mocker: MockerFixture) -> None:
|
||||
called = False
|
||||
|
||||
def fake_search_ebooks(_engine, query, _config, *, rerank=False):
|
||||
del rerank
|
||||
return SearchResponse(query=query, results=[], rank_label="Hybrid")
|
||||
|
||||
def fake_answer_query(_query, _results, _config):
|
||||
nonlocal called
|
||||
called = True
|
||||
return "answer"
|
||||
|
||||
config = EbookSearchConfig(rerank=RerankConfig(enabled=False), answer_enabled=False)
|
||||
mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
|
||||
mocker.patch("python.ebook_search.api.routes.search.answer_query", side_effect=fake_answer_query)
|
||||
mocker.patch("python.ebook_search.api.main.load_config", side_effect=lambda: config)
|
||||
patch_app_runtime(mocker)
|
||||
app = create_app()
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.post("/search", data={"query": "where is the quote?"})
|
||||
|
||||
assert response.status_code == 200
|
||||
assert called is False
|
||||
assert "Answer generation is disabled" in response.text
|
||||
|
||||
|
||||
def test_ui_shows_component_scores(mocker: MockerFixture) -> None:
|
||||
def fake_search_ebooks(_engine, query, _config, *, rerank=False):
|
||||
del rerank
|
||||
return SearchResponse(
|
||||
query=query,
|
||||
rank_label="Hybrid + rerank",
|
||||
results=[
|
||||
SearchResult(
|
||||
chunk_id=1,
|
||||
text="source text",
|
||||
source_title="Book",
|
||||
score=0.9,
|
||||
rerank_score=0.9,
|
||||
vector_score=0.8,
|
||||
bm25_score=2.5,
|
||||
fused_score=0.03,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
|
||||
mocker.patch(
|
||||
"python.ebook_search.api.routes.search.answer_query",
|
||||
side_effect=lambda _query, _results, _config: "answer",
|
||||
)
|
||||
patch_app_runtime(mocker)
|
||||
app = create_app()
|
||||
app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), answer_enabled=True)
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.post("/search", data={"query": "where is the quote?"})
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "rerank" in response.text
|
||||
assert "vector cosine" in response.text
|
||||
assert "BM25" in response.text
|
||||
assert "RRF" in response.text
|
||||
|
||||
|
||||
def test_ui_shows_search_runtime_chart(mocker: MockerFixture) -> None:
|
||||
def fake_search_ebooks(_engine, query, _config, *, rerank=False):
|
||||
del rerank
|
||||
return SearchResponse(
|
||||
query=query,
|
||||
rank_label="Hybrid",
|
||||
results=[],
|
||||
timings=(
|
||||
RuntimeStep(name="Embedding + vector search", duration_ms=12.5),
|
||||
RuntimeStep(name="BM25 search", duration_ms=4.0),
|
||||
),
|
||||
)
|
||||
|
||||
mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
|
||||
mocker.patch(
|
||||
"python.ebook_search.api.routes.search.answer_query",
|
||||
side_effect=lambda _query, _results, _config: "answer",
|
||||
)
|
||||
patch_app_runtime(mocker)
|
||||
app = create_app()
|
||||
app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), answer_enabled=True)
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.post("/search", data={"query": "where is the quote?"})
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "Runtime" in response.text
|
||||
assert "Total" in response.text
|
||||
assert "Embedding + vector search" in response.text
|
||||
assert "BM25 search" in response.text
|
||||
assert "Answer generation" in response.text
|
||||
assert "ms left" in response.text
|
||||
|
||||
|
||||
def test_ui_embed_all_batches_until_complete(mocker: MockerFixture) -> None:
|
||||
counts = iter([32, 32, 5, 0])
|
||||
batch_sizes: list[int] = []
|
||||
|
||||
def fake_embed_missing_chunks(_session, config):
|
||||
batch_sizes.append(config.embedding_batch_size)
|
||||
return next(counts)
|
||||
|
||||
mocker.patch("python.ebook_search.api.routes.admin.embed_missing_chunks", side_effect=fake_embed_missing_chunks)
|
||||
patch_app_runtime(mocker)
|
||||
app = create_app()
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.post("/admin/embed-all")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "Embedded 69 chunks in 3 batches of 32" in response.text
|
||||
assert batch_sizes == [32, 32, 32, 32]
|
||||
|
||||
|
||||
def test_ui_scan_schedules_bm25_refresh_after_database_change(mocker: MockerFixture) -> None:
|
||||
scheduled = False
|
||||
|
||||
def fake_ingest_configured_paths(_session, _config):
|
||||
return 1
|
||||
|
||||
def fake_schedule_bm25_refresh(_app):
|
||||
nonlocal scheduled
|
||||
scheduled = True
|
||||
|
||||
mocker.patch(
|
||||
"python.ebook_search.api.routes.admin.ingest_configured_paths",
|
||||
side_effect=fake_ingest_configured_paths,
|
||||
)
|
||||
mocker.patch("python.ebook_search.api.routes.admin.schedule_bm25_refresh", side_effect=fake_schedule_bm25_refresh)
|
||||
patch_app_runtime(mocker)
|
||||
app = create_app()
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.post("/admin/scan")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "Indexed 1 EPUBs" in response.text
|
||||
assert scheduled is True
|
||||
|
||||
|
||||
def test_bm25_refresh_clears_loaded_corpus_cache(mocker: MockerFixture) -> None:
|
||||
refreshed: list[object] = []
|
||||
cache_cleared = False
|
||||
|
||||
def fake_refresh_bm25_corpus(session, config):
|
||||
refreshed.append((session, config))
|
||||
|
||||
def fake_cache_clear():
|
||||
nonlocal cache_cleared
|
||||
cache_cleared = True
|
||||
|
||||
mocker.patch("python.ebook_search.api.bm25_tasks.refresh_bm25_corpus", side_effect=fake_refresh_bm25_corpus)
|
||||
mocker.patch("python.ebook_search.api.bm25_tasks.load_bm25_corpus.cache_clear", side_effect=fake_cache_clear)
|
||||
engine = create_engine("sqlite+pysqlite:///:memory:", future=True)
|
||||
config = EbookSearchConfig(rerank=RerankConfig(enabled=False))
|
||||
|
||||
refresh_bm25_for_engine(engine, config)
|
||||
|
||||
assert len(refreshed) == 1
|
||||
assert refreshed[0][1] == config
|
||||
assert cache_cleared is True
|
||||
|
||||
|
||||
def test_admin_page_shows_embedding_counts_by_model(mocker: MockerFixture) -> None:
|
||||
def fake_embedding_model_stats(_session):
|
||||
return [
|
||||
EmbeddingModelStats(
|
||||
model_name="qwen3-embedding-0.6b",
|
||||
dimension=1024,
|
||||
embedded_chunks=40,
|
||||
total_chunks=64,
|
||||
),
|
||||
EmbeddingModelStats(
|
||||
model_name="qwen3-embedding-4b",
|
||||
dimension=2560,
|
||||
embedded_chunks=8,
|
||||
total_chunks=64,
|
||||
),
|
||||
]
|
||||
|
||||
mocker.patch("python.ebook_search.api.routes.admin.embedding_model_stats", side_effect=fake_embedding_model_stats)
|
||||
patch_app_runtime(mocker)
|
||||
app = create_app()
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.get("/admin")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "qwen3-embedding-0.6b" in response.text
|
||||
assert "1024" in response.text
|
||||
assert "40" in response.text
|
||||
assert "24" in response.text
|
||||
assert "qwen3-embedding-4b" in response.text
|
||||
assert "2560" in response.text
|
||||
Reference in New Issue
Block a user