From dbc6b5b53b901847f2c8c1918c27f6e012c5e0fa Mon Sep 17 00:00:00 2001
From: Richie Cahill <Richie@tmmworkshop.com>
Date: Tue, 16 Jun 2026 21:47:40 -0400
Subject: [PATCH] test(ebook-search): organize tests under dedicated package
 Move ebook search tests into tests/ebook_search and standardize mocking on
 pytest-mock.

---
 tests/ebook_search/__init__.py                |   1 +
 .../test_core.py}                             | 116 ++++++-----------
 .../test_guardrails.py}                       |  46 ++++---
 tests/ebook_search/test_health.py             | 122 ++++++++++++++++++
 .../test_http.py}                             |  17 ++-
 tests/ebook_search/test_rag_pipeline.py       |  50 +++++++
 .../test_rerank.py}                           |  39 +++---
 .../test_ui.py}                               |  99 +++++++-------
 tests/test_ebook_search_health.py             | 116 -----------------
 9 files changed, 330 insertions(+), 276 deletions(-)
 create mode 100644 tests/ebook_search/__init__.py
 rename tests/{test_ebook_search_core.py => ebook_search/test_core.py} (77%)
 rename tests/{test_ebook_search_guardrails.py => ebook_search/test_guardrails.py} (74%)
 create mode 100644 tests/ebook_search/test_health.py
 rename tests/{test_ebook_search_http.py => ebook_search/test_http.py} (83%)
 create mode 100644 tests/ebook_search/test_rag_pipeline.py
 rename tests/{test_ebook_search_rerank.py => ebook_search/test_rerank.py} (77%)
 rename tests/{test_ebook_search_ui.py => ebook_search/test_ui.py} (70%)
 delete mode 100644 tests/test_ebook_search_health.py

diff --git a/tests/ebook_search/__init__.py b/tests/ebook_search/__init__.py
new file mode 100644
index 0000000..2de467e
--- /dev/null
+++ b/tests/ebook_search/__init__.py
@@ -0,0 +1 @@
+"""Focused ebook search tests."""
diff --git a/tests/test_ebook_search_core.py b/tests/ebook_search/test_core.py
similarity index 77%
rename from tests/test_ebook_search_core.py
rename to tests/ebook_search/test_core.py
index a66cef9..f1a8b54 100644
--- a/tests/test_ebook_search_core.py
+++ b/tests/ebook_search/test_core.py
@@ -6,8 +6,8 @@ import logging
 from datetime import UTC, datetime
 from os import environ
 from pathlib import Path
-from threading import Event
 from types import ModuleType
+from typing import TYPE_CHECKING
 
 import pytest
 from sqlalchemy import create_engine, select
@@ -34,7 +34,6 @@ from python.ebook_search.search import (
     bm25_candidates,
     reciprocal_rank_fusion,
     retrieval_query_from_text,
-    search_ebooks,
 )
 from python.ebook_search.timing import RuntimeStep
 from python.orm.richie import (
@@ -46,6 +45,9 @@ from python.orm.richie import (
     RichieBase,
 )
 
+if TYPE_CHECKING:
+    from pytest_mock import MockerFixture
+
 
 def test_chunk_text_uses_overlap() -> None:
     chunks = chunk_text(" ".join(str(index) for index in range(100)), chunk_tokens=20, overlap_tokens=5)
@@ -164,49 +166,13 @@ def test_search_response_sums_runtime_steps() -> None:
     assert response.total_runtime_ms == 4.0
 
 
-def test_search_ebooks_runs_vector_and_bm25_in_parallel(monkeypatch) -> None:
-    engine = create_engine("sqlite+pysqlite:///:memory:", future=True)
-    vector_started = Event()
-    bm25_started = Event()
-    received_engines: list[object] = []
-
-    def fake_vector_candidates(received_engine, query, _config):
-        """Return vector candidates after confirming BM25 has started."""
-        received_engines.append(received_engine)
-        assert query == "what is parallel"
-        vector_started.set()
-        assert bm25_started.wait(timeout=2)
-        return [SearchResult(chunk_id=1, text="vector", source_title="Vector", vector_score=0.9)]
-
-    def fake_bm25_candidates(query, _config):
-        """Return BM25 candidates after confirming vector search has started."""
-        assert query == "parallel"
-        bm25_started.set()
-        assert vector_started.wait(timeout=2)
-        return [SearchResult(chunk_id=2, text="bm25", source_title="BM25", bm25_score=2.0)]
-
-    monkeypatch.setattr("python.ebook_search.search.vector_candidates", fake_vector_candidates)
-    monkeypatch.setattr("python.ebook_search.search.bm25_candidates", fake_bm25_candidates)
-    config = EbookSearchConfig(rerank=RerankConfig(enabled=False))
-
-    response = search_ebooks(engine, "what is parallel", config)
-
-    timings = {step.name: step for step in response.timings}
-    assert [result.chunk_id for result in response.results] == [1, 2]
-    assert timings["Embedding + vector search"].counts_toward_total is False
-    assert timings["BM25 search"].counts_toward_total is False
-    assert timings["Hybrid retrieval"].counts_toward_total is True
-    assert timings["BM25 query preparation"].counts_toward_total is True
-    assert received_engines == [engine]
-
-
 def test_retrieval_query_keeps_entity_and_series_terms() -> None:
     assert retrieval_query_from_text("what does Damien Montgomery stand for in starship mage") == (
         "damien montgomery stand starship mage"
     )
 
 
-def test_bm25_candidates_scores_whole_corpus(monkeypatch) -> None:
+def test_bm25_candidates_scores_whole_corpus(mocker: MockerFixture) -> None:
     record = {
         "chunk_id": 2,
         "text": "high",
@@ -226,8 +192,8 @@ def test_bm25_candidates_scores_whole_corpus(monkeypatch) -> None:
         captured["limit"] = limit
         return [(record, 1.5)]
 
-    monkeypatch.setattr("python.ebook_search.search.load_bm25_corpus", lambda _config: corpus)
-    monkeypatch.setattr("python.ebook_search.search.score_bm25_corpus", fake_score_bm25_corpus)
+    mocker.patch("python.ebook_search.search.load_bm25_corpus", side_effect=lambda _config: corpus)
+    mocker.patch("python.ebook_search.search.score_bm25_corpus", side_effect=fake_score_bm25_corpus)
     config = EbookSearchConfig(rerank=RerankConfig(enabled=False))
 
     results = bm25_candidates("high", config)
@@ -239,11 +205,11 @@ def test_bm25_candidates_scores_whole_corpus(monkeypatch) -> None:
     assert [result.bm25_score for result in results] == [1.5]
 
 
-def test_bm25_candidates_returns_empty_when_corpus_is_unavailable(monkeypatch, caplog) -> None:
+def test_bm25_candidates_returns_empty_when_corpus_is_unavailable(mocker: MockerFixture, caplog) -> None:
     def fake_load_bm25_corpus(_config):
         raise BM25CorpusUnavailableError
 
-    monkeypatch.setattr("python.ebook_search.search.load_bm25_corpus", fake_load_bm25_corpus)
+    mocker.patch("python.ebook_search.search.load_bm25_corpus", side_effect=fake_load_bm25_corpus)
     config = EbookSearchConfig(rerank=RerankConfig(enabled=False))
 
     with caplog.at_level(logging.WARNING):
@@ -279,7 +245,7 @@ def test_write_bm25_corpus_publishes_dated_generation(tmp_path) -> None:
     assert read_bm25_manifest(index_path) == manifest
 
 
-def test_write_bm25_corpus_keeps_current_generation_when_publish_fails(monkeypatch, tmp_path) -> None:
+def test_write_bm25_corpus_keeps_current_generation_when_publish_fails(mocker: MockerFixture, tmp_path) -> None:
     index_path = tmp_path / "bm25"
     index_path.mkdir()
     generations_path = index_path / "generations"
@@ -297,7 +263,7 @@ def test_write_bm25_corpus_keeps_current_generation_when_publish_fails(monkeypat
             raise OSError(msg)
         return original_replace(self, target)
 
-    monkeypatch.setattr(Path, "replace", fail_current_replace)
+    mocker.patch.object(Path, "replace", fail_current_replace)
     manifest = BM25Manifest(
         created_at=datetime(2026, 6, 12, 1, 2, 3, 456789, tzinfo=UTC),
         db_updated_at=None,
@@ -341,7 +307,7 @@ def test_load_bm25_corpus_uses_current_generation(tmp_path) -> None:
     assert score_bm25_corpus("cached", corpus, limit=10)
 
 
-def test_load_bm25_corpus_caches_disk_load(monkeypatch, tmp_path) -> None:
+def test_load_bm25_corpus_caches_disk_load(mocker: MockerFixture, tmp_path) -> None:
     load_bm25_corpus.cache_clear()
     manifest = BM25Manifest(created_at=datetime.now(tz=UTC), db_updated_at=None, chunk_count=1)
     record = {
@@ -374,9 +340,9 @@ def test_load_bm25_corpus_caches_disk_load(monkeypatch, tmp_path) -> None:
 
     fake_bm25s = ModuleType("bm25s")
     fake_bm25s.BM25 = FakeBM25
-    monkeypatch.setattr("python.ebook_search.bm25_corpus.read_bm25_manifest", lambda _path: manifest)
-    monkeypatch.setattr("python.ebook_search.bm25_corpus.bm25_index_exists", lambda _path, _manifest: True)
-    monkeypatch.setattr("python.ebook_search.bm25_corpus.bm25s", fake_bm25s)
+    mocker.patch("python.ebook_search.bm25_corpus.read_bm25_manifest", side_effect=lambda _path: manifest)
+    mocker.patch("python.ebook_search.bm25_corpus.bm25_index_exists", side_effect=lambda _path, _manifest: True)
+    mocker.patch("python.ebook_search.bm25_corpus.bm25s", fake_bm25s)
     config = EbookSearchConfig(rerank=RerankConfig(enabled=False), bm25_index_dir=str(tmp_path))
 
     try:
@@ -391,10 +357,10 @@ def test_load_bm25_corpus_caches_disk_load(monkeypatch, tmp_path) -> None:
     assert load_count == 1
 
 
-def test_load_bm25_corpus_raises_when_index_is_missing(monkeypatch, tmp_path) -> None:
+def test_load_bm25_corpus_raises_when_index_is_missing(mocker: MockerFixture, tmp_path) -> None:
     load_bm25_corpus.cache_clear()
-    monkeypatch.setattr("python.ebook_search.bm25_corpus.read_bm25_manifest", lambda _path: None)
-    monkeypatch.setattr("python.ebook_search.bm25_corpus.bm25_index_exists", lambda _path, _manifest: False)
+    mocker.patch("python.ebook_search.bm25_corpus.read_bm25_manifest", side_effect=lambda _path: None)
+    mocker.patch("python.ebook_search.bm25_corpus.bm25_index_exists", side_effect=lambda _path, _manifest: False)
     config = EbookSearchConfig(rerank=RerankConfig(enabled=False), bm25_index_dir=str(tmp_path))
 
     try:
@@ -404,16 +370,16 @@ def test_load_bm25_corpus_raises_when_index_is_missing(monkeypatch, tmp_path) ->
         load_bm25_corpus.cache_clear()
 
 
-def test_ensure_bm25_corpus_refreshes_missing_index(monkeypatch) -> None:
+def test_ensure_bm25_corpus_refreshes_missing_index(mocker: MockerFixture) -> None:
     refreshed: list[object] = []
     db_updated_at = datetime.now(tz=UTC)
 
-    monkeypatch.setattr("python.ebook_search.bm25_corpus.read_bm25_manifest", lambda _path: None)
-    monkeypatch.setattr("python.ebook_search.bm25_corpus.bm25_index_exists", lambda _path, _manifest: False)
-    monkeypatch.setattr("python.ebook_search.bm25_corpus.corpus_last_updated_at", lambda _session: db_updated_at)
-    monkeypatch.setattr(
+    mocker.patch("python.ebook_search.bm25_corpus.read_bm25_manifest", side_effect=lambda _path: None)
+    mocker.patch("python.ebook_search.bm25_corpus.bm25_index_exists", side_effect=lambda _path, _manifest: False)
+    mocker.patch("python.ebook_search.bm25_corpus.corpus_last_updated_at", side_effect=lambda _session: db_updated_at)
+    mocker.patch(
         "python.ebook_search.bm25_corpus.refresh_bm25_corpus",
-        lambda session, config, *, db_updated_at: refreshed.append((session, config, db_updated_at)),
+        side_effect=lambda session, config, *, db_updated_at: refreshed.append((session, config, db_updated_at)),
     )
 
     config = EbookSearchConfig(rerank=RerankConfig(enabled=False))
@@ -424,18 +390,18 @@ def test_ensure_bm25_corpus_refreshes_missing_index(monkeypatch) -> None:
     assert refreshed == [(session, config, db_updated_at)]
 
 
-def test_ensure_bm25_corpus_refreshes_stale_index(monkeypatch) -> None:
+def test_ensure_bm25_corpus_refreshes_stale_index(mocker: MockerFixture) -> None:
     refreshed: list[object] = []
     created_at = datetime(2026, 1, 1, tzinfo=UTC)
     db_updated_at = datetime(2026, 1, 2, tzinfo=UTC)
     manifest = BM25Manifest(created_at=created_at, db_updated_at=created_at, chunk_count=10)
 
-    monkeypatch.setattr("python.ebook_search.bm25_corpus.read_bm25_manifest", lambda _path: manifest)
-    monkeypatch.setattr("python.ebook_search.bm25_corpus.bm25_index_exists", lambda _path, _manifest: True)
-    monkeypatch.setattr("python.ebook_search.bm25_corpus.corpus_last_updated_at", lambda _session: db_updated_at)
-    monkeypatch.setattr(
+    mocker.patch("python.ebook_search.bm25_corpus.read_bm25_manifest", side_effect=lambda _path: manifest)
+    mocker.patch("python.ebook_search.bm25_corpus.bm25_index_exists", side_effect=lambda _path, _manifest: True)
+    mocker.patch("python.ebook_search.bm25_corpus.corpus_last_updated_at", side_effect=lambda _session: db_updated_at)
+    mocker.patch(
         "python.ebook_search.bm25_corpus.refresh_bm25_corpus",
-        lambda session, config, *, db_updated_at: refreshed.append((session, config, db_updated_at)),
+        side_effect=lambda session, config, *, db_updated_at: refreshed.append((session, config, db_updated_at)),
     )
 
     config = EbookSearchConfig(rerank=RerankConfig(enabled=False))
@@ -479,7 +445,9 @@ def test_1024_embedding_table_has_cosine_hnsw_index() -> None:
     assert index.dialect_options["postgresql"]["ops"] == {"embedding": "vector_cosine_ops"}
 
 
-def test_embedding_model_aliases_normalize_to_provider_names() -> None:
+def test_embedding_model_aliases_normalize_to_provider_names(mocker: MockerFixture) -> None:
+    mocker.patch.dict(environ, {}, clear=False)
+
     assert normalize_embedding_model() == "qwen3-embedding-0.6b"
 
     environ["EBOOK_SEARCH_EMBEDDING_MODEL"] = "qwen3-embedding-0.6b"
@@ -499,17 +467,19 @@ def test_embedding_model_aliases_normalize_to_provider_names() -> None:
     assert normalize_embedding_model() == "qwen3-embedding-8b"
 
 
-def test_answer_generation_is_enabled_by_default(monkeypatch) -> None:
-    monkeypatch.delenv("EBOOK_SEARCH_ANSWER_ENABLED", raising=False)
+def test_answer_generation_is_enabled_by_default(mocker: MockerFixture) -> None:
+    mocker.patch.dict(environ, {}, clear=False)
+    environ.pop("EBOOK_SEARCH_ANSWER_ENABLED", None)
 
     config = load_config()
 
     assert config.answer_enabled is True
 
 
-def test_chat_defaults_use_ollama_cloud(monkeypatch) -> None:
-    monkeypatch.delenv("EBOOK_SEARCH_VLLM_BASE_URL", raising=False)
-    monkeypatch.delenv("EBOOK_SEARCH_CHAT_MODEL", raising=False)
+def test_chat_defaults_use_ollama_cloud(mocker: MockerFixture) -> None:
+    mocker.patch.dict(environ, {}, clear=False)
+    environ.pop("EBOOK_SEARCH_VLLM_BASE_URL", None)
+    environ.pop("EBOOK_SEARCH_CHAT_MODEL", None)
 
     config = load_config()
 
@@ -517,9 +487,9 @@ def test_chat_defaults_use_ollama_cloud(monkeypatch) -> None:
     assert config.chat_model == "deepseek-v4-flash"
 
 
-def test_chat_api_key_falls_back_to_ollama_api_key(monkeypatch) -> None:
-    monkeypatch.delenv("EBOOK_SEARCH_VLLM_API_KEY", raising=False)
-    monkeypatch.setenv("OLLAMA_API_KEY", "ollama-key")
+def test_chat_api_key_falls_back_to_ollama_api_key(mocker: MockerFixture) -> None:
+    mocker.patch.dict(environ, {"OLLAMA_API_KEY": "ollama-key"}, clear=False)
+    environ.pop("EBOOK_SEARCH_VLLM_API_KEY", None)
 
     config = load_config()
 
diff --git a/tests/test_ebook_search_guardrails.py b/tests/ebook_search/test_guardrails.py
similarity index 74%
rename from tests/test_ebook_search_guardrails.py
rename to tests/ebook_search/test_guardrails.py
index cec944d..03cddfd 100644
--- a/tests/test_ebook_search_guardrails.py
+++ b/tests/ebook_search/test_guardrails.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 from fastapi.testclient import TestClient
 from sqlalchemy import create_engine
 
@@ -10,6 +12,9 @@ from python.ebook_search.config import EbookSearchConfig, RerankConfig
 from python.ebook_search.guardrails import is_confident, retrieval_confidence, validate_citations
 from python.ebook_search.search import SearchResponse, SearchResult
 
+if TYPE_CHECKING:
+    from pytest_mock import MockerFixture
+
 
 def make_results(count, *, vector_score=0.8):
     return [
@@ -59,15 +64,15 @@ def test_is_confident_against_threshold() -> None:
     assert is_confident(make_results(1, vector_score=0.4), config) is False
 
 
-def patch_app_runtime(monkeypatch):
-    monkeypatch.setattr(
+def patch_app_runtime(mocker: MockerFixture):
+    mocker.patch(
         "python.ebook_search.api.main.get_postgres_engine",
-        lambda **_kwargs: create_engine("sqlite+pysqlite:///:memory:", future=True),
+        side_effect=lambda **_kwargs: create_engine("sqlite+pysqlite:///:memory:", future=True),
     )
-    monkeypatch.setattr("python.ebook_search.api.main.ensure_bm25_corpus", lambda _session, _config: None)
+    mocker.patch("python.ebook_search.api.main.ensure_bm25_corpus", side_effect=lambda _session, _config: None)
 
 
-def test_low_confidence_skips_answer_generation(monkeypatch) -> None:
+def test_low_confidence_skips_answer_generation(mocker: MockerFixture) -> None:
     called = False
 
     def fake_search_ebooks(_engine, query, _config, *, rerank=False):
@@ -79,15 +84,16 @@ def test_low_confidence_skips_answer_generation(monkeypatch) -> None:
         called = True
         return "answer"
 
-    monkeypatch.setattr("python.ebook_search.api.routes.search.search_ebooks", fake_search_ebooks)
-    monkeypatch.setattr("python.ebook_search.api.routes.search.answer_query", fake_answer_query)
-    patch_app_runtime(monkeypatch)
-    app = create_app()
-    app.state.config = EbookSearchConfig(
+    config = EbookSearchConfig(
         rerank=RerankConfig(enabled=False),
         answer_enabled=True,
         min_retrieval_confidence=0.5,
     )
+    mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
+    mocker.patch("python.ebook_search.api.routes.search.answer_query", side_effect=fake_answer_query)
+    mocker.patch("python.ebook_search.api.main.load_config", side_effect=lambda: config)
+    patch_app_runtime(mocker)
+    app = create_app()
 
     with TestClient(app) as client:
         response = client.post("/search", data={"query": "q"})
@@ -97,17 +103,17 @@ def test_low_confidence_skips_answer_generation(monkeypatch) -> None:
     assert "Low retrieval confidence" in response.text
 
 
-def test_invalid_citation_is_flagged(monkeypatch) -> None:
+def test_invalid_citation_is_flagged(mocker: MockerFixture) -> None:
     def fake_search_ebooks(_engine, query, _config, *, rerank=False):
         del rerank
         return SearchResponse(query=query, rank_label="Hybrid", results=make_results(2, vector_score=0.9))
 
-    monkeypatch.setattr("python.ebook_search.api.routes.search.search_ebooks", fake_search_ebooks)
-    monkeypatch.setattr(
+    mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
+    mocker.patch(
         "python.ebook_search.api.routes.search.answer_query",
-        lambda _query, _results, _config: "Per the text [9].",
+        side_effect=lambda _query, _results, _config: "Per the text [9].",
     )
-    patch_app_runtime(monkeypatch)
+    patch_app_runtime(mocker)
     app = create_app()
     app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), answer_enabled=True)
 
@@ -119,17 +125,17 @@ def test_invalid_citation_is_flagged(monkeypatch) -> None:
     assert "9" in response.text
 
 
-def test_grounded_answer_has_no_warning_badge(monkeypatch) -> None:
+def test_grounded_answer_has_no_warning_badge(mocker: MockerFixture) -> None:
     def fake_search_ebooks(_engine, query, _config, *, rerank=False):
         del rerank
         return SearchResponse(query=query, rank_label="Hybrid", results=make_results(2, vector_score=0.9))
 
-    monkeypatch.setattr("python.ebook_search.api.routes.search.search_ebooks", fake_search_ebooks)
-    monkeypatch.setattr(
+    mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
+    mocker.patch(
         "python.ebook_search.api.routes.search.answer_query",
-        lambda _query, _results, _config: "Grounded in [1] and [2].",
+        side_effect=lambda _query, _results, _config: "Grounded in [1] and [2].",
     )
-    patch_app_runtime(monkeypatch)
+    patch_app_runtime(mocker)
     app = create_app()
     app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), answer_enabled=True)
 
diff --git a/tests/ebook_search/test_health.py b/tests/ebook_search/test_health.py
new file mode 100644
index 0000000..be0ef35
--- /dev/null
+++ b/tests/ebook_search/test_health.py
@@ -0,0 +1,122 @@
+"""Tests for EPUB search health and readiness routes."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from fastapi.testclient import TestClient
+from sqlalchemy import create_engine
+
+from python.ebook_search.api.main import create_app
+from python.ebook_search.config import EbookSearchConfig, RerankConfig
+
+HEALTH_MODULE = "python.ebook_search.api.routes.health"
+
+if TYPE_CHECKING:
+    from pytest_mock import MockerFixture
+
+
+def fake_get_postgres_engine(**_kwargs):
+    """Return an in-memory engine for route tests."""
+    return create_engine("sqlite+pysqlite:///:memory:", future=True)
+
+
+def patch_app_runtime(mocker: MockerFixture):
+    mocker.patch("python.ebook_search.api.main.get_postgres_engine", side_effect=fake_get_postgres_engine)
+    mocker.patch("python.ebook_search.api.main.ensure_bm25_corpus", side_effect=lambda _session, _config: None)
+
+
+def patch_dependencies(mocker: MockerFixture, *, database=True, embedding=True, chat=True, bm25="ok"):
+    mocker.patch(f"{HEALTH_MODULE}.check_database", side_effect=lambda _session: database)
+    mocker.patch(f"{HEALTH_MODULE}.check_embedding_endpoint", side_effect=lambda _config: embedding)
+    mocker.patch(f"{HEALTH_MODULE}.check_chat_endpoint", side_effect=lambda _config: chat)
+    mocker.patch(f"{HEALTH_MODULE}.check_bm25_status", side_effect=lambda _config: bm25)
+
+
+def build_client(mocker: MockerFixture, config=None):
+    resolved = config or EbookSearchConfig(rerank=RerankConfig(enabled=False))
+    mocker.patch("python.ebook_search.api.main.load_config", side_effect=lambda: resolved)
+    patch_app_runtime(mocker)
+    app = create_app()
+    return TestClient(app)
+
+
+def test_health_returns_ok(mocker: MockerFixture) -> None:
+    with build_client(mocker) as client:
+        response = client.get("/health")
+
+    assert response.status_code == 200
+    assert response.json() == {"status": "ok"}
+
+
+def test_ready_all_dependencies_ok(mocker: MockerFixture) -> None:
+    patch_dependencies(mocker)
+
+    with build_client(mocker) as client:
+        response = client.get("/ready")
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["status"] == "ready"
+    assert body["checks"] == {"database": "ok", "embedding": "ok", "chat": "ok", "bm25": "ok"}
+
+
+def test_ready_embedding_down_is_degraded(mocker: MockerFixture) -> None:
+    patch_dependencies(mocker, embedding=False)
+
+    with build_client(mocker) as client:
+        response = client.get("/ready")
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["status"] == "degraded"
+    assert body["checks"]["embedding"] == "fail"
+
+
+def test_ready_chat_down_is_degraded(mocker: MockerFixture) -> None:
+    patch_dependencies(mocker, chat=False)
+
+    with build_client(mocker) as client:
+        response = client.get("/ready")
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["status"] == "degraded"
+    assert body["checks"]["chat"] == "fail"
+
+
+def test_ready_chat_disabled_when_answers_off(mocker: MockerFixture) -> None:
+    patch_dependencies(mocker)
+    config = EbookSearchConfig(rerank=RerankConfig(enabled=False), answer_enabled=False)
+
+    with build_client(mocker, config) as client:
+        response = client.get("/ready")
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["status"] == "ready"
+    assert body["checks"]["chat"] == "disabled"
+
+
+def test_ready_database_down_is_unavailable(mocker: MockerFixture) -> None:
+    patch_dependencies(mocker, database=False)
+
+    with build_client(mocker) as client:
+        response = client.get("/ready")
+
+    assert response.status_code == 503
+    body = response.json()
+    assert body["status"] == "unavailable"
+    assert body["checks"]["database"] == "fail"
+
+
+def test_ready_bm25_missing_is_degraded(mocker: MockerFixture) -> None:
+    patch_dependencies(mocker, bm25="missing")
+
+    with build_client(mocker) as client:
+        response = client.get("/ready")
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["status"] == "degraded"
+    assert body["checks"]["bm25"] == "missing"
diff --git a/tests/test_ebook_search_http.py b/tests/ebook_search/test_http.py
similarity index 83%
rename from tests/test_ebook_search_http.py
rename to tests/ebook_search/test_http.py
index 993f7bd..2d2adec 100644
--- a/tests/test_ebook_search_http.py
+++ b/tests/ebook_search/test_http.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import httpx
 import pytest
 
@@ -10,8 +12,11 @@ from python.ebook_search.config import EbookSearchConfig, RerankConfig
 from python.ebook_search.embeddings import embed_texts
 from python.ebook_search.search import SearchResult
 
+if TYPE_CHECKING:
+    from pytest_mock import MockerFixture
 
-def test_answer_query_uses_httpx_chat_completions(monkeypatch) -> None:
+
+def test_answer_query_uses_httpx_chat_completions(mocker: MockerFixture) -> None:
     captured: dict[str, object] = {}
 
     def fake_post(url: str, **kwargs: object) -> httpx.Response:
@@ -23,7 +28,7 @@ def test_answer_query_uses_httpx_chat_completions(monkeypatch) -> None:
             request=httpx.Request("POST", url),
         )
 
-    monkeypatch.setattr(httpx, "post", fake_post)
+    mocker.patch.object(httpx, "post", side_effect=fake_post)
     config = EbookSearchConfig(
         rerank=RerankConfig(enabled=False),
         vllm_base_url="https://ollama.com/v1",
@@ -43,7 +48,7 @@ def test_answer_query_uses_httpx_chat_completions(monkeypatch) -> None:
     assert payload["model"] == "deepseek-v4-flash"
 
 
-def test_embed_texts_uses_httpx_embeddings(monkeypatch) -> None:
+def test_embed_texts_uses_httpx_embeddings(mocker: MockerFixture) -> None:
     captured: dict[str, object] = {}
     vector = [0.0] * 1024
 
@@ -56,7 +61,7 @@ def test_embed_texts_uses_httpx_embeddings(monkeypatch) -> None:
             request=httpx.Request("POST", url),
         )
 
-    monkeypatch.setattr(httpx, "post", fake_post)
+    mocker.patch.object(httpx, "post", side_effect=fake_post)
     config = EbookSearchConfig(
         rerank=RerankConfig(enabled=False),
         embedding_base_url="http://bob:8000/v1",
@@ -73,11 +78,11 @@ def test_embed_texts_uses_httpx_embeddings(monkeypatch) -> None:
     assert kwargs["json"] == {"model": "qwen3-embedding-0.6b", "input": ["hello"]}
 
 
-def test_embed_texts_rejects_bad_response_shape(monkeypatch) -> None:
+def test_embed_texts_rejects_bad_response_shape(mocker: MockerFixture) -> None:
     def fake_post(url: str, **_kwargs: object) -> httpx.Response:
         return httpx.Response(200, json={"data": [{}]}, request=httpx.Request("POST", url))
 
-    monkeypatch.setattr(httpx, "post", fake_post)
+    mocker.patch.object(httpx, "post", side_effect=fake_post)
     config = EbookSearchConfig(rerank=RerankConfig(enabled=False))
 
     with pytest.raises(RuntimeError, match="Embedding request failed"):
diff --git a/tests/ebook_search/test_rag_pipeline.py b/tests/ebook_search/test_rag_pipeline.py
new file mode 100644
index 0000000..57620e5
--- /dev/null
+++ b/tests/ebook_search/test_rag_pipeline.py
@@ -0,0 +1,50 @@
+"""Tests for the ebook search RAG pipeline orchestration."""
+
+from __future__ import annotations
+
+from threading import Event
+from typing import TYPE_CHECKING
+
+from sqlalchemy import create_engine
+
+from python.ebook_search.config import EbookSearchConfig, RerankConfig
+from python.ebook_search.search import SearchResult, search_ebooks
+
+if TYPE_CHECKING:
+    from pytest_mock import MockerFixture
+
+
+def test_search_ebooks_runs_vector_and_bm25_in_parallel(mocker: MockerFixture) -> None:
+    engine = create_engine("sqlite+pysqlite:///:memory:", future=True)
+    vector_started = Event()
+    bm25_started = Event()
+    received_engines: list[object] = []
+
+    def fake_vector_candidates(received_engine, query, _config):
+        """Return vector candidates after confirming BM25 has started."""
+        received_engines.append(received_engine)
+        assert query == "what is parallel"
+        vector_started.set()
+        assert bm25_started.wait(timeout=2)
+        return [SearchResult(chunk_id=1, text="vector", source_title="Vector", vector_score=0.9)]
+
+    def fake_bm25_candidates(query, _config):
+        """Return BM25 candidates after confirming vector search has started."""
+        assert query == "parallel"
+        bm25_started.set()
+        assert vector_started.wait(timeout=2)
+        return [SearchResult(chunk_id=2, text="bm25", source_title="BM25", bm25_score=2.0)]
+
+    mocker.patch("python.ebook_search.search.vector_candidates", side_effect=fake_vector_candidates)
+    mocker.patch("python.ebook_search.search.bm25_candidates", side_effect=fake_bm25_candidates)
+    config = EbookSearchConfig(rerank=RerankConfig(enabled=False))
+
+    response = search_ebooks(engine, "what is parallel", config)
+
+    timings = {step.name: step for step in response.timings}
+    assert [result.chunk_id for result in response.results] == [1, 2]
+    assert timings["Embedding + vector search"].counts_toward_total is False
+    assert timings["BM25 search"].counts_toward_total is False
+    assert timings["Hybrid retrieval"].counts_toward_total is True
+    assert timings["BM25 query preparation"].counts_toward_total is True
+    assert received_engines == [engine]
diff --git a/tests/test_ebook_search_rerank.py b/tests/ebook_search/test_rerank.py
similarity index 77%
rename from tests/test_ebook_search_rerank.py
rename to tests/ebook_search/test_rerank.py
index db53333..15ccf3f 100644
--- a/tests/test_ebook_search_rerank.py
+++ b/tests/ebook_search/test_rerank.py
@@ -2,6 +2,9 @@
 
 from __future__ import annotations
 
+from os import environ
+from typing import TYPE_CHECKING
+
 import httpx
 import pytest
 
@@ -9,6 +12,9 @@ from python.ebook_search.config import EbookSearchConfig, RerankConfig, load_rer
 from python.ebook_search.rerank import rerank_chunks
 from python.ebook_search.search import SearchResult, apply_rerank, skip_rerank
 
+if TYPE_CHECKING:
+    from pytest_mock import MockerFixture
+
 
 def candidates() -> list[SearchResult]:
     return [
@@ -27,12 +33,13 @@ def rerank_response(payload: dict[str, object] | None = None, *, content: bytes
     )
 
 
-def test_config_defaults_keep_reranking_optional(monkeypatch: pytest.MonkeyPatch) -> None:
-    monkeypatch.delenv("EBOOK_SEARCH_RERANK_ENABLED", raising=False)
-    monkeypatch.delenv("EBOOK_SEARCH_RERANK_BASE_URL", raising=False)
-    monkeypatch.delenv("EBOOK_SEARCH_RERANK_MODEL", raising=False)
-    monkeypatch.delenv("EBOOK_SEARCH_RERANK_CANDIDATES", raising=False)
-    monkeypatch.delenv("EBOOK_SEARCH_RERANK_TIMEOUT_SECONDS", raising=False)
+def test_config_defaults_keep_reranking_optional(mocker: MockerFixture) -> None:
+    mocker.patch.dict(environ, {}, clear=False)
+    environ.pop("EBOOK_SEARCH_RERANK_ENABLED", None)
+    environ.pop("EBOOK_SEARCH_RERANK_BASE_URL", None)
+    environ.pop("EBOOK_SEARCH_RERANK_MODEL", None)
+    environ.pop("EBOOK_SEARCH_RERANK_CANDIDATES", None)
+    environ.pop("EBOOK_SEARCH_RERANK_TIMEOUT_SECONDS", None)
 
     config = load_rerank_config()
 
@@ -52,7 +59,7 @@ def test_reranking_disabled_returns_original_fused_order() -> None:
     assert [result.chunk_id for result in response.results] == [1, 2]
 
 
-def test_reranking_enabled_reorders_candidates(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_reranking_enabled_reorders_candidates(mocker: MockerFixture) -> None:
     def fake_post(_url: str, *, json: dict[str, object], timeout: float) -> httpx.Response:
         assert timeout == 30
         assert json == {
@@ -70,7 +77,7 @@ def test_reranking_enabled_reorders_candidates(monkeypatch: pytest.MonkeyPatch)
             }
         )
 
-    monkeypatch.setattr(httpx, "post", fake_post)
+    mocker.patch.object(httpx, "post", side_effect=fake_post)
 
     results = rerank_chunks("query", candidates(), RerankConfig())
 
@@ -79,7 +86,7 @@ def test_reranking_enabled_reorders_candidates(monkeypatch: pytest.MonkeyPatch)
     assert [result.rerank_score for result in results] == [0.9, 0.1, 0.4]
 
 
-def test_reranking_cannot_ignore_hybrid_score(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_reranking_cannot_ignore_hybrid_score(mocker: MockerFixture) -> None:
     candidates = [
         SearchResult(chunk_id=1, text="strong hybrid", source_title="A", score=1.0),
         SearchResult(chunk_id=2, text="weak hybrid", source_title="B", score=0.1),
@@ -95,7 +102,7 @@ def test_reranking_cannot_ignore_hybrid_score(monkeypatch: pytest.MonkeyPatch) -
             }
         )
 
-    monkeypatch.setattr(httpx, "post", fake_post)
+    mocker.patch.object(httpx, "post", side_effect=fake_post)
 
     results = rerank_chunks("query", candidates, RerankConfig())
 
@@ -105,7 +112,7 @@ def test_reranking_cannot_ignore_hybrid_score(monkeypatch: pytest.MonkeyPatch) -
     assert results[1].rerank_score == 1.0
 
 
-def test_vllm_rerank_timeout_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_vllm_rerank_timeout_raises(mocker: MockerFixture) -> None:
     def fake_rerank_chunks(
         _query: str,
         _candidates: list[SearchResult],
@@ -114,25 +121,25 @@ def test_vllm_rerank_timeout_raises(monkeypatch: pytest.MonkeyPatch) -> None:
         message = "timeout"
         raise httpx.TimeoutException(message)
 
-    monkeypatch.setattr("python.ebook_search.search.rerank_chunks", fake_rerank_chunks)
+    mocker.patch("python.ebook_search.search.rerank_chunks", side_effect=fake_rerank_chunks)
     config = EbookSearchConfig(rerank=RerankConfig(enabled=True), top_k=2)
 
     with pytest.raises(httpx.TimeoutException, match="timeout"):
         apply_rerank("query", candidates(), config)
 
 
-def test_malformed_vllm_rerank_json_does_not_crash_search(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_malformed_vllm_rerank_json_does_not_crash_search(mocker: MockerFixture) -> None:
     def fake_post(_url: str, **_kwargs: object) -> httpx.Response:
         return rerank_response(content=b"not-json")
 
-    monkeypatch.setattr(httpx, "post", fake_post)
+    mocker.patch.object(httpx, "post", side_effect=fake_post)
 
     results = rerank_chunks("query", candidates()[:1], RerankConfig())
 
     assert results[0].score == 0.3
 
 
-def test_vllm_rerank_scores_are_clamped(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_vllm_rerank_scores_are_clamped(mocker: MockerFixture) -> None:
     def fake_post(_url: str, **_kwargs: object) -> httpx.Response:
         return rerank_response(
             {
@@ -143,7 +150,7 @@ def test_vllm_rerank_scores_are_clamped(monkeypatch: pytest.MonkeyPatch) -> None
             }
         )
 
-    monkeypatch.setattr(httpx, "post", fake_post)
+    mocker.patch.object(httpx, "post", side_effect=fake_post)
 
     results = rerank_chunks("query", candidates()[:2], RerankConfig())
 
diff --git a/tests/test_ebook_search_ui.py b/tests/ebook_search/test_ui.py
similarity index 70%
rename from tests/test_ebook_search_ui.py
rename to tests/ebook_search/test_ui.py
index 025d867..8db5094 100644
--- a/tests/test_ebook_search_ui.py
+++ b/tests/ebook_search/test_ui.py
@@ -3,6 +3,8 @@
 from __future__ import annotations
 
 from compression import zstd
+from typing import TYPE_CHECKING
+
 from fastapi.testclient import TestClient
 from sqlalchemy import create_engine
 
@@ -13,11 +15,14 @@ from python.ebook_search.embeddings import EmbeddingModelStats
 from python.ebook_search.search import SearchResponse, SearchResult
 from python.ebook_search.timing import RuntimeStep
 
+if TYPE_CHECKING:
+    from pytest_mock import MockerFixture
 
-def patch_app_runtime(monkeypatch):
+
+def patch_app_runtime(mocker: MockerFixture):
     """Patch app startup dependencies used by UI route tests."""
-    monkeypatch.setattr("python.ebook_search.api.main.get_postgres_engine", fake_get_postgres_engine)
-    monkeypatch.setattr("python.ebook_search.api.main.ensure_bm25_corpus", lambda _session, _config: None)
+    mocker.patch("python.ebook_search.api.main.get_postgres_engine", side_effect=fake_get_postgres_engine)
+    mocker.patch("python.ebook_search.api.main.ensure_bm25_corpus", side_effect=lambda _session, _config: None)
 
 
 def fake_get_postgres_engine(**_kwargs):
@@ -25,8 +30,8 @@ def fake_get_postgres_engine(**_kwargs):
     return create_engine("sqlite+pysqlite:///:memory:", future=True)
 
 
-def test_search_page_uses_zstd_when_requested(monkeypatch) -> None:
-    patch_app_runtime(monkeypatch)
+def test_search_page_uses_zstd_when_requested(mocker: MockerFixture) -> None:
+    patch_app_runtime(mocker)
     app = create_app()
     app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False))
 
@@ -38,7 +43,7 @@ def test_search_page_uses_zstd_when_requested(monkeypatch) -> None:
     assert b"EPUB Search" in zstd.decompress(response.content)
 
 
-def test_ui_form_passes_rerank_flag_to_search_handler(monkeypatch) -> None:
+def test_ui_form_passes_rerank_flag_to_search_handler(mocker: MockerFixture) -> None:
     captured: dict[str, object] = {}
 
     def fake_search_ebooks(_engine, query, config, *, rerank=False):
@@ -47,12 +52,12 @@ def test_ui_form_passes_rerank_flag_to_search_handler(monkeypatch) -> None:
         captured["config"] = config
         return SearchResponse(query=query, results=[], rank_label="Hybrid + rerank")
 
-    monkeypatch.setattr("python.ebook_search.api.routes.search.search_ebooks", fake_search_ebooks)
-    monkeypatch.setattr(
+    mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
+    mocker.patch(
         "python.ebook_search.api.routes.search.answer_query",
-        lambda _query, _results, _config: "answer",
+        side_effect=lambda _query, _results, _config: "answer",
     )
-    patch_app_runtime(monkeypatch)
+    patch_app_runtime(mocker)
     app = create_app()
     app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), top_k=12, answer_enabled=True)
 
@@ -65,14 +70,14 @@ def test_ui_form_passes_rerank_flag_to_search_handler(monkeypatch) -> None:
     assert captured["rerank"] is True
 
 
-def test_ui_search_failure_returns_visible_error(monkeypatch) -> None:
+def test_ui_search_failure_returns_visible_error(mocker: MockerFixture) -> None:
     def fake_search_ebooks(_engine, _query, _config, *, rerank=False):
         del rerank
         msg = "search exploded"
         raise RuntimeError(msg)
 
-    monkeypatch.setattr("python.ebook_search.api.routes.search.search_ebooks", fake_search_ebooks)
-    patch_app_runtime(monkeypatch)
+    mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
+    patch_app_runtime(mocker)
     app = create_app()
     app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), top_k=12)
 
@@ -83,7 +88,7 @@ def test_ui_search_failure_returns_visible_error(monkeypatch) -> None:
     assert "search exploded" in response.text
 
 
-def test_ui_answer_failure_still_returns_sources(monkeypatch) -> None:
+def test_ui_answer_failure_still_returns_sources(mocker: MockerFixture) -> None:
     def fake_search_ebooks(_engine, query, _config, *, rerank=False):
         del rerank
         return SearchResponse(query=query, results=[], rank_label="Hybrid")
@@ -92,9 +97,9 @@ def test_ui_answer_failure_still_returns_sources(monkeypatch) -> None:
         msg = "answer exploded"
         raise RuntimeError(msg)
 
-    monkeypatch.setattr("python.ebook_search.api.routes.search.search_ebooks", fake_search_ebooks)
-    monkeypatch.setattr("python.ebook_search.api.routes.search.answer_query", fake_answer_query)
-    patch_app_runtime(monkeypatch)
+    mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
+    mocker.patch("python.ebook_search.api.routes.search.answer_query", side_effect=fake_answer_query)
+    patch_app_runtime(mocker)
     app = create_app()
     app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), top_k=12, answer_enabled=True)
 
@@ -105,7 +110,7 @@ def test_ui_answer_failure_still_returns_sources(monkeypatch) -> None:
     assert "Answer generation failed" in response.text
 
 
-def test_ui_skips_answer_when_disabled(monkeypatch) -> None:
+def test_ui_skips_answer_when_disabled(mocker: MockerFixture) -> None:
     called = False
 
     def fake_search_ebooks(_engine, query, _config, *, rerank=False):
@@ -117,11 +122,12 @@ def test_ui_skips_answer_when_disabled(monkeypatch) -> None:
         called = True
         return "answer"
 
-    monkeypatch.setattr("python.ebook_search.api.routes.search.search_ebooks", fake_search_ebooks)
-    monkeypatch.setattr("python.ebook_search.api.routes.search.answer_query", fake_answer_query)
-    patch_app_runtime(monkeypatch)
+    config = EbookSearchConfig(rerank=RerankConfig(enabled=False), answer_enabled=False)
+    mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
+    mocker.patch("python.ebook_search.api.routes.search.answer_query", side_effect=fake_answer_query)
+    mocker.patch("python.ebook_search.api.main.load_config", side_effect=lambda: config)
+    patch_app_runtime(mocker)
     app = create_app()
-    app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), answer_enabled=False)
 
     with TestClient(app) as client:
         response = client.post("/search", data={"query": "where is the quote?"})
@@ -131,7 +137,7 @@ def test_ui_skips_answer_when_disabled(monkeypatch) -> None:
     assert "Answer generation is disabled" in response.text
 
 
-def test_ui_shows_component_scores(monkeypatch) -> None:
+def test_ui_shows_component_scores(mocker: MockerFixture) -> None:
     def fake_search_ebooks(_engine, query, _config, *, rerank=False):
         del rerank
         return SearchResponse(
@@ -151,12 +157,12 @@ def test_ui_shows_component_scores(monkeypatch) -> None:
             ],
         )
 
-    monkeypatch.setattr("python.ebook_search.api.routes.search.search_ebooks", fake_search_ebooks)
-    monkeypatch.setattr(
+    mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
+    mocker.patch(
         "python.ebook_search.api.routes.search.answer_query",
-        lambda _query, _results, _config: "answer",
+        side_effect=lambda _query, _results, _config: "answer",
     )
-    patch_app_runtime(monkeypatch)
+    patch_app_runtime(mocker)
     app = create_app()
     app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), answer_enabled=True)
 
@@ -170,7 +176,7 @@ def test_ui_shows_component_scores(monkeypatch) -> None:
     assert "RRF" in response.text
 
 
-def test_ui_shows_search_runtime_chart(monkeypatch) -> None:
+def test_ui_shows_search_runtime_chart(mocker: MockerFixture) -> None:
     def fake_search_ebooks(_engine, query, _config, *, rerank=False):
         del rerank
         return SearchResponse(
@@ -183,12 +189,12 @@ def test_ui_shows_search_runtime_chart(monkeypatch) -> None:
             ),
         )
 
-    monkeypatch.setattr("python.ebook_search.api.routes.search.search_ebooks", fake_search_ebooks)
-    monkeypatch.setattr(
+    mocker.patch("python.ebook_search.api.routes.search.search_ebooks", side_effect=fake_search_ebooks)
+    mocker.patch(
         "python.ebook_search.api.routes.search.answer_query",
-        lambda _query, _results, _config: "answer",
+        side_effect=lambda _query, _results, _config: "answer",
     )
-    patch_app_runtime(monkeypatch)
+    patch_app_runtime(mocker)
     app = create_app()
     app.state.config = EbookSearchConfig(rerank=RerankConfig(enabled=False), answer_enabled=True)
 
@@ -204,7 +210,7 @@ def test_ui_shows_search_runtime_chart(monkeypatch) -> None:
     assert "ms left" in response.text
 
 
-def test_ui_embed_all_batches_until_complete(monkeypatch) -> None:
+def test_ui_embed_all_batches_until_complete(mocker: MockerFixture) -> None:
     counts = iter([32, 32, 5, 0])
     batch_sizes: list[int] = []
 
@@ -212,8 +218,8 @@ def test_ui_embed_all_batches_until_complete(monkeypatch) -> None:
         batch_sizes.append(config.embedding_batch_size)
         return next(counts)
 
-    monkeypatch.setattr("python.ebook_search.api.routes.admin.embed_missing_chunks", fake_embed_missing_chunks)
-    patch_app_runtime(monkeypatch)
+    mocker.patch("python.ebook_search.api.routes.admin.embed_missing_chunks", side_effect=fake_embed_missing_chunks)
+    patch_app_runtime(mocker)
     app = create_app()
 
     with TestClient(app) as client:
@@ -224,7 +230,7 @@ def test_ui_embed_all_batches_until_complete(monkeypatch) -> None:
     assert batch_sizes == [32, 32, 32, 32]
 
 
-def test_ui_scan_schedules_bm25_refresh_after_database_change(monkeypatch) -> None:
+def test_ui_scan_schedules_bm25_refresh_after_database_change(mocker: MockerFixture) -> None:
     scheduled = False
 
     def fake_ingest_configured_paths(_session, _config):
@@ -234,9 +240,12 @@ def test_ui_scan_schedules_bm25_refresh_after_database_change(monkeypatch) -> No
         nonlocal scheduled
         scheduled = True
 
-    monkeypatch.setattr("python.ebook_search.api.routes.admin.ingest_configured_paths", fake_ingest_configured_paths)
-    monkeypatch.setattr("python.ebook_search.api.routes.admin.schedule_bm25_refresh", fake_schedule_bm25_refresh)
-    patch_app_runtime(monkeypatch)
+    mocker.patch(
+        "python.ebook_search.api.routes.admin.ingest_configured_paths",
+        side_effect=fake_ingest_configured_paths,
+    )
+    mocker.patch("python.ebook_search.api.routes.admin.schedule_bm25_refresh", side_effect=fake_schedule_bm25_refresh)
+    patch_app_runtime(mocker)
     app = create_app()
 
     with TestClient(app) as client:
@@ -247,7 +256,7 @@ def test_ui_scan_schedules_bm25_refresh_after_database_change(monkeypatch) -> No
     assert scheduled is True
 
 
-def test_bm25_refresh_clears_loaded_corpus_cache(monkeypatch) -> None:
+def test_bm25_refresh_clears_loaded_corpus_cache(mocker: MockerFixture) -> None:
     refreshed: list[object] = []
     cache_cleared = False
 
@@ -258,8 +267,8 @@ def test_bm25_refresh_clears_loaded_corpus_cache(monkeypatch) -> None:
         nonlocal cache_cleared
         cache_cleared = True
 
-    monkeypatch.setattr("python.ebook_search.api.bm25_tasks.refresh_bm25_corpus", fake_refresh_bm25_corpus)
-    monkeypatch.setattr("python.ebook_search.api.bm25_tasks.load_bm25_corpus.cache_clear", fake_cache_clear)
+    mocker.patch("python.ebook_search.api.bm25_tasks.refresh_bm25_corpus", side_effect=fake_refresh_bm25_corpus)
+    mocker.patch("python.ebook_search.api.bm25_tasks.load_bm25_corpus.cache_clear", side_effect=fake_cache_clear)
     engine = create_engine("sqlite+pysqlite:///:memory:", future=True)
     config = EbookSearchConfig(rerank=RerankConfig(enabled=False))
 
@@ -270,7 +279,7 @@ def test_bm25_refresh_clears_loaded_corpus_cache(monkeypatch) -> None:
     assert cache_cleared is True
 
 
-def test_admin_page_shows_embedding_counts_by_model(monkeypatch) -> None:
+def test_admin_page_shows_embedding_counts_by_model(mocker: MockerFixture) -> None:
     def fake_embedding_model_stats(_session):
         return [
             EmbeddingModelStats(
@@ -287,8 +296,8 @@ def test_admin_page_shows_embedding_counts_by_model(monkeypatch) -> None:
             ),
         ]
 
-    monkeypatch.setattr("python.ebook_search.api.routes.admin.embedding_model_stats", fake_embedding_model_stats)
-    patch_app_runtime(monkeypatch)
+    mocker.patch("python.ebook_search.api.routes.admin.embedding_model_stats", side_effect=fake_embedding_model_stats)
+    patch_app_runtime(mocker)
     app = create_app()
 
     with TestClient(app) as client:
diff --git a/tests/test_ebook_search_health.py b/tests/test_ebook_search_health.py
deleted file mode 100644
index 036bd5c..0000000
--- a/tests/test_ebook_search_health.py
+++ /dev/null
@@ -1,116 +0,0 @@
-"""Tests for EPUB search health and readiness routes."""
-
-from __future__ import annotations
-
-from fastapi.testclient import TestClient
-from sqlalchemy import create_engine
-
-from python.ebook_search.api.main import create_app
-from python.ebook_search.config import EbookSearchConfig, RerankConfig
-
-HEALTH_MODULE = "python.ebook_search.api.routes.health"
-
-
-def fake_get_postgres_engine(**_kwargs):
-    """Return an in-memory engine for route tests."""
-    return create_engine("sqlite+pysqlite:///:memory:", future=True)
-
-
-def patch_app_runtime(monkeypatch):
-    monkeypatch.setattr("python.ebook_search.api.main.get_postgres_engine", fake_get_postgres_engine)
-    monkeypatch.setattr("python.ebook_search.api.main.ensure_bm25_corpus", lambda _session, _config: None)
-
-
-def patch_dependencies(monkeypatch, *, database=True, embedding=True, chat=True, bm25="ok"):
-    monkeypatch.setattr(f"{HEALTH_MODULE}.check_database", lambda _session: database)
-    monkeypatch.setattr(f"{HEALTH_MODULE}.check_embedding_endpoint", lambda _config: embedding)
-    monkeypatch.setattr(f"{HEALTH_MODULE}.check_chat_endpoint", lambda _config: chat)
-    monkeypatch.setattr(f"{HEALTH_MODULE}.check_bm25_status", lambda _config: bm25)
-
-
-def build_client(monkeypatch, config=None):
-    patch_app_runtime(monkeypatch)
-    app = create_app()
-    app.state.config = config or EbookSearchConfig(rerank=RerankConfig(enabled=False))
-    return TestClient(app)
-
-
-def test_health_returns_ok(monkeypatch) -> None:
-    with build_client(monkeypatch) as client:
-        response = client.get("/health")
-
-    assert response.status_code == 200
-    assert response.json() == {"status": "ok"}
-
-
-def test_ready_all_dependencies_ok(monkeypatch) -> None:
-    patch_dependencies(monkeypatch)
-
-    with build_client(monkeypatch) as client:
-        response = client.get("/ready")
-
-    assert response.status_code == 200
-    body = response.json()
-    assert body["status"] == "ready"
-    assert body["checks"] == {"database": "ok", "embedding": "ok", "chat": "ok", "bm25": "ok"}
-
-
-def test_ready_embedding_down_is_degraded(monkeypatch) -> None:
-    patch_dependencies(monkeypatch, embedding=False)
-
-    with build_client(monkeypatch) as client:
-        response = client.get("/ready")
-
-    assert response.status_code == 200
-    body = response.json()
-    assert body["status"] == "degraded"
-    assert body["checks"]["embedding"] == "fail"
-
-
-def test_ready_chat_down_is_degraded(monkeypatch) -> None:
-    patch_dependencies(monkeypatch, chat=False)
-
-    with build_client(monkeypatch) as client:
-        response = client.get("/ready")
-
-    assert response.status_code == 200
-    body = response.json()
-    assert body["status"] == "degraded"
-    assert body["checks"]["chat"] == "fail"
-
-
-def test_ready_chat_disabled_when_answers_off(monkeypatch) -> None:
-    patch_dependencies(monkeypatch)
-    config = EbookSearchConfig(rerank=RerankConfig(enabled=False), answer_enabled=False)
-
-    with build_client(monkeypatch, config) as client:
-        response = client.get("/ready")
-
-    assert response.status_code == 200
-    body = response.json()
-    assert body["status"] == "ready"
-    assert body["checks"]["chat"] == "disabled"
-
-
-def test_ready_database_down_is_unavailable(monkeypatch) -> None:
-    patch_dependencies(monkeypatch, database=False)
-
-    with build_client(monkeypatch) as client:
-        response = client.get("/ready")
-
-    assert response.status_code == 503
-    body = response.json()
-    assert body["status"] == "unavailable"
-    assert body["checks"]["database"] == "fail"
-
-
-def test_ready_bm25_missing_is_degraded(monkeypatch) -> None:
-    patch_dependencies(monkeypatch, bm25="missing")
-
-    with build_client(monkeypatch) as client:
-        response = client.get("/ready")
-
-    assert response.status_code == 200
-    body = response.json()
-    assert body["status"] == "degraded"
-    assert body["checks"]["bm25"] == "missing"