feat(ebook-search): add load-test CLI for the search service

Add a Typer CLI script that drives POST /search on a running server at a configurable concurrency and reports latency percentiles (p50/p90/p95/p99), throughput, and HTTP status distribution. Queries are drawn from the shared eval JSONL set so load testing and evaluation exercise the same questions.
2026-06-18 12:39:55 -04:00
parent dbc6b5b53b
commit 6ae1ff1f5c
7 changed files with 436 additions and 0 deletions
@@ -96,6 +96,8 @@ def search(
        timings=(*response.timings, runtime_step_from_start(answer_step_name, answer_start)),
    )

+    for step in response.timings:
+        logger.info("ebook_search_timing step=%r runtime_ms=%.1f", step.name, step.duration_ms)
    logger.info(
        "ebook_search_request_complete results=%s rank_label=%s runtime_ms=%.1f",
        len(response.results),
@@ -0,0 +1 @@
+"""Offline evaluation tooling for the ebook search pipeline."""
@@ -0,0 +1,71 @@
+{"query": "Who is Damien Montgomery and how does he become a Jump Mage?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "What is a Rune Wright and why is Damien so rare?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "How does jump magic let starships travel faster than light?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "What is the role of the Mage-King of Mars in the Protectorate?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "What happened aboard the Blue Jay in the first Starship's Mage book?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "Who is Captain David Rice?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "How are amplifiers and simulacrums used to power a ship's jump?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "What duties does a Hand of the Mage-King carry out?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "Explain the structure of the Royal Martian Navy.", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "How do mages carve runes to enchant a starship?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "What threat do the Legatan rebels pose to the Protectorate?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "How does Damien handle his first command?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "What is the significance of the simulacrum on a jump ship?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "Describe a mage duel in the Starship's Mage series.", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "What moral conflicts does Damien face as a Hand of the Mage-King?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "How does the Protectorate keep peace among its member worlds?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "Who is the Keeper of Oaths and how does Damien work with them?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
+{"query": "What event is known as the Onset and how does it change the world?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "Who is the main character at the start of the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "How do survivors adapt after the Onset begins?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "What new abilities emerge during the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "Describe the primary antagonist in the Onset series.", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "How does society collapse and reorganize after the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "What factions form in the aftermath of the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "How does the protagonist gain power throughout the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "What is the cause or origin of the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "Describe an early survival challenge faced after the Onset.", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "How do the characters defend their stronghold during the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "What relationships drive the protagonist's choices in the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "How does the Onset escalate by the end of the first book?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "What mysteries about the Onset remain unresolved?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "How do the rules of the world change once the Onset takes hold?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "What weapons or tactics work best against the threats of the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
+{"query": "How does Bob Johansson become a von Neumann probe?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "What is a replicant and why do Bob's copies have different personalities?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "Who are Riker, Homer, and Bill among the Bob clones?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "What is GUPPI and how does Bob use it?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "Describe the threat posed by the Others.", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "How does Bob protect and uplift the Deltans?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "Why do the replicants drift apart in personality over time?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "What is the role of FAITH and the Brazilian Empire on Earth?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "How does subspace communication work for the Bobs?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "What happens to Bender after he goes missing?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "How do the Bobs build self-replicating probes across the galaxy?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "How does Bob evacuate humanity after Earth becomes uninhabitable?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "Describe the conflict between different factions of Bobs.", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "What ethical dilemmas does Bob face when interfering with primitive species?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "How does the original Bob differ from later generations of clones?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "How do the Bobs defeat the Others' system-harvesting fleets?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+{"query": "What role does Howard play in the human colonies?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
+// querys not it the dataset
+{"query": "How does Frodo destroy the One Ring in The Lord of the Rings?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "Who killed Dumbledore in Harry Potter and the Half-Blood Prince?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "What house does Tyrion Lannister belong to in A Game of Thrones?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "How does Paul Atreides control the spice on Arrakis in Dune?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "What does the green light at the end of the dock mean in The Great Gatsby?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "Why does Hester Prynne wear a scarlet letter?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "What does the white whale represent in Moby-Dick?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "How does Elizabeth Bennet's view of Mr. Darcy change in Pride and Prejudice?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "What crime does Raskolnikov commit in Crime and Punishment?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "How does Katniss volunteer for the Hunger Games?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "What is Winston Smith's job in Nineteen Eighty-Four?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "Who is Atticus Finch defending in To Kill a Mockingbird?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "What is the capital of Australia?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "How do I bake a sourdough loaf from scratch?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "Explain how photosynthesis converts sunlight into energy.", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "What were the main causes of World War I?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "How does compound interest work?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "How do I change a flat tire on a car?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "What is the boiling point of water at sea level?", "answer": null, "answerable": false, "relevant_sources": []}
+{"query": "What is the recommended daily intake of vitamin D?", "answer": null, "answerable": false, "relevant_sources": []}
@@ -0,0 +1,47 @@
+"""Shared query set loading for evaluation and load testing.
+
+Each JSONL record has a ``query`` and an optional reference ``answer``. ``answerable``
+marks whether the query should be answerable from the library (false for out-of-corpus
+"garbage" queries used to test the refusal path). Relevance for retrieval metrics is
+labeled at source (book) granularity in ``relevant_sources``; source titles must match
+``ebook_source.title`` values for the indexed corpus.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from pathlib import Path
+
+DEFAULT_QUERIES_PATH = Path(__file__).parent / "data" / "queries.jsonl"
+
+
+@dataclass(frozen=True)
+class GoldQuery:
+    """One labeled query shared by the eval and load-test tools."""
+
+    query: str
+    answer: str | None
+    answerable: bool
+    relevant_sources: tuple[str, ...]
+    relevant_substrings: tuple[str, ...]
+
+
+def load_gold_queries(path: Path = DEFAULT_QUERIES_PATH) -> list[GoldQuery]:
+    """Load labeled queries from a JSONL file."""
+    queries: list[GoldQuery] = []
+    for line in path.read_text(encoding="utf-8").splitlines():
+        stripped = line.strip()
+        if not stripped:
+            continue
+        record = json.loads(stripped)
+        queries.append(
+            GoldQuery(
+                query=str(record["query"]),
+                answer=record.get("answer"),
+                answerable=bool(record.get("answerable", True)),
+                relevant_sources=tuple(record.get("relevant_sources", ())),
+                relevant_substrings=tuple(record.get("relevant_substrings", ())),
+            )
+        )
+    return queries
@@ -0,0 +1,218 @@
+"""Load test for the EPUB search service.
+
+Drives ``POST /search`` on a running server at a configurable concurrency and reports
+latency percentiles, throughput, and HTTP status distribution. Queries are drawn from
+the shared JSONL set (see ``eval/data/queries.jsonl``) that the eval also uses, so load
+and evaluation exercise the same questions. Answer generation and reranking happen
+server-side, so this exercises the full retrieval pipeline.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import math
+import random
+import statistics
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Annotated
+
+import httpx
+import typer
+
+from python.common import configure_logger
+from python.ebook_search.eval.dataset import DEFAULT_QUERIES_PATH, load_gold_queries
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class RequestResult:
+    """Outcome of a single search request."""
+
+    status_code: int
+    latency_ms: float
+    ok: bool
+
+
+@dataclass(frozen=True)
+class LoadSummary:
+    """Aggregate results of a load test run."""
+
+    total: int
+    successes: int
+    failures: int
+    wall_seconds: float
+    throughput_rps: float
+    latency_p50_ms: float
+    latency_p90_ms: float
+    latency_p95_ms: float
+    latency_p99_ms: float
+    latency_mean_ms: float
+    latency_max_ms: float
+    status_counts: dict[int, int]
+
+
+def load_queries(queries_file: str | None) -> list[str]:
+    """Return the query strings from the shared JSONL set (or a custom JSONL file)."""
+    path = Path(queries_file) if queries_file else DEFAULT_QUERIES_PATH
+    queries = [gold.query for gold in load_gold_queries(path)]
+    if not queries:
+        msg = f"No queries found in {path}"
+        raise typer.BadParameter(msg)
+    return queries
+
+
+def pick_query(queries: list[str]) -> str:
+    """Return a uniformly random query from the pool (not a security context)."""
+    return random.choice(queries)  # noqa: S311 load-test query sampling is not security-sensitive
+
+
+def percentile(values_sorted: list[float], pct: float) -> float:
+    """Return the linearly-interpolated percentile of a sorted list."""
+    if not values_sorted:
+        return 0.0
+    rank = (pct / 100) * (len(values_sorted) - 1)
+    low = math.floor(rank)
+    high = math.ceil(rank)
+    if low == high:
+        return values_sorted[low]
+    return values_sorted[low] + (values_sorted[high] - values_sorted[low]) * (rank - low)
+
+
+def summarize(results: list[RequestResult], wall_seconds: float) -> LoadSummary:
+    """Aggregate per-request results into a load summary."""
+    latencies = sorted(result.latency_ms for result in results)
+    successes = sum(1 for result in results if result.ok)
+    status_counts: dict[int, int] = {}
+    for result in results:
+        status_counts[result.status_code] = status_counts.get(result.status_code, 0) + 1
+    return LoadSummary(
+        total=len(results),
+        successes=successes,
+        failures=len(results) - successes,
+        wall_seconds=wall_seconds,
+        throughput_rps=len(results) / wall_seconds if wall_seconds > 0 else 0.0,
+        latency_p50_ms=percentile(latencies, 50),
+        latency_p90_ms=percentile(latencies, 90),
+        latency_p95_ms=percentile(latencies, 95),
+        latency_p99_ms=percentile(latencies, 99),
+        latency_mean_ms=statistics.fmean(latencies) if latencies else 0.0,
+        latency_max_ms=latencies[-1] if latencies else 0.0,
+        status_counts=status_counts,
+    )
+
+
+async def send_search(client: httpx.AsyncClient, query: str, *, rerank: bool) -> RequestResult:
+    """Send one search request and record its status and latency."""
+    data = {"query": query, "rerank": "true"} if rerank else {"query": query}
+    start = time.perf_counter()
+    try:
+        response = await client.post("/search", data=data)
+    except httpx.HTTPError as error:
+        logger.warning("ebook_loadtest_request_failed error=%s", error)
+        return RequestResult(status_code=0, latency_ms=(time.perf_counter() - start) * 1000, ok=False)
+    return RequestResult(
+        status_code=response.status_code,
+        latency_ms=(time.perf_counter() - start) * 1000,
+        ok=response.is_success,
+    )
+
+
+async def worker(
+    client: httpx.AsyncClient,
+    queue: asyncio.Queue[str],
+    results: list[RequestResult],
+    *,
+    rerank: bool,
+) -> None:
+    """Pull queries off the queue and send requests until it is empty."""
+    while True:
+        try:
+            query = queue.get_nowait()
+        except asyncio.QueueEmpty:
+            return
+        results.append(await send_search(client, query, rerank=rerank))
+
+
+async def run_load(
+    *,
+    base_url: str,
+    queries: list[str],
+    request_count: int,
+    concurrency: int,
+    rerank: bool,
+    warmup: int,
+    timeout_seconds: float,
+) -> LoadSummary:
+    """Run the load test and return its aggregate summary."""
+    limits = httpx.Limits(max_connections=concurrency, max_keepalive_connections=concurrency)
+    async with httpx.AsyncClient(base_url=base_url, timeout=timeout_seconds, limits=limits) as client:
+        for _ in range(warmup):
+            await send_search(client, pick_query(queries), rerank=rerank)
+
+        queue: asyncio.Queue[str] = asyncio.Queue()
+        for _ in range(request_count):
+            queue.put_nowait(pick_query(queries))
+
+        results: list[RequestResult] = []
+        start = time.perf_counter()
+        workers = [asyncio.create_task(worker(client, queue, results, rerank=rerank)) for _ in range(concurrency)]
+        await asyncio.gather(*workers)
+        wall_seconds = time.perf_counter() - start
+    return summarize(results, wall_seconds)
+
+
+def print_summary(summary: LoadSummary) -> None:
+    """Print the load summary to stdout."""
+    typer.echo(f"requests={summary.total} successes={summary.successes} failures={summary.failures}")
+    typer.echo(f"wall={summary.wall_seconds:.2f}s throughput={summary.throughput_rps:.1f} req/s")
+    typer.echo(
+        f"latency_ms p50={summary.latency_p50_ms:.1f} p90={summary.latency_p90_ms:.1f} "
+        f"p95={summary.latency_p95_ms:.1f} p99={summary.latency_p99_ms:.1f} "
+        f"mean={summary.latency_mean_ms:.1f} max={summary.latency_max_ms:.1f}"
+    )
+    status_summary = " ".join(f"{code}={count}" for code, count in sorted(summary.status_counts.items()))
+    typer.echo(f"status {status_summary}")
+
+
+def main(
+    *,
+    base_url: Annotated[str, typer.Option(help="Base URL of the running service")] = "http://127.0.0.1:8070",
+    request_count: Annotated[int, typer.Option("--requests", help="Total requests to send")] = 200,
+    concurrency: Annotated[int, typer.Option(help="Concurrent in-flight requests")] = 10,
+    rerank: Annotated[bool, typer.Option(help="Request server-side reranking")] = False,
+    warmup: Annotated[int, typer.Option(help="Warmup requests, not measured")] = 5,
+    timeout_seconds: Annotated[float, typer.Option("--timeout", help="Per-request timeout seconds")] = 120.0,
+    queries_file: Annotated[str | None, typer.Option(help="Query JSONL file (defaults to the shared set)")] = None,
+    log_level: Annotated[str, typer.Option(help="Log level")] = "WARNING",
+) -> None:
+    """Load test the search endpoint and report latency and throughput."""
+    configure_logger(log_level)
+    queries = load_queries(queries_file)
+    logger.info(
+        "ebook_loadtest_start base_url=%s requests=%s concurrency=%s rerank=%s queries=%s",
+        base_url,
+        request_count,
+        concurrency,
+        rerank,
+        len(queries),
+    )
+    summary = asyncio.run(
+        run_load(
+            base_url=base_url,
+            queries=queries,
+            request_count=request_count,
+            concurrency=concurrency,
+            rerank=rerank,
+            warmup=warmup,
+            timeout_seconds=timeout_seconds,
+        )
+    )
+    print_summary(summary)
+
+
+if __name__ == "__main__":
+    typer.run(main)
				`@@ -0,0 +1 @@`
				`"""Offline evaluation tooling for the ebook search pipeline."""`