feat(ebook-search): add load-test CLI for the search service
Add a Typer CLI script that drives POST /search on a running server at a configurable concurrency and reports latency percentiles (p50/p90/p95/p99), throughput, and HTTP status distribution. Queries are drawn from the shared eval JSONL set so load testing and evaluation exercise the same questions.
This commit is contained in:
@@ -96,6 +96,8 @@ def search(
|
||||
timings=(*response.timings, runtime_step_from_start(answer_step_name, answer_start)),
|
||||
)
|
||||
|
||||
for step in response.timings:
|
||||
logger.info("ebook_search_timing step=%r runtime_ms=%.1f", step.name, step.duration_ms)
|
||||
logger.info(
|
||||
"ebook_search_request_complete results=%s rank_label=%s runtime_ms=%.1f",
|
||||
len(response.results),
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
"""Offline evaluation tooling for the ebook search pipeline."""
|
||||
@@ -0,0 +1,71 @@
|
||||
{"query": "Who is Damien Montgomery and how does he become a Jump Mage?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "What is a Rune Wright and why is Damien so rare?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "How does jump magic let starships travel faster than light?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "What is the role of the Mage-King of Mars in the Protectorate?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "What happened aboard the Blue Jay in the first Starship's Mage book?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "Who is Captain David Rice?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "How are amplifiers and simulacrums used to power a ship's jump?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "What duties does a Hand of the Mage-King carry out?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "Explain the structure of the Royal Martian Navy.", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "How do mages carve runes to enchant a starship?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "What threat do the Legatan rebels pose to the Protectorate?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "How does Damien handle his first command?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "What is the significance of the simulacrum on a jump ship?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "Describe a mage duel in the Starship's Mage series.", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "What moral conflicts does Damien face as a Hand of the Mage-King?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "How does the Protectorate keep peace among its member worlds?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "Who is the Keeper of Oaths and how does Damien work with them?", "answer": null, "answerable": true, "relevant_sources": ["Starship's Mage"]}
|
||||
{"query": "What event is known as the Onset and how does it change the world?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "Who is the main character at the start of the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "How do survivors adapt after the Onset begins?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "What new abilities emerge during the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "Describe the primary antagonist in the Onset series.", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "How does society collapse and reorganize after the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "What factions form in the aftermath of the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "How does the protagonist gain power throughout the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "What is the cause or origin of the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "Describe an early survival challenge faced after the Onset.", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "How do the characters defend their stronghold during the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "What relationships drive the protagonist's choices in the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "How does the Onset escalate by the end of the first book?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "What mysteries about the Onset remain unresolved?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "How do the rules of the world change once the Onset takes hold?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "What weapons or tactics work best against the threats of the Onset?", "answer": null, "answerable": true, "relevant_sources": ["The Onset"]}
|
||||
{"query": "How does Bob Johansson become a von Neumann probe?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "What is a replicant and why do Bob's copies have different personalities?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "Who are Riker, Homer, and Bill among the Bob clones?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "What is GUPPI and how does Bob use it?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "Describe the threat posed by the Others.", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "How does Bob protect and uplift the Deltans?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "Why do the replicants drift apart in personality over time?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "What is the role of FAITH and the Brazilian Empire on Earth?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "How does subspace communication work for the Bobs?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "What happens to Bender after he goes missing?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "How do the Bobs build self-replicating probes across the galaxy?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "How does Bob evacuate humanity after Earth becomes uninhabitable?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "Describe the conflict between different factions of Bobs.", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "What ethical dilemmas does Bob face when interfering with primitive species?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "How does the original Bob differ from later generations of clones?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "How do the Bobs defeat the Others' system-harvesting fleets?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
{"query": "What role does Howard play in the human colonies?", "answer": null, "answerable": true, "relevant_sources": ["We Are Legion (We Are Bob)"]}
|
||||
// querys not it the dataset
|
||||
{"query": "How does Frodo destroy the One Ring in The Lord of the Rings?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "Who killed Dumbledore in Harry Potter and the Half-Blood Prince?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "What house does Tyrion Lannister belong to in A Game of Thrones?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "How does Paul Atreides control the spice on Arrakis in Dune?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "What does the green light at the end of the dock mean in The Great Gatsby?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "Why does Hester Prynne wear a scarlet letter?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "What does the white whale represent in Moby-Dick?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "How does Elizabeth Bennet's view of Mr. Darcy change in Pride and Prejudice?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "What crime does Raskolnikov commit in Crime and Punishment?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "How does Katniss volunteer for the Hunger Games?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "What is Winston Smith's job in Nineteen Eighty-Four?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "Who is Atticus Finch defending in To Kill a Mockingbird?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "What is the capital of Australia?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "How do I bake a sourdough loaf from scratch?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "Explain how photosynthesis converts sunlight into energy.", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "What were the main causes of World War I?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "How does compound interest work?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "How do I change a flat tire on a car?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "What is the boiling point of water at sea level?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
{"query": "What is the recommended daily intake of vitamin D?", "answer": null, "answerable": false, "relevant_sources": []}
|
||||
@@ -0,0 +1,47 @@
|
||||
"""Shared query set loading for evaluation and load testing.
|
||||
|
||||
Each JSONL record has a ``query`` and an optional reference ``answer``. ``answerable``
|
||||
marks whether the query should be answerable from the library (false for out-of-corpus
|
||||
"garbage" queries used to test the refusal path). Relevance for retrieval metrics is
|
||||
labeled at source (book) granularity in ``relevant_sources``; source titles must match
|
||||
``ebook_source.title`` values for the indexed corpus.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
DEFAULT_QUERIES_PATH = Path(__file__).parent / "data" / "queries.jsonl"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GoldQuery:
|
||||
"""One labeled query shared by the eval and load-test tools."""
|
||||
|
||||
query: str
|
||||
answer: str | None
|
||||
answerable: bool
|
||||
relevant_sources: tuple[str, ...]
|
||||
relevant_substrings: tuple[str, ...]
|
||||
|
||||
|
||||
def load_gold_queries(path: Path = DEFAULT_QUERIES_PATH) -> list[GoldQuery]:
|
||||
"""Load labeled queries from a JSONL file."""
|
||||
queries: list[GoldQuery] = []
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
record = json.loads(stripped)
|
||||
queries.append(
|
||||
GoldQuery(
|
||||
query=str(record["query"]),
|
||||
answer=record.get("answer"),
|
||||
answerable=bool(record.get("answerable", True)),
|
||||
relevant_sources=tuple(record.get("relevant_sources", ())),
|
||||
relevant_substrings=tuple(record.get("relevant_substrings", ())),
|
||||
)
|
||||
)
|
||||
return queries
|
||||
@@ -0,0 +1,218 @@
|
||||
"""Load test for the EPUB search service.
|
||||
|
||||
Drives ``POST /search`` on a running server at a configurable concurrency and reports
|
||||
latency percentiles, throughput, and HTTP status distribution. Queries are drawn from
|
||||
the shared JSONL set (see ``eval/data/queries.jsonl``) that the eval also uses, so load
|
||||
and evaluation exercise the same questions. Answer generation and reranking happen
|
||||
server-side, so this exercises the full retrieval pipeline.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import math
|
||||
import random
|
||||
import statistics
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
import httpx
|
||||
import typer
|
||||
|
||||
from python.common import configure_logger
|
||||
from python.ebook_search.eval.dataset import DEFAULT_QUERIES_PATH, load_gold_queries
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RequestResult:
|
||||
"""Outcome of a single search request."""
|
||||
|
||||
status_code: int
|
||||
latency_ms: float
|
||||
ok: bool
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LoadSummary:
|
||||
"""Aggregate results of a load test run."""
|
||||
|
||||
total: int
|
||||
successes: int
|
||||
failures: int
|
||||
wall_seconds: float
|
||||
throughput_rps: float
|
||||
latency_p50_ms: float
|
||||
latency_p90_ms: float
|
||||
latency_p95_ms: float
|
||||
latency_p99_ms: float
|
||||
latency_mean_ms: float
|
||||
latency_max_ms: float
|
||||
status_counts: dict[int, int]
|
||||
|
||||
|
||||
def load_queries(queries_file: str | None) -> list[str]:
|
||||
"""Return the query strings from the shared JSONL set (or a custom JSONL file)."""
|
||||
path = Path(queries_file) if queries_file else DEFAULT_QUERIES_PATH
|
||||
queries = [gold.query for gold in load_gold_queries(path)]
|
||||
if not queries:
|
||||
msg = f"No queries found in {path}"
|
||||
raise typer.BadParameter(msg)
|
||||
return queries
|
||||
|
||||
|
||||
def pick_query(queries: list[str]) -> str:
|
||||
"""Return a uniformly random query from the pool (not a security context)."""
|
||||
return random.choice(queries) # noqa: S311 load-test query sampling is not security-sensitive
|
||||
|
||||
|
||||
def percentile(values_sorted: list[float], pct: float) -> float:
|
||||
"""Return the linearly-interpolated percentile of a sorted list."""
|
||||
if not values_sorted:
|
||||
return 0.0
|
||||
rank = (pct / 100) * (len(values_sorted) - 1)
|
||||
low = math.floor(rank)
|
||||
high = math.ceil(rank)
|
||||
if low == high:
|
||||
return values_sorted[low]
|
||||
return values_sorted[low] + (values_sorted[high] - values_sorted[low]) * (rank - low)
|
||||
|
||||
|
||||
def summarize(results: list[RequestResult], wall_seconds: float) -> LoadSummary:
|
||||
"""Aggregate per-request results into a load summary."""
|
||||
latencies = sorted(result.latency_ms for result in results)
|
||||
successes = sum(1 for result in results if result.ok)
|
||||
status_counts: dict[int, int] = {}
|
||||
for result in results:
|
||||
status_counts[result.status_code] = status_counts.get(result.status_code, 0) + 1
|
||||
return LoadSummary(
|
||||
total=len(results),
|
||||
successes=successes,
|
||||
failures=len(results) - successes,
|
||||
wall_seconds=wall_seconds,
|
||||
throughput_rps=len(results) / wall_seconds if wall_seconds > 0 else 0.0,
|
||||
latency_p50_ms=percentile(latencies, 50),
|
||||
latency_p90_ms=percentile(latencies, 90),
|
||||
latency_p95_ms=percentile(latencies, 95),
|
||||
latency_p99_ms=percentile(latencies, 99),
|
||||
latency_mean_ms=statistics.fmean(latencies) if latencies else 0.0,
|
||||
latency_max_ms=latencies[-1] if latencies else 0.0,
|
||||
status_counts=status_counts,
|
||||
)
|
||||
|
||||
|
||||
async def send_search(client: httpx.AsyncClient, query: str, *, rerank: bool) -> RequestResult:
|
||||
"""Send one search request and record its status and latency."""
|
||||
data = {"query": query, "rerank": "true"} if rerank else {"query": query}
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
response = await client.post("/search", data=data)
|
||||
except httpx.HTTPError as error:
|
||||
logger.warning("ebook_loadtest_request_failed error=%s", error)
|
||||
return RequestResult(status_code=0, latency_ms=(time.perf_counter() - start) * 1000, ok=False)
|
||||
return RequestResult(
|
||||
status_code=response.status_code,
|
||||
latency_ms=(time.perf_counter() - start) * 1000,
|
||||
ok=response.is_success,
|
||||
)
|
||||
|
||||
|
||||
async def worker(
|
||||
client: httpx.AsyncClient,
|
||||
queue: asyncio.Queue[str],
|
||||
results: list[RequestResult],
|
||||
*,
|
||||
rerank: bool,
|
||||
) -> None:
|
||||
"""Pull queries off the queue and send requests until it is empty."""
|
||||
while True:
|
||||
try:
|
||||
query = queue.get_nowait()
|
||||
except asyncio.QueueEmpty:
|
||||
return
|
||||
results.append(await send_search(client, query, rerank=rerank))
|
||||
|
||||
|
||||
async def run_load(
|
||||
*,
|
||||
base_url: str,
|
||||
queries: list[str],
|
||||
request_count: int,
|
||||
concurrency: int,
|
||||
rerank: bool,
|
||||
warmup: int,
|
||||
timeout_seconds: float,
|
||||
) -> LoadSummary:
|
||||
"""Run the load test and return its aggregate summary."""
|
||||
limits = httpx.Limits(max_connections=concurrency, max_keepalive_connections=concurrency)
|
||||
async with httpx.AsyncClient(base_url=base_url, timeout=timeout_seconds, limits=limits) as client:
|
||||
for _ in range(warmup):
|
||||
await send_search(client, pick_query(queries), rerank=rerank)
|
||||
|
||||
queue: asyncio.Queue[str] = asyncio.Queue()
|
||||
for _ in range(request_count):
|
||||
queue.put_nowait(pick_query(queries))
|
||||
|
||||
results: list[RequestResult] = []
|
||||
start = time.perf_counter()
|
||||
workers = [asyncio.create_task(worker(client, queue, results, rerank=rerank)) for _ in range(concurrency)]
|
||||
await asyncio.gather(*workers)
|
||||
wall_seconds = time.perf_counter() - start
|
||||
return summarize(results, wall_seconds)
|
||||
|
||||
|
||||
def print_summary(summary: LoadSummary) -> None:
|
||||
"""Print the load summary to stdout."""
|
||||
typer.echo(f"requests={summary.total} successes={summary.successes} failures={summary.failures}")
|
||||
typer.echo(f"wall={summary.wall_seconds:.2f}s throughput={summary.throughput_rps:.1f} req/s")
|
||||
typer.echo(
|
||||
f"latency_ms p50={summary.latency_p50_ms:.1f} p90={summary.latency_p90_ms:.1f} "
|
||||
f"p95={summary.latency_p95_ms:.1f} p99={summary.latency_p99_ms:.1f} "
|
||||
f"mean={summary.latency_mean_ms:.1f} max={summary.latency_max_ms:.1f}"
|
||||
)
|
||||
status_summary = " ".join(f"{code}={count}" for code, count in sorted(summary.status_counts.items()))
|
||||
typer.echo(f"status {status_summary}")
|
||||
|
||||
|
||||
def main(
|
||||
*,
|
||||
base_url: Annotated[str, typer.Option(help="Base URL of the running service")] = "http://127.0.0.1:8070",
|
||||
request_count: Annotated[int, typer.Option("--requests", help="Total requests to send")] = 200,
|
||||
concurrency: Annotated[int, typer.Option(help="Concurrent in-flight requests")] = 10,
|
||||
rerank: Annotated[bool, typer.Option(help="Request server-side reranking")] = False,
|
||||
warmup: Annotated[int, typer.Option(help="Warmup requests, not measured")] = 5,
|
||||
timeout_seconds: Annotated[float, typer.Option("--timeout", help="Per-request timeout seconds")] = 120.0,
|
||||
queries_file: Annotated[str | None, typer.Option(help="Query JSONL file (defaults to the shared set)")] = None,
|
||||
log_level: Annotated[str, typer.Option(help="Log level")] = "WARNING",
|
||||
) -> None:
|
||||
"""Load test the search endpoint and report latency and throughput."""
|
||||
configure_logger(log_level)
|
||||
queries = load_queries(queries_file)
|
||||
logger.info(
|
||||
"ebook_loadtest_start base_url=%s requests=%s concurrency=%s rerank=%s queries=%s",
|
||||
base_url,
|
||||
request_count,
|
||||
concurrency,
|
||||
rerank,
|
||||
len(queries),
|
||||
)
|
||||
summary = asyncio.run(
|
||||
run_load(
|
||||
base_url=base_url,
|
||||
queries=queries,
|
||||
request_count=request_count,
|
||||
concurrency=concurrency,
|
||||
rerank=rerank,
|
||||
warmup=warmup,
|
||||
timeout_seconds=timeout_seconds,
|
||||
)
|
||||
)
|
||||
print_summary(summary)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
typer.run(main)
|
||||
Reference in New Issue
Block a user