feat(ebook-search): add load-test CLI for the search service

Add a Typer CLI script that drives POST /search on a running server at a
configurable concurrency and reports latency percentiles (p50/p90/p95/p99),
throughput, and HTTP status distribution. Queries are drawn from the shared
eval JSONL set so load testing and evaluation exercise the same questions.
This commit is contained in:
2026-06-18 12:39:55 -04:00
parent dbc6b5b53b
commit 6ae1ff1f5c
7 changed files with 436 additions and 0 deletions
+79
View File
@@ -0,0 +1,79 @@
"""Tests for the load-test runner and its statistics helpers."""
from __future__ import annotations
import asyncio
from typing import TYPE_CHECKING
import pytest
from python.ebook_search.loadtest import RequestResult, load_queries, percentile, run_load, summarize
if TYPE_CHECKING:
from pytest_mock import MockerFixture
def test_load_queries_reads_shared_set() -> None:
queries = load_queries(None)
assert len(queries) == 70
assert all(isinstance(query, str) and query for query in queries)
def test_percentile_interpolates() -> None:
values = [10.0, 20.0, 30.0, 40.0]
assert percentile(values, 50) == pytest.approx(25.0)
assert percentile(values, 90) == pytest.approx(37.0)
assert percentile(values, 0) == 10.0
assert percentile(values, 100) == 40.0
assert percentile([], 95) == 0.0
def test_summarize_counts_and_throughput() -> None:
results = [
RequestResult(status_code=200, latency_ms=10.0, ok=True),
RequestResult(status_code=200, latency_ms=20.0, ok=True),
RequestResult(status_code=200, latency_ms=30.0, ok=True),
RequestResult(status_code=500, latency_ms=40.0, ok=False),
]
summary = summarize(results, wall_seconds=2.0)
assert summary.total == 4
assert summary.successes == 3
assert summary.failures == 1
assert summary.throughput_rps == pytest.approx(2.0)
assert summary.latency_max_ms == 40.0
assert summary.status_counts == {200: 3, 500: 1}
def test_summarize_handles_empty() -> None:
summary = summarize([], wall_seconds=0.0)
assert summary.total == 0
assert summary.throughput_rps == 0.0
assert summary.latency_p95_ms == 0.0
def test_run_load_aggregates_mocked_responses(mocker: MockerFixture) -> None:
response = mocker.Mock(status_code=200, is_success=True)
client = mocker.MagicMock()
client.__aenter__.return_value = client
client.post = mocker.AsyncMock(return_value=response)
mocker.patch("python.ebook_search.loadtest.httpx.AsyncClient", return_value=client)
summary = asyncio.run(
run_load(
base_url="http://test",
queries=["q1", "q2"],
request_count=4,
concurrency=2,
rerank=False,
warmup=1,
timeout_seconds=1.0,
)
)
assert summary.total == 4
assert summary.successes == 4
assert summary.failures == 0
assert summary.status_counts == {200: 4}
# 1 warmup request (not measured) plus 4 measured requests.
assert client.post.await_count == 5