6ae1ff1f5c
Add a Typer CLI script that drives POST /search on a running server at a configurable concurrency and reports latency percentiles (p50/p90/p95/p99), throughput, and HTTP status distribution. Queries are drawn from the shared eval JSONL set so load testing and evaluation exercise the same questions.
80 lines
2.5 KiB
Python
80 lines
2.5 KiB
Python
"""Tests for the load-test runner and its statistics helpers."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from typing import TYPE_CHECKING
|
|
|
|
import pytest
|
|
|
|
from python.ebook_search.loadtest import RequestResult, load_queries, percentile, run_load, summarize
|
|
|
|
if TYPE_CHECKING:
|
|
from pytest_mock import MockerFixture
|
|
|
|
|
|
def test_load_queries_reads_shared_set() -> None:
|
|
queries = load_queries(None)
|
|
assert len(queries) == 70
|
|
assert all(isinstance(query, str) and query for query in queries)
|
|
|
|
|
|
def test_percentile_interpolates() -> None:
|
|
values = [10.0, 20.0, 30.0, 40.0]
|
|
assert percentile(values, 50) == pytest.approx(25.0)
|
|
assert percentile(values, 90) == pytest.approx(37.0)
|
|
assert percentile(values, 0) == 10.0
|
|
assert percentile(values, 100) == 40.0
|
|
assert percentile([], 95) == 0.0
|
|
|
|
|
|
def test_summarize_counts_and_throughput() -> None:
|
|
results = [
|
|
RequestResult(status_code=200, latency_ms=10.0, ok=True),
|
|
RequestResult(status_code=200, latency_ms=20.0, ok=True),
|
|
RequestResult(status_code=200, latency_ms=30.0, ok=True),
|
|
RequestResult(status_code=500, latency_ms=40.0, ok=False),
|
|
]
|
|
summary = summarize(results, wall_seconds=2.0)
|
|
|
|
assert summary.total == 4
|
|
assert summary.successes == 3
|
|
assert summary.failures == 1
|
|
assert summary.throughput_rps == pytest.approx(2.0)
|
|
assert summary.latency_max_ms == 40.0
|
|
assert summary.status_counts == {200: 3, 500: 1}
|
|
|
|
|
|
def test_summarize_handles_empty() -> None:
|
|
summary = summarize([], wall_seconds=0.0)
|
|
assert summary.total == 0
|
|
assert summary.throughput_rps == 0.0
|
|
assert summary.latency_p95_ms == 0.0
|
|
|
|
|
|
def test_run_load_aggregates_mocked_responses(mocker: MockerFixture) -> None:
|
|
response = mocker.Mock(status_code=200, is_success=True)
|
|
client = mocker.MagicMock()
|
|
client.__aenter__.return_value = client
|
|
client.post = mocker.AsyncMock(return_value=response)
|
|
mocker.patch("python.ebook_search.loadtest.httpx.AsyncClient", return_value=client)
|
|
|
|
summary = asyncio.run(
|
|
run_load(
|
|
base_url="http://test",
|
|
queries=["q1", "q2"],
|
|
request_count=4,
|
|
concurrency=2,
|
|
rerank=False,
|
|
warmup=1,
|
|
timeout_seconds=1.0,
|
|
)
|
|
)
|
|
|
|
assert summary.total == 4
|
|
assert summary.successes == 4
|
|
assert summary.failures == 0
|
|
assert summary.status_counts == {200: 4}
|
|
# 1 warmup request (not measured) plus 4 measured requests.
|
|
assert client.post.await_count == 5
|