6ae1ff1f5c
Add a Typer CLI script that drives POST /search on a running server at a configurable concurrency and reports latency percentiles (p50/p90/p95/p99), throughput, and HTTP status distribution. Queries are drawn from the shared eval JSONL set so load testing and evaluation exercise the same questions.
19 lines
689 B
Python
19 lines
689 B
Python
"""Tests for the shared query/gold set loader."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from python.ebook_search.eval.dataset import load_gold_queries
|
|
|
|
|
|
def test_default_query_set_counts() -> None:
|
|
queries = load_gold_queries()
|
|
answerable = [query for query in queries if query.answerable]
|
|
|
|
assert len(queries) == 70
|
|
assert len(answerable) == 50
|
|
assert len(queries) - len(answerable) == 20
|
|
assert all(query.query for query in queries)
|
|
# Answerable queries carry at least one source; garbage queries carry none.
|
|
assert all(query.relevant_sources for query in answerable)
|
|
assert all(not query.relevant_sources for query in queries if not query.answerable)
|