6ae1ff1f5c
Add a Typer CLI script that drives POST /search on a running server at a configurable concurrency and reports latency percentiles (p50/p90/p95/p99), throughput, and HTTP status distribution. Queries are drawn from the shared eval JSONL set so load testing and evaluation exercise the same questions.
48 lines
1.6 KiB
Python
48 lines
1.6 KiB
Python
"""Shared query set loading for evaluation and load testing.
|
|
|
|
Each JSONL record has a ``query`` and an optional reference ``answer``. ``answerable``
|
|
marks whether the query should be answerable from the library (false for out-of-corpus
|
|
"garbage" queries used to test the refusal path). Relevance for retrieval metrics is
|
|
labeled at source (book) granularity in ``relevant_sources``; source titles must match
|
|
``ebook_source.title`` values for the indexed corpus.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
DEFAULT_QUERIES_PATH = Path(__file__).parent / "data" / "queries.jsonl"
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class GoldQuery:
|
|
"""One labeled query shared by the eval and load-test tools."""
|
|
|
|
query: str
|
|
answer: str | None
|
|
answerable: bool
|
|
relevant_sources: tuple[str, ...]
|
|
relevant_substrings: tuple[str, ...]
|
|
|
|
|
|
def load_gold_queries(path: Path = DEFAULT_QUERIES_PATH) -> list[GoldQuery]:
|
|
"""Load labeled queries from a JSONL file."""
|
|
queries: list[GoldQuery] = []
|
|
for line in path.read_text(encoding="utf-8").splitlines():
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
record = json.loads(stripped)
|
|
queries.append(
|
|
GoldQuery(
|
|
query=str(record["query"]),
|
|
answer=record.get("answer"),
|
|
answerable=bool(record.get("answerable", True)),
|
|
relevant_sources=tuple(record.get("relevant_sources", ())),
|
|
relevant_substrings=tuple(record.get("relevant_substrings", ())),
|
|
)
|
|
)
|
|
return queries
|