Files
dotfiles/python/ebook_search/eval/dataset.py
T
Richie 6ae1ff1f5c feat(ebook-search): add load-test CLI for the search service
Add a Typer CLI script that drives POST /search on a running server at a
configurable concurrency and reports latency percentiles (p50/p90/p95/p99),
throughput, and HTTP status distribution. Queries are drawn from the shared
eval JSONL set so load testing and evaluation exercise the same questions.
2026-06-18 12:39:55 -04:00

48 lines
1.6 KiB
Python

"""Shared query set loading for evaluation and load testing.
Each JSONL record has a ``query`` and an optional reference ``answer``. ``answerable``
marks whether the query should be answerable from the library (false for out-of-corpus
"garbage" queries used to test the refusal path). Relevance for retrieval metrics is
labeled at source (book) granularity in ``relevant_sources``; source titles must match
``ebook_source.title`` values for the indexed corpus.
"""
from __future__ import annotations
import json
from dataclasses import dataclass
from pathlib import Path
DEFAULT_QUERIES_PATH = Path(__file__).parent / "data" / "queries.jsonl"
@dataclass(frozen=True)
class GoldQuery:
"""One labeled query shared by the eval and load-test tools."""
query: str
answer: str | None
answerable: bool
relevant_sources: tuple[str, ...]
relevant_substrings: tuple[str, ...]
def load_gold_queries(path: Path = DEFAULT_QUERIES_PATH) -> list[GoldQuery]:
"""Load labeled queries from a JSONL file."""
queries: list[GoldQuery] = []
for line in path.read_text(encoding="utf-8").splitlines():
stripped = line.strip()
if not stripped:
continue
record = json.loads(stripped)
queries.append(
GoldQuery(
query=str(record["query"]),
answer=record.get("answer"),
answerable=bool(record.get("answerable", True)),
relevant_sources=tuple(record.get("relevant_sources", ())),
relevant_substrings=tuple(record.get("relevant_substrings", ())),
)
)
return queries