dotfiles/python/ebook_search/eval/dataset.py

"""Shared query set loading for evaluation and load testing.

Each JSONL record has a ``query`` and an optional reference ``answer``. ``answerable``
marks whether the query should be answerable from the library (false for out-of-corpus
"garbage" queries used to test the refusal path). Relevance for retrieval metrics is
labeled at source (book) granularity in ``relevant_sources``; source titles must match
``ebook_source.title`` values for the indexed corpus.
"""

from __future__ import annotations

import json
from dataclasses import dataclass
from pathlib import Path

DEFAULT_QUERIES_PATH = Path(__file__).parent / "data" / "queries.jsonl"


@dataclass(frozen=True)
class GoldQuery:
    """One labeled query shared by the eval and load-test tools."""

    query: str
    answer: str | None
    answerable: bool
    relevant_sources: tuple[str, ...]
    relevant_substrings: tuple[str, ...]


def load_gold_queries(path: Path = DEFAULT_QUERIES_PATH) -> list[GoldQuery]:
    """Load labeled queries from a JSONL file."""
    queries: list[GoldQuery] = []
    for line in path.read_text(encoding="utf-8").splitlines():
        stripped = line.strip()
        if not stripped:
            continue
        record = json.loads(stripped)
        queries.append(
            GoldQuery(
                query=str(record["query"]),
                answer=record.get("answer"),
                answerable=bool(record.get("answerable", True)),
                relevant_sources=tuple(record.get("relevant_sources", ())),
                relevant_substrings=tuple(record.get("relevant_substrings", ())),
            )
        )
    return queries