6ae1ff1f5c
Add a Typer CLI script that drives POST /search on a running server at a configurable concurrency and reports latency percentiles (p50/p90/p95/p99), throughput, and HTTP status distribution. Queries are drawn from the shared eval JSONL set so load testing and evaluation exercise the same questions.
117 lines
4.0 KiB
Python
117 lines
4.0 KiB
Python
"""Search routes for the EPUB search web UI."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from dataclasses import replace
|
|
from time import perf_counter
|
|
from typing import TYPE_CHECKING, Annotated
|
|
|
|
from fastapi import APIRouter, Form, Request
|
|
from fastapi.responses import HTMLResponse
|
|
|
|
from python.ebook_search.answer import answer_query
|
|
from python.ebook_search.api.dependencies import ( # noqa: TC001 FastAPI resolves these annotated dependencies at runtime
|
|
AppConfig,
|
|
AppEngine,
|
|
)
|
|
from python.ebook_search.api.web import templates
|
|
from python.ebook_search.guardrails import (
|
|
CitationReport,
|
|
is_confident,
|
|
retrieval_confidence,
|
|
validate_citations,
|
|
)
|
|
from python.ebook_search.search import SearchResponse, search_ebooks
|
|
from python.ebook_search.timing import runtime_step_from_start
|
|
|
|
if TYPE_CHECKING:
|
|
from python.ebook_search.config import EbookSearchConfig
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
def build_answer(
|
|
query: str,
|
|
response: SearchResponse,
|
|
config: EbookSearchConfig,
|
|
) -> tuple[str, bool, CitationReport | None]:
|
|
"""Generate the answer for a search, returning ``(answer, low_confidence, citation_report)``."""
|
|
if not config.answer_enabled:
|
|
logger.info("ebook_answer_skipped_disabled")
|
|
return "Answer generation is disabled. Source chunks are shown below.", False, None
|
|
|
|
if not is_confident(response.results, config):
|
|
logger.info(
|
|
"ebook_answer_low_confidence confidence=%.4f threshold=%.4f",
|
|
retrieval_confidence(response.results),
|
|
config.min_retrieval_confidence,
|
|
)
|
|
answer = (
|
|
"Retrieval confidence is low for this query, so answer generation was skipped. "
|
|
"Source chunks are shown below."
|
|
)
|
|
return answer, True, None
|
|
|
|
try:
|
|
answer = answer_query(query, response.results, config)
|
|
except RuntimeError as error:
|
|
logger.warning("ebook_answer_request_failed_falling_back error=%s", error)
|
|
return "Answer generation failed. Source chunks are still shown below.", False, None
|
|
|
|
citation_report = None
|
|
if config.validate_citations_enabled and response.results:
|
|
citation_report = validate_citations(answer, len(response.results))
|
|
if citation_report.invalid or not citation_report.grounded:
|
|
logger.warning(
|
|
"ebook_answer_citation_issue invalid=%s grounded=%s",
|
|
citation_report.invalid,
|
|
citation_report.grounded,
|
|
)
|
|
return answer, False, citation_report
|
|
|
|
|
|
@router.post("/search", response_class=HTMLResponse)
|
|
def search(
|
|
request: Request,
|
|
config: AppConfig,
|
|
engine: AppEngine,
|
|
query: Annotated[str, Form()],
|
|
rerank: Annotated[str | None, Form()] = None,
|
|
) -> HTMLResponse:
|
|
"""Run a search and render HTMX results."""
|
|
try:
|
|
response = search_ebooks(engine, query, config, rerank=rerank == "true")
|
|
except Exception as error:
|
|
logger.exception("ebook_search_request_failed")
|
|
return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500)
|
|
|
|
answer_start = perf_counter()
|
|
answer, low_confidence, citation_report = build_answer(query, response, config)
|
|
answer_step_name = "Answer generation" if config.answer_enabled else "Answer skipped"
|
|
response = replace(
|
|
response,
|
|
timings=(*response.timings, runtime_step_from_start(answer_step_name, answer_start)),
|
|
)
|
|
|
|
for step in response.timings:
|
|
logger.info("ebook_search_timing step=%r runtime_ms=%.1f", step.name, step.duration_ms)
|
|
logger.info(
|
|
"ebook_search_request_complete results=%s rank_label=%s runtime_ms=%.1f",
|
|
len(response.results),
|
|
response.rank_label,
|
|
response.total_runtime_ms,
|
|
)
|
|
return templates.TemplateResponse(
|
|
request,
|
|
"partials/results.html",
|
|
{
|
|
"answer": answer,
|
|
"response": response,
|
|
"low_confidence": low_confidence,
|
|
"citation_report": citation_report,
|
|
},
|
|
)
|