"""vLLM-backed optional reranking.""" from __future__ import annotations import logging from dataclasses import dataclass, replace from typing import TYPE_CHECKING from python.ebook_search.llm_interface import request_rerank if TYPE_CHECKING: from python.ebook_search.config import RerankConfig from python.ebook_search.search import SearchResult logger = logging.getLogger(__name__) @dataclass(frozen=True) class RerankResult: """A relevance score for one candidate chunk.""" chunk_id: int score: float def rerank_chunks(query: str, candidates: list[SearchResult], config: RerankConfig) -> list[SearchResult]: """Rerank candidates with a vLLM rerank endpoint.""" if not candidates: return [] logger.info( "ebook_rerank_request_start base_url=%s model=%s candidates=%s", config.base_url, config.model, len(candidates), ) scores = score_candidates(query, candidates, config) results = sorted( ( replace( result, score=final_rerank_score(result, scores[result.chunk_id].score, candidates), rerank_score=scores[result.chunk_id].score, ) for result in candidates ), key=lambda result: result.score, reverse=True, ) logger.info( "ebook_rerank_request_complete base_url=%s model=%s candidates=%s", config.base_url, config.model, len(results), ) return results def score_candidates( query: str, candidates: list[SearchResult], config: RerankConfig, ) -> dict[int, RerankResult]: """Score candidate chunks with the configured rerank API.""" body = request_rerank(query, [candidate.text for candidate in candidates], config) if body is None: return zero_rerank_scores(candidates) scores = parse_vllm_scores(body, candidates) for result in scores.values(): logger.debug("ebook_rerank_candidate_scored chunk_id=%s score=%s", result.chunk_id, result.score) return scores def parse_vllm_scores(body: object, candidates: list[SearchResult]) -> dict[int, RerankResult]: """Parse vLLM rerank scores into chunk-id keyed results.""" if not isinstance(body, dict): logger.debug("ebook_rerank_response_not_object", extra={"response": body}) return zero_rerank_scores(candidates) results = body.get("results") or body.get("data") if not isinstance(results, list): logger.debug("ebook_rerank_response_missing_results", extra={"response": body}) return zero_rerank_scores(candidates) scores = zero_rerank_scores(candidates) for item in results: if not isinstance(item, dict): continue index = item.get("index") score = item.get("relevance_score", item.get("score")) if not isinstance(index, int) or index < 0 or index >= len(candidates): continue if not isinstance(score, int | float): continue chunk_id = candidates[index].chunk_id scores[chunk_id] = RerankResult(chunk_id=chunk_id, score=clamp_score(float(score))) return scores def zero_rerank_scores(candidates: list[SearchResult]) -> dict[int, RerankResult]: """Return zero relevance scores for all candidate chunks.""" return {candidate.chunk_id: RerankResult(chunk_id=candidate.chunk_id, score=0.0) for candidate in candidates} def clamp_score(score: float) -> float: """Clamp a rerank score into the supported 0.0 to 1.0 range.""" return min(max(score, 0.0), 1.0) def final_rerank_score(result: SearchResult, rerank_score: float, candidates: list[SearchResult]) -> float: """Combine rerank relevance with normalized hybrid retrieval evidence.""" return rerank_score * normalized_hybrid_score(result, candidates) def normalized_hybrid_score(result: SearchResult, candidates: list[SearchResult]) -> float: """Normalize a candidate hybrid score against the rerank candidate set.""" hybrid_scores = [ candidate.fused_score if candidate.fused_score is not None else candidate.score for candidate in candidates ] low = min(hybrid_scores) high = max(hybrid_scores) if high == low: return 1.0 score = result.fused_score if result.fused_score is not None else result.score return (score - low) / (high - low)