diff --git a/python/ebook_search/config.py b/python/ebook_search/config.py index dd1cd7b..6a1e736 100644 --- a/python/ebook_search/config.py +++ b/python/ebook_search/config.py @@ -42,7 +42,7 @@ class RerankConfig(BaseSettings): model_config = SettingsConfigDict(env_prefix="EBOOK_SEARCH_RERANK_", frozen=True, protected_namespaces=()) - enabled: bool = False + enabled: bool = True base_url: str = "http://192.168.90.25:8001" model: str = "qwen3-reranker-06b" candidates: int = 24 diff --git a/python/ebook_search/search.py b/python/ebook_search/search.py index db7b1eb..6e3e8d1 100644 --- a/python/ebook_search/search.py +++ b/python/ebook_search/search.py @@ -4,6 +4,7 @@ from __future__ import annotations import logging import re +from collections import defaultdict from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass, replace from typing import TYPE_CHECKING @@ -93,14 +94,11 @@ def search_ebooks( logger.info("ebook_search_start query_length=%s rerank=%s", len(query), rerank) timings: list[RuntimeStep] = [] - bm25_query, timing = timed_result("BM25 query preparation", retrieval_query_from_text, query) - timings.append(timing) retrieval, timing = timed_result( "Hybrid retrieval", parallel_retrieval, engine, query, - bm25_query, config, ) timings.extend(retrieval.timings) @@ -134,8 +132,7 @@ def search_ebooks( def parallel_retrieval( engine: Engine, - vector_query: str, - bm25_query: str, + query: str, config: EbookSearchConfig, ) -> RetrievalResponse: """Run vector and BM25 candidate retrieval concurrently with separate database sessions.""" @@ -145,14 +142,14 @@ def parallel_retrieval( "Embedding + vector search", vector_candidates, engine, - vector_query, + query, config, ) bm25_future = executor.submit( timed_result, "BM25 search", bm25_candidates, - bm25_query, + query, config, ) vector_results, vector_timing = vector_future.result() @@ -263,7 +260,8 @@ def bm25_candidates(query: str, config: EbookSearchConfig) -> list[SearchResult] logger.info("ebook_bm25_search_complete corpus=0 candidates=0") return [] - scored_records = score_bm25_corpus(query, corpus, limit=config.bm25_candidate_limit) + bm25_query = retrieval_query_from_text(query) + scored_records = score_bm25_corpus(bm25_query, corpus, limit=config.bm25_candidate_limit) results = [ replace(search_result_from_row(record), score=score, vector_score=None, bm25_score=score) for record, score in scored_records @@ -282,24 +280,23 @@ def bm25_candidates(query: str, config: EbookSearchConfig) -> list[SearchResult] def reciprocal_rank_fusion( vector_results: list[SearchResult], lexical_results: list[SearchResult], - *, - rank_constant: int = 60, + rank_constant: int, ) -> list[SearchResult]: """Fuse vector and lexical rankings with Reciprocal Rank Fusion.""" by_chunk: dict[int, SearchResult] = {} - scores: dict[int, float] = {} + scores: defaultdict[int, float] = defaultdict(float) vector_scores: dict[int, float] = {} bm25_scores: dict[int, float] = {} for rank, result in enumerate(vector_results, start=1): by_chunk.setdefault(result.chunk_id, result) vector_scores[result.chunk_id] = result.vector_score if result.vector_score is not None else result.score - scores[result.chunk_id] = scores.get(result.chunk_id, 0.0) + (1 / (rank_constant + rank)) + scores[result.chunk_id] += 1 / (rank_constant + rank) for rank, result in enumerate(lexical_results, start=1): by_chunk.setdefault(result.chunk_id, result) bm25_scores[result.chunk_id] = result.bm25_score if result.bm25_score is not None else result.score - scores[result.chunk_id] = scores.get(result.chunk_id, 0.0) + (1 / (rank_constant + rank)) + scores[result.chunk_id] += 1 / (rank_constant + rank) return sorted( (