book search engine #18
@@ -93,13 +93,14 @@ def search_ebooks(
|
|||||||
|
|
||||||
logger.info("ebook_search_start query_length=%s rerank=%s", len(query), rerank)
|
logger.info("ebook_search_start query_length=%s rerank=%s", len(query), rerank)
|
||||||
timings: list[RuntimeStep] = []
|
timings: list[RuntimeStep] = []
|
||||||
retrieval_query, timing = timed_result("Query preparation", retrieval_query_from_text, query)
|
bm25_query, timing = timed_result("BM25 query preparation", retrieval_query_from_text, query)
|
||||||
timings.append(timing)
|
timings.append(timing)
|
||||||
retrieval, timing = timed_result(
|
retrieval, timing = timed_result(
|
||||||
"Hybrid retrieval",
|
"Hybrid retrieval",
|
||||||
parallel_retrieval,
|
parallel_retrieval,
|
||||||
engine,
|
engine,
|
||||||
retrieval_query,
|
query,
|
||||||
|
bm25_query,
|
||||||
config,
|
config,
|
||||||
)
|
)
|
||||||
timings.extend(retrieval.timings)
|
timings.extend(retrieval.timings)
|
||||||
@@ -130,7 +131,12 @@ def search_ebooks(
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
def parallel_retrieval(engine: Engine, query: str, config: EbookSearchConfig) -> RetrievalResponse:
|
def parallel_retrieval(
|
||||||
|
engine: Engine,
|
||||||
|
vector_query: str,
|
||||||
|
bm25_query: str,
|
||||||
|
config: EbookSearchConfig,
|
||||||
|
) -> RetrievalResponse:
|
||||||
"""Run vector and BM25 candidate retrieval concurrently with separate database sessions."""
|
"""Run vector and BM25 candidate retrieval concurrently with separate database sessions."""
|
||||||
with ThreadPoolExecutor(max_workers=2, thread_name_prefix="ebook-search") as executor:
|
with ThreadPoolExecutor(max_workers=2, thread_name_prefix="ebook-search") as executor:
|
||||||
vector_future = executor.submit(
|
vector_future = executor.submit(
|
||||||
@@ -138,14 +144,14 @@ def parallel_retrieval(engine: Engine, query: str, config: EbookSearchConfig) ->
|
|||||||
"Embedding + vector search",
|
"Embedding + vector search",
|
||||||
vector_candidates,
|
vector_candidates,
|
||||||
engine,
|
engine,
|
||||||
query,
|
vector_query,
|
||||||
config,
|
config,
|
||||||
)
|
)
|
||||||
bm25_future = executor.submit(
|
bm25_future = executor.submit(
|
||||||
timed_result,
|
timed_result,
|
||||||
"BM25 search",
|
"BM25 search",
|
||||||
bm25_candidates,
|
bm25_candidates,
|
||||||
query,
|
bm25_query,
|
||||||
config,
|
config,
|
||||||
)
|
)
|
||||||
vector_results, vector_timing = vector_future.result()
|
vector_results, vector_timing = vector_future.result()
|
||||||
@@ -196,7 +202,7 @@ def apply_rerank(
|
|||||||
|
|
||||||
|
|
||||||
def vector_candidates(engine: Engine, query: str, config: EbookSearchConfig) -> list[SearchResult]:
|
def vector_candidates(engine: Engine, query: str, config: EbookSearchConfig) -> list[SearchResult]:
|
||||||
"""Return pgvector cosine candidates for a normalized query."""
|
"""Return pgvector cosine candidates for a natural-language query."""
|
||||||
with Session(engine) as session:
|
with Session(engine) as session:
|
||||||
model = session.scalar(select(EbookEmbeddingModel).where(EbookEmbeddingModel.name == config.embedding_model))
|
model = session.scalar(select(EbookEmbeddingModel).where(EbookEmbeddingModel.name == config.embedding_model))
|
||||||
if model is None:
|
if model is None:
|
||||||
|
|||||||
Reference in New Issue
Block a user