fix(ebook-search): skip comment lines in gold query loader and realign tests
treefmt / nix fmt (pull_request) Successful in 10s
pytest / pytest (pull_request) Successful in 31s
build_systems / build-brain (pull_request) Successful in 52s
build_systems / build-bob (pull_request) Successful in 52s
build_systems / build-jeeves (pull_request) Successful in 2m43s
build_systems / build-leviathan (pull_request) Successful in 59s
build_systems / build-rhapsody-in-green (pull_request) Successful in 1m5s
treefmt / nix fmt (pull_request) Successful in 10s
pytest / pytest (pull_request) Successful in 31s
build_systems / build-brain (pull_request) Successful in 52s
build_systems / build-bob (pull_request) Successful in 52s
build_systems / build-jeeves (pull_request) Successful in 2m43s
build_systems / build-leviathan (pull_request) Successful in 59s
build_systems / build-rhapsody-in-green (pull_request) Successful in 1m5s
load_gold_queries now skips blank and `//` comment lines so the committed
section separator in queries.jsonl no longer breaks dataset/load-test loading.
Update tests left stale by the search refactor (6bc3011):
- pass the now-required rank_constant to reciprocal_rank_fusion
- expect bm25_candidates to receive the full query and drop the removed
"BM25 query preparation" timing step
- assert reranking is enabled by default
This commit is contained in:
@@ -28,11 +28,11 @@ class GoldQuery:
|
|||||||
|
|
||||||
|
|
||||||
def load_gold_queries(path: Path = DEFAULT_QUERIES_PATH) -> list[GoldQuery]:
|
def load_gold_queries(path: Path = DEFAULT_QUERIES_PATH) -> list[GoldQuery]:
|
||||||
"""Load labeled queries from a JSONL file."""
|
"""Load labeled queries from a JSONL file. Blank lines and ``//`` comment lines are skipped."""
|
||||||
queries: list[GoldQuery] = []
|
queries: list[GoldQuery] = []
|
||||||
for line in path.read_text(encoding="utf-8").splitlines():
|
for line in path.read_text(encoding="utf-8").splitlines():
|
||||||
stripped = line.strip()
|
stripped = line.strip()
|
||||||
if not stripped:
|
if not stripped or stripped.startswith("//"):
|
||||||
continue
|
continue
|
||||||
record = json.loads(stripped)
|
record = json.loads(stripped)
|
||||||
queries.append(
|
queries.append(
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ def test_reciprocal_rank_fusion_combines_vector_and_bm25_rankings() -> None:
|
|||||||
SearchResult(chunk_id=3, text="c", source_title="C", score=2.1, bm25_score=2.1),
|
SearchResult(chunk_id=3, text="c", source_title="C", score=2.1, bm25_score=2.1),
|
||||||
]
|
]
|
||||||
|
|
||||||
fused = reciprocal_rank_fusion(vector_results, lexical_results)
|
fused = reciprocal_rank_fusion(vector_results, lexical_results, rank_constant=60)
|
||||||
|
|
||||||
assert [result.chunk_id for result in fused] == [2, 1, 3]
|
assert [result.chunk_id for result in fused] == [2, 1, 3]
|
||||||
assert fused[0].rank_source == "Hybrid"
|
assert fused[0].rank_source == "Hybrid"
|
||||||
@@ -146,7 +146,7 @@ def test_reciprocal_rank_fusion_marks_hybrid_source() -> None:
|
|||||||
vector_results = [SearchResult(chunk_id=1, text="a", source_title="A")]
|
vector_results = [SearchResult(chunk_id=1, text="a", source_title="A")]
|
||||||
lexical_results = [SearchResult(chunk_id=2, text="b", source_title="B")]
|
lexical_results = [SearchResult(chunk_id=2, text="b", source_title="B")]
|
||||||
|
|
||||||
fused = reciprocal_rank_fusion(vector_results, lexical_results)
|
fused = reciprocal_rank_fusion(vector_results, lexical_results, rank_constant=60)
|
||||||
|
|
||||||
assert {result.rank_source for result in fused} == {"Hybrid"}
|
assert {result.rank_source for result in fused} == {"Hybrid"}
|
||||||
|
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ def test_search_ebooks_runs_vector_and_bm25_in_parallel(mocker: MockerFixture) -
|
|||||||
|
|
||||||
def fake_bm25_candidates(query, _config):
|
def fake_bm25_candidates(query, _config):
|
||||||
"""Return BM25 candidates after confirming vector search has started."""
|
"""Return BM25 candidates after confirming vector search has started."""
|
||||||
assert query == "parallel"
|
assert query == "what is parallel"
|
||||||
bm25_started.set()
|
bm25_started.set()
|
||||||
assert vector_started.wait(timeout=2)
|
assert vector_started.wait(timeout=2)
|
||||||
return [SearchResult(chunk_id=2, text="bm25", source_title="BM25", bm25_score=2.0)]
|
return [SearchResult(chunk_id=2, text="bm25", source_title="BM25", bm25_score=2.0)]
|
||||||
@@ -46,5 +46,4 @@ def test_search_ebooks_runs_vector_and_bm25_in_parallel(mocker: MockerFixture) -
|
|||||||
assert timings["Embedding + vector search"].counts_toward_total is False
|
assert timings["Embedding + vector search"].counts_toward_total is False
|
||||||
assert timings["BM25 search"].counts_toward_total is False
|
assert timings["BM25 search"].counts_toward_total is False
|
||||||
assert timings["Hybrid retrieval"].counts_toward_total is True
|
assert timings["Hybrid retrieval"].counts_toward_total is True
|
||||||
assert timings["BM25 query preparation"].counts_toward_total is True
|
|
||||||
assert received_engines == [engine]
|
assert received_engines == [engine]
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ def rerank_response(payload: dict[str, object] | None = None, *, content: bytes
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_config_defaults_keep_reranking_optional(mocker: MockerFixture) -> None:
|
def test_config_defaults_enable_reranking(mocker: MockerFixture) -> None:
|
||||||
mocker.patch.dict(environ, {}, clear=False)
|
mocker.patch.dict(environ, {}, clear=False)
|
||||||
environ.pop("EBOOK_SEARCH_RERANK_ENABLED", None)
|
environ.pop("EBOOK_SEARCH_RERANK_ENABLED", None)
|
||||||
environ.pop("EBOOK_SEARCH_RERANK_BASE_URL", None)
|
environ.pop("EBOOK_SEARCH_RERANK_BASE_URL", None)
|
||||||
@@ -43,7 +43,7 @@ def test_config_defaults_keep_reranking_optional(mocker: MockerFixture) -> None:
|
|||||||
|
|
||||||
config = load_rerank_config()
|
config = load_rerank_config()
|
||||||
|
|
||||||
assert config.enabled is False
|
assert config.enabled is True
|
||||||
assert config.base_url == "http://192.168.90.25:8001"
|
assert config.base_url == "http://192.168.90.25:8001"
|
||||||
assert config.model == "qwen3-reranker-06b"
|
assert config.model == "qwen3-reranker-06b"
|
||||||
assert config.candidates == 24
|
assert config.candidates == 24
|
||||||
|
|||||||
Reference in New Issue
Block a user