added proper cache invalidation to load_bm25_corpus

This commit is contained in:
2026-06-12 13:47:43 -04:00
parent c4e8a395d2
commit 6bb6f935b1
3 changed files with 30 additions and 10 deletions
+3 -9
View File
@@ -108,11 +108,10 @@ def refresh_bm25_corpus(
)
write_bm25_corpus(index_path, records, manifest)
logger.info(
"ebook_bm25_index_refreshed path=%s chunks=%s created_at=%s note=%s",
"ebook_bm25_index_refreshed path=%s chunks=%s created_at=%s",
index_path,
manifest.chunk_count,
manifest.created_at.isoformat(),
"restart_service_to_use_refreshed_bm25_cache",
)
return manifest
@@ -121,15 +120,10 @@ def refresh_bm25_corpus(
def load_bm25_corpus(config: EbookSearchConfig) -> BM25Corpus:
"""Load the BM25 corpus into memory once per process.
This cache intentionally does not notice later on-disk corpus refreshes. Restart the service after rebuilding the
BM25 corpus for searches to use the new index.
Background refresh tasks clear this cache after rebuilding the on-disk corpus.
"""
index_path = bm25_index_path(config)
logger.info(
"ebook_bm25_corpus_cache_load path=%s note=%s",
index_path,
"restart_service_after_bm25_refresh",
)
logger.info("ebook_bm25_corpus_cache_load path=%s", index_path)
manifest = read_bm25_manifest(index_path)
if manifest is None or not bm25_index_exists(index_path, manifest):
msg = f"BM25 corpus is not available: {index_path}"