Admin
+ +Embeddings
+| Model | +Dimensions | +Embedded | +Missing | +Total chunks | +
|---|---|---|---|---|
| {{ item.model_name }} | +{{ item.dimension }} | +{{ item.embedded_chunks }} | +{{ item.missing_chunks }} | +{{ item.total_chunks }} | +
diff --git a/python/ebook_search/api/__init__.py b/python/ebook_search/api/__init__.py new file mode 100644 index 0000000..297fdb0 --- /dev/null +++ b/python/ebook_search/api/__init__.py @@ -0,0 +1 @@ +"""Web and external API adapters for EPUB search.""" diff --git a/python/ebook_search/api/bm25_tasks.py b/python/ebook_search/api/bm25_tasks.py new file mode 100644 index 0000000..a211d45 --- /dev/null +++ b/python/ebook_search/api/bm25_tasks.py @@ -0,0 +1,58 @@ +"""Background BM25 refresh tasks for the web app.""" + +from __future__ import annotations + +import logging +from threading import Timer +from typing import TYPE_CHECKING + +from sqlalchemy.orm import Session + +from python.ebook_search.bm25_corpus import refresh_bm25_corpus + +if TYPE_CHECKING: + from fastapi import FastAPI + from sqlalchemy.engine import Engine + + from python.ebook_search.config import EbookSearchConfig + +logger = logging.getLogger(__name__) + + +def schedule_bm25_refresh(app: FastAPI) -> None: + """Schedule a delayed BM25 corpus refresh, replacing any pending refresh.""" + existing_timer = getattr(app.state, "bm25_refresh_timer", None) + if existing_timer is not None: + existing_timer.cancel() + + timer = Timer(app.state.config.bm25_refresh_delay_seconds, refresh_bm25_for_app, args=(app,)) + timer.daemon = True + timer.start() + app.state.bm25_refresh_timer = timer + logger.info( + "ebook_bm25_refresh_scheduled delay_seconds=%s", + app.state.config.bm25_refresh_delay_seconds, + ) + + +def cancel_bm25_refresh(app: FastAPI) -> None: + """Cancel any pending BM25 corpus refresh.""" + existing_timer = getattr(app.state, "bm25_refresh_timer", None) + if existing_timer is not None: + existing_timer.cancel() + app.state.bm25_refresh_timer = None + logger.info("ebook_bm25_refresh_cancelled") + + +def refresh_bm25_for_app(app: FastAPI) -> None: + """Refresh the BM25 corpus using the app engine and config.""" + try: + refresh_bm25_for_engine(app.state.engine, app.state.config) + except Exception: + logger.exception("ebook_bm25_refresh_failed") + + +def refresh_bm25_for_engine(engine: Engine, config: EbookSearchConfig) -> None: + """Refresh the BM25 corpus using a SQLAlchemy engine.""" + with Session(engine) as session: + refresh_bm25_corpus(session, config) diff --git a/python/ebook_search/api/main.py b/python/ebook_search/api/main.py new file mode 100644 index 0000000..9be6d99 --- /dev/null +++ b/python/ebook_search/api/main.py @@ -0,0 +1,75 @@ +"""FastAPI HTMX app for EPUB search.""" + +from __future__ import annotations + +import logging +from contextlib import asynccontextmanager +from typing import TYPE_CHECKING, Annotated + +import typer +import uvicorn +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles +from sqlalchemy.orm import Session + +from python.common import configure_logger +from python.ebook_search.api.bm25_tasks import cancel_bm25_refresh +from python.ebook_search.api.routes import register_admin_routes, register_page_routes, register_search_routes +from python.ebook_search.api.web import STATIC_DIR +from python.ebook_search.bm25_corpus import ensure_bm25_corpus +from python.ebook_search.config import load_config +from python.orm.common import get_postgres_engine + +if TYPE_CHECKING: + from collections.abc import AsyncIterator + + +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI) -> AsyncIterator[None]: + """Manage application startup and shutdown resources.""" + logger.info("ebook_search_startup") + app.state.engine = get_postgres_engine(name="RICHIE") + with Session(app.state.engine) as session: + ensure_bm25_corpus(session, app.state.config) + try: + yield + finally: + logger.info("ebook_search_shutdown") + cancel_bm25_refresh(app) + app.state.engine.dispose() + + +def create_app() -> FastAPI: + """Create the EPUB search web app.""" + app = FastAPI(title="EPUB Search", lifespan=lifespan) + app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") + app.state.config = load_config() + logger.info( + "ebook_search_config_loaded top_k=%s embedding_model=%s rerank_enabled=%s answer_enabled=%s library_paths=%s", + app.state.config.top_k, + app.state.config.embedding_model, + app.state.config.rerank.enabled, + app.state.config.answer_enabled, + len(app.state.config.library_paths), + ) + register_page_routes(app) + register_search_routes(app) + register_admin_routes(app) + return app + + +def serve( + host: Annotated[str, typer.Option("--host", "-h", help="Host to bind to")] = "127.0.0.1", + port: Annotated[int, typer.Option("--port", "-p", help="Port to bind to")] = 8070, + log_level: Annotated[str, typer.Option("--log-level", "-l", help="Log level")] = "INFO", +) -> None: + """Start the EPUB search server.""" + configure_logger(log_level) + uvicorn.run(create_app(), host=host, port=port) + + +if __name__ == "__main__": + typer.run(serve) diff --git a/python/ebook_search/api/routes/__init__.py b/python/ebook_search/api/routes/__init__.py new file mode 100644 index 0000000..a6e49ca --- /dev/null +++ b/python/ebook_search/api/routes/__init__.py @@ -0,0 +1,16 @@ +"""EPUB search web route modules.""" + +from python.ebook_search.api.routes import admin, page, search + +register_admin_routes = admin.register_admin_routes +register_page_routes = page.register_page_routes +register_search_routes = search.register_search_routes + +__all__ = [ + "admin", + "page", + "register_admin_routes", + "register_page_routes", + "register_search_routes", + "search", +] diff --git a/python/ebook_search/api/routes/admin.py b/python/ebook_search/api/routes/admin.py new file mode 100644 index 0000000..ff83239 --- /dev/null +++ b/python/ebook_search/api/routes/admin.py @@ -0,0 +1,116 @@ +"""Admin routes for the EPUB search web UI.""" + +from __future__ import annotations + +import logging +from dataclasses import replace +from typing import TYPE_CHECKING + +from fastapi import APIRouter, Request +from fastapi.responses import HTMLResponse +from sqlalchemy.orm import Session + +from python.ebook_search.api.bm25_tasks import schedule_bm25_refresh +from python.ebook_search.api.web import templates +from python.ebook_search.embeddings import embed_missing_chunks, embedding_model_stats +from python.ebook_search.ingest import ingest_configured_paths + +if TYPE_CHECKING: + from fastapi import FastAPI + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/admin") +EMBED_ALL_BATCH_SIZE = 32 + + +def register_admin_routes(app: FastAPI) -> None: + """Register admin routes on the app.""" + app.include_router(router) + + +@router.get("", response_class=HTMLResponse) +def admin(request: Request) -> HTMLResponse: + """Render the admin page.""" + with Session(request.app.state.engine) as session: + stats = embedding_model_stats(session) + logger.info("ebook_admin_page_loaded models=%s", len(stats)) + return templates.TemplateResponse(request, "admin.html", {"config": request.app.state.config, "stats": stats}) + + +@router.post("/scan", response_class=HTMLResponse) +def scan_library(request: Request) -> HTMLResponse: + """Scan configured library paths for EPUB changes.""" + try: + with Session(request.app.state.engine) as session: + count = ingest_configured_paths(session, request.app.state.config) + session.commit() + except Exception as error: + logger.exception("ebook_admin_scan_failed") + return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500) + + logger.info("ebook_admin_scan_complete changed_files=%s", count) + if count > 0: + schedule_bm25_refresh(request.app) + return templates.TemplateResponse(request, "partials/admin_status.html", {"message": f"Indexed {count} EPUBs"}) + + +@router.post("/embed-missing", response_class=HTMLResponse) +def embed_missing(request: Request) -> HTMLResponse: + """Embed chunks missing vectors for the configured model.""" + try: + with Session(request.app.state.engine) as session: + count = embed_missing_chunks(session, request.app.state.config) + session.commit() + except Exception as error: + logger.exception("ebook_admin_embed_missing_failed") + return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500) + + logger.info("ebook_admin_embed_missing_complete chunks=%s", count) + return templates.TemplateResponse( + request, + "partials/admin_status.html", + {"message": f"Embedded {count} chunks"}, + ) + + +@router.post("/embed-all", response_class=HTMLResponse) +def embed_all(request: Request) -> HTMLResponse: + """Embed all chunks missing vectors in fixed-size batches.""" + total = 0 + batches = 0 + config = replace(request.app.state.config, embedding_batch_size=EMBED_ALL_BATCH_SIZE) + try: + with Session(request.app.state.engine) as session: + while True: + count = embed_missing_chunks(session, config) + if count == 0: + break + session.commit() + total += count + batches += 1 + logger.info( + "ebook_admin_embed_all_batch_complete batch=%s chunks=%s total_chunks=%s", + batches, + count, + total, + ) + except Exception as error: + logger.exception( + "ebook_admin_embed_all_failed batches=%s chunks=%s", + batches, + total, + ) + return templates.TemplateResponse( + request, + "partials/error.html", + {"message": f"Embed all failed after {total} chunks in {batches} batches: {error}"}, + status_code=500, + ) + + logger.info("ebook_admin_embed_all_complete batches=%s chunks=%s", batches, total) + return templates.TemplateResponse( + request, + "partials/admin_status.html", + {"message": f"Embedded {total} chunks in {batches} batches of {EMBED_ALL_BATCH_SIZE}"}, + ) diff --git a/python/ebook_search/api/routes/page.py b/python/ebook_search/api/routes/page.py new file mode 100644 index 0000000..8e48867 --- /dev/null +++ b/python/ebook_search/api/routes/page.py @@ -0,0 +1,66 @@ +"""Page routes for the EPUB search web UI.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +from fastapi import APIRouter, Request +from fastapi.responses import HTMLResponse +from sqlalchemy import select +from sqlalchemy.orm import Session + +from python.ebook_search.api.web import templates +from python.orm.richie import EbookSource + +if TYPE_CHECKING: + from fastapi import FastAPI + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +def register_page_routes(app: FastAPI) -> None: + """Register page routes on the app.""" + app.include_router(router) + + +@router.get("/", response_class=HTMLResponse) +def index(request: Request) -> HTMLResponse: + """Render the search page.""" + return templates.TemplateResponse(request, "search.html", {"config": request.app.state.config}) + + +@router.get("/books", response_class=HTMLResponse) +def books(request: Request) -> HTMLResponse: + """Render the indexed books page.""" + with Session(request.app.state.engine) as session: + sources = list(session.scalars(select(EbookSource).order_by(EbookSource.title)).all()) + logger.info("ebook_books_page_loaded count=%s", len(sources)) + return templates.TemplateResponse(request, "books.html", {"sources": sources}) + + +@router.get("/books/{source_id}", response_class=HTMLResponse) +def book_detail(source_id: int, request: Request) -> HTMLResponse: + """Render details for one indexed book.""" + with Session(request.app.state.engine) as session: + source = session.get(EbookSource, source_id) + if source is not None: + chapter_count = len(source.chapters) + chunk_count = len(source.chunks) + else: + chapter_count = 0 + chunk_count = 0 + logger.info( + "ebook_book_detail_loaded source_id=%s found=%s chapters=%s chunks=%s", + source_id, + source is not None, + chapter_count, + chunk_count, + ) + return templates.TemplateResponse( + request, + "book_detail.html", + {"chapter_count": chapter_count, "chunk_count": chunk_count, "source": source}, + ) diff --git a/python/ebook_search/api/routes/search.py b/python/ebook_search/api/routes/search.py new file mode 100644 index 0000000..77ed022 --- /dev/null +++ b/python/ebook_search/api/routes/search.py @@ -0,0 +1,66 @@ +"""Search routes for the EPUB search web UI.""" + +from __future__ import annotations + +import logging +from dataclasses import replace +from time import perf_counter +from typing import TYPE_CHECKING, Annotated + +from fastapi import APIRouter, Form, Request +from fastapi.responses import HTMLResponse + +from python.ebook_search.answer import answer_query +from python.ebook_search.api.web import templates +from python.ebook_search.search import search_ebooks +from python.ebook_search.timing import runtime_step_from_start + +if TYPE_CHECKING: + from fastapi import FastAPI + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +def register_search_routes(app: FastAPI) -> None: + """Register search routes on the app.""" + app.include_router(router) + + +@router.post("/search", response_class=HTMLResponse) +def search( + request: Request, + query: Annotated[str, Form()], + rerank: Annotated[str | None, Form()] = None, +) -> HTMLResponse: + """Run a search and render HTMX results.""" + try: + response = search_ebooks(request.app.state.engine, query, request.app.state.config, rerank=rerank == "true") + except Exception as error: + logger.exception("ebook_search_request_failed") + return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500) + + answer_start = perf_counter() + if request.app.state.config.answer_enabled: + try: + answer = answer_query(query, response.results, request.app.state.config) + except RuntimeError as error: + logger.warning("ebook_answer_request_failed_falling_back error=%s", error) + answer = "Answer generation failed. Source chunks are still shown below." + else: + logger.info("ebook_answer_skipped_disabled") + answer = "Answer generation is disabled. Source chunks are shown below." + answer_step_name = "Answer generation" if request.app.state.config.answer_enabled else "Answer skipped" + response = replace( + response, + timings=(*response.timings, runtime_step_from_start(answer_step_name, answer_start)), + ) + + logger.info( + "ebook_search_request_complete results=%s rank_label=%s runtime_ms=%.1f", + len(response.results), + response.rank_label, + response.total_runtime_ms, + ) + return templates.TemplateResponse(request, "partials/results.html", {"answer": answer, "response": response}) diff --git a/python/ebook_search/api/static/style.css b/python/ebook_search/api/static/style.css new file mode 100644 index 0000000..c869d55 --- /dev/null +++ b/python/ebook_search/api/static/style.css @@ -0,0 +1,140 @@ +body { + margin: 0; + background: #f7f7f4; + color: #202124; + font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; +} + +main { + max-width: 960px; + margin: 0 auto; + padding: 24px; +} + +nav { + display: flex; + gap: 12px; + align-items: center; + margin-bottom: 20px; +} + +nav form { + margin: 0; +} + +.actions { + display: flex; + flex-wrap: wrap; + gap: 12px; + margin-bottom: 24px; +} + +textarea { + display: block; + width: 100%; + margin: 8px 0 12px; +} + +button { + padding: 8px 14px; +} + +.check { + display: inline-flex; + gap: 8px; + align-items: center; + margin-right: 12px; +} + +.rank-label { + margin-top: 24px; + font-weight: 700; +} + +.results { + padding-left: 24px; +} + +.meta, +.scores, +.status { + color: #626a73; +} + +.scores { + display: flex; + flex-wrap: wrap; + gap: 8px; + margin: 12px 0; +} + +.scores div { + display: inline-flex; + gap: 4px; + align-items: baseline; +} + +.scores dt { + font-weight: 700; +} + +.scores dd { + margin: 0; +} + +.runtime { + margin-top: 16px; +} + +.timing-chart { + display: grid; + gap: 8px; + padding: 0; + list-style: none; +} + +.timing-chart li { + display: grid; + grid-template-columns: minmax(150px, 1fr) minmax(160px, 2fr) auto auto; + gap: 8px; + align-items: center; +} + +.timing-bar { + height: 10px; + overflow: hidden; + background: #e5e5df; +} + +.timing-bar span { + display: block; + height: 100%; + background: #3767c8; +} + +.timing-value, +.timing-remaining { + color: #626a73; + font-variant-numeric: tabular-nums; +} + +table { + width: 100%; + border-collapse: collapse; +} + +th, +td { + padding: 8px; + border-bottom: 1px solid #d8d8d2; + text-align: left; +} + +th { + font-weight: 700; +} + +.error { + color: #9f1d20; + font-weight: 700; +} diff --git a/python/ebook_search/api/templates/admin.html b/python/ebook_search/api/templates/admin.html new file mode 100644 index 0000000..12e588e --- /dev/null +++ b/python/ebook_search/api/templates/admin.html @@ -0,0 +1,57 @@ + + +
+ + +| Model | +Dimensions | +Embedded | +Missing | +Total chunks | +
|---|---|---|---|---|
| {{ item.model_name }} | +{{ item.dimension }} | +{{ item.embedded_chunks }} | +{{ item.missing_chunks }} | +{{ item.total_chunks }} | +
No EPUBs indexed.
+ {% endif %} +{{ message }}
diff --git a/python/ebook_search/api/templates/partials/error.html b/python/ebook_search/api/templates/partials/error.html new file mode 100644 index 0000000..9657121 --- /dev/null +++ b/python/ebook_search/api/templates/partials/error.html @@ -0,0 +1 @@ +{{ message }}
diff --git a/python/ebook_search/api/templates/partials/results.html b/python/ebook_search/api/templates/partials/results.html new file mode 100644 index 0000000..bc29eec --- /dev/null +++ b/python/ebook_search/api/templates/partials/results.html @@ -0,0 +1,74 @@ +{{ answer }}
+{{ result.text }}
+No results.
+{% endif %} diff --git a/python/ebook_search/api/templates/search.html b/python/ebook_search/api/templates/search.html new file mode 100644 index 0000000..df566c5 --- /dev/null +++ b/python/ebook_search/api/templates/search.html @@ -0,0 +1,30 @@ + + + + + +