From 73177ef399ac932d3bb947c801c49d6ca91b95dc Mon Sep 17 00:00:00 2001 From: Richie Cahill Date: Fri, 12 Jun 2026 03:10:19 -0400 Subject: [PATCH] build api and frountend --- python/ebook_search/api/__init__.py | 1 + python/ebook_search/api/bm25_tasks.py | 58 ++++++++ python/ebook_search/api/main.py | 75 ++++++++++ python/ebook_search/api/routes/__init__.py | 16 ++ python/ebook_search/api/routes/admin.py | 116 +++++++++++++++ python/ebook_search/api/routes/page.py | 66 +++++++++ python/ebook_search/api/routes/search.py | 66 +++++++++ python/ebook_search/api/static/style.css | 140 ++++++++++++++++++ python/ebook_search/api/templates/admin.html | 57 +++++++ .../api/templates/book_detail.html | 32 ++++ python/ebook_search/api/templates/books.html | 31 ++++ .../api/templates/partials/admin_status.html | 1 + .../api/templates/partials/error.html | 1 + .../api/templates/partials/results.html | 74 +++++++++ python/ebook_search/api/templates/search.html | 30 ++++ python/ebook_search/api/web.py | 13 ++ 16 files changed, 777 insertions(+) create mode 100644 python/ebook_search/api/__init__.py create mode 100644 python/ebook_search/api/bm25_tasks.py create mode 100644 python/ebook_search/api/main.py create mode 100644 python/ebook_search/api/routes/__init__.py create mode 100644 python/ebook_search/api/routes/admin.py create mode 100644 python/ebook_search/api/routes/page.py create mode 100644 python/ebook_search/api/routes/search.py create mode 100644 python/ebook_search/api/static/style.css create mode 100644 python/ebook_search/api/templates/admin.html create mode 100644 python/ebook_search/api/templates/book_detail.html create mode 100644 python/ebook_search/api/templates/books.html create mode 100644 python/ebook_search/api/templates/partials/admin_status.html create mode 100644 python/ebook_search/api/templates/partials/error.html create mode 100644 python/ebook_search/api/templates/partials/results.html create mode 100644 python/ebook_search/api/templates/search.html create mode 100644 python/ebook_search/api/web.py diff --git a/python/ebook_search/api/__init__.py b/python/ebook_search/api/__init__.py new file mode 100644 index 0000000..297fdb0 --- /dev/null +++ b/python/ebook_search/api/__init__.py @@ -0,0 +1 @@ +"""Web and external API adapters for EPUB search.""" diff --git a/python/ebook_search/api/bm25_tasks.py b/python/ebook_search/api/bm25_tasks.py new file mode 100644 index 0000000..a211d45 --- /dev/null +++ b/python/ebook_search/api/bm25_tasks.py @@ -0,0 +1,58 @@ +"""Background BM25 refresh tasks for the web app.""" + +from __future__ import annotations + +import logging +from threading import Timer +from typing import TYPE_CHECKING + +from sqlalchemy.orm import Session + +from python.ebook_search.bm25_corpus import refresh_bm25_corpus + +if TYPE_CHECKING: + from fastapi import FastAPI + from sqlalchemy.engine import Engine + + from python.ebook_search.config import EbookSearchConfig + +logger = logging.getLogger(__name__) + + +def schedule_bm25_refresh(app: FastAPI) -> None: + """Schedule a delayed BM25 corpus refresh, replacing any pending refresh.""" + existing_timer = getattr(app.state, "bm25_refresh_timer", None) + if existing_timer is not None: + existing_timer.cancel() + + timer = Timer(app.state.config.bm25_refresh_delay_seconds, refresh_bm25_for_app, args=(app,)) + timer.daemon = True + timer.start() + app.state.bm25_refresh_timer = timer + logger.info( + "ebook_bm25_refresh_scheduled delay_seconds=%s", + app.state.config.bm25_refresh_delay_seconds, + ) + + +def cancel_bm25_refresh(app: FastAPI) -> None: + """Cancel any pending BM25 corpus refresh.""" + existing_timer = getattr(app.state, "bm25_refresh_timer", None) + if existing_timer is not None: + existing_timer.cancel() + app.state.bm25_refresh_timer = None + logger.info("ebook_bm25_refresh_cancelled") + + +def refresh_bm25_for_app(app: FastAPI) -> None: + """Refresh the BM25 corpus using the app engine and config.""" + try: + refresh_bm25_for_engine(app.state.engine, app.state.config) + except Exception: + logger.exception("ebook_bm25_refresh_failed") + + +def refresh_bm25_for_engine(engine: Engine, config: EbookSearchConfig) -> None: + """Refresh the BM25 corpus using a SQLAlchemy engine.""" + with Session(engine) as session: + refresh_bm25_corpus(session, config) diff --git a/python/ebook_search/api/main.py b/python/ebook_search/api/main.py new file mode 100644 index 0000000..9be6d99 --- /dev/null +++ b/python/ebook_search/api/main.py @@ -0,0 +1,75 @@ +"""FastAPI HTMX app for EPUB search.""" + +from __future__ import annotations + +import logging +from contextlib import asynccontextmanager +from typing import TYPE_CHECKING, Annotated + +import typer +import uvicorn +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles +from sqlalchemy.orm import Session + +from python.common import configure_logger +from python.ebook_search.api.bm25_tasks import cancel_bm25_refresh +from python.ebook_search.api.routes import register_admin_routes, register_page_routes, register_search_routes +from python.ebook_search.api.web import STATIC_DIR +from python.ebook_search.bm25_corpus import ensure_bm25_corpus +from python.ebook_search.config import load_config +from python.orm.common import get_postgres_engine + +if TYPE_CHECKING: + from collections.abc import AsyncIterator + + +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI) -> AsyncIterator[None]: + """Manage application startup and shutdown resources.""" + logger.info("ebook_search_startup") + app.state.engine = get_postgres_engine(name="RICHIE") + with Session(app.state.engine) as session: + ensure_bm25_corpus(session, app.state.config) + try: + yield + finally: + logger.info("ebook_search_shutdown") + cancel_bm25_refresh(app) + app.state.engine.dispose() + + +def create_app() -> FastAPI: + """Create the EPUB search web app.""" + app = FastAPI(title="EPUB Search", lifespan=lifespan) + app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") + app.state.config = load_config() + logger.info( + "ebook_search_config_loaded top_k=%s embedding_model=%s rerank_enabled=%s answer_enabled=%s library_paths=%s", + app.state.config.top_k, + app.state.config.embedding_model, + app.state.config.rerank.enabled, + app.state.config.answer_enabled, + len(app.state.config.library_paths), + ) + register_page_routes(app) + register_search_routes(app) + register_admin_routes(app) + return app + + +def serve( + host: Annotated[str, typer.Option("--host", "-h", help="Host to bind to")] = "127.0.0.1", + port: Annotated[int, typer.Option("--port", "-p", help="Port to bind to")] = 8070, + log_level: Annotated[str, typer.Option("--log-level", "-l", help="Log level")] = "INFO", +) -> None: + """Start the EPUB search server.""" + configure_logger(log_level) + uvicorn.run(create_app(), host=host, port=port) + + +if __name__ == "__main__": + typer.run(serve) diff --git a/python/ebook_search/api/routes/__init__.py b/python/ebook_search/api/routes/__init__.py new file mode 100644 index 0000000..a6e49ca --- /dev/null +++ b/python/ebook_search/api/routes/__init__.py @@ -0,0 +1,16 @@ +"""EPUB search web route modules.""" + +from python.ebook_search.api.routes import admin, page, search + +register_admin_routes = admin.register_admin_routes +register_page_routes = page.register_page_routes +register_search_routes = search.register_search_routes + +__all__ = [ + "admin", + "page", + "register_admin_routes", + "register_page_routes", + "register_search_routes", + "search", +] diff --git a/python/ebook_search/api/routes/admin.py b/python/ebook_search/api/routes/admin.py new file mode 100644 index 0000000..ff83239 --- /dev/null +++ b/python/ebook_search/api/routes/admin.py @@ -0,0 +1,116 @@ +"""Admin routes for the EPUB search web UI.""" + +from __future__ import annotations + +import logging +from dataclasses import replace +from typing import TYPE_CHECKING + +from fastapi import APIRouter, Request +from fastapi.responses import HTMLResponse +from sqlalchemy.orm import Session + +from python.ebook_search.api.bm25_tasks import schedule_bm25_refresh +from python.ebook_search.api.web import templates +from python.ebook_search.embeddings import embed_missing_chunks, embedding_model_stats +from python.ebook_search.ingest import ingest_configured_paths + +if TYPE_CHECKING: + from fastapi import FastAPI + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/admin") +EMBED_ALL_BATCH_SIZE = 32 + + +def register_admin_routes(app: FastAPI) -> None: + """Register admin routes on the app.""" + app.include_router(router) + + +@router.get("", response_class=HTMLResponse) +def admin(request: Request) -> HTMLResponse: + """Render the admin page.""" + with Session(request.app.state.engine) as session: + stats = embedding_model_stats(session) + logger.info("ebook_admin_page_loaded models=%s", len(stats)) + return templates.TemplateResponse(request, "admin.html", {"config": request.app.state.config, "stats": stats}) + + +@router.post("/scan", response_class=HTMLResponse) +def scan_library(request: Request) -> HTMLResponse: + """Scan configured library paths for EPUB changes.""" + try: + with Session(request.app.state.engine) as session: + count = ingest_configured_paths(session, request.app.state.config) + session.commit() + except Exception as error: + logger.exception("ebook_admin_scan_failed") + return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500) + + logger.info("ebook_admin_scan_complete changed_files=%s", count) + if count > 0: + schedule_bm25_refresh(request.app) + return templates.TemplateResponse(request, "partials/admin_status.html", {"message": f"Indexed {count} EPUBs"}) + + +@router.post("/embed-missing", response_class=HTMLResponse) +def embed_missing(request: Request) -> HTMLResponse: + """Embed chunks missing vectors for the configured model.""" + try: + with Session(request.app.state.engine) as session: + count = embed_missing_chunks(session, request.app.state.config) + session.commit() + except Exception as error: + logger.exception("ebook_admin_embed_missing_failed") + return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500) + + logger.info("ebook_admin_embed_missing_complete chunks=%s", count) + return templates.TemplateResponse( + request, + "partials/admin_status.html", + {"message": f"Embedded {count} chunks"}, + ) + + +@router.post("/embed-all", response_class=HTMLResponse) +def embed_all(request: Request) -> HTMLResponse: + """Embed all chunks missing vectors in fixed-size batches.""" + total = 0 + batches = 0 + config = replace(request.app.state.config, embedding_batch_size=EMBED_ALL_BATCH_SIZE) + try: + with Session(request.app.state.engine) as session: + while True: + count = embed_missing_chunks(session, config) + if count == 0: + break + session.commit() + total += count + batches += 1 + logger.info( + "ebook_admin_embed_all_batch_complete batch=%s chunks=%s total_chunks=%s", + batches, + count, + total, + ) + except Exception as error: + logger.exception( + "ebook_admin_embed_all_failed batches=%s chunks=%s", + batches, + total, + ) + return templates.TemplateResponse( + request, + "partials/error.html", + {"message": f"Embed all failed after {total} chunks in {batches} batches: {error}"}, + status_code=500, + ) + + logger.info("ebook_admin_embed_all_complete batches=%s chunks=%s", batches, total) + return templates.TemplateResponse( + request, + "partials/admin_status.html", + {"message": f"Embedded {total} chunks in {batches} batches of {EMBED_ALL_BATCH_SIZE}"}, + ) diff --git a/python/ebook_search/api/routes/page.py b/python/ebook_search/api/routes/page.py new file mode 100644 index 0000000..8e48867 --- /dev/null +++ b/python/ebook_search/api/routes/page.py @@ -0,0 +1,66 @@ +"""Page routes for the EPUB search web UI.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +from fastapi import APIRouter, Request +from fastapi.responses import HTMLResponse +from sqlalchemy import select +from sqlalchemy.orm import Session + +from python.ebook_search.api.web import templates +from python.orm.richie import EbookSource + +if TYPE_CHECKING: + from fastapi import FastAPI + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +def register_page_routes(app: FastAPI) -> None: + """Register page routes on the app.""" + app.include_router(router) + + +@router.get("/", response_class=HTMLResponse) +def index(request: Request) -> HTMLResponse: + """Render the search page.""" + return templates.TemplateResponse(request, "search.html", {"config": request.app.state.config}) + + +@router.get("/books", response_class=HTMLResponse) +def books(request: Request) -> HTMLResponse: + """Render the indexed books page.""" + with Session(request.app.state.engine) as session: + sources = list(session.scalars(select(EbookSource).order_by(EbookSource.title)).all()) + logger.info("ebook_books_page_loaded count=%s", len(sources)) + return templates.TemplateResponse(request, "books.html", {"sources": sources}) + + +@router.get("/books/{source_id}", response_class=HTMLResponse) +def book_detail(source_id: int, request: Request) -> HTMLResponse: + """Render details for one indexed book.""" + with Session(request.app.state.engine) as session: + source = session.get(EbookSource, source_id) + if source is not None: + chapter_count = len(source.chapters) + chunk_count = len(source.chunks) + else: + chapter_count = 0 + chunk_count = 0 + logger.info( + "ebook_book_detail_loaded source_id=%s found=%s chapters=%s chunks=%s", + source_id, + source is not None, + chapter_count, + chunk_count, + ) + return templates.TemplateResponse( + request, + "book_detail.html", + {"chapter_count": chapter_count, "chunk_count": chunk_count, "source": source}, + ) diff --git a/python/ebook_search/api/routes/search.py b/python/ebook_search/api/routes/search.py new file mode 100644 index 0000000..77ed022 --- /dev/null +++ b/python/ebook_search/api/routes/search.py @@ -0,0 +1,66 @@ +"""Search routes for the EPUB search web UI.""" + +from __future__ import annotations + +import logging +from dataclasses import replace +from time import perf_counter +from typing import TYPE_CHECKING, Annotated + +from fastapi import APIRouter, Form, Request +from fastapi.responses import HTMLResponse + +from python.ebook_search.answer import answer_query +from python.ebook_search.api.web import templates +from python.ebook_search.search import search_ebooks +from python.ebook_search.timing import runtime_step_from_start + +if TYPE_CHECKING: + from fastapi import FastAPI + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +def register_search_routes(app: FastAPI) -> None: + """Register search routes on the app.""" + app.include_router(router) + + +@router.post("/search", response_class=HTMLResponse) +def search( + request: Request, + query: Annotated[str, Form()], + rerank: Annotated[str | None, Form()] = None, +) -> HTMLResponse: + """Run a search and render HTMX results.""" + try: + response = search_ebooks(request.app.state.engine, query, request.app.state.config, rerank=rerank == "true") + except Exception as error: + logger.exception("ebook_search_request_failed") + return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500) + + answer_start = perf_counter() + if request.app.state.config.answer_enabled: + try: + answer = answer_query(query, response.results, request.app.state.config) + except RuntimeError as error: + logger.warning("ebook_answer_request_failed_falling_back error=%s", error) + answer = "Answer generation failed. Source chunks are still shown below." + else: + logger.info("ebook_answer_skipped_disabled") + answer = "Answer generation is disabled. Source chunks are shown below." + answer_step_name = "Answer generation" if request.app.state.config.answer_enabled else "Answer skipped" + response = replace( + response, + timings=(*response.timings, runtime_step_from_start(answer_step_name, answer_start)), + ) + + logger.info( + "ebook_search_request_complete results=%s rank_label=%s runtime_ms=%.1f", + len(response.results), + response.rank_label, + response.total_runtime_ms, + ) + return templates.TemplateResponse(request, "partials/results.html", {"answer": answer, "response": response}) diff --git a/python/ebook_search/api/static/style.css b/python/ebook_search/api/static/style.css new file mode 100644 index 0000000..c869d55 --- /dev/null +++ b/python/ebook_search/api/static/style.css @@ -0,0 +1,140 @@ +body { + margin: 0; + background: #f7f7f4; + color: #202124; + font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; +} + +main { + max-width: 960px; + margin: 0 auto; + padding: 24px; +} + +nav { + display: flex; + gap: 12px; + align-items: center; + margin-bottom: 20px; +} + +nav form { + margin: 0; +} + +.actions { + display: flex; + flex-wrap: wrap; + gap: 12px; + margin-bottom: 24px; +} + +textarea { + display: block; + width: 100%; + margin: 8px 0 12px; +} + +button { + padding: 8px 14px; +} + +.check { + display: inline-flex; + gap: 8px; + align-items: center; + margin-right: 12px; +} + +.rank-label { + margin-top: 24px; + font-weight: 700; +} + +.results { + padding-left: 24px; +} + +.meta, +.scores, +.status { + color: #626a73; +} + +.scores { + display: flex; + flex-wrap: wrap; + gap: 8px; + margin: 12px 0; +} + +.scores div { + display: inline-flex; + gap: 4px; + align-items: baseline; +} + +.scores dt { + font-weight: 700; +} + +.scores dd { + margin: 0; +} + +.runtime { + margin-top: 16px; +} + +.timing-chart { + display: grid; + gap: 8px; + padding: 0; + list-style: none; +} + +.timing-chart li { + display: grid; + grid-template-columns: minmax(150px, 1fr) minmax(160px, 2fr) auto auto; + gap: 8px; + align-items: center; +} + +.timing-bar { + height: 10px; + overflow: hidden; + background: #e5e5df; +} + +.timing-bar span { + display: block; + height: 100%; + background: #3767c8; +} + +.timing-value, +.timing-remaining { + color: #626a73; + font-variant-numeric: tabular-nums; +} + +table { + width: 100%; + border-collapse: collapse; +} + +th, +td { + padding: 8px; + border-bottom: 1px solid #d8d8d2; + text-align: left; +} + +th { + font-weight: 700; +} + +.error { + color: #9f1d20; + font-weight: 700; +} diff --git a/python/ebook_search/api/templates/admin.html b/python/ebook_search/api/templates/admin.html new file mode 100644 index 0000000..12e588e --- /dev/null +++ b/python/ebook_search/api/templates/admin.html @@ -0,0 +1,57 @@ + + + + + + EPUB Admin + + + + +
+ +

Admin

+
+
+
+ +
+
+ +
+
+ +
+
+
+

Embeddings

+ + + + + + + + + + + + {% for item in stats %} + + + + + + + + {% endfor %} + +
ModelDimensionsEmbeddedMissingTotal chunks
{{ item.model_name }}{{ item.dimension }}{{ item.embedded_chunks }}{{ item.missing_chunks }}{{ item.total_chunks }}
+
+
+ + diff --git a/python/ebook_search/api/templates/book_detail.html b/python/ebook_search/api/templates/book_detail.html new file mode 100644 index 0000000..735aeaf --- /dev/null +++ b/python/ebook_search/api/templates/book_detail.html @@ -0,0 +1,32 @@ + + + + + + {% if source %}{{ source.title }}{% else %}Book not found{% endif %} + + + +
+ + {% if source %} +

{{ source.title }}

+

{{ source.author or "Unknown author" }}

+
+
File
+
{{ source.file_path }}
+
Chapters
+
{{ chapter_count }}
+
Chunks
+
{{ chunk_count }}
+
+ {% else %} +

Book not found

+ {% endif %} +
+ + diff --git a/python/ebook_search/api/templates/books.html b/python/ebook_search/api/templates/books.html new file mode 100644 index 0000000..c7bc487 --- /dev/null +++ b/python/ebook_search/api/templates/books.html @@ -0,0 +1,31 @@ + + + + + + EPUB Books + + + +
+ +

Books

+ {% if sources %} +
    + {% for source in sources %} +
  1. +

    {{ source.title }}

    +

    {{ source.author or "Unknown author" }}

    +
  2. + {% endfor %} +
+ {% else %} +

No EPUBs indexed.

+ {% endif %} +
+ + diff --git a/python/ebook_search/api/templates/partials/admin_status.html b/python/ebook_search/api/templates/partials/admin_status.html new file mode 100644 index 0000000..f8fa12f --- /dev/null +++ b/python/ebook_search/api/templates/partials/admin_status.html @@ -0,0 +1 @@ +

{{ message }}

diff --git a/python/ebook_search/api/templates/partials/error.html b/python/ebook_search/api/templates/partials/error.html new file mode 100644 index 0000000..9657121 --- /dev/null +++ b/python/ebook_search/api/templates/partials/error.html @@ -0,0 +1 @@ +

{{ message }}

diff --git a/python/ebook_search/api/templates/partials/results.html b/python/ebook_search/api/templates/partials/results.html new file mode 100644 index 0000000..bc29eec --- /dev/null +++ b/python/ebook_search/api/templates/partials/results.html @@ -0,0 +1,74 @@ +
{{ response.rank_label }}
+{% if response.timings %} +
+

Runtime

+

Total {{ "%.1f"|format(response.total_runtime_ms) }} ms

+
    + {% set total = response.total_runtime_ms %} + {% set ns = namespace(remaining=total) %} + {% for step in response.timings %} + {% set width = (step.duration_ms / total * 100) if total else 0 %} + {% if step.counts_toward_total %} + {% set ns.remaining = ns.remaining - step.duration_ms %} + {% endif %} +
  1. + {{ step.name }} + + {{ "%.1f"|format(step.duration_ms) }} ms + {{ "%.1f"|format([ns.remaining, 0]|max) }} ms left +
  2. + {% endfor %} +
+
+{% endif %} +
+

Answer

+

{{ answer }}

+
+{% if response.results %} +
    + {% for result in response.results %} +
  1. +

    {{ result.source_title }}

    +

    + {% if result.source_author %}{{ result.source_author }}{% endif %} + {% if result.chapter_title %} · {{ result.chapter_title }}{% endif %} + {% if result.page_label %} · page {{ result.page_label }}{% endif %} +

    +

    {{ result.text }}

    +
    +
    +
    final
    +
    {{ "%.3f"|format(result.score) }}
    +
    + {% if result.rerank_score is not none %} +
    +
    rerank
    +
    {{ "%.3f"|format(result.rerank_score) }}
    +
    + {% endif %} + {% if result.vector_score is not none %} +
    +
    vector cosine
    +
    {{ "%.3f"|format(result.vector_score) }}
    +
    + {% endif %} + {% if result.bm25_score is not none %} +
    +
    BM25
    +
    {{ "%.6f"|format(result.bm25_score) }}
    +
    + {% endif %} + {% if result.fused_score is not none %} +
    +
    RRF
    +
    {{ "%.3f"|format(result.fused_score) }}
    +
    + {% endif %} +
    +
  2. + {% endfor %} +
+{% else %} +

No results.

+{% endif %} diff --git a/python/ebook_search/api/templates/search.html b/python/ebook_search/api/templates/search.html new file mode 100644 index 0000000..df566c5 --- /dev/null +++ b/python/ebook_search/api/templates/search.html @@ -0,0 +1,30 @@ + + + + + + EPUB Search + + + + +
+ +

EPUB Search

+
+ + + + +
+
+
+ + diff --git a/python/ebook_search/api/web.py b/python/ebook_search/api/web.py new file mode 100644 index 0000000..85f6128 --- /dev/null +++ b/python/ebook_search/api/web.py @@ -0,0 +1,13 @@ +"""Shared web UI resources for EPUB search.""" + +from __future__ import annotations + +from pathlib import Path + +from fastapi.templating import Jinja2Templates + +PACKAGE_DIR = Path(__file__).resolve().parent +TEMPLATE_DIR = PACKAGE_DIR / "templates" +STATIC_DIR = PACKAGE_DIR / "static" + +templates = Jinja2Templates(directory=TEMPLATE_DIR)