build api and frountend

This commit is contained in:
2026-06-12 03:10:19 -04:00
parent 051b5c7dc0
commit 5d8a758b89
16 changed files with 777 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
"""Web and external API adapters for EPUB search."""
+58
View File
@@ -0,0 +1,58 @@
"""Background BM25 refresh tasks for the web app."""
from __future__ import annotations
import logging
from threading import Timer
from typing import TYPE_CHECKING
from sqlalchemy.orm import Session
from python.ebook_search.bm25_corpus import refresh_bm25_corpus
if TYPE_CHECKING:
from fastapi import FastAPI
from sqlalchemy.engine import Engine
from python.ebook_search.config import EbookSearchConfig
logger = logging.getLogger(__name__)
def schedule_bm25_refresh(app: FastAPI) -> None:
"""Schedule a delayed BM25 corpus refresh, replacing any pending refresh."""
existing_timer = getattr(app.state, "bm25_refresh_timer", None)
if existing_timer is not None:
existing_timer.cancel()
timer = Timer(app.state.config.bm25_refresh_delay_seconds, refresh_bm25_for_app, args=(app,))
timer.daemon = True
timer.start()
app.state.bm25_refresh_timer = timer
logger.info(
"ebook_bm25_refresh_scheduled delay_seconds=%s",
app.state.config.bm25_refresh_delay_seconds,
)
def cancel_bm25_refresh(app: FastAPI) -> None:
"""Cancel any pending BM25 corpus refresh."""
existing_timer = getattr(app.state, "bm25_refresh_timer", None)
if existing_timer is not None:
existing_timer.cancel()
app.state.bm25_refresh_timer = None
logger.info("ebook_bm25_refresh_cancelled")
def refresh_bm25_for_app(app: FastAPI) -> None:
"""Refresh the BM25 corpus using the app engine and config."""
try:
refresh_bm25_for_engine(app.state.engine, app.state.config)
except Exception:
logger.exception("ebook_bm25_refresh_failed")
def refresh_bm25_for_engine(engine: Engine, config: EbookSearchConfig) -> None:
"""Refresh the BM25 corpus using a SQLAlchemy engine."""
with Session(engine) as session:
refresh_bm25_corpus(session, config)
+75
View File
@@ -0,0 +1,75 @@
"""FastAPI HTMX app for EPUB search."""
from __future__ import annotations
import logging
from contextlib import asynccontextmanager
from typing import TYPE_CHECKING, Annotated
import typer
import uvicorn
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from sqlalchemy.orm import Session
from python.common import configure_logger
from python.ebook_search.api.bm25_tasks import cancel_bm25_refresh
from python.ebook_search.api.routes import register_admin_routes, register_page_routes, register_search_routes
from python.ebook_search.api.web import STATIC_DIR
from python.ebook_search.bm25_corpus import ensure_bm25_corpus
from python.ebook_search.config import load_config
from python.orm.common import get_postgres_engine
if TYPE_CHECKING:
from collections.abc import AsyncIterator
logger = logging.getLogger(__name__)
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
"""Manage application startup and shutdown resources."""
logger.info("ebook_search_startup")
app.state.engine = get_postgres_engine(name="RICHIE")
with Session(app.state.engine) as session:
ensure_bm25_corpus(session, app.state.config)
try:
yield
finally:
logger.info("ebook_search_shutdown")
cancel_bm25_refresh(app)
app.state.engine.dispose()
def create_app() -> FastAPI:
"""Create the EPUB search web app."""
app = FastAPI(title="EPUB Search", lifespan=lifespan)
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
app.state.config = load_config()
logger.info(
"ebook_search_config_loaded top_k=%s embedding_model=%s rerank_enabled=%s answer_enabled=%s library_paths=%s",
app.state.config.top_k,
app.state.config.embedding_model,
app.state.config.rerank.enabled,
app.state.config.answer_enabled,
len(app.state.config.library_paths),
)
register_page_routes(app)
register_search_routes(app)
register_admin_routes(app)
return app
def serve(
host: Annotated[str, typer.Option("--host", "-h", help="Host to bind to")] = "127.0.0.1",
port: Annotated[int, typer.Option("--port", "-p", help="Port to bind to")] = 8070,
log_level: Annotated[str, typer.Option("--log-level", "-l", help="Log level")] = "INFO",
) -> None:
"""Start the EPUB search server."""
configure_logger(log_level)
uvicorn.run(create_app(), host=host, port=port)
if __name__ == "__main__":
typer.run(serve)
@@ -0,0 +1,16 @@
"""EPUB search web route modules."""
from python.ebook_search.api.routes import admin, page, search
register_admin_routes = admin.register_admin_routes
register_page_routes = page.register_page_routes
register_search_routes = search.register_search_routes
__all__ = [
"admin",
"page",
"register_admin_routes",
"register_page_routes",
"register_search_routes",
"search",
]
+116
View File
@@ -0,0 +1,116 @@
"""Admin routes for the EPUB search web UI."""
from __future__ import annotations
import logging
from dataclasses import replace
from typing import TYPE_CHECKING
from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse
from sqlalchemy.orm import Session
from python.ebook_search.api.bm25_tasks import schedule_bm25_refresh
from python.ebook_search.api.web import templates
from python.ebook_search.embeddings import embed_missing_chunks, embedding_model_stats
from python.ebook_search.ingest import ingest_configured_paths
if TYPE_CHECKING:
from fastapi import FastAPI
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/admin")
EMBED_ALL_BATCH_SIZE = 32
def register_admin_routes(app: FastAPI) -> None:
"""Register admin routes on the app."""
app.include_router(router)
@router.get("", response_class=HTMLResponse)
def admin(request: Request) -> HTMLResponse:
"""Render the admin page."""
with Session(request.app.state.engine) as session:
stats = embedding_model_stats(session)
logger.info("ebook_admin_page_loaded models=%s", len(stats))
return templates.TemplateResponse(request, "admin.html", {"config": request.app.state.config, "stats": stats})
@router.post("/scan", response_class=HTMLResponse)
def scan_library(request: Request) -> HTMLResponse:
"""Scan configured library paths for EPUB changes."""
try:
with Session(request.app.state.engine) as session:
count = ingest_configured_paths(session, request.app.state.config)
session.commit()
except Exception as error:
logger.exception("ebook_admin_scan_failed")
return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500)
logger.info("ebook_admin_scan_complete changed_files=%s", count)
if count > 0:
schedule_bm25_refresh(request.app)
return templates.TemplateResponse(request, "partials/admin_status.html", {"message": f"Indexed {count} EPUBs"})
@router.post("/embed-missing", response_class=HTMLResponse)
def embed_missing(request: Request) -> HTMLResponse:
"""Embed chunks missing vectors for the configured model."""
try:
with Session(request.app.state.engine) as session:
count = embed_missing_chunks(session, request.app.state.config)
session.commit()
except Exception as error:
logger.exception("ebook_admin_embed_missing_failed")
return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500)
logger.info("ebook_admin_embed_missing_complete chunks=%s", count)
return templates.TemplateResponse(
request,
"partials/admin_status.html",
{"message": f"Embedded {count} chunks"},
)
@router.post("/embed-all", response_class=HTMLResponse)
def embed_all(request: Request) -> HTMLResponse:
"""Embed all chunks missing vectors in fixed-size batches."""
total = 0
batches = 0
config = replace(request.app.state.config, embedding_batch_size=EMBED_ALL_BATCH_SIZE)
try:
with Session(request.app.state.engine) as session:
while True:
count = embed_missing_chunks(session, config)
if count == 0:
break
session.commit()
total += count
batches += 1
logger.info(
"ebook_admin_embed_all_batch_complete batch=%s chunks=%s total_chunks=%s",
batches,
count,
total,
)
except Exception as error:
logger.exception(
"ebook_admin_embed_all_failed batches=%s chunks=%s",
batches,
total,
)
return templates.TemplateResponse(
request,
"partials/error.html",
{"message": f"Embed all failed after {total} chunks in {batches} batches: {error}"},
status_code=500,
)
logger.info("ebook_admin_embed_all_complete batches=%s chunks=%s", batches, total)
return templates.TemplateResponse(
request,
"partials/admin_status.html",
{"message": f"Embedded {total} chunks in {batches} batches of {EMBED_ALL_BATCH_SIZE}"},
)
+66
View File
@@ -0,0 +1,66 @@
"""Page routes for the EPUB search web UI."""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse
from sqlalchemy import select
from sqlalchemy.orm import Session
from python.ebook_search.api.web import templates
from python.orm.richie import EbookSource
if TYPE_CHECKING:
from fastapi import FastAPI
logger = logging.getLogger(__name__)
router = APIRouter()
def register_page_routes(app: FastAPI) -> None:
"""Register page routes on the app."""
app.include_router(router)
@router.get("/", response_class=HTMLResponse)
def index(request: Request) -> HTMLResponse:
"""Render the search page."""
return templates.TemplateResponse(request, "search.html", {"config": request.app.state.config})
@router.get("/books", response_class=HTMLResponse)
def books(request: Request) -> HTMLResponse:
"""Render the indexed books page."""
with Session(request.app.state.engine) as session:
sources = list(session.scalars(select(EbookSource).order_by(EbookSource.title)).all())
logger.info("ebook_books_page_loaded count=%s", len(sources))
return templates.TemplateResponse(request, "books.html", {"sources": sources})
@router.get("/books/{source_id}", response_class=HTMLResponse)
def book_detail(source_id: int, request: Request) -> HTMLResponse:
"""Render details for one indexed book."""
with Session(request.app.state.engine) as session:
source = session.get(EbookSource, source_id)
if source is not None:
chapter_count = len(source.chapters)
chunk_count = len(source.chunks)
else:
chapter_count = 0
chunk_count = 0
logger.info(
"ebook_book_detail_loaded source_id=%s found=%s chapters=%s chunks=%s",
source_id,
source is not None,
chapter_count,
chunk_count,
)
return templates.TemplateResponse(
request,
"book_detail.html",
{"chapter_count": chapter_count, "chunk_count": chunk_count, "source": source},
)
+66
View File
@@ -0,0 +1,66 @@
"""Search routes for the EPUB search web UI."""
from __future__ import annotations
import logging
from dataclasses import replace
from time import perf_counter
from typing import TYPE_CHECKING, Annotated
from fastapi import APIRouter, Form, Request
from fastapi.responses import HTMLResponse
from python.ebook_search.answer import answer_query
from python.ebook_search.api.web import templates
from python.ebook_search.search import search_ebooks
from python.ebook_search.timing import runtime_step_from_start
if TYPE_CHECKING:
from fastapi import FastAPI
logger = logging.getLogger(__name__)
router = APIRouter()
def register_search_routes(app: FastAPI) -> None:
"""Register search routes on the app."""
app.include_router(router)
@router.post("/search", response_class=HTMLResponse)
def search(
request: Request,
query: Annotated[str, Form()],
rerank: Annotated[str | None, Form()] = None,
) -> HTMLResponse:
"""Run a search and render HTMX results."""
try:
response = search_ebooks(request.app.state.engine, query, request.app.state.config, rerank=rerank == "true")
except Exception as error:
logger.exception("ebook_search_request_failed")
return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500)
answer_start = perf_counter()
if request.app.state.config.answer_enabled:
try:
answer = answer_query(query, response.results, request.app.state.config)
except RuntimeError as error:
logger.warning("ebook_answer_request_failed_falling_back error=%s", error)
answer = "Answer generation failed. Source chunks are still shown below."
else:
logger.info("ebook_answer_skipped_disabled")
answer = "Answer generation is disabled. Source chunks are shown below."
answer_step_name = "Answer generation" if request.app.state.config.answer_enabled else "Answer skipped"
response = replace(
response,
timings=(*response.timings, runtime_step_from_start(answer_step_name, answer_start)),
)
logger.info(
"ebook_search_request_complete results=%s rank_label=%s runtime_ms=%.1f",
len(response.results),
response.rank_label,
response.total_runtime_ms,
)
return templates.TemplateResponse(request, "partials/results.html", {"answer": answer, "response": response})
+140
View File
@@ -0,0 +1,140 @@
body {
margin: 0;
background: #f7f7f4;
color: #202124;
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
}
main {
max-width: 960px;
margin: 0 auto;
padding: 24px;
}
nav {
display: flex;
gap: 12px;
align-items: center;
margin-bottom: 20px;
}
nav form {
margin: 0;
}
.actions {
display: flex;
flex-wrap: wrap;
gap: 12px;
margin-bottom: 24px;
}
textarea {
display: block;
width: 100%;
margin: 8px 0 12px;
}
button {
padding: 8px 14px;
}
.check {
display: inline-flex;
gap: 8px;
align-items: center;
margin-right: 12px;
}
.rank-label {
margin-top: 24px;
font-weight: 700;
}
.results {
padding-left: 24px;
}
.meta,
.scores,
.status {
color: #626a73;
}
.scores {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin: 12px 0;
}
.scores div {
display: inline-flex;
gap: 4px;
align-items: baseline;
}
.scores dt {
font-weight: 700;
}
.scores dd {
margin: 0;
}
.runtime {
margin-top: 16px;
}
.timing-chart {
display: grid;
gap: 8px;
padding: 0;
list-style: none;
}
.timing-chart li {
display: grid;
grid-template-columns: minmax(150px, 1fr) minmax(160px, 2fr) auto auto;
gap: 8px;
align-items: center;
}
.timing-bar {
height: 10px;
overflow: hidden;
background: #e5e5df;
}
.timing-bar span {
display: block;
height: 100%;
background: #3767c8;
}
.timing-value,
.timing-remaining {
color: #626a73;
font-variant-numeric: tabular-nums;
}
table {
width: 100%;
border-collapse: collapse;
}
th,
td {
padding: 8px;
border-bottom: 1px solid #d8d8d2;
text-align: left;
}
th {
font-weight: 700;
}
.error {
color: #9f1d20;
font-weight: 700;
}
@@ -0,0 +1,57 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>EPUB Admin</title>
<script src="https://unpkg.com/htmx.org@2.0.4"></script>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<main>
<nav>
<a href="/">Search</a>
<a href="/books">Books</a>
<a href="/admin">Admin</a>
</nav>
<h1>Admin</h1>
<section id="admin-status"></section>
<section class="actions">
<form hx-post="/admin/scan" hx-target="#admin-status" hx-swap="innerHTML">
<button type="submit">Scan</button>
</form>
<form hx-post="/admin/embed-missing" hx-target="#admin-status" hx-swap="innerHTML">
<button type="submit">Embed</button>
</form>
<form hx-post="/admin/embed-all" hx-target="#admin-status" hx-swap="innerHTML">
<button type="submit">Embed all</button>
</form>
</section>
<section>
<h2>Embeddings</h2>
<table>
<thead>
<tr>
<th>Model</th>
<th>Dimensions</th>
<th>Embedded</th>
<th>Missing</th>
<th>Total chunks</th>
</tr>
</thead>
<tbody>
{% for item in stats %}
<tr>
<td>{{ item.model_name }}</td>
<td>{{ item.dimension }}</td>
<td>{{ item.embedded_chunks }}</td>
<td>{{ item.missing_chunks }}</td>
<td>{{ item.total_chunks }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</section>
</main>
</body>
</html>
@@ -0,0 +1,32 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% if source %}{{ source.title }}{% else %}Book not found{% endif %}</title>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<main>
<nav>
<a href="/">Search</a>
<a href="/books">Books</a>
<a href="/admin">Admin</a>
</nav>
{% if source %}
<h1>{{ source.title }}</h1>
<p class="meta">{{ source.author or "Unknown author" }}</p>
<dl>
<dt>File</dt>
<dd>{{ source.file_path }}</dd>
<dt>Chapters</dt>
<dd>{{ chapter_count }}</dd>
<dt>Chunks</dt>
<dd>{{ chunk_count }}</dd>
</dl>
{% else %}
<h1>Book not found</h1>
{% endif %}
</main>
</body>
</html>
@@ -0,0 +1,31 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>EPUB Books</title>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<main>
<nav>
<a href="/">Search</a>
<a href="/books">Books</a>
<a href="/admin">Admin</a>
</nav>
<h1>Books</h1>
{% if sources %}
<ol class="results">
{% for source in sources %}
<li>
<h2><a href="/books/{{ source.id }}">{{ source.title }}</a></h2>
<p class="meta">{{ source.author or "Unknown author" }}</p>
</li>
{% endfor %}
</ol>
{% else %}
<p>No EPUBs indexed.</p>
{% endif %}
</main>
</body>
</html>
@@ -0,0 +1 @@
<p class="status">{{ message }}</p>
@@ -0,0 +1 @@
<p class="error">{{ message }}</p>
@@ -0,0 +1,74 @@
<div class="rank-label">{{ response.rank_label }}</div>
{% if response.timings %}
<section class="runtime">
<h2>Runtime</h2>
<p class="meta">Total {{ "%.1f"|format(response.total_runtime_ms) }} ms</p>
<ol class="timing-chart">
{% set total = response.total_runtime_ms %}
{% set ns = namespace(remaining=total) %}
{% for step in response.timings %}
{% set width = (step.duration_ms / total * 100) if total else 0 %}
{% if step.counts_toward_total %}
{% set ns.remaining = ns.remaining - step.duration_ms %}
{% endif %}
<li>
<span class="timing-label">{{ step.name }}</span>
<span class="timing-bar"><span style="width: {{ "%.2f"|format(width) }}%"></span></span>
<span class="timing-value">{{ "%.1f"|format(step.duration_ms) }} ms</span>
<span class="timing-remaining">{{ "%.1f"|format([ns.remaining, 0]|max) }} ms left</span>
</li>
{% endfor %}
</ol>
</section>
{% endif %}
<section class="answer">
<h2>Answer</h2>
<p>{{ answer }}</p>
</section>
{% if response.results %}
<ol class="results">
{% for result in response.results %}
<li>
<h2>{{ result.source_title }}</h2>
<p class="meta">
{% if result.source_author %}{{ result.source_author }}{% endif %}
{% if result.chapter_title %} · {{ result.chapter_title }}{% endif %}
{% if result.page_label %} · page {{ result.page_label }}{% endif %}
</p>
<p>{{ result.text }}</p>
<dl class="scores">
<div>
<dt>final</dt>
<dd>{{ "%.3f"|format(result.score) }}</dd>
</div>
{% if result.rerank_score is not none %}
<div>
<dt>rerank</dt>
<dd>{{ "%.3f"|format(result.rerank_score) }}</dd>
</div>
{% endif %}
{% if result.vector_score is not none %}
<div>
<dt>vector cosine</dt>
<dd>{{ "%.3f"|format(result.vector_score) }}</dd>
</div>
{% endif %}
{% if result.bm25_score is not none %}
<div>
<dt>BM25</dt>
<dd>{{ "%.6f"|format(result.bm25_score) }}</dd>
</div>
{% endif %}
{% if result.fused_score is not none %}
<div>
<dt>RRF</dt>
<dd>{{ "%.3f"|format(result.fused_score) }}</dd>
</div>
{% endif %}
</dl>
</li>
{% endfor %}
</ol>
{% else %}
<p>No results.</p>
{% endif %}
@@ -0,0 +1,30 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>EPUB Search</title>
<script src="https://unpkg.com/htmx.org@2.0.4"></script>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<main>
<nav>
<a href="/">Search</a>
<a href="/books">Books</a>
<a href="/admin">Admin</a>
</nav>
<h1>EPUB Search</h1>
<form hx-post="/search" hx-target="#results" hx-swap="innerHTML">
<label for="query">Search</label>
<textarea id="query" name="query" rows="4" required></textarea>
<label class="check">
<input type="checkbox" name="rerank" value="true" {% if config.rerank.enabled %}checked{% endif %}>
Rerank
</label>
<button type="submit">Search</button>
</form>
<section id="results"></section>
</main>
</body>
</html>
+13
View File
@@ -0,0 +1,13 @@
"""Shared web UI resources for EPUB search."""
from __future__ import annotations
from pathlib import Path
from fastapi.templating import Jinja2Templates
PACKAGE_DIR = Path(__file__).resolve().parent
TEMPLATE_DIR = PACKAGE_DIR / "templates"
STATIC_DIR = PACKAGE_DIR / "static"
templates = Jinja2Templates(directory=TEMPLATE_DIR)