From f5368879c9e10ee8b92008d3b730e37e24c0101c Mon Sep 17 00:00:00 2001
From: Richie Cahill <Richie@tmmworkshop.com>
Date: Fri, 12 Jun 2026 03:08:21 -0400
Subject: [PATCH] made llm_interface.py

---
 python/ebook_search/embeddings.py    |   2 +-
 python/ebook_search/llm_interface.py | 143 +++++++++++++++++++++++++++
 2 files changed, 144 insertions(+), 1 deletion(-)
 create mode 100644 python/ebook_search/llm_interface.py

diff --git a/python/ebook_search/embeddings.py b/python/ebook_search/embeddings.py
index be428f3..f542e2b 100644
--- a/python/ebook_search/embeddings.py
+++ b/python/ebook_search/embeddings.py
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING
 from sqlalchemy import func, select
 from sqlalchemy.dialects.postgresql import insert
 
-from python.ebook_search.api.embedding_client import request_embeddings
+from python.ebook_search.llm_interface import request_embeddings
 from python.orm.richie import (
     EbookChunk,
     EbookChunkEmbedding1024,
diff --git a/python/ebook_search/llm_interface.py b/python/ebook_search/llm_interface.py
new file mode 100644
index 0000000..8cfa121
--- /dev/null
+++ b/python/ebook_search/llm_interface.py
@@ -0,0 +1,143 @@
+"""LLM provider HTTP adapters."""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+import httpx
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+    from python.ebook_search.config import EbookSearchConfig, RerankConfig
+
+logger = logging.getLogger(__name__)
+
+
+def auth_headers(api_key: str) -> dict[str, str]:
+    """Build authorization headers when an API key is configured."""
+    if api_key == "not-needed":
+        return {}
+    return {"Authorization": f"Bearer {api_key}"}
+
+
+def request_embeddings(texts: Sequence[str], config: EbookSearchConfig) -> list[list[float]]:
+    """Request embeddings from the configured OpenAI-compatible endpoint."""
+    try:
+        response = httpx.post(
+            f"{config.embedding_base_url.rstrip('/')}/embeddings",
+            headers=auth_headers(config.embedding_api_key),
+            json={"model": config.embedding_model, "input": list(texts)},
+            timeout=60,
+        )
+        response.raise_for_status()
+        return embedding_vectors_from_response(response.json())
+    except (httpx.HTTPError, ValueError, KeyError, TypeError) as error:
+        logger.exception(
+            "ebook_embed_request_failed base_url=%s model=%s count=%s",
+            config.embedding_base_url,
+            config.embedding_model,
+            len(texts),
+        )
+        msg = f"Embedding request failed. base_url={config.embedding_base_url} model={config.embedding_model}"
+        raise RuntimeError(msg) from error
+
+
+def embedding_vectors_from_response(body: object) -> list[list[float]]:
+    """Extract embedding vectors from an OpenAI-compatible embedding response."""
+    if not isinstance(body, dict):
+        msg = "Embedding response is not an object"
+        raise TypeError(msg)
+
+    data = body["data"]
+    if not isinstance(data, list):
+        msg = "Embedding response data is not a list"
+        raise TypeError(msg)
+
+    vectors: list[list[float]] = []
+    for item in data:
+        if not isinstance(item, dict):
+            msg = "Embedding item is not an object"
+            raise TypeError(msg)
+        embedding = item["embedding"]
+        if not isinstance(embedding, list):
+            msg = "Embedding value is not a list"
+            raise TypeError(msg)
+        vectors.append([float(value) for value in embedding])
+    return vectors
+
+
+def request_rerank(
+    query: str,
+    documents: Sequence[str],
+    config: RerankConfig,
+) -> object | None:
+    """Request rerank scores from the configured vLLM endpoint."""
+    payload = {
+        "model": config.model,
+        "query": query,
+        "documents": list(documents),
+    }
+    response = httpx.post(
+        f"{config.base_url.rstrip('/')}/rerank",
+        json=payload,
+        timeout=config.timeout_seconds,
+    )
+    response.raise_for_status()
+    try:
+        return response.json()
+    except ValueError:
+        logger.debug("ebook_rerank_response_invalid_json", extra={"response": response.text})
+        return None
+
+
+def request_chat_completion(
+    config: EbookSearchConfig,
+    messages: Sequence[dict[str, str]],
+) -> str:
+    """Request a chat completion from the configured OpenAI-compatible endpoint."""
+    try:
+        response = httpx.post(
+            f"{config.vllm_base_url.rstrip('/')}/chat/completions",
+            headers=auth_headers(config.vllm_api_key),
+            json={
+                "model": config.chat_model,
+                "messages": list(messages),
+                "temperature": 0,
+            },
+            timeout=60,
+        )
+        response.raise_for_status()
+        return chat_content_from_response(response.json())
+    except (httpx.HTTPError, ValueError, KeyError, TypeError) as error:
+        msg = f"Chat request failed. base_url={config.vllm_base_url} model={config.chat_model}"
+        raise RuntimeError(msg) from error
+
+
+def chat_content_from_response(body: object) -> str:
+    """Extract text content from an OpenAI-compatible chat response."""
+    if not isinstance(body, dict):
+        msg = "Chat response is not an object"
+        raise TypeError(msg)
+
+    choices = body["choices"]
+    if not isinstance(choices, list) or not choices:
+        msg = "Chat response has no choices"
+        raise ValueError(msg)
+
+    first = choices[0]
+    if not isinstance(first, dict):
+        msg = "Chat choice is not an object"
+        raise TypeError(msg)
+
+    message = first["message"]
+    if not isinstance(message, dict):
+        msg = "Chat message is not an object"
+        raise TypeError(msg)
+
+    content = message.get("content") or ""
+    if not isinstance(content, str):
+        msg = "Chat content is not text"
+        raise TypeError(msg)
+    return content