From f5368879c9e10ee8b92008d3b730e37e24c0101c Mon Sep 17 00:00:00 2001 From: Richie Cahill Date: Fri, 12 Jun 2026 03:08:21 -0400 Subject: [PATCH] made llm_interface.py --- python/ebook_search/embeddings.py | 2 +- python/ebook_search/llm_interface.py | 143 +++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 python/ebook_search/llm_interface.py diff --git a/python/ebook_search/embeddings.py b/python/ebook_search/embeddings.py index be428f3..f542e2b 100644 --- a/python/ebook_search/embeddings.py +++ b/python/ebook_search/embeddings.py @@ -9,7 +9,7 @@ from typing import TYPE_CHECKING from sqlalchemy import func, select from sqlalchemy.dialects.postgresql import insert -from python.ebook_search.api.embedding_client import request_embeddings +from python.ebook_search.llm_interface import request_embeddings from python.orm.richie import ( EbookChunk, EbookChunkEmbedding1024, diff --git a/python/ebook_search/llm_interface.py b/python/ebook_search/llm_interface.py new file mode 100644 index 0000000..8cfa121 --- /dev/null +++ b/python/ebook_search/llm_interface.py @@ -0,0 +1,143 @@ +"""LLM provider HTTP adapters.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +import httpx + +if TYPE_CHECKING: + from collections.abc import Sequence + + from python.ebook_search.config import EbookSearchConfig, RerankConfig + +logger = logging.getLogger(__name__) + + +def auth_headers(api_key: str) -> dict[str, str]: + """Build authorization headers when an API key is configured.""" + if api_key == "not-needed": + return {} + return {"Authorization": f"Bearer {api_key}"} + + +def request_embeddings(texts: Sequence[str], config: EbookSearchConfig) -> list[list[float]]: + """Request embeddings from the configured OpenAI-compatible endpoint.""" + try: + response = httpx.post( + f"{config.embedding_base_url.rstrip('/')}/embeddings", + headers=auth_headers(config.embedding_api_key), + json={"model": config.embedding_model, "input": list(texts)}, + timeout=60, + ) + response.raise_for_status() + return embedding_vectors_from_response(response.json()) + except (httpx.HTTPError, ValueError, KeyError, TypeError) as error: + logger.exception( + "ebook_embed_request_failed base_url=%s model=%s count=%s", + config.embedding_base_url, + config.embedding_model, + len(texts), + ) + msg = f"Embedding request failed. base_url={config.embedding_base_url} model={config.embedding_model}" + raise RuntimeError(msg) from error + + +def embedding_vectors_from_response(body: object) -> list[list[float]]: + """Extract embedding vectors from an OpenAI-compatible embedding response.""" + if not isinstance(body, dict): + msg = "Embedding response is not an object" + raise TypeError(msg) + + data = body["data"] + if not isinstance(data, list): + msg = "Embedding response data is not a list" + raise TypeError(msg) + + vectors: list[list[float]] = [] + for item in data: + if not isinstance(item, dict): + msg = "Embedding item is not an object" + raise TypeError(msg) + embedding = item["embedding"] + if not isinstance(embedding, list): + msg = "Embedding value is not a list" + raise TypeError(msg) + vectors.append([float(value) for value in embedding]) + return vectors + + +def request_rerank( + query: str, + documents: Sequence[str], + config: RerankConfig, +) -> object | None: + """Request rerank scores from the configured vLLM endpoint.""" + payload = { + "model": config.model, + "query": query, + "documents": list(documents), + } + response = httpx.post( + f"{config.base_url.rstrip('/')}/rerank", + json=payload, + timeout=config.timeout_seconds, + ) + response.raise_for_status() + try: + return response.json() + except ValueError: + logger.debug("ebook_rerank_response_invalid_json", extra={"response": response.text}) + return None + + +def request_chat_completion( + config: EbookSearchConfig, + messages: Sequence[dict[str, str]], +) -> str: + """Request a chat completion from the configured OpenAI-compatible endpoint.""" + try: + response = httpx.post( + f"{config.vllm_base_url.rstrip('/')}/chat/completions", + headers=auth_headers(config.vllm_api_key), + json={ + "model": config.chat_model, + "messages": list(messages), + "temperature": 0, + }, + timeout=60, + ) + response.raise_for_status() + return chat_content_from_response(response.json()) + except (httpx.HTTPError, ValueError, KeyError, TypeError) as error: + msg = f"Chat request failed. base_url={config.vllm_base_url} model={config.chat_model}" + raise RuntimeError(msg) from error + + +def chat_content_from_response(body: object) -> str: + """Extract text content from an OpenAI-compatible chat response.""" + if not isinstance(body, dict): + msg = "Chat response is not an object" + raise TypeError(msg) + + choices = body["choices"] + if not isinstance(choices, list) or not choices: + msg = "Chat response has no choices" + raise ValueError(msg) + + first = choices[0] + if not isinstance(first, dict): + msg = "Chat choice is not an object" + raise TypeError(msg) + + message = first["message"] + if not isinstance(message, dict): + msg = "Chat message is not an object" + raise TypeError(msg) + + content = message.get("content") or "" + if not isinstance(content, str): + msg = "Chat content is not text" + raise TypeError(msg) + return content