"""Flexible LLM client for ollama backends.""" from __future__ import annotations import base64 import logging from typing import Any import httpx logger = logging.getLogger(__name__) class LLMClient: """Talk to an ollama instance. Args: model: Ollama model name. host: Ollama host. port: Ollama port. temperature: Sampling temperature. """ def __init__(self, model: str, host: str, port: int = 11434, *, temperature: float = 0.1) -> None: self.model = model self.temperature = temperature self._client = httpx.Client(base_url=f"http://{host}:{port}", timeout=120) def chat(self, prompt: str, *, system: str = "") -> str: """Send a text prompt and return the response.""" messages: list[dict[str, Any]] = [] if system: messages.append({"role": "system", "content": system}) messages.append({"role": "user", "content": prompt}) return self._generate(messages) def chat_with_image(self, prompt: str, image_data: bytes, *, system: str = "") -> str: """Send a prompt with an image and return the response. Requires a vision-capable model (e.g. qwen3-vl). """ encoded = base64.b64encode(image_data).decode() messages: list[dict[str, Any]] = [] if system: messages.append({"role": "system", "content": system}) messages.append({"role": "user", "content": prompt, "images": [encoded]}) return self._generate(messages) def _generate(self, messages: list[dict[str, Any]]) -> str: """Call the ollama chat API.""" payload = { "model": self.model, "messages": messages, "stream": False, "options": {"temperature": self.temperature}, } logger.info(f"LLM request to {self.model}") response = self._client.post("/api/chat", json=payload) response.raise_for_status() data = response.json() return data["message"]["content"] def list_models(self) -> list[str]: """List available models on the ollama instance.""" response = self._client.get("/api/tags") response.raise_for_status() return [m["name"] for m in response.json().get("models", [])] def close(self) -> None: """Close the HTTP client.""" self._client.close()