start
This commit is contained in:
68
prompt_bench/vllm_client.py
Normal file
68
prompt_bench/vllm_client.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""OpenAI-compatible client for vLLM's API."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Self
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
READY_POLL_INTERVAL = 2.0
|
||||
|
||||
|
||||
class VLLMClient:
|
||||
"""Talk to a vLLM server via its OpenAI-compatible API.
|
||||
|
||||
Args:
|
||||
host: vLLM host.
|
||||
port: vLLM port.
|
||||
timeout: Per-request timeout in seconds.
|
||||
"""
|
||||
|
||||
def __init__(self, *, host: str = "localhost", port: int = 8000, timeout: int = 300) -> None:
|
||||
"""Create a client connected to a vLLM server."""
|
||||
self._client = httpx.Client(base_url=f"http://{host}:{port}", timeout=timeout)
|
||||
|
||||
def wait_ready(self, max_wait: int) -> None:
|
||||
"""Poll /v1/models until the server is ready or timeout."""
|
||||
deadline = time.monotonic() + max_wait
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
response = self._client.get("/v1/models")
|
||||
if response.is_success:
|
||||
logger.info("vLLM server is ready")
|
||||
return
|
||||
except httpx.TransportError:
|
||||
pass
|
||||
time.sleep(READY_POLL_INTERVAL)
|
||||
msg = f"vLLM server not ready after {max_wait}s"
|
||||
raise TimeoutError(msg)
|
||||
|
||||
def complete(self, prompt: str, model: str, *, temperature: float = 0.0, max_tokens: int = 4096) -> str:
|
||||
"""Send a prompt to /v1/completions and return the response text."""
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
logger.info("Sending prompt to %s (%d chars)", model, len(prompt))
|
||||
response = self._client.post("/v1/completions", json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data["choices"][0]["text"]
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the HTTP client."""
|
||||
self._client.close()
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
"""Enter the context manager."""
|
||||
return self
|
||||
|
||||
def __exit__(self, *args: object) -> None:
|
||||
"""Close the HTTP client on exit."""
|
||||
self.close()
|
||||
Reference in New Issue
Block a user