added containers dir

This commit is contained in:
2026-04-10 20:48:24 -04:00
parent 4a10a80ba0
commit 259e952afc
6 changed files with 32 additions and 70 deletions

View File

@@ -34,7 +34,6 @@ DEFAULT_COUNT = 100
SEED = 42 SEED = 42
def load_bills(csv_path: Path, count: int) -> list[tuple[str, str]]: def load_bills(csv_path: Path, count: int) -> list[tuple[str, str]]:
"""Return up to `count` (bill_id, text_content) tuples with non-empty text.""" """Return up to `count` (bill_id, text_content) tuples with non-empty text."""
csv.field_size_limit(sys.maxsize) csv.field_size_limit(sys.maxsize)

View File

@@ -0,0 +1 @@
"""Prompt benchmarking system for evaluating LLMs via vLLM."""

View File

@@ -9,13 +9,13 @@ from typing import Annotated
import typer import typer
from python.prompt_bench.vllm_container import check_gpu_free from python.prompt_bench.containers.lib import check_gpu_free
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
CONTAINER_NAME = "bill-finetune" CONTAINER_NAME = "bill-finetune"
FINETUNE_IMAGE = "bill-finetune:latest" FINETUNE_IMAGE = "bill-finetune:latest"
DOCKERFILE_PATH = "python/prompt_bench/Dockerfile.finetune" DOCKERFILE_PATH = "/home/richie/dotfiles/python/prompt_bench/Dockerfile.finetune"
DEFAULT_HF_CACHE = Path("/zfs/models/hf") DEFAULT_HF_CACHE = Path("/zfs/models/hf")
@@ -38,13 +38,6 @@ def start_finetune(
dataset_path: Path, dataset_path: Path,
output_dir: Path, output_dir: Path,
hf_cache: Path = DEFAULT_HF_CACHE, hf_cache: Path = DEFAULT_HF_CACHE,
validation_split: float = 0.1,
epochs: int = 3,
batch_size: int = 2,
learning_rate: float = 2e-4,
lora_rank: int = 32,
max_seq_length: int = 4096,
save_gguf: bool = False,
) -> None: ) -> None:
"""Run the fine-tuning container. """Run the fine-tuning container.
@@ -53,12 +46,6 @@ def start_finetune(
output_dir: Host path where the trained model will be saved. output_dir: Host path where the trained model will be saved.
hf_cache: Host path to HuggingFace model cache (bind-mounted to avoid re-downloading). hf_cache: Host path to HuggingFace model cache (bind-mounted to avoid re-downloading).
validation_split: Fraction of data held out for validation. validation_split: Fraction of data held out for validation.
epochs: Number of training epochs.
batch_size: Per-device training batch size.
learning_rate: Learning rate for the optimizer.
lora_rank: LoRA adapter rank.
max_seq_length: Maximum sequence length for training.
save_gguf: Whether to also export a GGUF quantized model.
""" """
dataset_path = dataset_path.resolve() dataset_path = dataset_path.resolve()
output_dir = output_dir.resolve() output_dir = output_dir.resolve()
@@ -91,30 +78,11 @@ def start_finetune(
"/workspace/dataset.jsonl", "/workspace/dataset.jsonl",
"--output-dir", "--output-dir",
"/workspace/output/qwen-bill-summarizer", "/workspace/output/qwen-bill-summarizer",
"--val-split",
str(validation_split),
"--epochs",
str(epochs),
"--batch-size",
str(batch_size),
"--lr",
str(learning_rate),
"--lora-rank",
str(lora_rank),
"--max-seq-length",
str(max_seq_length),
] ]
if save_gguf:
command.append("--save-gguf")
logger.info("Starting fine-tuning container") logger.info("Starting fine-tuning container")
logger.info(" Dataset: %s", dataset_path) logger.info(" Dataset: %s", dataset_path)
logger.info(" Val split: %.0f%%", validation_split * 100)
logger.info(" Output: %s", output_dir) logger.info(" Output: %s", output_dir)
logger.info(" Epochs: %d", epochs)
logger.info(" Batch size: %d", batch_size)
logger.info(" LoRA rank: %d", lora_rank)
result = subprocess.run(command, text=True, check=False) result = subprocess.run(command, text=True, check=False)
if result.returncode != 0: if result.returncode != 0:
@@ -154,18 +122,13 @@ def build() -> None:
@app.command() @app.command()
def run( def run(
dataset: Annotated[Path, typer.Option(help="Fine-tuning JSONL")] = Path("output/finetune_dataset.jsonl"), dataset: Annotated[Path, typer.Option(help="Fine-tuning JSONL")] = Path(
"/home/richie/dotfiles/data/finetune_dataset.jsonl"
),
output_dir: Annotated[Path, typer.Option(help="Where to save the trained model")] = Path( output_dir: Annotated[Path, typer.Option(help="Where to save the trained model")] = Path(
"output/qwen-bill-summarizer", "/home/richie/dotfiles/data/output/qwen-bill-summarizer",
), ),
hf_cache: Annotated[Path, typer.Option(help="Host path to HuggingFace model cache")] = DEFAULT_HF_CACHE, hf_cache: Annotated[Path, typer.Option(help="Host path to HuggingFace model cache")] = DEFAULT_HF_CACHE,
validation_split: Annotated[float, typer.Option("--val-split", help="Fraction held out for validation")] = 0.1,
epochs: Annotated[int, typer.Option(help="Training epochs")] = 3,
batch_size: Annotated[int, typer.Option(help="Per-device batch size")] = 2,
learning_rate: Annotated[float, typer.Option("--lr", help="Learning rate")] = 2e-4,
lora_rank: Annotated[int, typer.Option(help="LoRA rank")] = 32,
max_seq_length: Annotated[int, typer.Option(help="Max sequence length")] = 4096,
save_gguf: Annotated[bool, typer.Option("--save-gguf/--no-save-gguf", help="Also save GGUF")] = False,
log_level: Annotated[str, typer.Option(help="Log level")] = "INFO", log_level: Annotated[str, typer.Option(help="Log level")] = "INFO",
) -> None: ) -> None:
"""Run fine-tuning inside a Docker container.""" """Run fine-tuning inside a Docker container."""
@@ -175,16 +138,8 @@ def run(
dataset_path=dataset, dataset_path=dataset,
output_dir=output_dir, output_dir=output_dir,
hf_cache=hf_cache, hf_cache=hf_cache,
validation_split=validation_split,
epochs=epochs,
batch_size=batch_size,
learning_rate=learning_rate,
lora_rank=lora_rank,
max_seq_length=max_seq_length,
save_gguf=save_gguf,
) )
@app.command() @app.command()
def stop() -> None: def stop() -> None:
"""Stop and remove the fine-tuning container.""" """Stop and remove the fine-tuning container."""

View File

@@ -0,0 +1,23 @@
from __future__ import annotations
import logging
import subprocess
logger = logging.getLogger(__name__)
def check_gpu_free() -> None:
"""Warn if GPU-heavy processes (e.g. Ollama) are running."""
result = subprocess.run(
["nvidia-smi", "--query-compute-apps=pid,process_name", "--format=csv,noheader"],
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
logger.warning("Could not query GPU processes: %s", result.stderr.strip())
return
processes = result.stdout.strip()
if processes:
logger.warning("GPU processes detected:\n%s", processes)
logger.warning("Consider stopping Ollama (sudo systemctl stop ollama) before benchmarking")

View File

@@ -68,20 +68,3 @@ def stop_vllm() -> None:
check=False, check=False,
) )
logger.info("vLLM container stopped and removed") logger.info("vLLM container stopped and removed")
def check_gpu_free() -> None:
"""Warn if GPU-heavy processes (e.g. Ollama) are running."""
result = subprocess.run(
["nvidia-smi", "--query-compute-apps=pid,process_name", "--format=csv,noheader"],
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
logger.warning("Could not query GPU processes: %s", result.stderr.strip())
return
processes = result.stdout.strip()
if processes:
logger.warning("GPU processes detected:\n%s", processes)
logger.warning("Consider stopping Ollama (sudo systemctl stop ollama) before benchmarking")

View File

@@ -12,7 +12,8 @@ from typing import Annotated
import typer import typer
from python.prompt_bench.vllm_container import check_gpu_free, start_vllm, stop_vllm from python.prompt_bench.containers.lib import check_gpu_free
from python.prompt_bench.containers.vllm import start_vllm, stop_vllm
from python.prompt_bench.downloader import is_model_present from python.prompt_bench.downloader import is_model_present
from python.prompt_bench.models import BenchmarkConfig from python.prompt_bench.models import BenchmarkConfig
from python.prompt_bench.vllm_client import VLLMClient from python.prompt_bench.vllm_client import VLLMClient