From 259e952afcd801689466482b1e58ab4963596021 Mon Sep 17 00:00:00 2001 From: Richie Cahill Date: Fri, 10 Apr 2026 20:48:24 -0400 Subject: [PATCH] added containers dir --- python/prompt_bench/compresion_test.py | 1 - python/prompt_bench/containers/__init__.py | 1 + .../finetune.py} | 57 ++----------------- python/prompt_bench/containers/lib.py | 23 ++++++++ .../{vllm_container.py => containers/vllm.py} | 17 ------ python/prompt_bench/main.py | 3 +- 6 files changed, 32 insertions(+), 70 deletions(-) create mode 100644 python/prompt_bench/containers/__init__.py rename python/prompt_bench/{finetune_container.py => containers/finetune.py} (68%) create mode 100644 python/prompt_bench/containers/lib.py rename python/prompt_bench/{vllm_container.py => containers/vllm.py} (76%) diff --git a/python/prompt_bench/compresion_test.py b/python/prompt_bench/compresion_test.py index 8d1b40f..801933e 100644 --- a/python/prompt_bench/compresion_test.py +++ b/python/prompt_bench/compresion_test.py @@ -34,7 +34,6 @@ DEFAULT_COUNT = 100 SEED = 42 - def load_bills(csv_path: Path, count: int) -> list[tuple[str, str]]: """Return up to `count` (bill_id, text_content) tuples with non-empty text.""" csv.field_size_limit(sys.maxsize) diff --git a/python/prompt_bench/containers/__init__.py b/python/prompt_bench/containers/__init__.py new file mode 100644 index 0000000..dc58a44 --- /dev/null +++ b/python/prompt_bench/containers/__init__.py @@ -0,0 +1 @@ +"""Prompt benchmarking system for evaluating LLMs via vLLM.""" diff --git a/python/prompt_bench/finetune_container.py b/python/prompt_bench/containers/finetune.py similarity index 68% rename from python/prompt_bench/finetune_container.py rename to python/prompt_bench/containers/finetune.py index 42f5444..cc20ae6 100644 --- a/python/prompt_bench/finetune_container.py +++ b/python/prompt_bench/containers/finetune.py @@ -9,13 +9,13 @@ from typing import Annotated import typer -from python.prompt_bench.vllm_container import check_gpu_free +from python.prompt_bench.containers.lib import check_gpu_free logger = logging.getLogger(__name__) CONTAINER_NAME = "bill-finetune" FINETUNE_IMAGE = "bill-finetune:latest" -DOCKERFILE_PATH = "python/prompt_bench/Dockerfile.finetune" +DOCKERFILE_PATH = "/home/richie/dotfiles/python/prompt_bench/Dockerfile.finetune" DEFAULT_HF_CACHE = Path("/zfs/models/hf") @@ -38,13 +38,6 @@ def start_finetune( dataset_path: Path, output_dir: Path, hf_cache: Path = DEFAULT_HF_CACHE, - validation_split: float = 0.1, - epochs: int = 3, - batch_size: int = 2, - learning_rate: float = 2e-4, - lora_rank: int = 32, - max_seq_length: int = 4096, - save_gguf: bool = False, ) -> None: """Run the fine-tuning container. @@ -53,12 +46,6 @@ def start_finetune( output_dir: Host path where the trained model will be saved. hf_cache: Host path to HuggingFace model cache (bind-mounted to avoid re-downloading). validation_split: Fraction of data held out for validation. - epochs: Number of training epochs. - batch_size: Per-device training batch size. - learning_rate: Learning rate for the optimizer. - lora_rank: LoRA adapter rank. - max_seq_length: Maximum sequence length for training. - save_gguf: Whether to also export a GGUF quantized model. """ dataset_path = dataset_path.resolve() output_dir = output_dir.resolve() @@ -91,30 +78,11 @@ def start_finetune( "/workspace/dataset.jsonl", "--output-dir", "/workspace/output/qwen-bill-summarizer", - "--val-split", - str(validation_split), - "--epochs", - str(epochs), - "--batch-size", - str(batch_size), - "--lr", - str(learning_rate), - "--lora-rank", - str(lora_rank), - "--max-seq-length", - str(max_seq_length), ] - if save_gguf: - command.append("--save-gguf") - logger.info("Starting fine-tuning container") logger.info(" Dataset: %s", dataset_path) - logger.info(" Val split: %.0f%%", validation_split * 100) logger.info(" Output: %s", output_dir) - logger.info(" Epochs: %d", epochs) - logger.info(" Batch size: %d", batch_size) - logger.info(" LoRA rank: %d", lora_rank) result = subprocess.run(command, text=True, check=False) if result.returncode != 0: @@ -154,18 +122,13 @@ def build() -> None: @app.command() def run( - dataset: Annotated[Path, typer.Option(help="Fine-tuning JSONL")] = Path("output/finetune_dataset.jsonl"), + dataset: Annotated[Path, typer.Option(help="Fine-tuning JSONL")] = Path( + "/home/richie/dotfiles/data/finetune_dataset.jsonl" + ), output_dir: Annotated[Path, typer.Option(help="Where to save the trained model")] = Path( - "output/qwen-bill-summarizer", + "/home/richie/dotfiles/data/output/qwen-bill-summarizer", ), hf_cache: Annotated[Path, typer.Option(help="Host path to HuggingFace model cache")] = DEFAULT_HF_CACHE, - validation_split: Annotated[float, typer.Option("--val-split", help="Fraction held out for validation")] = 0.1, - epochs: Annotated[int, typer.Option(help="Training epochs")] = 3, - batch_size: Annotated[int, typer.Option(help="Per-device batch size")] = 2, - learning_rate: Annotated[float, typer.Option("--lr", help="Learning rate")] = 2e-4, - lora_rank: Annotated[int, typer.Option(help="LoRA rank")] = 32, - max_seq_length: Annotated[int, typer.Option(help="Max sequence length")] = 4096, - save_gguf: Annotated[bool, typer.Option("--save-gguf/--no-save-gguf", help="Also save GGUF")] = False, log_level: Annotated[str, typer.Option(help="Log level")] = "INFO", ) -> None: """Run fine-tuning inside a Docker container.""" @@ -175,16 +138,8 @@ def run( dataset_path=dataset, output_dir=output_dir, hf_cache=hf_cache, - validation_split=validation_split, - epochs=epochs, - batch_size=batch_size, - learning_rate=learning_rate, - lora_rank=lora_rank, - max_seq_length=max_seq_length, - save_gguf=save_gguf, ) - @app.command() def stop() -> None: """Stop and remove the fine-tuning container.""" diff --git a/python/prompt_bench/containers/lib.py b/python/prompt_bench/containers/lib.py new file mode 100644 index 0000000..b1ba5c4 --- /dev/null +++ b/python/prompt_bench/containers/lib.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +import logging +import subprocess + +logger = logging.getLogger(__name__) + + +def check_gpu_free() -> None: + """Warn if GPU-heavy processes (e.g. Ollama) are running.""" + result = subprocess.run( + ["nvidia-smi", "--query-compute-apps=pid,process_name", "--format=csv,noheader"], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + logger.warning("Could not query GPU processes: %s", result.stderr.strip()) + return + processes = result.stdout.strip() + if processes: + logger.warning("GPU processes detected:\n%s", processes) + logger.warning("Consider stopping Ollama (sudo systemctl stop ollama) before benchmarking") diff --git a/python/prompt_bench/vllm_container.py b/python/prompt_bench/containers/vllm.py similarity index 76% rename from python/prompt_bench/vllm_container.py rename to python/prompt_bench/containers/vllm.py index f29ce7d..33e7e31 100644 --- a/python/prompt_bench/vllm_container.py +++ b/python/prompt_bench/containers/vllm.py @@ -68,20 +68,3 @@ def stop_vllm() -> None: check=False, ) logger.info("vLLM container stopped and removed") - - -def check_gpu_free() -> None: - """Warn if GPU-heavy processes (e.g. Ollama) are running.""" - result = subprocess.run( - ["nvidia-smi", "--query-compute-apps=pid,process_name", "--format=csv,noheader"], - capture_output=True, - text=True, - check=False, - ) - if result.returncode != 0: - logger.warning("Could not query GPU processes: %s", result.stderr.strip()) - return - processes = result.stdout.strip() - if processes: - logger.warning("GPU processes detected:\n%s", processes) - logger.warning("Consider stopping Ollama (sudo systemctl stop ollama) before benchmarking") diff --git a/python/prompt_bench/main.py b/python/prompt_bench/main.py index cb2e7d5..50bd04e 100644 --- a/python/prompt_bench/main.py +++ b/python/prompt_bench/main.py @@ -12,7 +12,8 @@ from typing import Annotated import typer -from python.prompt_bench.vllm_container import check_gpu_free, start_vllm, stop_vllm +from python.prompt_bench.containers.lib import check_gpu_free +from python.prompt_bench.containers.vllm import start_vllm, stop_vllm from python.prompt_bench.downloader import is_model_present from python.prompt_bench.models import BenchmarkConfig from python.prompt_bench.vllm_client import VLLMClient