moved containers dir and created docker_files dir

2026-04-28 22:36:13 -04:00
parent 3056c19f69
commit 09f7f0187f
8 changed files with 286 additions and 27 deletions
@@ -1,25 +0,0 @@
-# Unsloth fine-tuning container for Qwen 3.5 4B on RTX 3090.
-#
-# Build:
-#   docker build -f python/prompt_bench/Dockerfile.finetune -t bill-finetune .
-#
-# Run:
-#   docker run --rm --device=nvidia.com/gpu=all --ipc=host \
-#     -v $(pwd)/output:/workspace/output \
-#     -v $(pwd)/output/finetune_dataset.jsonl:/workspace/dataset.jsonl:ro \
-#     -v /zfs/models/hf:/models \
-#     bill-finetune \
-#     --dataset /workspace/dataset.jsonl \
-#     --output-dir /workspace/output/qwen-bill-summarizer
-
-FROM ghcr.io/unslothai/unsloth:latest
-
-RUN pip install --no-cache-dir typer
-
-WORKDIR /workspace
-COPY python/prompt_bench/finetune.py python/prompt_bench/finetune.py
-COPY config/prompts/summarization_prompts.toml config/prompts/summarization_prompts.toml
-COPY python/prompt_bench/__init__.py python/prompt_bench/__init__.py
-COPY python/__init__.py python/__init__.py
-
-ENTRYPOINT ["python", "-m", "pipelines.prompt_bench.finetune"]
@@ -1 +0,0 @@
-"""Prompt benchmarking system for evaluating LLMs via vLLM."""
@@ -1,179 +0,0 @@
-"""Docker container lifecycle management for Unsloth fine-tuning."""
-
-from __future__ import annotations
-
-import logging
-import subprocess
-from pathlib import Path
-from typing import Annotated
-
-import typer
-
-from pipelines.tools.containers.lib import check_gpu_free
-
-logger = logging.getLogger(__name__)
-
-CONTAINER_NAME = "bill-finetune"
-FINETUNE_IMAGE = "bill-finetune:latest"
-REPO_DIR = Path(__file__).resolve().parents[4]
-DEFAULT_HF_CACHE = Path("/zfs/models/hf")
-
-
-def build_image() -> None:
-    """Build the fine-tuning Docker image."""
-    logger.info("Building fine-tuning image: %s", FINETUNE_IMAGE)
-    result = subprocess.run(
-        [
-            "docker",
-            "build",
-            "-f",
-            str(REPO_DIR / "python/prompt_bench/Dockerfile.finetune"),
-            "-t",
-            FINETUNE_IMAGE,
-            ".",
-        ],
-        text=True,
-        check=False,
-    )
-    if result.returncode != 0:
-        message = "Failed to build fine-tuning image"
-        raise RuntimeError(message)
-    logger.info("Image built: %s", FINETUNE_IMAGE)
-
-
-def start_finetune(
-    *,
-    dataset_path: Path,
-    output_dir: Path,
-    hf_cache: Path = DEFAULT_HF_CACHE,
-) -> None:
-    """Run the fine-tuning container.
-
-    Args:
-        dataset_path: Host path to the fine-tuning JSONL dataset.
-        output_dir: Host path where the trained model will be saved.
-        hf_cache: Host path to HuggingFace model cache (bind-mounted to avoid re-downloading).
-        validation_split: Fraction of data held out for validation.
-    """
-    dataset_path = dataset_path.resolve()
-    output_dir = output_dir.resolve()
-
-    if not dataset_path.is_file():
-        message = f"Dataset not found: {dataset_path}"
-        raise FileNotFoundError(message)
-
-    output_dir.mkdir(parents=True, exist_ok=True)
-    stop_finetune()
-
-    hf_cache = hf_cache.resolve()
-    hf_cache.mkdir(parents=True, exist_ok=True)
-
-    command = [
-        "docker",
-        "run",
-        "--name",
-        CONTAINER_NAME,
-        "--device=nvidia.com/gpu=all",
-        "--ipc=host",
-        "-v",
-        f"{hf_cache}:/root/.cache/huggingface",
-        "-v",
-        f"{output_dir}:/workspace/output/qwen-bill-summarizer",
-        "-v",
-        f"{dataset_path}:/workspace/dataset.jsonl:ro",
-        FINETUNE_IMAGE,
-        "--dataset",
-        "/workspace/dataset.jsonl",
-        "--output-dir",
-        "/workspace/output/qwen-bill-summarizer",
-    ]
-
-    logger.info("Starting fine-tuning container")
-    logger.info("  Dataset:    %s", dataset_path)
-    logger.info("  Output:     %s", output_dir)
-
-    result = subprocess.run(command, text=True, check=False)
-    if result.returncode != 0:
-        message = f"Fine-tuning container exited with code {result.returncode}"
-        raise RuntimeError(message)
-    logger.info("Fine-tuning complete. Model saved to %s", output_dir)
-
-
-def stop_finetune() -> None:
-    """Stop and remove the fine-tuning container."""
-    logger.info("Stopping fine-tuning container")
-    subprocess.run(["docker", "stop", CONTAINER_NAME], capture_output=True, check=False)
-    subprocess.run(
-        ["docker", "rm", "-f", CONTAINER_NAME], capture_output=True, check=False
-    )
-
-
-def logs_finetune() -> str | None:
-    """Return recent logs from the fine-tuning container, or None if not running."""
-    result = subprocess.run(
-        ["docker", "logs", "--tail", "50", CONTAINER_NAME],
-        capture_output=True,
-        text=True,
-        check=False,
-    )
-    if result.returncode != 0:
-        return None
-    return result.stdout + result.stderr
-
-
-app = typer.Typer(help="Fine-tuning container management.")
-
-
-@app.command()
-def build() -> None:
-    """Build the fine-tuning Docker image."""
-    build_image()
-
-
-@app.command()
-def run(
-    dataset: Annotated[Path, typer.Option(help="Fine-tuning JSONL")] = REPO_DIR
-    / "data/finetune_dataset.jsonl",
-    output_dir: Annotated[
-        Path, typer.Option(help="Where to save the trained model")
-    ] = REPO_DIR / "data/output/qwen-bill-summarizer",
-    hf_cache: Annotated[
-        Path, typer.Option(help="Host path to HuggingFace model cache")
-    ] = DEFAULT_HF_CACHE,
-    log_level: Annotated[str, typer.Option(help="Log level")] = "INFO",
-) -> None:
-    """Run fine-tuning inside a Docker container."""
-    logging.basicConfig(
-        level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s"
-    )
-    check_gpu_free()
-    start_finetune(
-        dataset_path=dataset,
-        output_dir=output_dir,
-        hf_cache=hf_cache,
-    )
-
-
-@app.command()
-def stop() -> None:
-    """Stop and remove the fine-tuning container."""
-    stop_finetune()
-
-
-@app.command()
-def logs() -> None:
-    """Show recent logs from the fine-tuning container."""
-    output = logs_finetune()
-    if output is None:
-        typer.echo("No running fine-tuning container found.")
-        raise typer.Exit(code=1)
-    typer.echo(output)
-
-
-def cli() -> None:
-    """Typer entry point."""
-    app()
-
-
-if __name__ == "__main__":
-    cli()
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-import logging
-import subprocess
-
-logger = logging.getLogger(__name__)
-
-
-def check_gpu_free() -> None:
-    """Warn if GPU-heavy processes (e.g. Ollama) are running."""
-    result = subprocess.run(
-        ["nvidia-smi", "--query-compute-apps=pid,process_name", "--format=csv,noheader"],
-        capture_output=True,
-        text=True,
-        check=False,
-    )
-    if result.returncode != 0:
-        logger.warning("Could not query GPU processes: %s", result.stderr.strip())
-        return
-    processes = result.stdout.strip()
-    if processes:
-        logger.warning("GPU processes detected:\n%s", processes)
-        logger.warning("Consider stopping Ollama (sudo systemctl stop ollama) before benchmarking")
@@ -1,70 +0,0 @@
-"""Docker container lifecycle management for vLLM."""
-
-from __future__ import annotations
-
-import logging
-import subprocess
-
-logger = logging.getLogger(__name__)
-
-CONTAINER_NAME = "vllm-bench"
-VLLM_IMAGE = "vllm/vllm-openai:v0.19.0"
-
-
-def start_vllm(
-    *,
-    model: str,
-    port: int,
-    model_dir: str,
-    gpu_memory_utilization: float,
-) -> None:
-    """Start a vLLM container serving the given model.
-
-    Args:
-        model: HuggingFace model directory name (relative to model_dir).
-        port: Host port to bind.
-        model_dir: Host path containing HuggingFace model directories.
-        gpu_memory_utilization: Fraction of GPU memory to use (0-1).
-    """
-    command = [
-        "docker",
-        "run",
-        "-d",
-        "--name",
-        CONTAINER_NAME,
-        "--device=nvidia.com/gpu=all",
-        "--ipc=host",
-        "-v",
-        f"{model_dir}:/models",
-        "-p",
-        f"{port}:8000",
-        VLLM_IMAGE,
-        "--model",
-        f"/models/{model}",
-        "--served-model-name",
-        model,
-        "--gpu-memory-utilization",
-        str(gpu_memory_utilization),
-        "--max-model-len",
-        "4096",
-    ]
-    logger.info("Starting vLLM container with model: %s", model)
-    stop_vllm()
-    result = subprocess.run(command, capture_output=True, text=True, check=False)
-    if result.returncode != 0:
-        msg = f"Failed to start vLLM container: {result.stderr.strip()}"
-        raise RuntimeError(msg)
-    logger.info("vLLM container started: %s", result.stdout.strip()[:12])
-
-
-def stop_vllm() -> None:
-    """Stop and remove the vLLM benchmark container."""
-    logger.info("Stopping vLLM container")
-    subprocess.run(["docker", "stop", CONTAINER_NAME], capture_output=True, check=False)
-    subprocess.run(["docker", "rm", "-f", CONTAINER_NAME], capture_output=True, check=False)
-    subprocess.run(
-        ["docker", "network", "disconnect", "-f", "bridge", CONTAINER_NAME],
-        capture_output=True,
-        check=False,
-    )
-    logger.info("vLLM container stopped and removed")
				`@@ -1 +0,0 @@`
				`"""Prompt benchmarking system for evaluating LLMs via vLLM."""`