From 721526022ba0598295138baa3c7bd0ee015c9d24 Mon Sep 17 00:00:00 2001 From: Richie Cahill Date: Fri, 10 Apr 2026 12:56:57 -0400 Subject: [PATCH] created working finetuing pipeline --- python/prompt_bench/Dockerfile.finetune | 25 +++ python/prompt_bench/finetune.py | 190 ++++++++++++++++++++ python/prompt_bench/finetune_container.py | 210 ++++++++++++++++++++++ python/prompt_bench/train.sh | 45 +++++ 4 files changed, 470 insertions(+) create mode 100644 python/prompt_bench/Dockerfile.finetune create mode 100644 python/prompt_bench/finetune.py create mode 100644 python/prompt_bench/finetune_container.py create mode 100644 python/prompt_bench/train.sh diff --git a/python/prompt_bench/Dockerfile.finetune b/python/prompt_bench/Dockerfile.finetune new file mode 100644 index 0000000..bed68dc --- /dev/null +++ b/python/prompt_bench/Dockerfile.finetune @@ -0,0 +1,25 @@ +# Unsloth fine-tuning container for Qwen 3.5 4B on RTX 3090. +# +# Build: +# docker build -f python/prompt_bench/Dockerfile.finetune -t bill-finetune . +# +# Run: +# docker run --rm --device=nvidia.com/gpu=all --ipc=host \ +# -v $(pwd)/output:/workspace/output \ +# -v $(pwd)/output/finetune_dataset.jsonl:/workspace/dataset.jsonl:ro \ +# -v /zfs/models/hf:/models \ +# bill-finetune \ +# --dataset /workspace/dataset.jsonl \ +# --output-dir /workspace/output/qwen-bill-summarizer + +FROM ghcr.io/unslothai/unsloth:latest + +RUN pip install --no-cache-dir typer + +WORKDIR /workspace +COPY python/prompt_bench/finetune.py python/prompt_bench/finetune.py +COPY python/prompt_bench/summarization_prompts.py python/prompt_bench/summarization_prompts.py +COPY python/prompt_bench/__init__.py python/prompt_bench/__init__.py +COPY python/__init__.py python/__init__.py + +ENTRYPOINT ["python", "-m", "python.prompt_bench.finetune"] diff --git a/python/prompt_bench/finetune.py b/python/prompt_bench/finetune.py new file mode 100644 index 0000000..30ae3d3 --- /dev/null +++ b/python/prompt_bench/finetune.py @@ -0,0 +1,190 @@ +"""Fine-tune Qwen 3.5 4B on bill summarization data using Unsloth. + +Loads a ChatML-style JSONL dataset (system/user/assistant messages), +applies QLoRA with 4-bit quantization, and saves the merged model +in HuggingFace format. Designed for a single RTX 3090 (24GB). + +Usage: + python -m python.prompt_bench.finetune \ + --dataset output/finetune_dataset.jsonl \ + --output-dir output/qwen-bill-summarizer +""" + +from __future__ import annotations + +import json +import logging +from pathlib import Path +from typing import Annotated + +import typer +from unsloth import FastLanguageModel +from datasets import Dataset +from transformers import TrainingArguments +from trl import SFTTrainer + +logger = logging.getLogger(__name__) + +BASE_MODEL = "unsloth/Qwen3-4B-Base-unsloth-bnb-4bit" + +# LoRA hyperparameters +LORA_RANK = 32 +LORA_ALPHA = 32 +LORA_DROPOUT = 0.0 +LORA_TARGETS = [ + "q_proj", + "k_proj", + "v_proj", + "o_proj", + "gate_proj", + "up_proj", + "down_proj", +] + +# Training hyperparameters tuned for ~2k examples on a 3090 +LEARNING_RATE = 2e-4 +EPOCHS = 3 +BATCH_SIZE = 2 +GRADIENT_ACCUMULATION = 8 # effective batch = 16 +MAX_SEQ_LENGTH = 4096 +WARMUP_RATIO = 0.05 +WEIGHT_DECAY = 0.01 +LOGGING_STEPS = 10 +SAVE_STEPS = 100 + + +def _messages_to_chatml(messages: list[dict]) -> str: + r"""Convert a message list to Qwen ChatML format. + + Produces: + <|im_start|>system\n...\n<|im_end|> + <|im_start|>user\n...\n<|im_end|> + <|im_start|>assistant\n...\n<|im_end|> + """ + parts = [] + for message in messages: + role = message["role"] + content = message["content"] + parts.append(f"<|im_start|>{role}\n{content}<|im_end|>") + return "\n".join(parts) + + +def load_dataset_from_jsonl(path: Path) -> Dataset: + """Load a ChatML JSONL file into a HuggingFace Dataset. + + Each line must have {"messages": [{"role": ..., "content": ...}, ...]}. + Pre-formats into a `text` column with the Qwen ChatML template applied, + which SFTTrainer consumes directly. + """ + records = [] + with path.open(encoding="utf-8") as handle: + for raw_line in handle: + stripped = raw_line.strip() + if stripped: + entry = json.loads(stripped) + records.append({"text": _messages_to_chatml(entry["messages"])}) + logger.info("Loaded %d examples from %s", len(records), path) + return Dataset.from_list(records) + + +def main( + dataset_path: Annotated[Path, typer.Option("--dataset", help="Fine-tuning JSONL")] = Path( + "output/finetune_dataset.jsonl", + ), + validation_split: Annotated[float, typer.Option("--val-split", help="Fraction held out for validation")] = 0.1, + output_dir: Annotated[Path, typer.Option("--output-dir", help="Where to save the merged model")] = Path( + "output/qwen-bill-summarizer", + ), + base_model: Annotated[str, typer.Option("--base-model", help="Unsloth model ID")] = BASE_MODEL, + epochs: Annotated[int, typer.Option("--epochs", help="Training epochs")] = EPOCHS, + batch_size: Annotated[int, typer.Option("--batch-size", help="Per-device batch size")] = BATCH_SIZE, + learning_rate: Annotated[float, typer.Option("--lr", help="Learning rate")] = LEARNING_RATE, + lora_rank: Annotated[int, typer.Option("--lora-rank", help="LoRA rank")] = LORA_RANK, + max_seq_length: Annotated[int, typer.Option("--max-seq-length", help="Max sequence length")] = MAX_SEQ_LENGTH, + save_gguf: Annotated[bool, typer.Option("--save-gguf/--no-save-gguf", help="Also save GGUF")] = False, +) -> None: + """Fine-tune Qwen 3.5 4B on bill summarization with Unsloth + QLoRA.""" + logging.basicConfig(level="INFO", format="%(asctime)s %(levelname)s %(name)s: %(message)s") + + if not dataset_path.is_file(): + message = f"Dataset not found: {dataset_path}" + raise typer.BadParameter(message) + + logger.info("Loading base model: %s", base_model) + model, tokenizer = FastLanguageModel.from_pretrained( + model_name=base_model, + max_seq_length=max_seq_length, + load_in_4bit=True, + dtype=None, + ) + + logger.info("Applying LoRA (rank=%d, alpha=%d)", lora_rank, LORA_ALPHA) + model = FastLanguageModel.get_peft_model( + model, + r=lora_rank, + lora_alpha=LORA_ALPHA, + lora_dropout=LORA_DROPOUT, + target_modules=LORA_TARGETS, + bias="none", + use_gradient_checkpointing="unsloth", + random_state=42, + ) + + full_dataset = load_dataset_from_jsonl(dataset_path) + split = full_dataset.train_test_split(test_size=validation_split, seed=42) + train_dataset = split["train"] + validation_dataset = split["test"] + logger.info("Split: %d train, %d validation", len(train_dataset), len(validation_dataset)) + training_args = TrainingArguments( + output_dir=str(output_dir / "checkpoints"), + num_train_epochs=epochs, + per_device_train_batch_size=batch_size, + gradient_accumulation_steps=GRADIENT_ACCUMULATION, + learning_rate=learning_rate, + warmup_ratio=WARMUP_RATIO, + weight_decay=WEIGHT_DECAY, + lr_scheduler_type="cosine", + logging_steps=LOGGING_STEPS, + save_steps=SAVE_STEPS, + save_total_limit=3, + eval_strategy="steps", + eval_steps=SAVE_STEPS, + load_best_model_at_end=True, + bf16=True, + optim="adamw_8bit", + seed=42, + report_to="none", + ) + + trainer = SFTTrainer( + model=model, + tokenizer=tokenizer, + train_dataset=train_dataset, + eval_dataset=validation_dataset, + args=training_args, + max_seq_length=max_seq_length, + packing=True, + ) + + logger.info("Starting training: %d train, %d val, %d epochs", len(train_dataset), len(validation_dataset), epochs) + trainer.train() + + merged_path = str(output_dir / "merged") + logger.info("Saving merged model to %s", merged_path) + model.save_pretrained_merged(merged_path, tokenizer, save_method="merged_16bit") + + if save_gguf: + gguf_path = str(output_dir / "gguf") + logger.info("Saving GGUF to %s", gguf_path) + model.save_pretrained_gguf(gguf_path, tokenizer, quantization_method="q4_k_m") + + logger.info("Done! Model saved to %s", output_dir) + + +def cli() -> None: + """Typer entry point.""" + typer.run(main) + + +if __name__ == "__main__": + cli() diff --git a/python/prompt_bench/finetune_container.py b/python/prompt_bench/finetune_container.py new file mode 100644 index 0000000..5b8cb8e --- /dev/null +++ b/python/prompt_bench/finetune_container.py @@ -0,0 +1,210 @@ +"""Docker container lifecycle management for Unsloth fine-tuning.""" + +from __future__ import annotations + +import logging +import subprocess +from pathlib import Path +from typing import Annotated + +import typer + +from python.prompt_bench.container import check_gpu_free + +logger = logging.getLogger(__name__) + +CONTAINER_NAME = "bill-finetune" +FINETUNE_IMAGE = "bill-finetune:latest" +DOCKERFILE_PATH = "python/prompt_bench/Dockerfile.finetune" +DEFAULT_HF_CACHE = Path("/zfs/models/hf") + + +def build_image() -> None: + """Build the fine-tuning Docker image.""" + logger.info("Building fine-tuning image: %s", FINETUNE_IMAGE) + result = subprocess.run( + ["docker", "build", "-f", DOCKERFILE_PATH, "-t", FINETUNE_IMAGE, "."], + text=True, + check=False, + ) + if result.returncode != 0: + message = "Failed to build fine-tuning image" + raise RuntimeError(message) + logger.info("Image built: %s", FINETUNE_IMAGE) + + +def start_finetune( + *, + dataset_path: Path, + output_dir: Path, + hf_cache: Path = DEFAULT_HF_CACHE, + validation_split: float = 0.1, + epochs: int = 3, + batch_size: int = 2, + learning_rate: float = 2e-4, + lora_rank: int = 32, + max_seq_length: int = 4096, + save_gguf: bool = False, +) -> None: + """Run the fine-tuning container. + + Args: + dataset_path: Host path to the fine-tuning JSONL dataset. + output_dir: Host path where the trained model will be saved. + hf_cache: Host path to HuggingFace model cache (bind-mounted to avoid re-downloading). + validation_split: Fraction of data held out for validation. + epochs: Number of training epochs. + batch_size: Per-device training batch size. + learning_rate: Learning rate for the optimizer. + lora_rank: LoRA adapter rank. + max_seq_length: Maximum sequence length for training. + save_gguf: Whether to also export a GGUF quantized model. + """ + dataset_path = dataset_path.resolve() + output_dir = output_dir.resolve() + + if not dataset_path.is_file(): + message = f"Dataset not found: {dataset_path}" + raise FileNotFoundError(message) + + output_dir.mkdir(parents=True, exist_ok=True) + stop_finetune() + + hf_cache = hf_cache.resolve() + hf_cache.mkdir(parents=True, exist_ok=True) + + command = [ + "docker", + "run", + "--name", + CONTAINER_NAME, + "--device=nvidia.com/gpu=all", + "--ipc=host", + "-v", + f"{hf_cache}:/root/.cache/huggingface", + "-v", + f"{output_dir}:/workspace/output/qwen-bill-summarizer", + "-v", + f"{dataset_path}:/workspace/dataset.jsonl:ro", + FINETUNE_IMAGE, + "--dataset", + "/workspace/dataset.jsonl", + "--output-dir", + "/workspace/output/qwen-bill-summarizer", + "--val-split", + str(validation_split), + "--epochs", + str(epochs), + "--batch-size", + str(batch_size), + "--lr", + str(learning_rate), + "--lora-rank", + str(lora_rank), + "--max-seq-length", + str(max_seq_length), + ] + + if save_gguf: + command.append("--save-gguf") + + logger.info("Starting fine-tuning container") + logger.info(" Dataset: %s", dataset_path) + logger.info(" Val split: %.0f%%", validation_split * 100) + logger.info(" Output: %s", output_dir) + logger.info(" Epochs: %d", epochs) + logger.info(" Batch size: %d", batch_size) + logger.info(" LoRA rank: %d", lora_rank) + + result = subprocess.run(command, text=True, check=False) + if result.returncode != 0: + message = f"Fine-tuning container exited with code {result.returncode}" + raise RuntimeError(message) + logger.info("Fine-tuning complete. Model saved to %s", output_dir) + + +def stop_finetune() -> None: + """Stop and remove the fine-tuning container.""" + logger.info("Stopping fine-tuning container") + subprocess.run(["docker", "stop", CONTAINER_NAME], capture_output=True, check=False) + subprocess.run(["docker", "rm", "-f", CONTAINER_NAME], capture_output=True, check=False) + + +def logs_finetune() -> str | None: + """Return recent logs from the fine-tuning container, or None if not running.""" + result = subprocess.run( + ["docker", "logs", "--tail", "50", CONTAINER_NAME], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + return None + return result.stdout + result.stderr + + +app = typer.Typer(help="Fine-tuning container management.") + + +@app.command() +def build() -> None: + """Build the fine-tuning Docker image.""" + build_image() + + +@app.command() +def run( + dataset: Annotated[Path, typer.Option(help="Fine-tuning JSONL")] = Path("output/finetune_dataset.jsonl"), + output_dir: Annotated[Path, typer.Option(help="Where to save the trained model")] = Path( + "output/qwen-bill-summarizer", + ), + hf_cache: Annotated[Path, typer.Option(help="Host path to HuggingFace model cache")] = DEFAULT_HF_CACHE, + validation_split: Annotated[float, typer.Option("--val-split", help="Fraction held out for validation")] = 0.1, + epochs: Annotated[int, typer.Option(help="Training epochs")] = 3, + batch_size: Annotated[int, typer.Option(help="Per-device batch size")] = 2, + learning_rate: Annotated[float, typer.Option("--lr", help="Learning rate")] = 2e-4, + lora_rank: Annotated[int, typer.Option(help="LoRA rank")] = 32, + max_seq_length: Annotated[int, typer.Option(help="Max sequence length")] = 4096, + save_gguf: Annotated[bool, typer.Option("--save-gguf/--no-save-gguf", help="Also save GGUF")] = False, + log_level: Annotated[str, typer.Option(help="Log level")] = "INFO", +) -> None: + """Run fine-tuning inside a Docker container.""" + logging.basicConfig(level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s") + check_gpu_free() + start_finetune( + dataset_path=dataset, + output_dir=output_dir, + hf_cache=hf_cache, + validation_split=validation_split, + epochs=epochs, + batch_size=batch_size, + learning_rate=learning_rate, + lora_rank=lora_rank, + max_seq_length=max_seq_length, + save_gguf=save_gguf, + ) + + +@app.command() +def stop() -> None: + """Stop and remove the fine-tuning container.""" + stop_finetune() + + +@app.command() +def logs() -> None: + """Show recent logs from the fine-tuning container.""" + output = logs_finetune() + if output is None: + typer.echo("No running fine-tuning container found.") + raise typer.Exit(code=1) + typer.echo(output) + + +def cli() -> None: + """Typer entry point.""" + app() + + +if __name__ == "__main__": + cli() diff --git a/python/prompt_bench/train.sh b/python/prompt_bench/train.sh new file mode 100644 index 0000000..504cb30 --- /dev/null +++ b/python/prompt_bench/train.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Fine-tune Qwen 3.5 4B on bill summarization data. +# +# Prerequisites: +# 1. Build the dataset: python -m python.prompt_bench.build_finetune_dataset +# 2. Build the image: docker build -f python/prompt_bench/Dockerfile.finetune -t bill-finetune . +# +# Usage: +# bash python/prompt_bench/train.sh [extra flags passed to finetune.py] +# +# Examples: +# bash python/prompt_bench/train.sh +# bash python/prompt_bench/train.sh --epochs 5 --lr 1e-4 +# bash python/prompt_bench/train.sh --val-split 0.15 --save-gguf + +set -euo pipefail + +IMAGE="bill-finetune" +DATASET="$(pwd)/output/finetune_dataset.jsonl" +OUTPUT_DIR="$(pwd)/output/qwen-bill-summarizer" + +if [ ! -f "$DATASET" ]; then + echo "Error: Dataset not found at $DATASET" + echo "Run: python -m python.prompt_bench.build_finetune_dataset" + exit 1 +fi + +mkdir -p "$OUTPUT_DIR" + +echo "Starting fine-tuning..." +echo " Dataset: $DATASET" +echo " Output: $OUTPUT_DIR" +echo " Extra args: $*" + +docker run --rm \ + --device=nvidia.com/gpu=all \ + --ipc=host \ + -v "$OUTPUT_DIR":/workspace/output/qwen-bill-summarizer \ + -v "$DATASET":/workspace/dataset.jsonl:ro \ + "$IMAGE" \ + --dataset /workspace/dataset.jsonl \ + --output-dir /workspace/output/qwen-bill-summarizer \ + "$@" + +echo "Done! Model saved to $OUTPUT_DIR"