created working finetuing pipeline

This commit is contained in:
2026-04-10 12:56:57 -04:00
parent 921a397b1c
commit 721526022b
4 changed files with 470 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
# Unsloth fine-tuning container for Qwen 3.5 4B on RTX 3090.
#
# Build:
# docker build -f python/prompt_bench/Dockerfile.finetune -t bill-finetune .
#
# Run:
# docker run --rm --device=nvidia.com/gpu=all --ipc=host \
# -v $(pwd)/output:/workspace/output \
# -v $(pwd)/output/finetune_dataset.jsonl:/workspace/dataset.jsonl:ro \
# -v /zfs/models/hf:/models \
# bill-finetune \
# --dataset /workspace/dataset.jsonl \
# --output-dir /workspace/output/qwen-bill-summarizer
FROM ghcr.io/unslothai/unsloth:latest
RUN pip install --no-cache-dir typer
WORKDIR /workspace
COPY python/prompt_bench/finetune.py python/prompt_bench/finetune.py
COPY python/prompt_bench/summarization_prompts.py python/prompt_bench/summarization_prompts.py
COPY python/prompt_bench/__init__.py python/prompt_bench/__init__.py
COPY python/__init__.py python/__init__.py
ENTRYPOINT ["python", "-m", "python.prompt_bench.finetune"]

View File

@@ -0,0 +1,190 @@
"""Fine-tune Qwen 3.5 4B on bill summarization data using Unsloth.
Loads a ChatML-style JSONL dataset (system/user/assistant messages),
applies QLoRA with 4-bit quantization, and saves the merged model
in HuggingFace format. Designed for a single RTX 3090 (24GB).
Usage:
python -m python.prompt_bench.finetune \
--dataset output/finetune_dataset.jsonl \
--output-dir output/qwen-bill-summarizer
"""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import Annotated
import typer
from unsloth import FastLanguageModel
from datasets import Dataset
from transformers import TrainingArguments
from trl import SFTTrainer
logger = logging.getLogger(__name__)
BASE_MODEL = "unsloth/Qwen3-4B-Base-unsloth-bnb-4bit"
# LoRA hyperparameters
LORA_RANK = 32
LORA_ALPHA = 32
LORA_DROPOUT = 0.0
LORA_TARGETS = [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
]
# Training hyperparameters tuned for ~2k examples on a 3090
LEARNING_RATE = 2e-4
EPOCHS = 3
BATCH_SIZE = 2
GRADIENT_ACCUMULATION = 8 # effective batch = 16
MAX_SEQ_LENGTH = 4096
WARMUP_RATIO = 0.05
WEIGHT_DECAY = 0.01
LOGGING_STEPS = 10
SAVE_STEPS = 100
def _messages_to_chatml(messages: list[dict]) -> str:
r"""Convert a message list to Qwen ChatML format.
Produces:
<|im_start|>system\n...\n<|im_end|>
<|im_start|>user\n...\n<|im_end|>
<|im_start|>assistant\n...\n<|im_end|>
"""
parts = []
for message in messages:
role = message["role"]
content = message["content"]
parts.append(f"<|im_start|>{role}\n{content}<|im_end|>")
return "\n".join(parts)
def load_dataset_from_jsonl(path: Path) -> Dataset:
"""Load a ChatML JSONL file into a HuggingFace Dataset.
Each line must have {"messages": [{"role": ..., "content": ...}, ...]}.
Pre-formats into a `text` column with the Qwen ChatML template applied,
which SFTTrainer consumes directly.
"""
records = []
with path.open(encoding="utf-8") as handle:
for raw_line in handle:
stripped = raw_line.strip()
if stripped:
entry = json.loads(stripped)
records.append({"text": _messages_to_chatml(entry["messages"])})
logger.info("Loaded %d examples from %s", len(records), path)
return Dataset.from_list(records)
def main(
dataset_path: Annotated[Path, typer.Option("--dataset", help="Fine-tuning JSONL")] = Path(
"output/finetune_dataset.jsonl",
),
validation_split: Annotated[float, typer.Option("--val-split", help="Fraction held out for validation")] = 0.1,
output_dir: Annotated[Path, typer.Option("--output-dir", help="Where to save the merged model")] = Path(
"output/qwen-bill-summarizer",
),
base_model: Annotated[str, typer.Option("--base-model", help="Unsloth model ID")] = BASE_MODEL,
epochs: Annotated[int, typer.Option("--epochs", help="Training epochs")] = EPOCHS,
batch_size: Annotated[int, typer.Option("--batch-size", help="Per-device batch size")] = BATCH_SIZE,
learning_rate: Annotated[float, typer.Option("--lr", help="Learning rate")] = LEARNING_RATE,
lora_rank: Annotated[int, typer.Option("--lora-rank", help="LoRA rank")] = LORA_RANK,
max_seq_length: Annotated[int, typer.Option("--max-seq-length", help="Max sequence length")] = MAX_SEQ_LENGTH,
save_gguf: Annotated[bool, typer.Option("--save-gguf/--no-save-gguf", help="Also save GGUF")] = False,
) -> None:
"""Fine-tune Qwen 3.5 4B on bill summarization with Unsloth + QLoRA."""
logging.basicConfig(level="INFO", format="%(asctime)s %(levelname)s %(name)s: %(message)s")
if not dataset_path.is_file():
message = f"Dataset not found: {dataset_path}"
raise typer.BadParameter(message)
logger.info("Loading base model: %s", base_model)
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=base_model,
max_seq_length=max_seq_length,
load_in_4bit=True,
dtype=None,
)
logger.info("Applying LoRA (rank=%d, alpha=%d)", lora_rank, LORA_ALPHA)
model = FastLanguageModel.get_peft_model(
model,
r=lora_rank,
lora_alpha=LORA_ALPHA,
lora_dropout=LORA_DROPOUT,
target_modules=LORA_TARGETS,
bias="none",
use_gradient_checkpointing="unsloth",
random_state=42,
)
full_dataset = load_dataset_from_jsonl(dataset_path)
split = full_dataset.train_test_split(test_size=validation_split, seed=42)
train_dataset = split["train"]
validation_dataset = split["test"]
logger.info("Split: %d train, %d validation", len(train_dataset), len(validation_dataset))
training_args = TrainingArguments(
output_dir=str(output_dir / "checkpoints"),
num_train_epochs=epochs,
per_device_train_batch_size=batch_size,
gradient_accumulation_steps=GRADIENT_ACCUMULATION,
learning_rate=learning_rate,
warmup_ratio=WARMUP_RATIO,
weight_decay=WEIGHT_DECAY,
lr_scheduler_type="cosine",
logging_steps=LOGGING_STEPS,
save_steps=SAVE_STEPS,
save_total_limit=3,
eval_strategy="steps",
eval_steps=SAVE_STEPS,
load_best_model_at_end=True,
bf16=True,
optim="adamw_8bit",
seed=42,
report_to="none",
)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=train_dataset,
eval_dataset=validation_dataset,
args=training_args,
max_seq_length=max_seq_length,
packing=True,
)
logger.info("Starting training: %d train, %d val, %d epochs", len(train_dataset), len(validation_dataset), epochs)
trainer.train()
merged_path = str(output_dir / "merged")
logger.info("Saving merged model to %s", merged_path)
model.save_pretrained_merged(merged_path, tokenizer, save_method="merged_16bit")
if save_gguf:
gguf_path = str(output_dir / "gguf")
logger.info("Saving GGUF to %s", gguf_path)
model.save_pretrained_gguf(gguf_path, tokenizer, quantization_method="q4_k_m")
logger.info("Done! Model saved to %s", output_dir)
def cli() -> None:
"""Typer entry point."""
typer.run(main)
if __name__ == "__main__":
cli()

View File

@@ -0,0 +1,210 @@
"""Docker container lifecycle management for Unsloth fine-tuning."""
from __future__ import annotations
import logging
import subprocess
from pathlib import Path
from typing import Annotated
import typer
from python.prompt_bench.container import check_gpu_free
logger = logging.getLogger(__name__)
CONTAINER_NAME = "bill-finetune"
FINETUNE_IMAGE = "bill-finetune:latest"
DOCKERFILE_PATH = "python/prompt_bench/Dockerfile.finetune"
DEFAULT_HF_CACHE = Path("/zfs/models/hf")
def build_image() -> None:
"""Build the fine-tuning Docker image."""
logger.info("Building fine-tuning image: %s", FINETUNE_IMAGE)
result = subprocess.run(
["docker", "build", "-f", DOCKERFILE_PATH, "-t", FINETUNE_IMAGE, "."],
text=True,
check=False,
)
if result.returncode != 0:
message = "Failed to build fine-tuning image"
raise RuntimeError(message)
logger.info("Image built: %s", FINETUNE_IMAGE)
def start_finetune(
*,
dataset_path: Path,
output_dir: Path,
hf_cache: Path = DEFAULT_HF_CACHE,
validation_split: float = 0.1,
epochs: int = 3,
batch_size: int = 2,
learning_rate: float = 2e-4,
lora_rank: int = 32,
max_seq_length: int = 4096,
save_gguf: bool = False,
) -> None:
"""Run the fine-tuning container.
Args:
dataset_path: Host path to the fine-tuning JSONL dataset.
output_dir: Host path where the trained model will be saved.
hf_cache: Host path to HuggingFace model cache (bind-mounted to avoid re-downloading).
validation_split: Fraction of data held out for validation.
epochs: Number of training epochs.
batch_size: Per-device training batch size.
learning_rate: Learning rate for the optimizer.
lora_rank: LoRA adapter rank.
max_seq_length: Maximum sequence length for training.
save_gguf: Whether to also export a GGUF quantized model.
"""
dataset_path = dataset_path.resolve()
output_dir = output_dir.resolve()
if not dataset_path.is_file():
message = f"Dataset not found: {dataset_path}"
raise FileNotFoundError(message)
output_dir.mkdir(parents=True, exist_ok=True)
stop_finetune()
hf_cache = hf_cache.resolve()
hf_cache.mkdir(parents=True, exist_ok=True)
command = [
"docker",
"run",
"--name",
CONTAINER_NAME,
"--device=nvidia.com/gpu=all",
"--ipc=host",
"-v",
f"{hf_cache}:/root/.cache/huggingface",
"-v",
f"{output_dir}:/workspace/output/qwen-bill-summarizer",
"-v",
f"{dataset_path}:/workspace/dataset.jsonl:ro",
FINETUNE_IMAGE,
"--dataset",
"/workspace/dataset.jsonl",
"--output-dir",
"/workspace/output/qwen-bill-summarizer",
"--val-split",
str(validation_split),
"--epochs",
str(epochs),
"--batch-size",
str(batch_size),
"--lr",
str(learning_rate),
"--lora-rank",
str(lora_rank),
"--max-seq-length",
str(max_seq_length),
]
if save_gguf:
command.append("--save-gguf")
logger.info("Starting fine-tuning container")
logger.info(" Dataset: %s", dataset_path)
logger.info(" Val split: %.0f%%", validation_split * 100)
logger.info(" Output: %s", output_dir)
logger.info(" Epochs: %d", epochs)
logger.info(" Batch size: %d", batch_size)
logger.info(" LoRA rank: %d", lora_rank)
result = subprocess.run(command, text=True, check=False)
if result.returncode != 0:
message = f"Fine-tuning container exited with code {result.returncode}"
raise RuntimeError(message)
logger.info("Fine-tuning complete. Model saved to %s", output_dir)
def stop_finetune() -> None:
"""Stop and remove the fine-tuning container."""
logger.info("Stopping fine-tuning container")
subprocess.run(["docker", "stop", CONTAINER_NAME], capture_output=True, check=False)
subprocess.run(["docker", "rm", "-f", CONTAINER_NAME], capture_output=True, check=False)
def logs_finetune() -> str | None:
"""Return recent logs from the fine-tuning container, or None if not running."""
result = subprocess.run(
["docker", "logs", "--tail", "50", CONTAINER_NAME],
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
return None
return result.stdout + result.stderr
app = typer.Typer(help="Fine-tuning container management.")
@app.command()
def build() -> None:
"""Build the fine-tuning Docker image."""
build_image()
@app.command()
def run(
dataset: Annotated[Path, typer.Option(help="Fine-tuning JSONL")] = Path("output/finetune_dataset.jsonl"),
output_dir: Annotated[Path, typer.Option(help="Where to save the trained model")] = Path(
"output/qwen-bill-summarizer",
),
hf_cache: Annotated[Path, typer.Option(help="Host path to HuggingFace model cache")] = DEFAULT_HF_CACHE,
validation_split: Annotated[float, typer.Option("--val-split", help="Fraction held out for validation")] = 0.1,
epochs: Annotated[int, typer.Option(help="Training epochs")] = 3,
batch_size: Annotated[int, typer.Option(help="Per-device batch size")] = 2,
learning_rate: Annotated[float, typer.Option("--lr", help="Learning rate")] = 2e-4,
lora_rank: Annotated[int, typer.Option(help="LoRA rank")] = 32,
max_seq_length: Annotated[int, typer.Option(help="Max sequence length")] = 4096,
save_gguf: Annotated[bool, typer.Option("--save-gguf/--no-save-gguf", help="Also save GGUF")] = False,
log_level: Annotated[str, typer.Option(help="Log level")] = "INFO",
) -> None:
"""Run fine-tuning inside a Docker container."""
logging.basicConfig(level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
check_gpu_free()
start_finetune(
dataset_path=dataset,
output_dir=output_dir,
hf_cache=hf_cache,
validation_split=validation_split,
epochs=epochs,
batch_size=batch_size,
learning_rate=learning_rate,
lora_rank=lora_rank,
max_seq_length=max_seq_length,
save_gguf=save_gguf,
)
@app.command()
def stop() -> None:
"""Stop and remove the fine-tuning container."""
stop_finetune()
@app.command()
def logs() -> None:
"""Show recent logs from the fine-tuning container."""
output = logs_finetune()
if output is None:
typer.echo("No running fine-tuning container found.")
raise typer.Exit(code=1)
typer.echo(output)
def cli() -> None:
"""Typer entry point."""
app()
if __name__ == "__main__":
cli()

View File

@@ -0,0 +1,45 @@
#!/usr/bin/env bash
# Fine-tune Qwen 3.5 4B on bill summarization data.
#
# Prerequisites:
# 1. Build the dataset: python -m python.prompt_bench.build_finetune_dataset
# 2. Build the image: docker build -f python/prompt_bench/Dockerfile.finetune -t bill-finetune .
#
# Usage:
# bash python/prompt_bench/train.sh [extra flags passed to finetune.py]
#
# Examples:
# bash python/prompt_bench/train.sh
# bash python/prompt_bench/train.sh --epochs 5 --lr 1e-4
# bash python/prompt_bench/train.sh --val-split 0.15 --save-gguf
set -euo pipefail
IMAGE="bill-finetune"
DATASET="$(pwd)/output/finetune_dataset.jsonl"
OUTPUT_DIR="$(pwd)/output/qwen-bill-summarizer"
if [ ! -f "$DATASET" ]; then
echo "Error: Dataset not found at $DATASET"
echo "Run: python -m python.prompt_bench.build_finetune_dataset"
exit 1
fi
mkdir -p "$OUTPUT_DIR"
echo "Starting fine-tuning..."
echo " Dataset: $DATASET"
echo " Output: $OUTPUT_DIR"
echo " Extra args: $*"
docker run --rm \
--device=nvidia.com/gpu=all \
--ipc=host \
-v "$OUTPUT_DIR":/workspace/output/qwen-bill-summarizer \
-v "$DATASET":/workspace/dataset.jsonl:ro \
"$IMAGE" \
--dataset /workspace/dataset.jsonl \
--output-dir /workspace/output/qwen-bill-summarizer \
"$@"
echo "Done! Model saved to $OUTPUT_DIR"