#!/usr/bin/env bash # Fine-tune Qwen 3.5 4B on bill summarization data. # # Prerequisites: # 1. Build the dataset: python -m python.prompt_bench.build_finetune_dataset # 2. Build the image: docker build -f python/prompt_bench/Dockerfile.finetune -t bill-finetune . # # Usage: # bash python/prompt_bench/train.sh [extra flags passed to finetune.py] # # Examples: # bash python/prompt_bench/train.sh # bash python/prompt_bench/train.sh --epochs 5 --lr 1e-4 # bash python/prompt_bench/train.sh --val-split 0.15 --save-gguf set -euo pipefail IMAGE="bill-finetune" DATASET="$(pwd)/output/finetune_dataset.jsonl" OUTPUT_DIR="$(pwd)/output/qwen-bill-summarizer" if [ ! -f "$DATASET" ]; then echo "Error: Dataset not found at $DATASET" echo "Run: python -m python.prompt_bench.build_finetune_dataset" exit 1 fi mkdir -p "$OUTPUT_DIR" echo "Starting fine-tuning..." echo " Dataset: $DATASET" echo " Output: $OUTPUT_DIR" echo " Extra args: $*" docker run --rm \ --device=nvidia.com/gpu=all \ --ipc=host \ -v "$OUTPUT_DIR":/workspace/output/qwen-bill-summarizer \ -v "$DATASET":/workspace/dataset.jsonl:ro \ "$IMAGE" \ --dataset /workspace/dataset.jsonl \ --output-dir /workspace/output/qwen-bill-summarizer \ "$@" echo "Done! Model saved to $OUTPUT_DIR"