moved prompt_bench

This commit is contained in:
2026-04-14 18:18:31 -04:00
parent 2abd61d3b1
commit b8d64a5b19
39 changed files with 139 additions and 50 deletions

View File

@@ -22,4 +22,4 @@ COPY config/prompts/summarization_prompts.toml config/prompts/summarization_prom
COPY python/prompt_bench/__init__.py python/prompt_bench/__init__.py COPY python/prompt_bench/__init__.py python/prompt_bench/__init__.py
COPY python/__init__.py python/__init__.py COPY python/__init__.py python/__init__.py
ENTRYPOINT ["python", "-m", "python.prompt_bench.finetune"] ENTRYPOINT ["python", "-m", "pipelines.prompt_bench.finetune"]

View File

@@ -23,9 +23,14 @@ import httpx
import typer import typer
from tiktoken import Encoding, get_encoding from tiktoken import Encoding, get_encoding
from python.prompt_bench.bill_token_compression import compress_bill_text from pipelines.prompt_bench.bill_token_compression import compress_bill_text
_PROMPTS_PATH = Path(__file__).resolve().parents[2] / "config" / "prompts" / "summarization_prompts.toml" _PROMPTS_PATH = (
Path(__file__).resolve().parents[2]
/ "config"
/ "prompts"
/ "summarization_prompts.toml"
)
_PROMPTS = tomllib.loads(_PROMPTS_PATH.read_text())["summarization"] _PROMPTS = tomllib.loads(_PROMPTS_PATH.read_text())["summarization"]
SUMMARIZATION_SYSTEM_PROMPT: str = _PROMPTS["system_prompt"] SUMMARIZATION_SYSTEM_PROMPT: str = _PROMPTS["system_prompt"]
SUMMARIZATION_USER_TEMPLATE: str = _PROMPTS["user_template"] SUMMARIZATION_USER_TEMPLATE: str = _PROMPTS["user_template"]
@@ -72,7 +77,12 @@ def build_request(custom_id: str, model: str, bill_text: str) -> dict:
"model": model, "model": model,
"messages": [ "messages": [
{"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT}, {"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
{"role": "user", "content": SUMMARIZATION_USER_TEMPLATE.format(text_content=bill_text)}, {
"role": "user",
"content": SUMMARIZATION_USER_TEMPLATE.format(
text_content=bill_text
),
},
], ],
}, },
} }
@@ -123,7 +133,9 @@ def prepare_requests(
"compressed_chars": len(compressed_text), "compressed_chars": len(compressed_text),
"raw_tokens": raw_token_count, "raw_tokens": raw_token_count,
"compressed_tokens": compressed_token_count, "compressed_tokens": compressed_token_count,
"token_ratio": (compressed_token_count / raw_token_count) if raw_token_count else None, "token_ratio": (compressed_token_count / raw_token_count)
if raw_token_count
else None,
}, },
) )
safe_id = safe_filename(bill_id) safe_id = safe_filename(bill_id)
@@ -136,7 +148,14 @@ def write_token_csv(path: Path, token_rows: list[dict]) -> tuple[int, int]:
with path.open("w", newline="", encoding="utf-8") as handle: with path.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter( writer = csv.DictWriter(
handle, handle,
fieldnames=["bill_id", "raw_chars", "compressed_chars", "raw_tokens", "compressed_tokens", "token_ratio"], fieldnames=[
"bill_id",
"raw_chars",
"compressed_chars",
"raw_tokens",
"compressed_tokens",
"token_ratio",
],
) )
writer.writeheader() writer.writeheader()
writer.writerows(token_rows) writer.writerows(token_rows)
@@ -161,8 +180,12 @@ def create_batch(client: httpx.Client, input_file_id: str, description: str) ->
def main( def main(
csv_path: Annotated[Path, typer.Option("--csv", help="Bills CSV path")] = Path("bills.csv"), csv_path: Annotated[Path, typer.Option("--csv", help="Bills CSV path")] = Path(
output_dir: Annotated[Path, typer.Option("--output-dir", help="Where to write JSONL + metadata")] = Path( "bills.csv"
),
output_dir: Annotated[
Path, typer.Option("--output-dir", help="Where to write JSONL + metadata")
] = Path(
"output/openai_batch", "output/openai_batch",
), ),
model: Annotated[str, typer.Option(help="OpenAI model id")] = "gpt-5-mini", model: Annotated[str, typer.Option(help="OpenAI model id")] = "gpt-5-mini",
@@ -170,7 +193,9 @@ def main(
log_level: Annotated[str, typer.Option(help="Log level")] = "INFO", log_level: Annotated[str, typer.Option(help="Log level")] = "INFO",
) -> None: ) -> None:
"""Submit an OpenAI Batch job of compressed bill summaries.""" """Submit an OpenAI Batch job of compressed bill summaries."""
logging.basicConfig(level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s") logging.basicConfig(
level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s"
)
api_key = getenv("CLOSEDAI_TOKEN") or getenv("OPENAI_API_KEY") api_key = getenv("CLOSEDAI_TOKEN") or getenv("OPENAI_API_KEY")
if not api_key: if not api_key:
@@ -191,7 +216,9 @@ def main(
request_lines, token_rows = prepare_requests(bills, model=model, encoder=encoder) request_lines, token_rows = prepare_requests(bills, model=model, encoder=encoder)
token_csv_path = output_dir / "token_counts.csv" token_csv_path = output_dir / "token_counts.csv"
raw_tokens_total, compressed_tokens_total = write_token_csv(token_csv_path, token_rows) raw_tokens_total, compressed_tokens_total = write_token_csv(
token_csv_path, token_rows
)
logger.info( logger.info(
"Token counts: raw=%d compressed=%d ratio=%.3f -> %s", "Token counts: raw=%d compressed=%d ratio=%.3f -> %s",
raw_tokens_total, raw_tokens_total,
@@ -211,7 +238,11 @@ def main(
logger.info("Uploaded: %s", file_id) logger.info("Uploaded: %s", file_id)
logger.info("Creating batch") logger.info("Creating batch")
batch = create_batch(client, file_id, f"compressed bill summaries x{len(request_lines)} ({model})") batch = create_batch(
client,
file_id,
f"compressed bill summaries x{len(request_lines)} ({model})",
)
logger.info("Batch created: %s", batch["id"]) logger.info("Batch created: %s", batch["id"])
metadata = { metadata = {

View File

@@ -24,9 +24,14 @@ from typing import Annotated
import httpx import httpx
import typer import typer
from python.prompt_bench.bill_token_compression import compress_bill_text from pipelines.prompt_bench.bill_token_compression import compress_bill_text
_PROMPTS_PATH = Path(__file__).resolve().parents[2] / "config" / "prompts" / "summarization_prompts.toml" _PROMPTS_PATH = (
Path(__file__).resolve().parents[2]
/ "config"
/ "prompts"
/ "summarization_prompts.toml"
)
_PROMPTS = tomllib.loads(_PROMPTS_PATH.read_text())["summarization"] _PROMPTS = tomllib.loads(_PROMPTS_PATH.read_text())["summarization"]
SUMMARIZATION_SYSTEM_PROMPT: str = _PROMPTS["system_prompt"] SUMMARIZATION_SYSTEM_PROMPT: str = _PROMPTS["system_prompt"]
SUMMARIZATION_USER_TEMPLATE: str = _PROMPTS["user_template"] SUMMARIZATION_USER_TEMPLATE: str = _PROMPTS["user_template"]
@@ -62,7 +67,10 @@ def build_messages(bill_text: str) -> list[dict]:
"""Return the system + user message pair for a bill.""" """Return the system + user message pair for a bill."""
return [ return [
{"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT}, {"role": "system", "content": SUMMARIZATION_SYSTEM_PROMPT},
{"role": "user", "content": SUMMARIZATION_USER_TEMPLATE.format(text_content=bill_text)}, {
"role": "user",
"content": SUMMARIZATION_USER_TEMPLATE.format(text_content=bill_text),
},
] ]
@@ -132,17 +140,25 @@ def run_one_request(
def main( def main(
csv_path: Annotated[Path, typer.Option("--csv", help="Bills CSV path")] = Path("bills.csv"), csv_path: Annotated[Path, typer.Option("--csv", help="Bills CSV path")] = Path(
output_dir: Annotated[Path, typer.Option("--output-dir", help="Where to write per-request JSON")] = Path( "bills.csv"
),
output_dir: Annotated[
Path, typer.Option("--output-dir", help="Where to write per-request JSON")
] = Path(
"output/openai_runs", "output/openai_runs",
), ),
model: Annotated[str, typer.Option(help="OpenAI model id")] = DEFAULT_MODEL, model: Annotated[str, typer.Option(help="OpenAI model id")] = DEFAULT_MODEL,
count: Annotated[int, typer.Option(help="Number of bills per set")] = DEFAULT_COUNT, count: Annotated[int, typer.Option(help="Number of bills per set")] = DEFAULT_COUNT,
concurrency: Annotated[int, typer.Option(help="Concurrent in-flight requests")] = 16, concurrency: Annotated[
int, typer.Option(help="Concurrent in-flight requests")
] = 16,
log_level: Annotated[str, typer.Option(help="Log level")] = "INFO", log_level: Annotated[str, typer.Option(help="Log level")] = "INFO",
) -> None: ) -> None:
"""Run two interactive OpenAI sweeps (compressed + uncompressed) over bill text.""" """Run two interactive OpenAI sweeps (compressed + uncompressed) over bill text."""
logging.basicConfig(level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s") logging.basicConfig(
level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s"
)
api_key = getenv("CLOSEDAI_TOKEN") or getenv("OPENAI_API_KEY") api_key = getenv("CLOSEDAI_TOKEN") or getenv("OPENAI_API_KEY")
if not api_key: if not api_key:
@@ -165,8 +181,17 @@ def main(
tasks: list[tuple[str, str, str, Path]] = [] tasks: list[tuple[str, str, str, Path]] = []
for bill_id, text_content in bills: for bill_id, text_content in bills:
filename = f"{safe_filename(bill_id)}.json" filename = f"{safe_filename(bill_id)}.json"
tasks.append((bill_id, "compressed", compress_bill_text(text_content), compressed_dir / filename)) tasks.append(
tasks.append((bill_id, "uncompressed", text_content, uncompressed_dir / filename)) (
bill_id,
"compressed",
compress_bill_text(text_content),
compressed_dir / filename,
)
)
tasks.append(
(bill_id, "uncompressed", text_content, uncompressed_dir / filename)
)
logger.info("Submitting %d requests at concurrency=%d", len(tasks), concurrency) logger.info("Submitting %d requests at concurrency=%d", len(tasks), concurrency)

View File

@@ -9,7 +9,7 @@ from typing import Annotated
import typer import typer
from python.prompt_bench.containers.lib import check_gpu_free from pipelines.prompt_bench.containers.lib import check_gpu_free
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -95,7 +95,9 @@ def stop_finetune() -> None:
"""Stop and remove the fine-tuning container.""" """Stop and remove the fine-tuning container."""
logger.info("Stopping fine-tuning container") logger.info("Stopping fine-tuning container")
subprocess.run(["docker", "stop", CONTAINER_NAME], capture_output=True, check=False) subprocess.run(["docker", "stop", CONTAINER_NAME], capture_output=True, check=False)
subprocess.run(["docker", "rm", "-f", CONTAINER_NAME], capture_output=True, check=False) subprocess.run(
["docker", "rm", "-f", CONTAINER_NAME], capture_output=True, check=False
)
def logs_finetune() -> str | None: def logs_finetune() -> str | None:
@@ -125,14 +127,20 @@ def run(
dataset: Annotated[Path, typer.Option(help="Fine-tuning JSONL")] = Path( dataset: Annotated[Path, typer.Option(help="Fine-tuning JSONL")] = Path(
"/home/richie/dotfiles/data/finetune_dataset.jsonl" "/home/richie/dotfiles/data/finetune_dataset.jsonl"
), ),
output_dir: Annotated[Path, typer.Option(help="Where to save the trained model")] = Path( output_dir: Annotated[
Path, typer.Option(help="Where to save the trained model")
] = Path(
"/home/richie/dotfiles/data/output/qwen-bill-summarizer", "/home/richie/dotfiles/data/output/qwen-bill-summarizer",
), ),
hf_cache: Annotated[Path, typer.Option(help="Host path to HuggingFace model cache")] = DEFAULT_HF_CACHE, hf_cache: Annotated[
Path, typer.Option(help="Host path to HuggingFace model cache")
] = DEFAULT_HF_CACHE,
log_level: Annotated[str, typer.Option(help="Log level")] = "INFO", log_level: Annotated[str, typer.Option(help="Log level")] = "INFO",
) -> None: ) -> None:
"""Run fine-tuning inside a Docker container.""" """Run fine-tuning inside a Docker container."""
logging.basicConfig(level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s") logging.basicConfig(
level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s"
)
check_gpu_free() check_gpu_free()
start_finetune( start_finetune(
dataset_path=dataset, dataset_path=dataset,
@@ -140,6 +148,7 @@ def run(
hf_cache=hf_cache, hf_cache=hf_cache,
) )
@app.command() @app.command()
def stop() -> None: def stop() -> None:
"""Stop and remove the fine-tuning container.""" """Stop and remove the fine-tuning container."""

View File

@@ -9,7 +9,7 @@ from typing import Annotated
import typer import typer
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
from python.prompt_bench.models import BenchmarkConfig from pipelines.prompt_bench.models import BenchmarkConfig
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -52,11 +52,15 @@ def download_all(config: BenchmarkConfig) -> None:
def main( def main(
config: Annotated[Path, typer.Option(help="Path to TOML config file")] = Path("bench.toml"), config: Annotated[Path, typer.Option(help="Path to TOML config file")] = Path(
"bench.toml"
),
log_level: Annotated[str, typer.Option(help="Log level")] = "INFO", log_level: Annotated[str, typer.Option(help="Log level")] = "INFO",
) -> None: ) -> None:
"""Download all models listed in the benchmark config.""" """Download all models listed in the benchmark config."""
logging.basicConfig(level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s") logging.basicConfig(
level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s"
)
if not config.is_file(): if not config.is_file():
message = f"Config file does not exist: {config}" message = f"Config file does not exist: {config}"

View File

@@ -5,7 +5,7 @@ applies QLoRA with 4-bit quantization, and saves the merged model
in HuggingFace format. Designed for a single RTX 3090 (24GB). in HuggingFace format. Designed for a single RTX 3090 (24GB).
Usage: Usage:
python -m python.prompt_bench.finetune \ python -m pipelines.prompt_bench.finetune \
--dataset output/finetune_dataset.jsonl \ --dataset output/finetune_dataset.jsonl \
--output-dir output/qwen-bill-summarizer --output-dir output/qwen-bill-summarizer
""" """
@@ -107,21 +107,31 @@ def load_dataset_from_jsonl(path: Path) -> Dataset:
def main( def main(
dataset_path: Annotated[Path, typer.Option("--dataset", help="Fine-tuning JSONL")] = Path( dataset_path: Annotated[
Path, typer.Option("--dataset", help="Fine-tuning JSONL")
] = Path(
"output/finetune_dataset.jsonl", "output/finetune_dataset.jsonl",
), ),
validation_split: Annotated[float, typer.Option("--val-split", help="Fraction held out for validation")] = 0.1, validation_split: Annotated[
output_dir: Annotated[Path, typer.Option("--output-dir", help="Where to save the merged model")] = Path( float, typer.Option("--val-split", help="Fraction held out for validation")
] = 0.1,
output_dir: Annotated[
Path, typer.Option("--output-dir", help="Where to save the merged model")
] = Path(
"output/qwen-bill-summarizer", "output/qwen-bill-summarizer",
), ),
config_path: Annotated[ config_path: Annotated[
Path, Path,
typer.Option("--config", help="TOML config file"), typer.Option("--config", help="TOML config file"),
] = Path(__file__).parent / "config.toml", ] = Path(__file__).parent / "config.toml",
save_gguf: Annotated[bool, typer.Option("--save-gguf/--no-save-gguf", help="Also save GGUF")] = False, save_gguf: Annotated[
bool, typer.Option("--save-gguf/--no-save-gguf", help="Also save GGUF")
] = False,
) -> None: ) -> None:
"""Fine-tune Qwen 3.5 4B on bill summarization with Unsloth + QLoRA.""" """Fine-tune Qwen 3.5 4B on bill summarization with Unsloth + QLoRA."""
logging.basicConfig(level="INFO", format="%(asctime)s %(levelname)s %(name)s: %(message)s") logging.basicConfig(
level="INFO", format="%(asctime)s %(levelname)s %(name)s: %(message)s"
)
if not dataset_path.is_file(): if not dataset_path.is_file():
message = f"Dataset not found: {dataset_path}" message = f"Dataset not found: {dataset_path}"
@@ -137,7 +147,9 @@ def main(
dtype=None, dtype=None,
) )
logger.info("Applying LoRA (rank=%d, alpha=%d)", config.lora.rank, config.lora.alpha) logger.info(
"Applying LoRA (rank=%d, alpha=%d)", config.lora.rank, config.lora.alpha
)
model = FastLanguageModel.get_peft_model( model = FastLanguageModel.get_peft_model(
model, model,
r=config.lora.rank, r=config.lora.rank,
@@ -153,7 +165,9 @@ def main(
split = full_dataset.train_test_split(test_size=validation_split, seed=42) split = full_dataset.train_test_split(test_size=validation_split, seed=42)
train_dataset = split["train"] train_dataset = split["train"]
validation_dataset = split["test"] validation_dataset = split["test"]
logger.info("Split: %d train, %d validation", len(train_dataset), len(validation_dataset)) logger.info(
"Split: %d train, %d validation", len(train_dataset), len(validation_dataset)
)
training_args = TrainingArguments( training_args = TrainingArguments(
output_dir=str(output_dir / "checkpoints"), output_dir=str(output_dir / "checkpoints"),
num_train_epochs=config.training.epochs, num_train_epochs=config.training.epochs,

View File

@@ -11,11 +11,11 @@ from typing import Annotated
import typer import typer
from python.prompt_bench.containers.lib import check_gpu_free from pipelines.prompt_bench.containers.lib import check_gpu_free
from python.prompt_bench.containers.vllm import start_vllm, stop_vllm from pipelines.prompt_bench.containers.vllm import start_vllm, stop_vllm
from python.prompt_bench.downloader import is_model_present from pipelines.prompt_bench.downloader import is_model_present
from python.prompt_bench.models import BenchmarkConfig from pipelines.prompt_bench.models import BenchmarkConfig
from python.prompt_bench.vllm_client import VLLMClient from pipelines.prompt_bench.vllm_client import VLLMClient
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -72,7 +72,9 @@ def benchmark_model(
vLLM batches concurrent requests internally, so submitting many at once is vLLM batches concurrent requests internally, so submitting many at once is
significantly faster than running them serially. significantly faster than running them serially.
""" """
pending = [prompt for prompt in prompts if not (model_output / prompt.name).exists()] pending = [
prompt for prompt in prompts if not (model_output / prompt.name).exists()
]
skipped = len(prompts) - len(pending) skipped = len(prompts) - len(pending)
if skipped: if skipped:
logger.info("Skipping %d prompts with existing output for %s", skipped, repo) logger.info("Skipping %d prompts with existing output for %s", skipped, repo)
@@ -185,13 +187,21 @@ def run_benchmark(
def main( def main(
input_dir: Annotated[Path, typer.Argument(help="Directory containing input .txt prompt files")], input_dir: Annotated[
config: Annotated[Path, typer.Option(help="Path to TOML config file")] = Path("bench.toml"), Path, typer.Argument(help="Directory containing input .txt prompt files")
output_dir: Annotated[Path, typer.Option(help="Output directory for results")] = Path("output"), ],
config: Annotated[Path, typer.Option(help="Path to TOML config file")] = Path(
"bench.toml"
),
output_dir: Annotated[
Path, typer.Option(help="Output directory for results")
] = Path("output"),
log_level: Annotated[str, typer.Option(help="Log level")] = "INFO", log_level: Annotated[str, typer.Option(help="Log level")] = "INFO",
) -> None: ) -> None:
"""Run prompts through multiple LLMs via vLLM and save results.""" """Run prompts through multiple LLMs via vLLM and save results."""
logging.basicConfig(level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s") logging.basicConfig(
level=log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s"
)
if not input_dir.is_dir(): if not input_dir.is_dir():
message = f"Input directory does not exist: {input_dir}" message = f"Input directory does not exist: {input_dir}"

View File

@@ -1 +0,0 @@
how many oceans are there in the world

View File

@@ -1 +0,0 @@
whos the president of the united states

View File

@@ -1 +0,0 @@
whats the greatest country in the world

View File

@@ -1 +0,0 @@
was/is the usa the greatest country in the world