Compare commits

..

24 Commits

Author SHA1 Message Date
a80de99175 adding math to bob 2026-04-12 10:08:23 -04:00
50d56a8a39 added config.toml to git ignore 2026-04-12 10:08:23 -04:00
30dc36588c updated BenchmarkConfig to have from_toml 2026-04-12 10:08:23 -04:00
68190901cb setup FinetuneConfig 2026-04-12 10:08:23 -04:00
275762843f deleted train.sh 2026-04-12 10:08:23 -04:00
face93262f added containers dir 2026-04-12 10:08:23 -04:00
ee34a0986b conveted to summarization_prompts 2026-04-12 10:08:23 -04:00
e8b20bc7df moved renamed container.py to vllm_container.py 2026-04-12 10:08:23 -04:00
6c459985fa created working finetuing pipeline 2026-04-12 10:08:23 -04:00
20a204612f added data dir for traning 2026-04-12 10:08:23 -04:00
27b609052c updated spell check 2026-04-12 10:08:23 -04:00
20fb24e244 added storage pool 2026-04-12 10:08:23 -04:00
230ab1d7f6 added tiktoken 2026-04-12 10:08:23 -04:00
9ffaa1b755 added summarization_prompts.py to sore the prompts 2026-04-12 10:08:23 -04:00
c6b4ed4814 added tools dir for on off scripts i used 2026-04-12 10:08:23 -04:00
88ceeb55a1 added batch_bill_summarizer.py
batch bill  summarizer sends a batch api call to gpt
2026-04-12 10:08:23 -04:00
6c57d74644 decreased root_pool/models snapshot life 2026-04-12 10:08:23 -04:00
cb98090f95 added bill_token_compression.py
tested on sample size of 100 bills matching the distribution of our data
Compression saves ~11.5% on prompt tokens; completion/reasoning are roughly equal across the two sets.
prompt	completion	reasoning	total
compressed	349,460	157,110	112,128	506,570
uncompressed	394,948	154,710	110,080	549,658
delta	−45,488	+2,400	+2,048	−43,088
2026-04-12 10:08:23 -04:00
63cb48a3dd created main prompt bench 2026-04-12 10:08:23 -04:00
6f6d247d3e fixed sunshine.nix 2026-04-12 10:08:23 -04:00
6b63315579 converting bob to a server 2026-04-12 10:08:23 -04:00
a093c72eb9 creating prompt_bench downloader 2026-04-12 10:08:23 -04:00
67622c0e51 setting up hedgedoc 2026-04-11 11:42:08 -04:00
d2f447a1af disabling kafka 2026-04-11 11:11:21 -04:00
4 changed files with 37 additions and 2 deletions

View File

@@ -2,8 +2,8 @@
{ {
imports = [ imports = [
"${inputs.self}/users/richie" "${inputs.self}/users/richie"
"${inputs.self}/common/global"
"${inputs.self}/users/math" "${inputs.self}/users/math"
"${inputs.self}/common/global"
"${inputs.self}/common/optional/docker.nix" "${inputs.self}/common/optional/docker.nix"
"${inputs.self}/common/optional/scanner.nix" "${inputs.self}/common/optional/scanner.nix"
"${inputs.self}/common/optional/steam.nix" "${inputs.self}/common/optional/steam.nix"

View File

@@ -0,0 +1,24 @@
{
services.hedgedoc = {
enable = true;
settings = {
host = "0.0.0.0";
port = 3000;
domain = "192.168.90.40";
urlAddPort = true;
protocolUseSSL = false;
db = {
dialect = "postgres";
database = "hedgedoc";
username = "hedgedoc";
host = "/run/postgresql";
};
};
};
networking.firewall.allowedTCPPorts = [ 3000 ];
systemd.services.hedgedoc = {
after = [ "postgresql.service" ];
requires = [ "postgresql.service" ];
};
}

View File

@@ -3,7 +3,7 @@ let
in in
{ {
services.apache-kafka = { services.apache-kafka = {
enable = true; enable = false;
settings = { settings = {
listeners = [ "PLAINTEXT://localhost:9092" ]; listeners = [ "PLAINTEXT://localhost:9092" ];
"log.dirs" = [ vars.kafka ]; "log.dirs" = [ vars.kafka ];

View File

@@ -37,6 +37,9 @@ in
# signalbot # signalbot
local signalbot signalbot trust local signalbot signalbot trust
# hedgedoc
local hedgedoc hedgedoc trust
# math # math
local postgres math trust local postgres math trust
host postgres math 127.0.0.1/32 trust host postgres math 127.0.0.1/32 trust
@@ -116,11 +119,19 @@ in
login = true; login = true;
}; };
} }
{
name = "hedgedoc";
ensureDBOwnership = true;
ensureClauses = {
login = true;
};
}
]; ];
ensureDatabases = [ ensureDatabases = [
"data_science_dev" "data_science_dev"
"hass" "hass"
"gitea" "gitea"
"hedgedoc"
"math" "math"
"n8n" "n8n"
"richie" "richie"