Compare commits

..

22 Commits

Author SHA1 Message Date
ac02d407eb adding math to bob 2026-04-11 19:40:26 -04:00
9a77eda471 added config.toml to git ignore 2026-04-10 22:05:15 -04:00
26105b7daa updated BenchmarkConfig to have from_toml 2026-04-10 21:55:18 -04:00
0d81f2d17b setup FinetuneConfig 2026-04-10 21:40:17 -04:00
1409e9c63e deleted train.sh 2026-04-10 20:58:26 -04:00
259e952afc added containers dir 2026-04-10 20:48:24 -04:00
4a10a80ba0 conveted to summarization_prompts 2026-04-10 18:57:21 -04:00
03208a1ab2 moved renamed container.py to vllm_container.py 2026-04-10 13:16:18 -04:00
721526022b created working finetuing pipeline 2026-04-10 12:56:57 -04:00
921a397b1c added data dir for traning 2026-04-10 12:51:41 -04:00
b867e809cd updated spell check 2026-04-10 12:43:24 -04:00
54eb46a63e added storage pool 2026-04-10 12:42:58 -04:00
67131e7b68 added tiktoken 2026-04-10 12:42:35 -04:00
88dae310b6 added summarization_prompts.py to sore the prompts 2026-04-10 12:40:36 -04:00
24f0e8693a added tools dir for on off scripts i used 2026-04-10 12:37:14 -04:00
ced78fe516 added batch_bill_summarizer.py
batch bill  summarizer sends a batch api call to gpt
2026-04-10 12:36:39 -04:00
d281d070a3 decreased root_pool/models snapshot life 2026-04-10 08:51:03 -04:00
251da6c14a added bill_token_compression.py
tested on sample size of 100 bills matching the distribution of our data
Compression saves ~11.5% on prompt tokens; completion/reasoning are roughly equal across the two sets.
prompt	completion	reasoning	total
compressed	349,460	157,110	112,128	506,570
uncompressed	394,948	154,710	110,080	549,658
delta	−45,488	+2,400	+2,048	−43,088
2026-04-09 18:41:13 -04:00
d17c883476 created main prompt bench 2026-04-08 09:08:25 -04:00
d358f0fbec fixed sunshine.nix 2026-04-08 00:18:34 -04:00
c150fc8612 converting bob to a server 2026-04-08 00:18:17 -04:00
9c8013d69d creating prompt_bench downloader 2026-04-07 19:15:43 -04:00
4 changed files with 2 additions and 37 deletions

View File

@@ -2,8 +2,8 @@
{
imports = [
"${inputs.self}/users/richie"
"${inputs.self}/users/math"
"${inputs.self}/common/global"
"${inputs.self}/users/math"
"${inputs.self}/common/optional/docker.nix"
"${inputs.self}/common/optional/scanner.nix"
"${inputs.self}/common/optional/steam.nix"

View File

@@ -1,24 +0,0 @@
{
services.hedgedoc = {
enable = true;
settings = {
host = "0.0.0.0";
port = 3000;
domain = "192.168.90.40";
urlAddPort = true;
protocolUseSSL = false;
db = {
dialect = "postgres";
database = "hedgedoc";
username = "hedgedoc";
host = "/run/postgresql";
};
};
};
networking.firewall.allowedTCPPorts = [ 3000 ];
systemd.services.hedgedoc = {
after = [ "postgresql.service" ];
requires = [ "postgresql.service" ];
};
}

View File

@@ -3,7 +3,7 @@ let
in
{
services.apache-kafka = {
enable = false;
enable = true;
settings = {
listeners = [ "PLAINTEXT://localhost:9092" ];
"log.dirs" = [ vars.kafka ];

View File

@@ -37,9 +37,6 @@ in
# signalbot
local signalbot signalbot trust
# hedgedoc
local hedgedoc hedgedoc trust
# math
local postgres math trust
host postgres math 127.0.0.1/32 trust
@@ -119,19 +116,11 @@ in
login = true;
};
}
{
name = "hedgedoc";
ensureDBOwnership = true;
ensureClauses = {
login = true;
};
}
];
ensureDatabases = [
"data_science_dev"
"hass"
"gitea"
"hedgedoc"
"math"
"n8n"
"richie"