Compare commits

..

1 Commits

Author SHA1 Message Date
Richie dc210d7380 adding xf86_input_wacom 2025-11-02 09:54:38 -05:00
299 changed files with 1497 additions and 28749 deletions
+2 -2
View File
@@ -23,6 +23,6 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Build default package
run: "nixos-rebuild build --accept-flake-config --flake ./#${{ matrix.system }}"
run: "nixos-rebuild build --flake ./#${{ matrix.system }}"
- name: copy to nix-cache
run: nix copy --accept-flake-config --to unix:///host-nix/var/nix/daemon-socket/socket .#nixosConfigurations.${{ matrix.system }}.config.system.build.toplevel
run: nix copy --to ssh://jeeves .#nixosConfigurations.${{ matrix.system }}.config.system.build.toplevel
+13 -7
View File
@@ -6,18 +6,24 @@ on:
jobs:
merge:
runs-on: self-hosted
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: merge_flake_lock_update
run: >-
nix develop .#devShells.x86_64-linux.default -c
python -m python.gitea_flake_lock merge
--repo "${{ github.repository }}"
run: |
pr_number=$(gh pr list --state open --author RichieCahill --label flake_lock_update --json number --jq '.[0].number')
echo "pr_number=$pr_number" >> $GITHUB_ENV
if [ -n "$pr_number" ]; then
gh pr merge "$pr_number" --rebase
else
echo "No open PR found with label flake_lock_update"
fi
env:
GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
GITEA_URL: https://gitea.tmmworkshop.com
GITHUB_TOKEN: ${{ secrets.GH_TOKEN_FOR_UPDATES }}
+2 -2
View File
@@ -1,13 +1,13 @@
name: pytest
on:
workflow_dispatch:
push:
branches:
- main
pull_request:
branches:
- main
merge_group:
jobs:
pytest:
@@ -16,4 +16,4 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Run tests
run: nix develop .#devShells.x86_64-linux.default -c pytest tests
run: pytest tests
+11 -14
View File
@@ -6,21 +6,18 @@ on:
jobs:
lockfile:
runs-on: self-hosted
permissions:
actions: write
contents: write
pull-requests: write
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Nix
uses: DeterminateSystems/nix-installer-action@main
- name: Update flake.lock
run: nix flake update
- name: Create or update flake.lock PR
env:
GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
GITEA_URL: https://gitea.tmmworkshop.com
run: >-
nix develop .#devShells.x86_64-linux.default -c
python -m python.gitea_flake_lock update
--repo "${{ github.repository }}"
uses: DeterminateSystems/update-flake-lock@main
with:
token: ${{ secrets.GH_TOKEN_FOR_UPDATES }}
pr-title: "Update flake.lock"
pr-labels: |
dependencies
automated
flake_lock_update
-8
View File
@@ -165,11 +165,3 @@ test.*
# syncthing
.stfolder
# Frontend build output
frontend/dist/
frontend/node_modules/
# data from testing llms
data/*
.ebook_search_bm25
+8 -23
View File
@@ -40,6 +40,7 @@
"cgroupdriver",
"charliermarsh",
"Checkpointing",
"cloudflared",
"codellama",
"codezombiech",
"compactmode",
@@ -76,11 +77,11 @@
"esphome",
"extest",
"fadvise",
"fastfetch",
"fastforwardteam",
"FASTFOX",
"ffmpegthumbnailer",
"filebot",
"filebrowser",
"fileroller",
"findbar",
"Fira",
@@ -97,7 +98,6 @@
"getch",
"getmaxyx",
"ghdeploy",
"gitea",
"globalprivacycontrol",
"gparted",
"gtts",
@@ -116,9 +116,7 @@
"httpchk",
"hurlenko",
"hwloc",
"ical",
"ignorelist",
"improv",
"INITDB",
"iocharset",
"ioit",
@@ -128,8 +126,6 @@
"jnoortheen",
"jsbc",
"kagi",
"keyformat",
"keylocation",
"kuma",
"lazer",
"levelname",
@@ -166,6 +162,7 @@
"mypy",
"ncdu",
"nemo",
"neofetch",
"nerdfonts",
"netdev",
"netdevs",
@@ -203,7 +200,6 @@
"peerconnection",
"PESKYFOX",
"PGID",
"pgvector",
"pipewire",
"pkgs",
"plugdev",
@@ -229,10 +225,12 @@
"pylint",
"pymetno",
"pymodbus",
"pyopenweathermap",
"pyownet",
"pytest",
"qalculate",
"qbit",
"qbittorrent",
"qbittorrentvpn",
"qbitvpn",
"quicksuggest",
"radarr",
"readahead",
@@ -254,10 +252,8 @@
"schemeless",
"scrollback",
"SECUREFOX",
"sessionmaker",
"sessionstore",
"shellcheck",
"signalbot",
"signon",
"Signons",
"skia",
@@ -267,7 +263,6 @@
"socialtracking",
"sonarr",
"sponsorblock",
"sqlalchemy",
"sqltools",
"ssdp",
"SSHOPTS",
@@ -289,14 +284,11 @@
"topstories",
"treefmt",
"twimg",
"typedmonarchmoney",
"typer",
"uaccess",
"ubiquiti",
"ublock",
"uiprotect",
"uitour",
"unifi",
"unrar",
"unsubmitted",
"uptimekuma",
@@ -307,8 +299,6 @@
"useragent",
"usernamehw",
"userprefs",
"vaninventory",
"vdev",
"vfat",
"victron",
"virt",
@@ -335,10 +325,5 @@
"zoxide",
"zram",
"zstd"
],
"python-envs.defaultEnvManager": "ms-python.python:system",
"python-envs.pythonProjects": [],
"python.testing.pytestArgs": ["tests"],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
]
}
-12
View File
@@ -1,12 +0,0 @@
## Dev environment tips
- use treefmt to format all files
- make python code ruff compliant
- use pytest to test python code
- always use the minimum amount of complexity
- if judgment calls are easy to reverse make them. if not ask me first
- Match existing code style.
- Use builtin helpers getenv() over os.environ.get.
- Prefer single-purpose functions over “do everything” helpers.
- Avoid compatibility branches like PG_USER and POSTGRESQL_URL unless requested.
- Keep helpers only if reused or they simplify the code otherwise inline.
+3 -16
View File
@@ -23,10 +23,7 @@
boot = {
tmp.useTmpfs = true;
kernelPackages = lib.mkDefault pkgs.linuxPackages_6_12;
zfs = {
package = lib.mkDefault pkgs.zfs_2_4;
forceImportRoot = lib.mkDefault false;
};
zfs.package = lib.mkDefault pkgs.zfs_2_3;
};
hardware.enableRedistributableFirmware = true;
@@ -40,24 +37,14 @@
nixpkgs = {
overlays = builtins.attrValues outputs.overlays;
config = {
allowUnfree = true;
permittedInsecurePackages = [
"openssl-1.1.1w" # This is for discord-canary
];
};
config.allowUnfree = true;
};
services = {
dbus.implementation = "dbus";
# firmware update
fwupd.enable = true;
snapshot_manager = {
enable = lib.mkDefault true;
PYTHONPATH = "${inputs.self}/";
};
snapshot_manager.enable = lib.mkDefault true;
zfs = {
trim.enable = lib.mkDefault true;
-3
View File
@@ -33,9 +33,6 @@ in
];
warn-dirty = false;
flake-registry = ""; # disable global flake registries
connect-timeout = 10;
download-buffer-size = 536870912;
fallback = true;
};
# Add each flake input as a registry and nix_path
+4 -10
View File
@@ -1,4 +1,5 @@
{
inputs,
pkgs,
lib,
config,
@@ -16,12 +17,6 @@ in
default = ./snapshot_config.toml;
description = "Path to the snapshot_manager TOML config.";
};
PYTHONPATH = lib.mkOption {
type = lib.types.str;
description = ''
the PYTHONPATH to use for the snapshot_manager service.
'';
};
EnvironmentFile = lib.mkOption {
type = lib.types.nullOr (lib.types.coercedTo lib.types.path toString lib.types.str);
default = null;
@@ -40,12 +35,11 @@ in
requires = [ "zfs-import.target" ];
after = [ "zfs-import.target" ];
path = [ pkgs.zfs ];
environment = {
PYTHONPATH = cfg.PYTHONPATH;
};
serviceConfig = {
Type = "oneshot";
ExecStart = "${pkgs.my_python}/bin/python -m python.tools.snapshot_manager ${lib.escapeShellArg cfg.path}";
ExecStart = "${
inputs.system_tools.packages.${pkgs.system}.default
}/bin/snapshot_manager ${lib.escapeShellArg cfg.path}";
}
// lib.optionalAttrs (cfg.EnvironmentFile != null) {
EnvironmentFile = cfg.EnvironmentFile;
-2
View File
@@ -37,8 +37,6 @@
TcpKeepAlive = "no";
X11Forwarding = lib.mkDefault false;
KexAlgorithms = [
"sntrup761x25519-sha512@openssh.com"
"mlkem768x25519-sha256"
"curve25519-sha256@libssh.org"
"diffie-hellman-group-exchange-sha256"
];
-6
View File
@@ -1,6 +0,0 @@
{
nix.settings = {
trusted-substituters = [ "http://192.168.95.35:5000" ];
substituters = [ "http://192.168.95.35:5000/?priority=1&want-mass-query=true" ];
};
}
+2 -2
View File
@@ -1,8 +1,8 @@
{ pkgs, ... }:
{
boot = {
kernelPackages = pkgs.linuxPackages_6_18;
zfs.package = pkgs.zfs_2_4;
kernelPackages = pkgs.linuxPackages_6_16;
zfs.package = pkgs.zfs_2_3;
};
hardware.bluetooth = {
-256
View File
@@ -1,256 +0,0 @@
{
config,
lib,
pkgs,
...
}:
let
monitoringInterface = "ztwfunumly";
nodeTextfileDir = "/var/lib/prometheus-node-exporter-textfile";
mkProcessNameTemplate =
perPid: template: if perPid then "${template}:{{.PID}}:{{.StartTime}}" else template;
mkProcessMatchers = perPid: [
{
name = mkProcessNameTemplate perPid "{{.Username}}:{{.Matches.Module}}";
cmdline = [ "^/nix/store[^ ]*/bin/python[^ ]* -m (?P<Module>[^ ]+)" ];
}
{
name = mkProcessNameTemplate perPid "{{.Username}}:{{.Matches.Wrapped}}";
cmdline = [
"^/nix/store[^ ]*/bin/python[^ ]* /nix/store[^ ]*/bin/\\.?(?P<Wrapped>[^ /]+?)(?:-wrapped)?(?:\\s|$)"
];
}
{
name = mkProcessNameTemplate perPid "{{.Username}}:{{.Matches.Wrapped}}";
cmdline = [
"^/nix/store[^ ]*/bin/node /nix/store[^ ]*-(?P<Wrapped>[A-Za-z0-9._+-]+)-[0-9][^ /]*/"
];
}
{
name = mkProcessNameTemplate perPid "{{.Username}}:{{.Matches.Wrapped}}";
cmdline = [ "^/nix/store[^ ]*/(?:bin/|lib/[^ ]*/)?\\.?(?P<Wrapped>[^ /]+?)(?:-wrapped)?(?:\\s|$)" ];
}
{
name = mkProcessNameTemplate perPid "{{.Username}}:{{.ExeBase}}";
cmdline = [ ".+" ];
}
];
perPidConfig = pkgs.writeText "process-exporter-per-pid.yaml" (
builtins.toJSON {
process_names = mkProcessMatchers true;
}
);
zpoolLatencyScript = pkgs.writeShellScript "zpool-latency-exporter" ''
set -euo pipefail
out_dir=${lib.escapeShellArg nodeTextfileDir}
host=${lib.escapeShellArg config.networking.hostName}
tmp_file="$(mktemp "$out_dir/zpool.prom.XXXXXX")"
trap 'rm -f "$tmp_file"' EXIT
pools="$(zpool list -H -o name | paste -sd, -)"
cat >"$tmp_file" <<'EOF'
# HELP zpool_iostat_total_wait_read_ns Average total read wait time reported by zpool iostat.
# TYPE zpool_iostat_total_wait_read_ns gauge
# HELP zpool_iostat_total_wait_write_ns Average total write wait time reported by zpool iostat.
# TYPE zpool_iostat_total_wait_write_ns gauge
# HELP zpool_iostat_disk_wait_read_ns Average disk read wait time reported by zpool iostat.
# TYPE zpool_iostat_disk_wait_read_ns gauge
# HELP zpool_iostat_disk_wait_write_ns Average disk write wait time reported by zpool iostat.
# TYPE zpool_iostat_disk_wait_write_ns gauge
# HELP zpool_iostat_syncq_wait_read_ns Average synchronous queue read wait time reported by zpool iostat.
# TYPE zpool_iostat_syncq_wait_read_ns gauge
# HELP zpool_iostat_syncq_wait_write_ns Average synchronous queue write wait time reported by zpool iostat.
# TYPE zpool_iostat_syncq_wait_write_ns gauge
# HELP zpool_iostat_asyncq_wait_read_ns Average asynchronous queue read wait time reported by zpool iostat.
# TYPE zpool_iostat_asyncq_wait_read_ns gauge
# HELP zpool_iostat_asyncq_wait_write_ns Average asynchronous queue write wait time reported by zpool iostat.
# TYPE zpool_iostat_asyncq_wait_write_ns gauge
EOF
zpool iostat -Hplvy -y 1 1 | awk -F '\t' -v host="$host" -v pools="$pools" '
function esc(str, out) {
out = str
gsub(/\\/, "\\\\", out)
gsub(/"/, "\\\"", out)
return out
}
function emit(metric, pool, vdev, value) {
if (value == "" || value == "-") {
return
}
printf "%s{host=\"%s\",pool=\"%s\",vdev=\"%s\"} %s\n",
metric,
esc(host),
esc(pool),
esc(vdev),
value
}
BEGIN {
split(pools, pool_names, ",")
for (idx in pool_names) {
if (pool_names[idx] != "") {
known_pools[pool_names[idx]] = 1
}
}
}
NF == 0 {
next
}
{
row_name = $1
if (row_name in known_pools) {
current_pool = row_name
current_vdev = "_pool"
} else if (current_pool == "") {
next
} else {
current_vdev = row_name
}
emit("zpool_iostat_total_wait_read_ns", current_pool, current_vdev, $8)
emit("zpool_iostat_total_wait_write_ns", current_pool, current_vdev, $9)
emit("zpool_iostat_disk_wait_read_ns", current_pool, current_vdev, $10)
emit("zpool_iostat_disk_wait_write_ns", current_pool, current_vdev, $11)
emit("zpool_iostat_syncq_wait_read_ns", current_pool, current_vdev, $12)
emit("zpool_iostat_syncq_wait_write_ns", current_pool, current_vdev, $13)
emit("zpool_iostat_asyncq_wait_read_ns", current_pool, current_vdev, $14)
emit("zpool_iostat_asyncq_wait_write_ns", current_pool, current_vdev, $15)
}
' >>"$tmp_file"
mv "$tmp_file" "$out_dir/zpool.prom"
trap - EXIT
'';
in
{
networking.firewall.interfaces.${monitoringInterface}.allowedTCPPorts = [
9100
9134
9256
9257
9633
];
services.prometheus.exporters = {
node = {
enable = true;
enabledCollectors = [
"pressure"
"processes"
"systemd"
];
extraFlags = [ "--collector.textfile.directory=${nodeTextfileDir}" ];
};
process = {
enable = true;
user = "root";
group = "root";
settings.process_names = mkProcessMatchers false;
extraFlags = [
"-gather-smaps=false"
"-remove-empty-groups=true"
"-threads=false"
];
};
smartctl.enable = true;
zfs.enable = true;
};
programs.atop = {
enable = true;
atopService.enable = true;
atopRotateTimer.enable = true;
atopacctService.enable = true;
settings.interval = 30;
};
systemd = {
services = {
prometheus-process-pid-exporter = {
description = "Prometheus process exporter with per-PID naming";
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
ExecStart = ''
${pkgs.prometheus-process-exporter}/bin/process-exporter \
--web.listen-address 0.0.0.0:9257 \
--config.path ${perPidConfig} \
-children=false \
-gather-smaps=false \
-remove-empty-groups=true \
-threads=false
'';
User = "root";
Group = "root";
Restart = "always";
WorkingDirectory = "/tmp";
CapabilityBoundingSet = [ "" ];
DeviceAllow = [ "" ];
LockPersonality = true;
MemoryDenyWriteExecute = true;
NoNewPrivileges = true;
PrivateDevices = true;
PrivateTmp = true;
ProtectClock = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectHostname = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectSystem = "strict";
RemoveIPC = true;
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
];
RestrictNamespaces = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
SystemCallArchitectures = "native";
UMask = "0077";
};
};
zpool-latency-exporter = {
description = "Exports ZFS latency metrics for node_exporter textfile collection";
after = [ "zfs-import.target" ];
requires = [ "zfs-import.target" ];
path = [
config.boot.zfs.package
pkgs.coreutils
pkgs.gawk
];
serviceConfig = {
Type = "oneshot";
ExecStart = zpoolLatencyScript;
};
};
};
timers.zpool-latency-exporter = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnBootSec = "2m";
OnUnitActiveSec = "60s";
Unit = "zpool-latency-exporter.service";
};
};
tmpfiles.rules = [ "d ${nodeTextfileDir} 0755 root root - -" ];
};
}
+1 -1
View File
@@ -12,7 +12,7 @@
brain.id = "SSCGIPI-IV3VYKB-TRNIJE3-COV4T2H-CDBER7F-I2CGHYA-NWOEUDU-3T5QAAN"; # cspell:disable-line
ipad.id = "KI76T3X-SFUGV2L-VSNYTKR-TSIUV5L-SHWD3HE-GQRGRCN-GY4UFMD-CW6Z6AX"; # cspell:disable-line
jeeves.id = "ICRHXZW-ECYJCUZ-I4CZ64R-3XRK7CG-LL2HAAK-FGOHD22-BQA4AI6-5OAL6AG"; # cspell:disable-line
phone.id = "JPVQKQW-CFXOJXT-Q5G5F3H-QIDHDRE-GKHPTQB-GXZUQSP-U7FR7F7-INP3AAH"; # cspell:disable-line
phone.id = "TBRULKD-7DZPGGZ-F6LLB7J-MSO54AY-7KLPBIN-QOFK6PX-W2HBEWI-PHM2CQI"; # cspell:disable-line
rhapsody-in-green.id = "ASL3KC4-3XEN6PA-7BQBRKE-A7JXLI6-DJT43BY-Q4WPOER-7UALUAZ-VTPQ6Q4"; # cspell:disable-line
};
};
+1 -1
View File
@@ -4,7 +4,7 @@
flags = [ "--accept-flake-config" ];
randomizedDelaySec = "1h";
persistent = true;
flake = "git+https://gitea.tmmworkshop.com/richie/dotfiles?ref=main";
flake = "github:RichieCahill/dotfiles";
allowReboot = true;
dates = "Sat *-*-* 06:00:00";
};
-76
View File
@@ -1,76 +0,0 @@
# ZFS failed root import recovery
## Fast path
If the machine fails to boot because ZFS refuses to import `root_pool`:
### GRUB
1. At the bootloader menu, select the normal NixOS entry.
2. Press `e`.
3. Find the line that starts with `linux`.
4. Append this to the end of that line:
```text
zfs_force=1
```
5. Boot once with `Ctrl+x` or `F10`.
### systemd-boot
1. At the bootloader menu, highlight the normal NixOS entry.
2. Press `e`.
3. Append this to the end of the options line:
```text
zfs_force=1
```
4. Press `Enter` to boot once.
## After boot
Run:
```bash
sudo zpool status
sudo zpool import
journalctl -b | rg "ZFS|zfs|import|root_pool"
```
## Expected result
`sudo zpool status` should show `root_pool` as `ONLINE`.
## Reboot test
Run:
```bash
sudo reboot
```
Do not add `zfs_force=1` the second time.
## If it still fails
Boot once more with:
```text
zfs_force=1
```
Then run:
```bash
sudo zpool status -v
sudo zpool history | tail -n 50
journalctl -b | rg "ZFS|zfs|import|root_pool"
```
## Notes
- Root pool name is `root_pool`.
- This is a one-time recovery path after disk moves, controller changes, dirty exports, or interrupted imports.
- Some hosts also need the LUKS unlock USB key inserted before boot.
+129
View File
@@ -0,0 +1,129 @@
esphome:
name: batteries
friendly_name: batteries
esp32:
board: esp32dev
framework:
type: arduino
logger:
api:
encryption:
key: !secret api_key
external_components:
- source: github://syssi/esphome-jk-bms@main
ota:
- platform: esphome
password: !secret ota_password
wifi:
ssid: !secret wifi_ssid
password: !secret wifi_password
captive_portal:
esp32_ble_tracker:
scan_parameters:
interval: 1100ms
window: 1100ms
active: true
ble_client:
- mac_address: "C8:47:80:29:0F:DB"
id: jk_ble0
- mac_address: "C8:47:80:37:9D:DD"
id: jk_ble1
jk_bms_ble:
- ble_client_id: jk_ble0
protocol_version: JK02_32S
throttle: 1s
id: jk_bms0
- ble_client_id: jk_ble1
protocol_version: JK02_32S
throttle: 1s
id: jk_bms1
sensor:
# BMS1 sensors
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms0
total_voltage:
name: "JK0 Total Voltage"
current:
name: "JK0 Current"
state_of_charge:
name: "JK0 SoC"
power:
name: "JK0 Power"
temperature_sensor_1:
name: "JK0 Temp 1"
temperature_sensor_2:
name: "JK0 Temp 2"
balancing:
name: "JK0 balancing"
charging_cycles:
name: "JK0 charging cycles"
total_runtime:
name: "JK0 total runtime"
balancing_current:
name: "JK0 balancing current"
# BMS2 sensors
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms1
total_voltage:
name: "JK1 Total Voltage"
current:
name: "JK1 Current"
state_of_charge:
name: "JK1 SoC"
power:
name: "Jk1 Power"
temperature_sensor_1:
name: "JK1 Temp 1"
temperature_sensor_2:
name: "Jk1 Temp 2"
balancing:
name: "JK1 balancing"
charging_cycles:
name: "JK1 charging cycles"
total_runtime:
name: "JK1 total runtime"
balancing_current:
name: "JK1 balancing current"
text_sensor:
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms0
errors:
name: "JK0 Errors"
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms1
errors:
name: "JK1 Errors"
switch:
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms0
charging:
name: "JK0 Charging"
discharging:
name: "JK0 Discharging"
balancer:
name: "JK0 Balancing"
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms1
charging:
name: "JK1 Charging"
discharging:
name: "JK1 Discharging"
balancer:
name: "JK1 Balancing"
-132
View File
@@ -1,132 +0,0 @@
esphome:
name: batteries
friendly_name: batteries
esp32:
board: esp32dev
framework:
type: arduino
logger:
api:
encryption:
key: !secret api_key
external_components:
- source: github://syssi/esphome-jk-bms@main
ota:
- platform: esphome
password: !secret ota_password
wifi:
ssid: !secret wifi_ssid
password: !secret wifi_password
fast_connect: on
captive_portal:
esp32_ble_tracker:
scan_parameters:
interval: 1100ms
window: 1100ms
active: true
ble_client:
- mac_address: "C8:47:80:29:0F:DB"
id: jk_ble0
jk_bms_ble:
- ble_client_id: jk_ble0
protocol_version: JK02_32S
throttle: 1s
id: jk_bms0
button:
- platform: jk_bms_ble
retrieve_settings:
name: "JK0 retrieve settings"
retrieve_device_info:
name: "JK0 retrieve device info"
sensor:
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms0
total_voltage:
name: "JK0 Total Voltage"
state_of_charge:
name: "JK0 SoC"
charging_power:
name: "JK0 charging power"
discharging_power:
name: "JK0 discharging power"
temperature_sensor_1:
name: "JK0 Temp 1"
temperature_sensor_2:
name: "JK0 Temp 2"
balancing:
name: "JK0 balancing"
total_runtime:
name: "JK0 total runtime"
balancing_current:
name: "JK0 balancing current"
delta_cell_voltage:
name: "JK0 cell delta voltage"
average_cell_voltage:
name: "JK0 cell average voltage"
cell_voltage_1:
name: "JK0 cell voltage 1"
cell_voltage_2:
name: "JK0 cell voltage 2"
cell_voltage_3:
name: "JK0 cell voltage 3"
cell_voltage_4:
name: "JK0 cell voltage 4"
cell_voltage_5:
name: "JK0 cell voltage 5"
cell_voltage_6:
name: "JK0 cell voltage 6"
cell_voltage_7:
name: "JK0 cell voltage 7"
cell_voltage_8:
name: "JK0 cell voltage 8"
cell_resistance_1:
name: "JK0 cell resistance 1"
cell_resistance_2:
name: "JK0 cell resistance 2"
cell_resistance_3:
name: "JK0 cell resistance 3"
cell_resistance_4:
name: "JK0 cell resistance 4"
cell_resistance_5:
name: "JK0 cell resistance 5"
cell_resistance_6:
name: "JK0 cell resistance 6"
cell_resistance_7:
name: "JK0 cell resistance 7"
cell_resistance_8:
name: "JK0 cell resistance 8"
total_charging_cycle_capacity:
name: "JK0 total charging cycle capacity"
text_sensor:
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms0
errors:
name: "JK0 Errors"
switch:
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms0
charging:
name: "JK0 Charging"
discharging:
name: "JK0 Discharging"
balancer:
name: "JK0 Balancing"
- platform: ble_client
ble_client_id: jk_ble0
name: "JK0 enable bluetooth connection"
id: ble_client_switch0
-132
View File
@@ -1,132 +0,0 @@
esphome:
name: battery1
friendly_name: battery1
esp32:
board: esp32dev
framework:
type: arduino
logger:
api:
encryption:
key: !secret api_key
external_components:
- source: github://syssi/esphome-jk-bms@main
ota:
- platform: esphome
password: !secret ota_password
wifi:
ssid: !secret wifi_ssid
password: !secret wifi_password
fast_connect: on
captive_portal:
esp32_ble_tracker:
scan_parameters:
interval: 1100ms
window: 1100ms
active: true
ble_client:
- mac_address: "C8:47:80:37:9D:DD"
id: jk_ble1
jk_bms_ble:
- ble_client_id: jk_ble1
protocol_version: JK02_32S
throttle: 1s
id: jk_bms1
button:
- platform: jk_bms_ble
retrieve_settings:
name: "JK1 retrieve settings"
retrieve_device_info:
name: "JK1 retrieve device info"
sensor:
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms1
total_voltage:
name: "JK1 Total Voltage"
state_of_charge:
name: "JK1 SoC"
charging_power:
name: "JK1 charging power"
discharging_power:
name: "JK1 discharging power"
temperature_sensor_1:
name: "JK1 Temp 1"
temperature_sensor_2:
name: "JK1 Temp 2"
balancing:
name: "JK1 balancing"
total_runtime:
name: "JK1 total runtime"
balancing_current:
name: "JK1 balancing current"
delta_cell_voltage:
name: "JK1 cell delta voltage"
average_cell_voltage:
name: "JK1 cell average voltage"
cell_voltage_1:
name: "JK1 cell voltage 1"
cell_voltage_2:
name: "JK1 cell voltage 2"
cell_voltage_3:
name: "JK1 cell voltage 3"
cell_voltage_4:
name: "JK1 cell voltage 4"
cell_voltage_5:
name: "JK1 cell voltage 5"
cell_voltage_6:
name: "JK1 cell voltage 6"
cell_voltage_7:
name: "JK1 cell voltage 7"
cell_voltage_8:
name: "JK1 cell voltage 8"
cell_resistance_1:
name: "JK1 cell resistance 1"
cell_resistance_2:
name: "JK1 cell resistance 2"
cell_resistance_3:
name: "JK1 cell resistance 3"
cell_resistance_4:
name: "JK1 cell resistance 4"
cell_resistance_5:
name: "JK1 cell resistance 5"
cell_resistance_6:
name: "JK1 cell resistance 6"
cell_resistance_7:
name: "JK1 cell resistance 7"
cell_resistance_8:
name: "JK1 cell resistance 8"
total_charging_cycle_capacity:
name: "JK1 total charging cycle capacity"
text_sensor:
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms1
errors:
name: "JK1 Errors"
switch:
- platform: jk_bms_ble
jk_bms_ble_id: jk_bms1
charging:
name: "JK1 Charging"
discharging:
name: "JK1 Discharging"
balancer:
name: "JK1 Balancing"
- platform: ble_client
ble_client_id: jk_ble1
name: "JK1 enable bluetooth connection"
id: ble_client_switch0
-48
View File
@@ -1,48 +0,0 @@
esphome:
name: "environment"
friendly_name: "environment"
esp32:
board: esp32dev
framework:
type: arduino
i2c:
sda: GPIO21
scl: GPIO22
scan: True
id: bus_a
sensor:
- platform: aht10
i2c_id: bus_a
address: 0x38
variant: AHT20
temperature:
name: "environment Temperature"
id: aht10_temperature
humidity:
name: "environment Humidity"
id: aht10_humidity
update_interval: 5s
web_server:
port: 80
logger:
level: DEBUG
api:
encryption:
key: !secret api_key
ota:
- platform: esphome
password: !secret ota_password
wifi:
ssid: !secret wifi_ssid
password: !secret wifi_password
fast_connect: on
captive_portal:
Generated
+118 -35
View File
@@ -8,11 +8,11 @@
},
"locked": {
"dir": "pkgs/firefox-addons",
"lastModified": 1781150628,
"narHash": "sha256-b4mp8l3qWuSCyYYo9HSngDtcB3PpecYiOXjULrjwwlw=",
"lastModified": 1760673822,
"narHash": "sha256-h+liPhhMw1yYvkDGLHzQJQShQs+yLjNgjfAyZX+sRrM=",
"owner": "rycee",
"repo": "nur-expressions",
"rev": "753319310f4673a2dabbfab87482187b40bf9bac",
"rev": "5cca27f1bb30a26140d0cf60ab34daa45b4fa11f",
"type": "gitlab"
},
"original": {
@@ -29,11 +29,11 @@
]
},
"locked": {
"lastModified": 1781189114,
"narHash": "sha256-5inaamLgUMWy+MOBE9ChF9QAF1o/74LFuHkI0W/9rqc=",
"lastModified": 1760662441,
"narHash": "sha256-mlDqR1Ntgs9uYYEAUR1IhamKBO0lxoNS4zGLzEZaY0A=",
"owner": "nix-community",
"repo": "home-manager",
"rev": "486595d2cf49cfcd649b58a284fa11ac0e34da22",
"rev": "722792af097dff5790f1a66d271a47759f477755",
"type": "github"
},
"original": {
@@ -43,15 +43,12 @@
}
},
"nixos-hardware": {
"inputs": {
"nixpkgs": "nixpkgs"
},
"locked": {
"lastModified": 1781168557,
"narHash": "sha256-LOnLQ2tpYF9gqIDDr3+j3DbpJJr/QCH6zPRT2GzEUOE=",
"lastModified": 1760106635,
"narHash": "sha256-2GoxVaKWTHBxRoeUYSjv0AfSOx4qw5CWSFz2b+VolKU=",
"owner": "nixos",
"repo": "nixos-hardware",
"rev": "6358ff76821101c178e3ab4919a62799bfe3652e",
"rev": "9ed85f8afebf2b7478f25db0a98d0e782c0ed903",
"type": "github"
},
"original": {
@@ -63,24 +60,27 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1767892417,
"narHash": "sha256-8bW3q88CEg2u4hSP66Vf4lpbLonHz7hqDNBMcCY7E9U=",
"rev": "3497aa5c9457a9d88d71fa93a4a8368816fbeeba",
"type": "tarball",
"url": "https://releases.nixos.org/nixos/unstable/nixos-26.05pre924538.3497aa5c9457/nixexprs.tar.xz"
"lastModified": 1760524057,
"narHash": "sha256-EVAqOteLBFmd7pKkb0+FIUyzTF61VKi7YmvP1tw4nEw=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "544961dfcce86422ba200ed9a0b00dd4b1486ec5",
"type": "github"
},
"original": {
"type": "tarball",
"url": "https://channels.nixos.org/nixos-unstable/nixexprs.tar.xz"
"owner": "nixos",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs-master": {
"locked": {
"lastModified": 1781229721,
"narHash": "sha256-ORvqDbb/LYxiJljGIejapjkc/kJbVote2N1WSb9W45I=",
"lastModified": 1760751316,
"narHash": "sha256-1296zQfPiLZNrLKzX1t+kunadeI/mH82hKze3voduEI=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "173d0ad7a974f8543a9ab01d2271b2e290341b33",
"rev": "d85429339c0bcf0428084fe1306c970aed364417",
"type": "github"
},
"original": {
@@ -106,19 +106,53 @@
"type": "github"
}
},
"nixpkgs_2": {
"pyproject-build-systems": {
"inputs": {
"nixpkgs": [
"system_tools",
"nixpkgs"
],
"pyproject-nix": [
"system_tools",
"pyproject-nix"
],
"uv2nix": [
"system_tools",
"uv2nix"
]
},
"locked": {
"lastModified": 1781074563,
"narHash": "sha256-md8WlXOlfnIeHeOScMTTHFyf2d6iaTwPl2apR5EQ3P4=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "9ae611a455b90cf061d8f332b977e387bda8e1ca",
"lastModified": 1744599653,
"narHash": "sha256-nysSwVVjG4hKoOjhjvE6U5lIKA8sEr1d1QzEfZsannU=",
"owner": "pyproject-nix",
"repo": "build-system-pkgs",
"rev": "7dba6dbc73120e15b558754c26024f6c93015dd7",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"owner": "pyproject-nix",
"repo": "build-system-pkgs",
"type": "github"
}
},
"pyproject-nix": {
"inputs": {
"nixpkgs": [
"system_tools",
"nixpkgs"
]
},
"locked": {
"lastModified": 1746540146,
"narHash": "sha256-QxdHGNpbicIrw5t6U3x+ZxeY/7IEJ6lYbvsjXmcxFIM=",
"owner": "pyproject-nix",
"repo": "pyproject.nix",
"rev": "e09c10c24ebb955125fda449939bfba664c467fd",
"type": "github"
},
"original": {
"owner": "pyproject-nix",
"repo": "pyproject.nix",
"type": "github"
}
},
@@ -127,10 +161,11 @@
"firefox-addons": "firefox-addons",
"home-manager": "home-manager",
"nixos-hardware": "nixos-hardware",
"nixpkgs": "nixpkgs_2",
"nixpkgs": "nixpkgs",
"nixpkgs-master": "nixpkgs-master",
"nixpkgs-stable": "nixpkgs-stable",
"sops-nix": "sops-nix",
"system_tools": "system_tools",
"systems": "systems"
}
},
@@ -141,11 +176,11 @@
]
},
"locked": {
"lastModified": 1780547341,
"narHash": "sha256-Gq8KNx5A7hBB3uGJaj6eQfLDIz5YdLu92gqBcvHvoUo=",
"lastModified": 1760393368,
"narHash": "sha256-8mN3kqyqa2PKY0wwZ2UmMEYMcxvNTwLaOrrDsw6Qi4E=",
"owner": "Mic92",
"repo": "sops-nix",
"rev": "9ed65852b6257fbeae4355bc24ecfea307ca759a",
"rev": "ab8d56e85b8be14cff9d93735951e30c3e86a437",
"type": "github"
},
"original": {
@@ -154,6 +189,29 @@
"type": "github"
}
},
"system_tools": {
"inputs": {
"nixpkgs": [
"nixpkgs"
],
"pyproject-build-systems": "pyproject-build-systems",
"pyproject-nix": "pyproject-nix",
"uv2nix": "uv2nix"
},
"locked": {
"lastModified": 1760751967,
"narHash": "sha256-u/uciy9kpM/CBZKl05iAZRaOTwUHiuI0L/qbkk2mLUg=",
"owner": "RichieCahill",
"repo": "system_tools",
"rev": "a125c3e5c01cecbc3f2a842ffb1abb1210c35706",
"type": "github"
},
"original": {
"owner": "RichieCahill",
"repo": "system_tools",
"type": "github"
}
},
"systems": {
"locked": {
"lastModified": 1689347949,
@@ -168,6 +226,31 @@
"repo": "default-linux",
"type": "github"
}
},
"uv2nix": {
"inputs": {
"nixpkgs": [
"system_tools",
"nixpkgs"
],
"pyproject-nix": [
"system_tools",
"pyproject-nix"
]
},
"locked": {
"lastModified": 1747441483,
"narHash": "sha256-W8BFXk5R0TuJcjIhcGoMpSOaIufGXpizK0pm+uTqynA=",
"owner": "pyproject-nix",
"repo": "uv2nix",
"rev": "582024dc64663e9f88d467c2f7f7b20d278349de",
"type": "github"
},
"original": {
"owner": "pyproject-nix",
"repo": "uv2nix",
"type": "github"
}
}
},
"root": "root",
+5 -1
View File
@@ -31,6 +31,11 @@
inputs.nixpkgs.follows = "nixpkgs";
};
system_tools = {
url = "github:RichieCahill/system_tools";
inputs.nixpkgs.follows = "nixpkgs";
};
sops-nix = {
url = "github:Mic92/sops-nix";
inputs.nixpkgs.follows = "nixpkgs";
@@ -54,7 +59,6 @@
system:
import nixpkgs {
inherit system;
overlays = builtins.attrValues outputs.overlays;
config.allowUnfree = true;
}
);
+6 -47
View File
@@ -3,75 +3,34 @@
# When applied, the stable nixpkgs set (declared in the flake inputs) will be accessible through 'pkgs.stable'
stable = final: _prev: {
stable = import inputs.nixpkgs-stable {
system = final.stdenv.hostPlatform.system;
system = final.system;
config.allowUnfree = true;
};
};
# When applied, the master nixpkgs set (declared in the flake inputs) will be accessible through 'pkgs.master'
master = final: _prev: {
master = import inputs.nixpkgs-master {
system = final.stdenv.hostPlatform.system;
system = final.system;
config.allowUnfree = true;
};
};
python-env = final: _prev: {
my_python = final.python314.withPackages (
ps:
let
bm25s = ps.buildPythonPackage rec {
pname = "bm25s";
version = "0.3.9";
pyproject = true;
src = final.fetchPypi {
inherit pname version;
hash = "sha256-iVxnnZUrfeg1XttfPhpiCh4vKU0dQrkZvwghzOLi9Zc=";
};
build-system = [ ps.setuptools ];
dependencies = with ps; [
numpy
scipy
];
pythonImportsCheck = [ "bm25s" ];
};
in
with ps;
[
alembic
my_python = final.python313.withPackages (
ps: with ps; [
apprise
apscheduler
beautifulsoup4
ebooklib
fastapi
fastapi-cli
httpx
mypy
numpy
orjson
pgvector
polars
psycopg
pydantic
pyfakefs
pytest
pytest-cov
pytest-mock
pytest-xdist
python-multipart
requests
ruff
scalene
sqlalchemy
sqlalchemy
bm25s
tenacity
textual
tiktoken
tinytuya
typer
websockets
types-requests
]
);
};
+4 -45
View File
@@ -7,26 +7,7 @@ requires-python = "~=3.13.0"
readme = "README.md"
license = "MIT"
# these dependencies are a best effort and aren't guaranteed to work
# for up-to-date dependencies, see overlays/default.nix
dependencies = [
"alembic",
"apprise",
"apscheduler",
"httpx",
"python-multipart",
"polars",
"psycopg[binary]",
"pydantic",
"pyyaml",
"sqlalchemy",
"typer",
"websockets",
]
[project.scripts]
database = "python.database_cli:app"
van-inventory = "python.van_inventory.main:serve"
whisper-transcribe = "python.tools.whisper.transcribe:main"
dependencies = ["apprise", "apscheduler", "polars", "requests", "typer"]
[dependency-groups]
dev = [
@@ -37,6 +18,7 @@ dev = [
"pytest-xdist",
"pytest",
"ruff",
"types-requests",
]
[tool.ruff]
@@ -51,42 +33,20 @@ lint.ignore = [
"COM812", # (TEMP) conflicts when used with the formatter
"ISC001", # (TEMP) conflicts when used with the formatter
"S603", # (PERM) This is known to cause a false positive
"S607", # (PERM) This is becoming a consistent annoyance
]
[tool.ruff.lint.per-file-ignores]
"tests/**" = [
"ANN", # (perm) type annotations not needed in tests
"D", # (perm) docstrings not needed in tests
"PLR2004", # (perm) magic values are fine in test assertions
"S101", # (perm) pytest needs asserts
"S101", # (perm) pytest needs asserts
]
"python/stuff/**" = [
"python/random/**" = [
"T201", # (perm) I don't care about print statements dir
]
"python/testing/**" = [
"T201", # (perm) I don't care about print statements dir
"ERA001", # (perm) I don't care about print statements dir
]
"python/splendor/**" = [
"S311", # (perm) there is no security issue here
"T201", # (perm) I don't care about print statements dir
"PLR2004", # (temps) need to think about this
]
"python/orm/**" = [
"TC003", # (perm) this creates issues because sqlalchemy uses these at runtime
]
"python/congress_tracker/**" = [
"TC003", # (perm) this creates issues because sqlalchemy uses these at runtime
]
"python/alembic/**" = [
"INP001", # (perm) this creates LSP issues for alembic
]
"python/signal_bot/**" = [
"D107", # (perm) class docstrings cover __init__
]
[tool.ruff.lint.pydocstyle]
convention = "google"
@@ -110,5 +70,4 @@ exclude_lines = [
[tool.pytest.ini_options]
addopts = "-n auto -ra"
testpaths = ["tests"]
# --cov=system_tools --cov-report=term-missing --cov-report=xml --cov-report=html --cov-branch
@@ -1,50 +0,0 @@
"""adding FailedIngestion.
Revision ID: 2f43120e3ffc
Revises: f99be864fe69
Create Date: 2026-03-24 23:46:17.277897
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import DataScienceDevBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "2f43120e3ffc"
down_revision: str | None = "f99be864fe69"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = DataScienceDevBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"failed_ingestion",
sa.Column("raw_line", sa.Text(), nullable=False),
sa.Column("error", sa.Text(), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_failed_ingestion")),
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("failed_ingestion", schema=schema)
# ### end Alembic commands ###
@@ -1,72 +0,0 @@
"""Attach all partition tables to the posts parent table.
Alembic autogenerate creates partition tables as standalone tables but does not
emit the ALTER TABLE ... ATTACH PARTITION statements needed for PostgreSQL to
route inserts to the correct partition.
Revision ID: a1b2c3d4e5f6
Revises: 605b1794838f
Create Date: 2026-03-25 10:00:00.000000
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from alembic import op
from sqlalchemy import text
from python.orm import DataScienceDevBase
from python.orm.data_science_dev.posts.partitions import (
PARTITION_END_YEAR,
PARTITION_START_YEAR,
iso_weeks_in_year,
week_bounds,
)
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "a1b2c3d4e5f6"
down_revision: str | None = "605b1794838f"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = DataScienceDevBase.schema_name
ALREADY_ATTACHED_QUERY = text("""
SELECT inhrelid::regclass::text
FROM pg_inherits
WHERE inhparent = :parent::regclass
""")
def upgrade() -> None:
"""Attach all weekly partition tables to the posts parent table."""
connection = op.get_bind()
already_attached = {row[0] for row in connection.execute(ALREADY_ATTACHED_QUERY, {"parent": f"{schema}.posts"})}
for year in range(PARTITION_START_YEAR, PARTITION_END_YEAR + 1):
for week in range(1, iso_weeks_in_year(year) + 1):
table_name = f"posts_{year}_{week:02d}"
qualified_name = f"{schema}.{table_name}"
if qualified_name in already_attached:
continue
start, end = week_bounds(year, week)
start_str = start.strftime("%Y-%m-%d %H:%M:%S")
end_str = end.strftime("%Y-%m-%d %H:%M:%S")
op.execute(
f"ALTER TABLE {schema}.posts "
f"ATTACH PARTITION {qualified_name} "
f"FOR VALUES FROM ('{start_str}') TO ('{end_str}')"
)
def downgrade() -> None:
"""Detach all weekly partition tables from the posts parent table."""
for year in range(PARTITION_START_YEAR, PARTITION_END_YEAR + 1):
for week in range(1, iso_weeks_in_year(year) + 1):
table_name = f"posts_{year}_{week:02d}"
op.execute(f"ALTER TABLE {schema}.posts DETACH PARTITION {schema}.{table_name}")
@@ -1,153 +0,0 @@
"""adding congress data.
Revision ID: 83bfc8af92d8
Revises: a1b2c3d4e5f6
Create Date: 2026-03-27 10:43:02.324510
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import DataScienceDevBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "83bfc8af92d8"
down_revision: str | None = "a1b2c3d4e5f6"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = DataScienceDevBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"bill",
sa.Column("congress", sa.Integer(), nullable=False),
sa.Column("bill_type", sa.String(), nullable=False),
sa.Column("number", sa.Integer(), nullable=False),
sa.Column("title", sa.String(), nullable=True),
sa.Column("title_short", sa.String(), nullable=True),
sa.Column("official_title", sa.String(), nullable=True),
sa.Column("status", sa.String(), nullable=True),
sa.Column("status_at", sa.Date(), nullable=True),
sa.Column("sponsor_bioguide_id", sa.String(), nullable=True),
sa.Column("subjects_top_term", sa.String(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_bill")),
sa.UniqueConstraint("congress", "bill_type", "number", name="uq_bill_congress_type_number"),
schema=schema,
)
op.create_index("ix_bill_congress", "bill", ["congress"], unique=False, schema=schema)
op.create_table(
"legislator",
sa.Column("bioguide_id", sa.Text(), nullable=False),
sa.Column("thomas_id", sa.String(), nullable=True),
sa.Column("lis_id", sa.String(), nullable=True),
sa.Column("govtrack_id", sa.Integer(), nullable=True),
sa.Column("opensecrets_id", sa.String(), nullable=True),
sa.Column("fec_ids", sa.String(), nullable=True),
sa.Column("first_name", sa.String(), nullable=False),
sa.Column("last_name", sa.String(), nullable=False),
sa.Column("official_full_name", sa.String(), nullable=True),
sa.Column("nickname", sa.String(), nullable=True),
sa.Column("birthday", sa.Date(), nullable=True),
sa.Column("gender", sa.String(), nullable=True),
sa.Column("current_party", sa.String(), nullable=True),
sa.Column("current_state", sa.String(), nullable=True),
sa.Column("current_district", sa.Integer(), nullable=True),
sa.Column("current_chamber", sa.String(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_legislator")),
schema=schema,
)
op.create_index(op.f("ix_legislator_bioguide_id"), "legislator", ["bioguide_id"], unique=True, schema=schema)
op.create_table(
"bill_text",
sa.Column("bill_id", sa.Integer(), nullable=False),
sa.Column("version_code", sa.String(), nullable=False),
sa.Column("version_name", sa.String(), nullable=True),
sa.Column("text_content", sa.String(), nullable=True),
sa.Column("date", sa.Date(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(
["bill_id"], [f"{schema}.bill.id"], name=op.f("fk_bill_text_bill_id_bill"), ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_bill_text")),
sa.UniqueConstraint("bill_id", "version_code", name="uq_bill_text_bill_id_version_code"),
schema=schema,
)
op.create_table(
"vote",
sa.Column("congress", sa.Integer(), nullable=False),
sa.Column("chamber", sa.String(), nullable=False),
sa.Column("session", sa.Integer(), nullable=False),
sa.Column("number", sa.Integer(), nullable=False),
sa.Column("vote_type", sa.String(), nullable=True),
sa.Column("question", sa.String(), nullable=True),
sa.Column("result", sa.String(), nullable=True),
sa.Column("result_text", sa.String(), nullable=True),
sa.Column("vote_date", sa.Date(), nullable=False),
sa.Column("yea_count", sa.Integer(), nullable=True),
sa.Column("nay_count", sa.Integer(), nullable=True),
sa.Column("not_voting_count", sa.Integer(), nullable=True),
sa.Column("present_count", sa.Integer(), nullable=True),
sa.Column("bill_id", sa.Integer(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(["bill_id"], [f"{schema}.bill.id"], name=op.f("fk_vote_bill_id_bill")),
sa.PrimaryKeyConstraint("id", name=op.f("pk_vote")),
sa.UniqueConstraint("congress", "chamber", "session", "number", name="uq_vote_congress_chamber_session_number"),
schema=schema,
)
op.create_index("ix_vote_congress_chamber", "vote", ["congress", "chamber"], unique=False, schema=schema)
op.create_index("ix_vote_date", "vote", ["vote_date"], unique=False, schema=schema)
op.create_table(
"vote_record",
sa.Column("vote_id", sa.Integer(), nullable=False),
sa.Column("legislator_id", sa.Integer(), nullable=False),
sa.Column("position", sa.String(), nullable=False),
sa.ForeignKeyConstraint(
["legislator_id"],
[f"{schema}.legislator.id"],
name=op.f("fk_vote_record_legislator_id_legislator"),
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["vote_id"], [f"{schema}.vote.id"], name=op.f("fk_vote_record_vote_id_vote"), ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("vote_id", "legislator_id", name=op.f("pk_vote_record")),
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("vote_record", schema=schema)
op.drop_index("ix_vote_date", table_name="vote", schema=schema)
op.drop_index("ix_vote_congress_chamber", table_name="vote", schema=schema)
op.drop_table("vote", schema=schema)
op.drop_table("bill_text", schema=schema)
op.drop_index(op.f("ix_legislator_bioguide_id"), table_name="legislator", schema=schema)
op.drop_table("legislator", schema=schema)
op.drop_index("ix_bill_congress", table_name="bill", schema=schema)
op.drop_table("bill", schema=schema)
# ### end Alembic commands ###
@@ -1,58 +0,0 @@
"""adding LegislatorSocialMedia.
Revision ID: 5cd7eee3549d
Revises: 83bfc8af92d8
Create Date: 2026-03-29 11:53:44.224799
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import DataScienceDevBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "5cd7eee3549d"
down_revision: str | None = "83bfc8af92d8"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = DataScienceDevBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"legislator_social_media",
sa.Column("legislator_id", sa.Integer(), nullable=False),
sa.Column("platform", sa.String(), nullable=False),
sa.Column("account_name", sa.String(), nullable=False),
sa.Column("url", sa.String(), nullable=True),
sa.Column("source", sa.String(), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(
["legislator_id"],
[f"{schema}.legislator.id"],
name=op.f("fk_legislator_social_media_legislator_id_legislator"),
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_legislator_social_media")),
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("legislator_social_media", schema=schema)
# ### end Alembic commands ###
-122
View File
@@ -1,122 +0,0 @@
"""Alembic."""
from __future__ import annotations
import logging
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal
from alembic import context
from alembic.script import write_hooks
from sqlalchemy.schema import CreateSchema
from python.common import bash_wrapper
from python.orm.common import get_postgres_engine
if TYPE_CHECKING:
from collections.abc import MutableMapping
from sqlalchemy.orm import DeclarativeBase
config = context.config
base_class: type[DeclarativeBase] = config.attributes.get("base")
if base_class is None:
error = "No base class provided. Use the database CLI to run alembic commands."
raise RuntimeError(error)
target_metadata = base_class.metadata
logging.basicConfig(
level="DEBUG",
datefmt="%Y-%m-%dT%H:%M:%S%z",
format="%(asctime)s %(levelname)s %(filename)s:%(lineno)d - %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)
@write_hooks.register("dynamic_schema")
def dynamic_schema(filename: str, _options: dict[Any, Any]) -> None:
"""Dynamic schema."""
original_file = Path(filename).read_text()
schema_name = base_class.schema_name
dynamic_schema_file_part1 = original_file.replace(f"schema='{schema_name}'", "schema=schema")
dynamic_schema_file = dynamic_schema_file_part1.replace(f"'{schema_name}.", "f'{schema}.")
Path(filename).write_text(dynamic_schema_file)
@write_hooks.register("import_postgresql")
def import_postgresql(filename: str, _options: dict[Any, Any]) -> None:
"""Add postgresql dialect import when postgresql types are used."""
content = Path(filename).read_text()
if "postgresql." in content and "from sqlalchemy.dialects import postgresql" not in content:
content = content.replace(
"import sqlalchemy as sa\n",
"import sqlalchemy as sa\nfrom sqlalchemy.dialects import postgresql\n",
)
Path(filename).write_text(content)
@write_hooks.register("ruff")
def ruff_check_and_format(filename: str, _options: dict[Any, Any]) -> None:
"""Docstring for ruff_check_and_format."""
bash_wrapper(f"ruff check --fix {filename}")
bash_wrapper(f"ruff format {filename}")
def include_name(
name: str | None,
type_: Literal["schema", "table", "column", "index", "unique_constraint", "foreign_key_constraint"],
_parent_names: MutableMapping[Literal["schema_name", "table_name", "schema_qualified_table_name"], str | None],
) -> bool:
"""Filter tables to be included in the migration.
Args:
name (str): The name of the table.
type_ (str): The type of the table.
_parent_names (MutableMapping): The names of the parent tables.
Returns:
bool: True if the table should be included, False otherwise.
"""
if type_ == "schema":
# allows a database with multiple schemas to have separate alembic revisions
return name == target_metadata.schema
return True
def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
env_prefix = config.attributes.get("env_prefix", "POSTGRES")
connectable = get_postgres_engine(name=env_prefix)
with connectable.connect() as connection:
schema = base_class.schema_name
if not connectable.dialect.has_schema(connection, schema):
answer = input(f"Schema {schema!r} does not exist. Create it? [y/N] ")
if answer.lower() != "y":
error = f"Schema {schema!r} does not exist. Exiting."
raise SystemExit(error)
connection.execute(CreateSchema(schema))
connection.commit()
context.configure(
connection=connection,
target_metadata=target_metadata,
include_schemas=True,
version_table_schema=schema,
include_name=include_name,
)
with context.begin_transaction():
context.run_migrations()
connection.commit()
run_migrations_online()
@@ -1,113 +0,0 @@
"""created contact api.
Revision ID: edd7dd61a3d2
Revises:
Create Date: 2026-01-11 15:45:59.909266
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "edd7dd61a3d2"
down_revision: str | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"contact",
sa.Column("name", sa.String(), nullable=False),
sa.Column("age", sa.Integer(), nullable=True),
sa.Column("bio", sa.String(), nullable=True),
sa.Column("current_job", sa.String(), nullable=True),
sa.Column("gender", sa.String(), nullable=True),
sa.Column("goals", sa.String(), nullable=True),
sa.Column("legal_name", sa.String(), nullable=True),
sa.Column("profile_pic", sa.String(), nullable=True),
sa.Column("safe_conversation_starters", sa.String(), nullable=True),
sa.Column("self_sufficiency_score", sa.Integer(), nullable=True),
sa.Column("social_structure_style", sa.String(), nullable=True),
sa.Column("ssn", sa.String(), nullable=True),
sa.Column("suffix", sa.String(), nullable=True),
sa.Column("timezone", sa.String(), nullable=True),
sa.Column("topics_to_avoid", sa.String(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_contact")),
schema=schema,
)
op.create_table(
"need",
sa.Column("name", sa.String(), nullable=False),
sa.Column("description", sa.String(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_need")),
schema=schema,
)
op.create_table(
"contact_need",
sa.Column("contact_id", sa.Integer(), nullable=False),
sa.Column("need_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(
["contact_id"],
[f"{schema}.contact.id"],
name=op.f("fk_contact_need_contact_id_contact"),
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["need_id"], [f"{schema}.need.id"], name=op.f("fk_contact_need_need_id_need"), ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("contact_id", "need_id", name=op.f("pk_contact_need")),
schema=schema,
)
op.create_table(
"contact_relationship",
sa.Column("contact_id", sa.Integer(), nullable=False),
sa.Column("related_contact_id", sa.Integer(), nullable=False),
sa.Column("relationship_type", sa.String(length=100), nullable=False),
sa.Column("closeness_weight", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(
["contact_id"],
[f"{schema}.contact.id"],
name=op.f("fk_contact_relationship_contact_id_contact"),
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["related_contact_id"],
[f"{schema}.contact.id"],
name=op.f("fk_contact_relationship_related_contact_id_contact"),
ondelete="CASCADE",
),
sa.PrimaryKeyConstraint("contact_id", "related_contact_id", name=op.f("pk_contact_relationship")),
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("contact_relationship", schema=schema)
op.drop_table("contact_need", schema=schema)
op.drop_table("need", schema=schema)
op.drop_table("contact", schema=schema)
# ### end Alembic commands ###
@@ -1,135 +0,0 @@
"""add congress tracker tables.
Revision ID: 3f71565e38de
Revises: edd7dd61a3d2
Create Date: 2026-02-12 16:36:09.457303
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "3f71565e38de"
down_revision: str | None = "edd7dd61a3d2"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"bill",
sa.Column("congress", sa.Integer(), nullable=False),
sa.Column("bill_type", sa.String(), nullable=False),
sa.Column("number", sa.Integer(), nullable=False),
sa.Column("title", sa.String(), nullable=True),
sa.Column("title_short", sa.String(), nullable=True),
sa.Column("official_title", sa.String(), nullable=True),
sa.Column("status", sa.String(), nullable=True),
sa.Column("status_at", sa.Date(), nullable=True),
sa.Column("sponsor_bioguide_id", sa.String(), nullable=True),
sa.Column("subjects_top_term", sa.String(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_bill")),
sa.UniqueConstraint("congress", "bill_type", "number", name="uq_bill_congress_type_number"),
schema=schema,
)
op.create_index("ix_bill_congress", "bill", ["congress"], unique=False, schema=schema)
op.create_table(
"legislator",
sa.Column("bioguide_id", sa.Text(), nullable=False),
sa.Column("thomas_id", sa.String(), nullable=True),
sa.Column("lis_id", sa.String(), nullable=True),
sa.Column("govtrack_id", sa.Integer(), nullable=True),
sa.Column("opensecrets_id", sa.String(), nullable=True),
sa.Column("fec_ids", sa.String(), nullable=True),
sa.Column("first_name", sa.String(), nullable=False),
sa.Column("last_name", sa.String(), nullable=False),
sa.Column("official_full_name", sa.String(), nullable=True),
sa.Column("nickname", sa.String(), nullable=True),
sa.Column("birthday", sa.Date(), nullable=True),
sa.Column("gender", sa.String(), nullable=True),
sa.Column("current_party", sa.String(), nullable=True),
sa.Column("current_state", sa.String(), nullable=True),
sa.Column("current_district", sa.Integer(), nullable=True),
sa.Column("current_chamber", sa.String(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_legislator")),
schema=schema,
)
op.create_index(op.f("ix_legislator_bioguide_id"), "legislator", ["bioguide_id"], unique=True, schema=schema)
op.create_table(
"vote",
sa.Column("congress", sa.Integer(), nullable=False),
sa.Column("chamber", sa.String(), nullable=False),
sa.Column("session", sa.Integer(), nullable=False),
sa.Column("number", sa.Integer(), nullable=False),
sa.Column("vote_type", sa.String(), nullable=True),
sa.Column("question", sa.String(), nullable=True),
sa.Column("result", sa.String(), nullable=True),
sa.Column("result_text", sa.String(), nullable=True),
sa.Column("vote_date", sa.Date(), nullable=False),
sa.Column("yea_count", sa.Integer(), nullable=True),
sa.Column("nay_count", sa.Integer(), nullable=True),
sa.Column("not_voting_count", sa.Integer(), nullable=True),
sa.Column("present_count", sa.Integer(), nullable=True),
sa.Column("bill_id", sa.Integer(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(["bill_id"], [f"{schema}.bill.id"], name=op.f("fk_vote_bill_id_bill")),
sa.PrimaryKeyConstraint("id", name=op.f("pk_vote")),
sa.UniqueConstraint("congress", "chamber", "session", "number", name="uq_vote_congress_chamber_session_number"),
schema=schema,
)
op.create_index("ix_vote_congress_chamber", "vote", ["congress", "chamber"], unique=False, schema=schema)
op.create_index("ix_vote_date", "vote", ["vote_date"], unique=False, schema=schema)
op.create_table(
"vote_record",
sa.Column("vote_id", sa.Integer(), nullable=False),
sa.Column("legislator_id", sa.Integer(), nullable=False),
sa.Column("position", sa.String(), nullable=False),
sa.ForeignKeyConstraint(
["legislator_id"],
[f"{schema}.legislator.id"],
name=op.f("fk_vote_record_legislator_id_legislator"),
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["vote_id"], [f"{schema}.vote.id"], name=op.f("fk_vote_record_vote_id_vote"), ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("vote_id", "legislator_id", name=op.f("pk_vote_record")),
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("vote_record", schema=schema)
op.drop_index("ix_vote_date", table_name="vote", schema=schema)
op.drop_index("ix_vote_congress_chamber", table_name="vote", schema=schema)
op.drop_table("vote", schema=schema)
op.drop_index(op.f("ix_legislator_bioguide_id"), table_name="legislator", schema=schema)
op.drop_table("legislator", schema=schema)
op.drop_index("ix_bill_congress", table_name="bill", schema=schema)
op.drop_table("bill", schema=schema)
# ### end Alembic commands ###
@@ -1,58 +0,0 @@
"""adding SignalDevice for DeviceRegistry for signal bot.
Revision ID: 4c410c16e39c
Revises: 3f71565e38de
Create Date: 2026-03-09 14:51:24.228976
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "4c410c16e39c"
down_revision: str | None = "3f71565e38de"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"signal_device",
sa.Column("phone_number", sa.String(length=50), nullable=False),
sa.Column("safety_number", sa.String(), nullable=False),
sa.Column(
"trust_level",
postgresql.ENUM("VERIFIED", "UNVERIFIED", "BLOCKED", name="trust_level", schema=schema),
nullable=False,
),
sa.Column("last_seen", sa.DateTime(timezone=True), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_signal_device")),
sa.UniqueConstraint("phone_number", name=op.f("uq_signal_device_phone_number")),
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("signal_device", schema=schema)
# ### end Alembic commands ###
@@ -1,41 +0,0 @@
"""fixed safety number logic.
Revision ID: 99fec682516c
Revises: 4c410c16e39c
Create Date: 2026-03-09 16:25:25.085806
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "99fec682516c"
down_revision: str | None = "4c410c16e39c"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column("signal_device", "safety_number", existing_type=sa.VARCHAR(), nullable=True, schema=schema)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column("signal_device", "safety_number", existing_type=sa.VARCHAR(), nullable=False, schema=schema)
# ### end Alembic commands ###
@@ -1,54 +0,0 @@
"""add dead_letter_message table.
Revision ID: a1b2c3d4e5f6
Revises: 99fec682516c
Create Date: 2026-03-10 12:00:00.000000
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "a1b2c3d4e5f6"
down_revision: str | None = "99fec682516c"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
op.create_table(
"dead_letter_message",
sa.Column("source", sa.String(), nullable=False),
sa.Column("message", sa.Text(), nullable=False),
sa.Column("received_at", sa.DateTime(timezone=True), nullable=False),
sa.Column(
"status",
postgresql.ENUM("UNPROCESSED", "PROCESSED", name="message_status", schema=schema),
nullable=False,
),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_dead_letter_message")),
schema=schema,
)
def downgrade() -> None:
"""Downgrade."""
op.drop_table("dead_letter_message", schema=schema)
op.execute(sa.text(f"DROP TYPE IF EXISTS {schema}.message_status"))
@@ -1,66 +0,0 @@
"""adding roles to signal devices.
Revision ID: 2ef7ba690159
Revises: a1b2c3d4e5f6
Create Date: 2026-03-16 19:22:38.020350
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "2ef7ba690159"
down_revision: str | None = "a1b2c3d4e5f6"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"role",
sa.Column("name", sa.String(length=50), nullable=False),
sa.Column("id", sa.SmallInteger(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_role")),
sa.UniqueConstraint("name", name=op.f("uq_role_name")),
schema=schema,
)
op.create_table(
"device_role",
sa.Column("device_id", sa.Integer(), nullable=False),
sa.Column("role_id", sa.SmallInteger(), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(
["device_id"], [f"{schema}.signal_device.id"], name=op.f("fk_device_role_device_id_signal_device")
),
sa.ForeignKeyConstraint(["role_id"], [f"{schema}.role.id"], name=op.f("fk_device_role_role_id_role")),
sa.PrimaryKeyConstraint("id", name=op.f("pk_device_role")),
sa.UniqueConstraint("device_id", "role_id", name="uq_device_role_device_role"),
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("device_role", schema=schema)
op.drop_table("role", schema=schema)
# ### end Alembic commands ###
@@ -1,171 +0,0 @@
"""seprating signal_bot database.
Revision ID: 6b275323f435
Revises: 2ef7ba690159
Create Date: 2026-03-18 08:34:28.785885
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "6b275323f435"
down_revision: str | None = "2ef7ba690159"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("device_role", schema=schema)
op.drop_table("signal_device", schema=schema)
op.drop_table("role", schema=schema)
op.drop_table("dead_letter_message", schema=schema)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"dead_letter_message",
sa.Column("source", sa.VARCHAR(), autoincrement=False, nullable=False),
sa.Column("message", sa.TEXT(), autoincrement=False, nullable=False),
sa.Column("received_at", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False),
sa.Column(
"status",
postgresql.ENUM("UNPROCESSED", "PROCESSED", name="message_status", schema=schema),
autoincrement=False,
nullable=False,
),
sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False),
sa.Column(
"created",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.Column(
"updated",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_dead_letter_message")),
schema=schema,
)
op.create_table(
"role",
sa.Column("name", sa.VARCHAR(length=50), autoincrement=False, nullable=False),
sa.Column(
"id",
sa.SMALLINT(),
server_default=sa.text(f"nextval('{schema}.role_id_seq'::regclass)"),
autoincrement=True,
nullable=False,
),
sa.Column(
"created",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.Column(
"updated",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_role")),
sa.UniqueConstraint(
"name", name=op.f("uq_role_name"), postgresql_include=[], postgresql_nulls_not_distinct=False
),
schema=schema,
)
op.create_table(
"signal_device",
sa.Column("phone_number", sa.VARCHAR(length=50), autoincrement=False, nullable=False),
sa.Column("safety_number", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column(
"trust_level",
postgresql.ENUM("VERIFIED", "UNVERIFIED", "BLOCKED", name="trust_level", schema=schema),
autoincrement=False,
nullable=False,
),
sa.Column("last_seen", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False),
sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False),
sa.Column(
"created",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.Column(
"updated",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_signal_device")),
sa.UniqueConstraint(
"phone_number",
name=op.f("uq_signal_device_phone_number"),
postgresql_include=[],
postgresql_nulls_not_distinct=False,
),
schema=schema,
)
op.create_table(
"device_role",
sa.Column("device_id", sa.INTEGER(), autoincrement=False, nullable=False),
sa.Column("role_id", sa.SMALLINT(), autoincrement=False, nullable=False),
sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False),
sa.Column(
"created",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.Column(
"updated",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.ForeignKeyConstraint(
["device_id"], [f"{schema}.signal_device.id"], name=op.f("fk_device_role_device_id_signal_device")
),
sa.ForeignKeyConstraint(["role_id"], [f"{schema}.role.id"], name=op.f("fk_device_role_role_id_role")),
sa.PrimaryKeyConstraint("id", name=op.f("pk_device_role")),
sa.UniqueConstraint(
"device_id",
"role_id",
name=op.f("uq_device_role_device_role"),
postgresql_include=[],
postgresql_nulls_not_distinct=False,
),
schema=schema,
)
# ### end Alembic commands ###
@@ -1,187 +0,0 @@
"""removed ds table from richie DB.
Revision ID: c8a794340928
Revises: 6b275323f435
Create Date: 2026-03-29 15:29:23.643146
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "c8a794340928"
down_revision: str | None = "6b275323f435"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("vote_record", schema=schema)
op.drop_index(op.f("ix_vote_congress_chamber"), table_name="vote", schema=schema)
op.drop_index(op.f("ix_vote_date"), table_name="vote", schema=schema)
op.drop_index(op.f("ix_legislator_bioguide_id"), table_name="legislator", schema=schema)
op.drop_table("legislator", schema=schema)
op.drop_table("vote", schema=schema)
op.drop_index(op.f("ix_bill_congress"), table_name="bill", schema=schema)
op.drop_table("bill", schema=schema)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"vote",
sa.Column("congress", sa.INTEGER(), autoincrement=False, nullable=False),
sa.Column("chamber", sa.VARCHAR(), autoincrement=False, nullable=False),
sa.Column("session", sa.INTEGER(), autoincrement=False, nullable=False),
sa.Column("number", sa.INTEGER(), autoincrement=False, nullable=False),
sa.Column("vote_type", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("question", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("result", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("result_text", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("vote_date", sa.DATE(), autoincrement=False, nullable=False),
sa.Column("yea_count", sa.INTEGER(), autoincrement=False, nullable=True),
sa.Column("nay_count", sa.INTEGER(), autoincrement=False, nullable=True),
sa.Column("not_voting_count", sa.INTEGER(), autoincrement=False, nullable=True),
sa.Column("present_count", sa.INTEGER(), autoincrement=False, nullable=True),
sa.Column("bill_id", sa.INTEGER(), autoincrement=False, nullable=True),
sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False),
sa.Column(
"created",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.Column(
"updated",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.ForeignKeyConstraint(["bill_id"], [f"{schema}.bill.id"], name=op.f("fk_vote_bill_id_bill")),
sa.PrimaryKeyConstraint("id", name=op.f("pk_vote")),
sa.UniqueConstraint(
"congress",
"chamber",
"session",
"number",
name=op.f("uq_vote_congress_chamber_session_number"),
postgresql_include=[],
postgresql_nulls_not_distinct=False,
),
schema=schema,
)
op.create_index(op.f("ix_vote_date"), "vote", ["vote_date"], unique=False, schema=schema)
op.create_index(op.f("ix_vote_congress_chamber"), "vote", ["congress", "chamber"], unique=False, schema=schema)
op.create_table(
"vote_record",
sa.Column("vote_id", sa.INTEGER(), autoincrement=False, nullable=False),
sa.Column("legislator_id", sa.INTEGER(), autoincrement=False, nullable=False),
sa.Column("position", sa.VARCHAR(), autoincrement=False, nullable=False),
sa.ForeignKeyConstraint(
["legislator_id"],
[f"{schema}.legislator.id"],
name=op.f("fk_vote_record_legislator_id_legislator"),
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["vote_id"], [f"{schema}.vote.id"], name=op.f("fk_vote_record_vote_id_vote"), ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("vote_id", "legislator_id", name=op.f("pk_vote_record")),
schema=schema,
)
op.create_table(
"legislator",
sa.Column("bioguide_id", sa.TEXT(), autoincrement=False, nullable=False),
sa.Column("thomas_id", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("lis_id", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("govtrack_id", sa.INTEGER(), autoincrement=False, nullable=True),
sa.Column("opensecrets_id", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("fec_ids", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("first_name", sa.VARCHAR(), autoincrement=False, nullable=False),
sa.Column("last_name", sa.VARCHAR(), autoincrement=False, nullable=False),
sa.Column("official_full_name", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("nickname", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("birthday", sa.DATE(), autoincrement=False, nullable=True),
sa.Column("gender", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("current_party", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("current_state", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("current_district", sa.INTEGER(), autoincrement=False, nullable=True),
sa.Column("current_chamber", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False),
sa.Column(
"created",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.Column(
"updated",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_legislator")),
schema=schema,
)
op.create_index(op.f("ix_legislator_bioguide_id"), "legislator", ["bioguide_id"], unique=True, schema=schema)
op.create_table(
"bill",
sa.Column("congress", sa.INTEGER(), autoincrement=False, nullable=False),
sa.Column("bill_type", sa.VARCHAR(), autoincrement=False, nullable=False),
sa.Column("number", sa.INTEGER(), autoincrement=False, nullable=False),
sa.Column("title", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("title_short", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("official_title", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("status", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("status_at", sa.DATE(), autoincrement=False, nullable=True),
sa.Column("sponsor_bioguide_id", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("subjects_top_term", sa.VARCHAR(), autoincrement=False, nullable=True),
sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False),
sa.Column(
"created",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.Column(
"updated",
postgresql.TIMESTAMP(timezone=True),
server_default=sa.text("now()"),
autoincrement=False,
nullable=False,
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_bill")),
sa.UniqueConstraint(
"congress",
"bill_type",
"number",
name=op.f("uq_bill_congress_type_number"),
postgresql_include=[],
postgresql_nulls_not_distinct=False,
),
schema=schema,
)
op.create_index(op.f("ix_bill_congress"), "bill", ["congress"], unique=False, schema=schema)
# ### end Alembic commands ###
@@ -1,93 +0,0 @@
"""adding audiobook libreary metadata.
Revision ID: d7864d1ffc17
Revises: c8a794340928
Create Date: 2026-06-03 20:24:09.200837
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "d7864d1ffc17"
down_revision: str | None = "c8a794340928"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"audiobook_author",
sa.Column("name", sa.String(), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_audiobook_author")),
sa.UniqueConstraint("name", name=op.f("uq_audiobook_author_name")),
schema=schema,
)
op.create_table(
"audiobook_series",
sa.Column("name", sa.String(), nullable=False),
sa.Column("author_id", sa.Integer(), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(
["author_id"],
[f"{schema}.audiobook_author.id"],
name=op.f("fk_audiobook_series_author_id_audiobook_author"),
ondelete="CASCADE",
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_audiobook_series")),
sa.UniqueConstraint("author_id", "name", name=op.f("uq_audiobook_series_author_id")),
schema=schema,
)
op.create_table(
"audiobook",
sa.Column("title", sa.String(), nullable=False),
sa.Column("author_id", sa.Integer(), nullable=False),
sa.Column("series_id", sa.Integer(), nullable=True),
sa.Column("series_index", sa.Integer(), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(
["author_id"],
[f"{schema}.audiobook_author.id"],
name=op.f("fk_audiobook_author_id_audiobook_author"),
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["series_id"],
[f"{schema}.audiobook_series.id"],
name=op.f("fk_audiobook_series_id_audiobook_series"),
ondelete="SET NULL",
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_audiobook")),
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("audiobook", schema=schema)
op.drop_table("audiobook_series", schema=schema)
op.drop_table("audiobook_author", schema=schema)
# ### end Alembic commands ###
@@ -1,200 +0,0 @@
"""add ebook search tables.
Revision ID: 2db132cace1a
Revises: b3c60cc5beb5
Create Date: 2026-06-10 22:10:54.379159
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import pgvector
import sqlalchemy as sa
from alembic import op
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "2db132cace1a"
down_revision: str | None = "b3c60cc5beb5"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"ebook_embedding_model",
sa.Column("name", sa.String(), nullable=False),
sa.Column("dimension", sa.Integer(), nullable=False),
sa.Column("is_default", sa.Boolean(), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_embedding_model")),
sa.UniqueConstraint("name", name=op.f("uq_ebook_embedding_model_name")),
schema=schema,
)
op.create_table(
"ebook_source",
sa.Column("title", sa.String(), nullable=False),
sa.Column("author", sa.String(), nullable=True),
sa.Column("language", sa.String(), nullable=True),
sa.Column("publisher", sa.String(), nullable=True),
sa.Column("identifier", sa.String(), nullable=True),
sa.Column("file_path", sa.String(), nullable=False),
sa.Column("file_sha256", sa.String(length=64), nullable=False),
sa.Column("file_mtime", sa.DateTime(timezone=True), nullable=False),
sa.Column("file_size", sa.BigInteger(), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_source")),
sa.UniqueConstraint("file_path", name=op.f("uq_ebook_source_file_path")),
sa.UniqueConstraint("file_sha256", name=op.f("uq_ebook_source_file_sha256")),
schema=schema,
)
op.create_table(
"ebook_chapter",
sa.Column("source_id", sa.Integer(), nullable=False),
sa.Column("spine_index", sa.Integer(), nullable=False),
sa.Column("title", sa.String(), nullable=True),
sa.Column("href", sa.String(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(
["source_id"],
[f"{schema}.ebook_source.id"],
name=op.f("fk_ebook_chapter_source_id_ebook_source"),
ondelete="CASCADE",
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_chapter")),
sa.UniqueConstraint("source_id", "spine_index", name=op.f("uq_ebook_chapter_source_id")),
schema=schema,
)
op.create_table(
"ebook_chunk",
sa.Column("source_id", sa.Integer(), nullable=False),
sa.Column("chapter_id", sa.Integer(), nullable=True),
sa.Column("chunk_index", sa.Integer(), nullable=False),
sa.Column("text", sa.String(), nullable=False),
sa.Column("token_start", sa.Integer(), nullable=False),
sa.Column("token_count", sa.Integer(), nullable=False),
sa.Column("page_label", sa.String(), nullable=True),
sa.Column("content_sha256", sa.String(length=64), nullable=False),
sa.Column("search_text", sa.String(), nullable=False),
sa.Column("id", sa.BigInteger(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(
["chapter_id"],
[f"{schema}.ebook_chapter.id"],
name=op.f("fk_ebook_chunk_chapter_id_ebook_chapter"),
ondelete="SET NULL",
),
sa.ForeignKeyConstraint(
["source_id"],
[f"{schema}.ebook_source.id"],
name=op.f("fk_ebook_chunk_source_id_ebook_source"),
ondelete="CASCADE",
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_chunk")),
sa.UniqueConstraint("source_id", "chunk_index", name="uq_ebook_chunk_source_id_chunk_index"),
sa.UniqueConstraint("source_id", "content_sha256", name="uq_ebook_chunk_source_id_content_sha256"),
schema=schema,
)
op.create_table(
"ebook_chunk_embedding_1024",
sa.Column("chunk_id", sa.BigInteger(), nullable=False),
sa.Column("model_id", sa.Integer(), nullable=False),
sa.Column("embedding", pgvector.sqlalchemy.vector.VECTOR(dim=1024), nullable=False),
sa.Column("id", sa.BigInteger(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(
["chunk_id"],
[f"{schema}.ebook_chunk.id"],
name=op.f("fk_ebook_chunk_embedding_1024_chunk_id_ebook_chunk"),
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["model_id"],
[f"{schema}.ebook_embedding_model.id"],
name=op.f("fk_ebook_chunk_embedding_1024_model_id_ebook_embedding_model"),
ondelete="CASCADE",
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_chunk_embedding_1024")),
sa.UniqueConstraint("chunk_id", "model_id", name=op.f("uq_ebook_chunk_embedding_1024_chunk_id")),
schema=schema,
)
op.create_table(
"ebook_chunk_embedding_2560",
sa.Column("chunk_id", sa.BigInteger(), nullable=False),
sa.Column("model_id", sa.Integer(), nullable=False),
sa.Column("embedding", pgvector.sqlalchemy.vector.VECTOR(dim=2560), nullable=False),
sa.Column("id", sa.BigInteger(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(
["chunk_id"],
[f"{schema}.ebook_chunk.id"],
name=op.f("fk_ebook_chunk_embedding_2560_chunk_id_ebook_chunk"),
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["model_id"],
[f"{schema}.ebook_embedding_model.id"],
name=op.f("fk_ebook_chunk_embedding_2560_model_id_ebook_embedding_model"),
ondelete="CASCADE",
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_chunk_embedding_2560")),
sa.UniqueConstraint("chunk_id", "model_id", name=op.f("uq_ebook_chunk_embedding_2560_chunk_id")),
schema=schema,
)
op.create_table(
"ebook_chunk_embedding_4096",
sa.Column("chunk_id", sa.BigInteger(), nullable=False),
sa.Column("model_id", sa.Integer(), nullable=False),
sa.Column("embedding", pgvector.sqlalchemy.vector.VECTOR(dim=4096), nullable=False),
sa.Column("id", sa.BigInteger(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(
["chunk_id"],
[f"{schema}.ebook_chunk.id"],
name=op.f("fk_ebook_chunk_embedding_4096_chunk_id_ebook_chunk"),
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["model_id"],
[f"{schema}.ebook_embedding_model.id"],
name=op.f("fk_ebook_chunk_embedding_4096_model_id_ebook_embedding_model"),
ondelete="CASCADE",
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_chunk_embedding_4096")),
sa.UniqueConstraint("chunk_id", "model_id", name=op.f("uq_ebook_chunk_embedding_4096_chunk_id")),
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("ebook_chunk_embedding_4096", schema=schema)
op.drop_table("ebook_chunk_embedding_2560", schema=schema)
op.drop_table("ebook_chunk_embedding_1024", schema=schema)
op.drop_table("ebook_chunk", schema=schema)
op.drop_table("ebook_chapter", schema=schema)
op.drop_table("ebook_source", schema=schema)
op.drop_table("ebook_embedding_model", schema=schema)
# ### end Alembic commands ###
@@ -1,63 +0,0 @@
"""updated series_index to float and added UniqueConstraint to audiobook and audiobook_author.
Revision ID: b3c60cc5beb5
Revises: d7864d1ffc17
Create Date: 2026-06-10 20:02:43.073725
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import RichieBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "b3c60cc5beb5"
down_revision: str | None = "d7864d1ffc17"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = RichieBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column(
"audiobook",
"series_index",
existing_type=sa.INTEGER(),
type_=sa.Float(),
existing_nullable=False,
schema=schema,
)
op.create_unique_constraint(
op.f("uq_audiobook_author_id"),
"audiobook",
["author_id", "series_id", "title"],
schema=schema,
postgresql_nulls_not_distinct=True,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(op.f("uq_audiobook_author_id"), "audiobook", schema=schema, type_="unique")
op.alter_column(
"audiobook",
"series_index",
existing_type=sa.Float(),
type_=sa.INTEGER(),
existing_nullable=False,
schema=schema,
)
# ### end Alembic commands ###
-36
View File
@@ -1,36 +0,0 @@
"""${message}.
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import ${config.attributes["base"].__name__}
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: str | None = ${repr(down_revision)}
branch_labels: str | Sequence[str] | None = ${repr(branch_labels)}
depends_on: str | Sequence[str] | None = ${repr(depends_on)}
schema=${config.attributes["base"].__name__}.schema_name
def upgrade() -> None:
"""Upgrade."""
${upgrades if upgrades else "pass"}
def downgrade() -> None:
"""Downgrade."""
${downgrades if downgrades else "pass"}
@@ -1,100 +0,0 @@
"""seprating signal_bot database.
Revision ID: 6eaf696e07a5
Revises:
Create Date: 2026-03-17 21:35:37.612672
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
from python.orm import SignalBotBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "6eaf696e07a5"
down_revision: str | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = SignalBotBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"dead_letter_message",
sa.Column("source", sa.String(), nullable=False),
sa.Column("message", sa.Text(), nullable=False),
sa.Column("received_at", sa.DateTime(timezone=True), nullable=False),
sa.Column(
"status", postgresql.ENUM("UNPROCESSED", "PROCESSED", name="message_status", schema=schema), nullable=False
),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_dead_letter_message")),
schema=schema,
)
op.create_table(
"role",
sa.Column("name", sa.String(length=50), nullable=False),
sa.Column("id", sa.SmallInteger(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_role")),
sa.UniqueConstraint("name", name=op.f("uq_role_name")),
schema=schema,
)
op.create_table(
"signal_device",
sa.Column("phone_number", sa.String(length=50), nullable=False),
sa.Column("safety_number", sa.String(), nullable=True),
sa.Column(
"trust_level",
postgresql.ENUM("VERIFIED", "UNVERIFIED", "BLOCKED", name="trust_level", schema=schema),
nullable=False,
),
sa.Column("last_seen", sa.DateTime(timezone=True), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_signal_device")),
sa.UniqueConstraint("phone_number", name=op.f("uq_signal_device_phone_number")),
schema=schema,
)
op.create_table(
"device_role",
sa.Column("device_id", sa.Integer(), nullable=False),
sa.Column("role_id", sa.SmallInteger(), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(
["device_id"], [f"{schema}.signal_device.id"], name=op.f("fk_device_role_device_id_signal_device")
),
sa.ForeignKeyConstraint(["role_id"], [f"{schema}.role.id"], name=op.f("fk_device_role_role_id_role")),
sa.PrimaryKeyConstraint("id", name=op.f("pk_device_role")),
sa.UniqueConstraint("device_id", "role_id", name="uq_device_role_device_role"),
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("device_role", schema=schema)
op.drop_table("signal_device", schema=schema)
op.drop_table("role", schema=schema)
op.drop_table("dead_letter_message", schema=schema)
# ### end Alembic commands ###
@@ -1,72 +0,0 @@
"""test.
Revision ID: 66bdd532bcab
Revises: 6eaf696e07a5
Create Date: 2026-03-18 19:21:14.561568
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
from python.orm import SignalBotBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "66bdd532bcab"
down_revision: str | None = "6eaf696e07a5"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = SignalBotBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column(
"dead_letter_message",
"status",
existing_type=postgresql.ENUM("UNPROCESSED", "PROCESSED", name="message_status", schema=schema),
type_=sa.Enum("UNPROCESSED", "PROCESSED", name="message_status", native_enum=False),
existing_nullable=False,
schema=schema,
)
op.alter_column(
"signal_device",
"trust_level",
existing_type=postgresql.ENUM("VERIFIED", "UNVERIFIED", "BLOCKED", name="trust_level", schema=schema),
type_=sa.Enum("VERIFIED", "UNVERIFIED", "BLOCKED", name="trust_level", native_enum=False),
existing_nullable=False,
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column(
"signal_device",
"trust_level",
existing_type=sa.Enum("VERIFIED", "UNVERIFIED", "BLOCKED", name="trust_level", native_enum=False),
type_=postgresql.ENUM("VERIFIED", "UNVERIFIED", "BLOCKED", name="trust_level", schema=schema),
existing_nullable=False,
schema=schema,
)
op.alter_column(
"dead_letter_message",
"status",
existing_type=sa.Enum("UNPROCESSED", "PROCESSED", name="message_status", native_enum=False),
type_=postgresql.ENUM("UNPROCESSED", "PROCESSED", name="message_status", schema=schema),
existing_nullable=False,
schema=schema,
)
# ### end Alembic commands ###
@@ -1,80 +0,0 @@
"""starting van invintory.
Revision ID: 15e733499804
Revises:
Create Date: 2026-03-08 00:18:20.759720
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from python.orm import VanInventoryBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "15e733499804"
down_revision: str | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = VanInventoryBase.schema_name
def upgrade() -> None:
"""Upgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"items",
sa.Column("name", sa.String(), nullable=False),
sa.Column("quantity", sa.Float(), nullable=False),
sa.Column("unit", sa.String(), nullable=False),
sa.Column("category", sa.String(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_items")),
sa.UniqueConstraint("name", name=op.f("uq_items_name")),
schema=schema,
)
op.create_table(
"meals",
sa.Column("name", sa.String(), nullable=False),
sa.Column("instructions", sa.String(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.PrimaryKeyConstraint("id", name=op.f("pk_meals")),
sa.UniqueConstraint("name", name=op.f("uq_meals_name")),
schema=schema,
)
op.create_table(
"meal_ingredients",
sa.Column("meal_id", sa.Integer(), nullable=False),
sa.Column("item_id", sa.Integer(), nullable=False),
sa.Column("quantity_needed", sa.Float(), nullable=False),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
sa.ForeignKeyConstraint(["item_id"], [f"{schema}.items.id"], name=op.f("fk_meal_ingredients_item_id_items")),
sa.ForeignKeyConstraint(["meal_id"], [f"{schema}.meals.id"], name=op.f("fk_meal_ingredients_meal_id_meals")),
sa.PrimaryKeyConstraint("id", name=op.f("pk_meal_ingredients")),
sa.UniqueConstraint("meal_id", "item_id", name=op.f("uq_meal_ingredients_meal_id")),
schema=schema,
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("meal_ingredients", schema=schema)
op.drop_table("meals", schema=schema)
op.drop_table("items", schema=schema)
# ### end Alembic commands ###
-1
View File
@@ -1 +0,0 @@
"""FastAPI applications."""
-52
View File
@@ -1,52 +0,0 @@
"""FastAPI interface for Contact database."""
import logging
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
from typing import Annotated
import typer
import uvicorn
from fastapi import FastAPI
from python.api.routers import contact_router, views_router
from python.common import configure_logger
from python.fastapi_tools import ZstdMiddleware
from python.orm.common import get_postgres_engine
logger = logging.getLogger(__name__)
def create_app() -> FastAPI:
"""Create and configure the FastAPI application."""
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
"""Manage application lifespan."""
app.state.engine = get_postgres_engine()
yield
app.state.engine.dispose()
app = FastAPI(title="Contact Database API", lifespan=lifespan)
app.add_middleware(ZstdMiddleware)
app.include_router(contact_router)
app.include_router(views_router)
return app
def serve(
host: Annotated[str, typer.Option("--host", "-h", help="Host to bind to")],
port: Annotated[int, typer.Option("--port", "-p", help="Port to bind to")] = 8000,
log_level: Annotated[str, typer.Option("--log-level", "-l", help="Log level")] = "INFO",
) -> None:
"""Start the Contact API server."""
configure_logger(log_level)
app = create_app()
uvicorn.run(app, host=host, port=port)
if __name__ == "__main__":
typer.run(serve)
-6
View File
@@ -1,6 +0,0 @@
"""API routers."""
from python.api.routers.contact import router as contact_router
from python.api.routers.views import router as views_router
__all__ = ["contact_router", "views_router"]
-481
View File
@@ -1,481 +0,0 @@
"""Contact API router."""
from pathlib import Path
from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.orm import selectinload
from python.fastapi_tools.db import DbSession
from python.orm.richie.contact import Contact, ContactRelationship, Need, RelationshipType
TEMPLATES_DIR = Path(__file__).parent.parent / "templates"
templates = Jinja2Templates(directory=TEMPLATES_DIR)
def _is_htmx(request: Request) -> bool:
"""Check if the request is from HTMX."""
return request.headers.get("HX-Request") == "true"
class NeedBase(BaseModel):
"""Base schema for Need."""
name: str
description: str | None = None
class NeedCreate(NeedBase):
"""Schema for creating a Need."""
class NeedResponse(NeedBase):
"""Schema for Need response."""
id: int
model_config = {"from_attributes": True}
class ContactRelationshipCreate(BaseModel):
"""Schema for creating a contact relationship."""
related_contact_id: int
relationship_type: RelationshipType
closeness_weight: int | None = None
class ContactRelationshipUpdate(BaseModel):
"""Schema for updating a contact relationship."""
relationship_type: RelationshipType | None = None
closeness_weight: int | None = None
class ContactRelationshipResponse(BaseModel):
"""Schema for contact relationship response."""
contact_id: int
related_contact_id: int
relationship_type: str
closeness_weight: int
model_config = {"from_attributes": True}
class RelationshipTypeInfo(BaseModel):
"""Information about a relationship type."""
value: str
display_name: str
default_weight: int
class GraphNode(BaseModel):
"""Node in the relationship graph."""
id: int
name: str
current_job: str | None = None
class GraphEdge(BaseModel):
"""Edge in the relationship graph."""
source: int
target: int
relationship_type: str
closeness_weight: int
class GraphData(BaseModel):
"""Complete graph data for visualization."""
nodes: list[GraphNode]
edges: list[GraphEdge]
class ContactBase(BaseModel):
"""Base schema for Contact."""
name: str
age: int | None = None
bio: str | None = None
current_job: str | None = None
gender: str | None = None
goals: str | None = None
legal_name: str | None = None
profile_pic: str | None = None
safe_conversation_starters: str | None = None
self_sufficiency_score: int | None = None
social_structure_style: str | None = None
ssn: str | None = None
suffix: str | None = None
timezone: str | None = None
topics_to_avoid: str | None = None
class ContactCreate(ContactBase):
"""Schema for creating a Contact."""
need_ids: list[int] = []
class ContactUpdate(BaseModel):
"""Schema for updating a Contact."""
name: str | None = None
age: int | None = None
bio: str | None = None
current_job: str | None = None
gender: str | None = None
goals: str | None = None
legal_name: str | None = None
profile_pic: str | None = None
safe_conversation_starters: str | None = None
self_sufficiency_score: int | None = None
social_structure_style: str | None = None
ssn: str | None = None
suffix: str | None = None
timezone: str | None = None
topics_to_avoid: str | None = None
need_ids: list[int] | None = None
class ContactResponse(ContactBase):
"""Schema for Contact response with relationships."""
id: int
needs: list[NeedResponse] = []
related_to: list[ContactRelationshipResponse] = []
related_from: list[ContactRelationshipResponse] = []
model_config = {"from_attributes": True}
class ContactListResponse(ContactBase):
"""Schema for Contact list response."""
id: int
model_config = {"from_attributes": True}
router = APIRouter(prefix="/api", tags=["contacts"])
@router.post("/needs", response_model=NeedResponse)
def create_need(need: NeedCreate, db: DbSession) -> Need:
"""Create a new need."""
db_need = Need(name=need.name, description=need.description)
db.add(db_need)
db.commit()
db.refresh(db_need)
return db_need
@router.get("/needs", response_model=list[NeedResponse])
def list_needs(db: DbSession) -> list[Need]:
"""List all needs."""
return list(db.scalars(select(Need)).all())
@router.get("/needs/{need_id}", response_model=NeedResponse)
def get_need(need_id: int, db: DbSession) -> Need:
"""Get a need by ID."""
need = db.get(Need, need_id)
if not need:
raise HTTPException(status_code=404, detail="Need not found")
return need
@router.delete("/needs/{need_id}", response_model=None)
def delete_need(need_id: int, request: Request, db: DbSession) -> dict[str, bool] | HTMLResponse:
"""Delete a need by ID."""
need = db.get(Need, need_id)
if not need:
raise HTTPException(status_code=404, detail="Need not found")
db.delete(need)
db.commit()
if _is_htmx(request):
return HTMLResponse("")
return {"deleted": True}
@router.post("/contacts", response_model=ContactResponse)
def create_contact(contact: ContactCreate, db: DbSession) -> Contact:
"""Create a new contact."""
need_ids = contact.need_ids
contact_data = contact.model_dump(exclude={"need_ids"})
db_contact = Contact(**contact_data)
if need_ids:
needs = list(db.scalars(select(Need).where(Need.id.in_(need_ids))).all())
db_contact.needs = needs
db.add(db_contact)
db.commit()
db.refresh(db_contact)
return db_contact
@router.get("/contacts", response_model=list[ContactListResponse])
def list_contacts(
db: DbSession,
skip: int = 0,
limit: int = 100,
) -> list[Contact]:
"""List all contacts with pagination."""
return list(db.scalars(select(Contact).offset(skip).limit(limit)).all())
@router.get("/contacts/{contact_id}", response_model=ContactResponse)
def get_contact(contact_id: int, db: DbSession) -> Contact:
"""Get a contact by ID with all relationships."""
contact = db.scalar(
select(Contact)
.where(Contact.id == contact_id)
.options(
selectinload(Contact.needs),
selectinload(Contact.related_to),
selectinload(Contact.related_from),
)
)
if not contact:
raise HTTPException(status_code=404, detail="Contact not found")
return contact
@router.patch("/contacts/{contact_id}", response_model=ContactResponse)
def update_contact(
contact_id: int,
contact: ContactUpdate,
db: DbSession,
) -> Contact:
"""Update a contact by ID."""
db_contact = db.get(Contact, contact_id)
if not db_contact:
raise HTTPException(status_code=404, detail="Contact not found")
update_data = contact.model_dump(exclude_unset=True)
need_ids = update_data.pop("need_ids", None)
for key, value in update_data.items():
setattr(db_contact, key, value)
if need_ids is not None:
needs = list(db.scalars(select(Need).where(Need.id.in_(need_ids))).all())
db_contact.needs = needs
db.commit()
db.refresh(db_contact)
return db_contact
@router.delete("/contacts/{contact_id}", response_model=None)
def delete_contact(contact_id: int, request: Request, db: DbSession) -> dict[str, bool] | HTMLResponse:
"""Delete a contact by ID."""
contact = db.get(Contact, contact_id)
if not contact:
raise HTTPException(status_code=404, detail="Contact not found")
db.delete(contact)
db.commit()
if _is_htmx(request):
return HTMLResponse("")
return {"deleted": True}
@router.post("/contacts/{contact_id}/needs/{need_id}")
def add_need_to_contact(
contact_id: int,
need_id: int,
db: DbSession,
) -> dict[str, bool]:
"""Add a need to a contact."""
contact = db.get(Contact, contact_id)
if not contact:
raise HTTPException(status_code=404, detail="Contact not found")
need = db.get(Need, need_id)
if not need:
raise HTTPException(status_code=404, detail="Need not found")
if need not in contact.needs:
contact.needs.append(need)
db.commit()
return {"added": True}
@router.delete("/contacts/{contact_id}/needs/{need_id}", response_model=None)
def remove_need_from_contact(
contact_id: int,
need_id: int,
request: Request,
db: DbSession,
) -> dict[str, bool] | HTMLResponse:
"""Remove a need from a contact."""
contact = db.get(Contact, contact_id)
if not contact:
raise HTTPException(status_code=404, detail="Contact not found")
need = db.get(Need, need_id)
if not need:
raise HTTPException(status_code=404, detail="Need not found")
if need in contact.needs:
contact.needs.remove(need)
db.commit()
if _is_htmx(request):
return HTMLResponse("")
return {"removed": True}
@router.post(
"/contacts/{contact_id}/relationships",
response_model=ContactRelationshipResponse,
)
def add_contact_relationship(
contact_id: int,
relationship: ContactRelationshipCreate,
db: DbSession,
) -> ContactRelationship:
"""Add a relationship between two contacts."""
contact = db.get(Contact, contact_id)
if not contact:
raise HTTPException(status_code=404, detail="Contact not found")
related_contact = db.get(Contact, relationship.related_contact_id)
if not related_contact:
raise HTTPException(status_code=404, detail="Related contact not found")
if contact_id == relationship.related_contact_id:
raise HTTPException(status_code=400, detail="Cannot relate contact to itself")
# Use provided weight or default from relationship type
weight = relationship.closeness_weight
if weight is None:
weight = relationship.relationship_type.default_weight
db_relationship = ContactRelationship(
contact_id=contact_id,
related_contact_id=relationship.related_contact_id,
relationship_type=relationship.relationship_type.value,
closeness_weight=weight,
)
db.add(db_relationship)
db.commit()
db.refresh(db_relationship)
return db_relationship
@router.get(
"/contacts/{contact_id}/relationships",
response_model=list[ContactRelationshipResponse],
)
def get_contact_relationships(
contact_id: int,
db: DbSession,
) -> list[ContactRelationship]:
"""Get all relationships for a contact."""
contact = db.get(Contact, contact_id)
if not contact:
raise HTTPException(status_code=404, detail="Contact not found")
outgoing = list(db.scalars(select(ContactRelationship).where(ContactRelationship.contact_id == contact_id)).all())
incoming = list(
db.scalars(select(ContactRelationship).where(ContactRelationship.related_contact_id == contact_id)).all()
)
return outgoing + incoming
@router.patch(
"/contacts/{contact_id}/relationships/{related_contact_id}",
response_model=ContactRelationshipResponse,
)
def update_contact_relationship(
contact_id: int,
related_contact_id: int,
update: ContactRelationshipUpdate,
db: DbSession,
) -> ContactRelationship:
"""Update a relationship between two contacts."""
relationship = db.scalar(
select(ContactRelationship).where(
ContactRelationship.contact_id == contact_id,
ContactRelationship.related_contact_id == related_contact_id,
)
)
if not relationship:
raise HTTPException(status_code=404, detail="Relationship not found")
if update.relationship_type is not None:
relationship.relationship_type = update.relationship_type.value
if update.closeness_weight is not None:
relationship.closeness_weight = update.closeness_weight
db.commit()
db.refresh(relationship)
return relationship
@router.delete("/contacts/{contact_id}/relationships/{related_contact_id}", response_model=None)
def remove_contact_relationship(
contact_id: int,
related_contact_id: int,
request: Request,
db: DbSession,
) -> dict[str, bool] | HTMLResponse:
"""Remove a relationship between two contacts."""
relationship = db.scalar(
select(ContactRelationship).where(
ContactRelationship.contact_id == contact_id,
ContactRelationship.related_contact_id == related_contact_id,
)
)
if not relationship:
raise HTTPException(status_code=404, detail="Relationship not found")
db.delete(relationship)
db.commit()
if _is_htmx(request):
return HTMLResponse("")
return {"deleted": True}
@router.get("/relationship-types")
def list_relationship_types() -> list[RelationshipTypeInfo]:
"""List all available relationship types with their default weights."""
return [
RelationshipTypeInfo(
value=rt.value,
display_name=rt.display_name,
default_weight=rt.default_weight,
)
for rt in RelationshipType
]
@router.get("/graph")
def get_relationship_graph(db: DbSession) -> GraphData:
"""Get all contacts and relationships as graph data for visualization."""
contacts = list(db.scalars(select(Contact)).all())
relationships = list(db.scalars(select(ContactRelationship)).all())
nodes = [GraphNode(id=c.id, name=c.name, current_job=c.current_job) for c in contacts]
edges = [
GraphEdge(
source=rel.contact_id,
target=rel.related_contact_id,
relationship_type=rel.relationship_type,
closeness_weight=rel.closeness_weight,
)
for rel in relationships
]
return GraphData(nodes=nodes, edges=edges)
-345
View File
@@ -1,345 +0,0 @@
"""HTMX server-rendered view router."""
from pathlib import Path
from typing import Annotated, Any
from fastapi import APIRouter, Form, HTTPException, Request
from fastapi.responses import HTMLResponse, RedirectResponse
from fastapi.templating import Jinja2Templates
from sqlalchemy import select
from sqlalchemy.orm import Session, selectinload
from python.fastapi_tools.db import DbSession
from python.orm.richie.contact import Contact, ContactRelationship, Need, RelationshipType
TEMPLATES_DIR = Path(__file__).parent.parent / "templates"
templates = Jinja2Templates(directory=TEMPLATES_DIR)
router = APIRouter(tags=["views"])
FAMILIAL_TYPES = {
"parent",
"child",
"sibling",
"grandparent",
"grandchild",
"aunt_uncle",
"niece_nephew",
"cousin",
"in_law",
}
FRIEND_TYPES = {"best_friend", "close_friend", "friend", "acquaintance", "neighbor"}
PARTNER_TYPES = {"spouse", "partner"}
PROFESSIONAL_TYPES = {"mentor", "mentee", "business_partner", "colleague", "manager", "direct_report", "client"}
CONTACT_STRING_FIELDS = (
"name",
"legal_name",
"suffix",
"gender",
"current_job",
"timezone",
"profile_pic",
"bio",
"goals",
"social_structure_style",
"safe_conversation_starters",
"topics_to_avoid",
"ssn",
)
CONTACT_INT_FIELDS = ("age", "self_sufficiency_score")
def _group_relationships(relationships: list[ContactRelationship]) -> dict[str, list[ContactRelationship]]:
"""Group relationships by category."""
groups: dict[str, list[ContactRelationship]] = {
"familial": [],
"partners": [],
"friends": [],
"professional": [],
"other": [],
}
for rel in relationships:
if rel.relationship_type in FAMILIAL_TYPES:
groups["familial"].append(rel)
elif rel.relationship_type in PARTNER_TYPES:
groups["partners"].append(rel)
elif rel.relationship_type in FRIEND_TYPES:
groups["friends"].append(rel)
elif rel.relationship_type in PROFESSIONAL_TYPES:
groups["professional"].append(rel)
else:
groups["other"].append(rel)
return groups
def _build_contact_name_map(database: Session, contact: Contact) -> dict[int, str]:
"""Build a mapping of contact IDs to names for relationship display."""
related_ids = {rel.related_contact_id for rel in contact.related_to}
related_ids |= {rel.contact_id for rel in contact.related_from}
related_ids.discard(contact.id)
if not related_ids:
return {}
related_contacts = list(database.scalars(select(Contact).where(Contact.id.in_(related_ids))).all())
return {related.id: related.name for related in related_contacts}
def _get_relationship_type_display() -> dict[str, str]:
"""Build a mapping of relationship type values to display names."""
return {rel_type.value: rel_type.display_name for rel_type in RelationshipType}
async def _parse_contact_form(request: Request) -> dict[str, Any]:
"""Parse contact form data from a multipart/form request."""
form_data = await request.form()
result: dict[str, Any] = {}
for field in CONTACT_STRING_FIELDS:
value = form_data.get(field, "")
result[field] = str(value) if value else None
for field in CONTACT_INT_FIELDS:
value = form_data.get(field, "")
result[field] = int(value) if value else None
result["need_ids"] = [int(value) for value in form_data.getlist("need_ids")]
return result
def _save_contact_from_form(database: Session, contact: Contact, form_result: dict[str, Any]) -> None:
"""Apply parsed form data to a Contact and save associated needs."""
need_ids = form_result.pop("need_ids")
for key, value in form_result.items():
setattr(contact, key, value)
if need_ids:
contact.needs = list(database.scalars(select(Need).where(Need.id.in_(need_ids))).all())
else:
contact.needs = []
@router.get("/", response_class=HTMLResponse)
@router.get("/contacts", response_class=HTMLResponse)
def contact_list_page(request: Request, database: DbSession) -> HTMLResponse:
"""Render the contacts list page."""
contacts = list(database.scalars(select(Contact)).all())
return templates.TemplateResponse(request, "contact_list.html", {"contacts": contacts})
@router.get("/contacts/new", response_class=HTMLResponse)
def new_contact_page(request: Request, database: DbSession) -> HTMLResponse:
"""Render the new contact form page."""
all_needs = list(database.scalars(select(Need)).all())
return templates.TemplateResponse(request, "contact_form.html", {"contact": None, "all_needs": all_needs})
@router.post("/htmx/contacts/new")
async def create_contact_form(request: Request, database: DbSession) -> RedirectResponse:
"""Handle the create contact form submission."""
form_result = await _parse_contact_form(request)
contact = Contact()
_save_contact_from_form(database, contact, form_result)
database.add(contact)
database.commit()
database.refresh(contact)
return RedirectResponse(url=f"/contacts/{contact.id}", status_code=303)
@router.get("/contacts/{contact_id}", response_class=HTMLResponse)
def contact_detail_page(contact_id: int, request: Request, database: DbSession) -> HTMLResponse:
"""Render the contact detail page."""
contact = database.scalar(
select(Contact)
.where(Contact.id == contact_id)
.options(
selectinload(Contact.needs),
selectinload(Contact.related_to),
selectinload(Contact.related_from),
)
)
if not contact:
raise HTTPException(status_code=404, detail="Contact not found")
contact_names = _build_contact_name_map(database, contact)
grouped_relationships = _group_relationships(contact.related_to)
all_contacts = list(database.scalars(select(Contact)).all())
all_needs = list(database.scalars(select(Need)).all())
available_needs = [need for need in all_needs if need not in contact.needs]
return templates.TemplateResponse(
request,
"contact_detail.html",
{
"contact": contact,
"contact_names": contact_names,
"grouped_relationships": grouped_relationships,
"all_contacts": all_contacts,
"available_needs": available_needs,
"relationship_types": list(RelationshipType),
},
)
@router.get("/contacts/{contact_id}/edit", response_class=HTMLResponse)
def edit_contact_page(contact_id: int, request: Request, database: DbSession) -> HTMLResponse:
"""Render the edit contact form page."""
contact = database.scalar(select(Contact).where(Contact.id == contact_id).options(selectinload(Contact.needs)))
if not contact:
raise HTTPException(status_code=404, detail="Contact not found")
all_needs = list(database.scalars(select(Need)).all())
return templates.TemplateResponse(request, "contact_form.html", {"contact": contact, "all_needs": all_needs})
@router.post("/htmx/contacts/{contact_id}/edit")
async def update_contact_form(contact_id: int, request: Request, database: DbSession) -> RedirectResponse:
"""Handle the edit contact form submission."""
contact = database.get(Contact, contact_id)
if not contact:
raise HTTPException(status_code=404, detail="Contact not found")
form_result = await _parse_contact_form(request)
_save_contact_from_form(database, contact, form_result)
database.commit()
return RedirectResponse(url=f"/contacts/{contact_id}", status_code=303)
@router.post("/htmx/contacts/{contact_id}/add-need", response_class=HTMLResponse)
def add_need_to_contact_htmx(
contact_id: int,
request: Request,
database: DbSession,
need_id: Annotated[int, Form()],
) -> HTMLResponse:
"""Add a need to a contact and return updated manage-needs partial."""
contact = database.scalar(select(Contact).where(Contact.id == contact_id).options(selectinload(Contact.needs)))
if not contact:
raise HTTPException(status_code=404, detail="Contact not found")
need = database.get(Need, need_id)
if not need:
raise HTTPException(status_code=404, detail="Need not found")
if need not in contact.needs:
contact.needs.append(need)
database.commit()
database.refresh(contact)
return templates.TemplateResponse(request, "partials/manage_needs.html", {"contact": contact})
@router.post("/htmx/contacts/{contact_id}/add-relationship", response_class=HTMLResponse)
def add_relationship_htmx(
contact_id: int,
request: Request,
database: DbSession,
related_contact_id: Annotated[int, Form()],
relationship_type: Annotated[str, Form()],
) -> HTMLResponse:
"""Add a relationship and return updated manage-relationships partial."""
contact = database.scalar(select(Contact).where(Contact.id == contact_id).options(selectinload(Contact.related_to)))
if not contact:
raise HTTPException(status_code=404, detail="Contact not found")
related_contact = database.get(Contact, related_contact_id)
if not related_contact:
raise HTTPException(status_code=404, detail="Related contact not found")
rel_type = RelationshipType(relationship_type)
weight = rel_type.default_weight
relationship = ContactRelationship(
contact_id=contact_id,
related_contact_id=related_contact_id,
relationship_type=relationship_type,
closeness_weight=weight,
)
database.add(relationship)
database.commit()
database.refresh(contact)
contact_names = _build_contact_name_map(database, contact)
return templates.TemplateResponse(
request,
"partials/manage_relationships.html",
{"contact": contact, "contact_names": contact_names},
)
@router.post("/htmx/contacts/{contact_id}/relationships/{related_contact_id}/weight")
def update_relationship_weight_htmx(
contact_id: int,
related_contact_id: int,
database: DbSession,
closeness_weight: Annotated[int, Form()],
) -> HTMLResponse:
"""Update a relationship's closeness weight from HTMX range input."""
relationship = database.scalar(
select(ContactRelationship).where(
ContactRelationship.contact_id == contact_id,
ContactRelationship.related_contact_id == related_contact_id,
)
)
if not relationship:
raise HTTPException(status_code=404, detail="Relationship not found")
relationship.closeness_weight = closeness_weight
database.commit()
return HTMLResponse("")
@router.post("/htmx/needs", response_class=HTMLResponse)
def create_need_htmx(
request: Request,
database: DbSession,
name: Annotated[str, Form()],
description: Annotated[str, Form()] = "",
) -> HTMLResponse:
"""Create a need via form data and return updated needs list."""
need = Need(name=name, description=description or None)
database.add(need)
database.commit()
needs = list(database.scalars(select(Need)).all())
return templates.TemplateResponse(request, "partials/need_items.html", {"needs": needs})
@router.get("/needs", response_class=HTMLResponse)
def needs_page(request: Request, database: DbSession) -> HTMLResponse:
"""Render the needs list page."""
needs = list(database.scalars(select(Need)).all())
return templates.TemplateResponse(request, "need_list.html", {"needs": needs})
@router.get("/graph", response_class=HTMLResponse)
def graph_page(request: Request, database: DbSession) -> HTMLResponse:
"""Render the relationship graph page."""
contacts = list(database.scalars(select(Contact)).all())
relationships = list(database.scalars(select(ContactRelationship)).all())
graph_data = {
"nodes": [{"id": contact.id, "name": contact.name, "current_job": contact.current_job} for contact in contacts],
"edges": [
{
"source": rel.contact_id,
"target": rel.related_contact_id,
"relationship_type": rel.relationship_type,
"closeness_weight": rel.closeness_weight,
}
for rel in relationships
],
}
return templates.TemplateResponse(
request,
"graph.html",
{
"graph_data": graph_data,
"relationship_type_display": _get_relationship_type_display(),
},
)
-198
View File
@@ -1,198 +0,0 @@
<!DOCTYPE html>
<html lang="en" data-theme="light">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}Contact Database{% endblock %}</title>
<script src="https://unpkg.com/htmx.org@2.0.4"></script>
<style>
:root {
--color-bg: #f5f5f5;
--color-bg-card: #ffffff;
--color-bg-hover: #f0f0f0;
--color-bg-muted: #f9f9f9;
--color-bg-error: #ffe0e0;
--color-text: #333333;
--color-text-muted: #666666;
--color-text-error: #cc0000;
--color-border: #dddddd;
--color-border-light: #eeeeee;
--color-border-lighter: #f0f0f0;
--color-primary: #0066cc;
--color-primary-hover: #0055aa;
--color-danger: #cc3333;
--color-danger-hover: #aa2222;
--color-tag-bg: #e0e0e0;
--shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
line-height: 1.5;
color: var(--color-text);
background-color: var(--color-bg);
}
[data-theme="dark"] {
--color-bg: #1a1a1a;
--color-bg-card: #2d2d2d;
--color-bg-hover: #3d3d3d;
--color-bg-muted: #252525;
--color-bg-error: #4a2020;
--color-text: #e0e0e0;
--color-text-muted: #a0a0a0;
--color-text-error: #ff6b6b;
--color-border: #404040;
--color-border-light: #353535;
--color-border-lighter: #303030;
--color-primary: #4da6ff;
--color-primary-hover: #7dbfff;
--color-danger: #ff6b6b;
--color-danger-hover: #ff8a8a;
--color-tag-bg: #404040;
--shadow: 0 1px 3px rgba(0, 0, 0, 0.3);
}
* { box-sizing: border-box; }
body { margin: 0; background: var(--color-bg); color: var(--color-text); }
.app { max-width: 1000px; margin: 0 auto; padding: 20px; }
nav { display: flex; align-items: center; gap: 20px; padding: 15px 0; border-bottom: 1px solid var(--color-border); margin-bottom: 20px; }
nav a { color: var(--color-primary); text-decoration: none; font-weight: 500; }
nav a:hover { text-decoration: underline; }
.theme-toggle { margin-left: auto; }
main { background: var(--color-bg-card); padding: 20px; border-radius: 8px; box-shadow: var(--shadow); }
.header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px; }
.header h1 { margin: 0; }
a { color: var(--color-primary); }
a:hover { text-decoration: underline; }
.btn { display: inline-block; padding: 8px 16px; border: 1px solid var(--color-border); border-radius: 4px; background: var(--color-bg-card); color: var(--color-text); text-decoration: none; cursor: pointer; font-size: 14px; margin-left: 8px; }
.btn:hover { background: var(--color-bg-hover); }
.btn-primary { background: var(--color-primary); border-color: var(--color-primary); color: white; }
.btn-primary:hover { background: var(--color-primary-hover); }
.btn-danger { background: var(--color-danger); border-color: var(--color-danger); color: white; }
.btn-danger:hover { background: var(--color-danger-hover); }
.btn-small { padding: 4px 8px; font-size: 12px; }
.btn:disabled { opacity: 0.6; cursor: not-allowed; }
table { width: 100%; border-collapse: collapse; }
th, td { padding: 12px; text-align: left; border-bottom: 1px solid var(--color-border-light); }
th { font-weight: 600; background: var(--color-bg-muted); }
tr:hover { background: var(--color-bg-muted); }
.error { background: var(--color-bg-error); color: var(--color-text-error); padding: 10px; border-radius: 4px; margin-bottom: 20px; }
.tag { display: inline-block; background: var(--color-tag-bg); padding: 2px 8px; border-radius: 12px; font-size: 12px; color: var(--color-text-muted); }
.add-form { display: flex; gap: 10px; margin-top: 15px; flex-wrap: wrap; }
.add-form select, .add-form input { padding: 8px; border: 1px solid var(--color-border); border-radius: 4px; min-width: 200px; background: var(--color-bg-card); color: var(--color-text); }
.form-group { margin-bottom: 20px; }
.form-group label { display: block; font-weight: 500; margin-bottom: 5px; }
.form-group input, .form-group textarea, .form-group select { width: 100%; padding: 10px; border: 1px solid var(--color-border); border-radius: 4px; font-size: 14px; background: var(--color-bg-card); color: var(--color-text); }
.form-group textarea { resize: vertical; }
.form-row { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; }
.checkbox-group { display: flex; flex-wrap: wrap; gap: 15px; }
.checkbox-label { display: flex; align-items: center; gap: 5px; cursor: pointer; }
.form-actions { display: flex; gap: 10px; margin-top: 30px; padding-top: 20px; border-top: 1px solid var(--color-border-light); }
.need-form { background: var(--color-bg-muted); padding: 20px; border-radius: 4px; margin-bottom: 20px; }
.need-items { list-style: none; padding: 0; }
.need-items li { display: flex; justify-content: space-between; align-items: flex-start; padding: 15px; border: 1px solid var(--color-border-light); border-radius: 4px; margin-bottom: 10px; }
.need-info p { margin: 5px 0 0; color: var(--color-text-muted); font-size: 14px; }
.graph-container { width: 100%; }
.graph-hint { color: var(--color-text-muted); font-size: 14px; margin-bottom: 15px; }
.selected-info { margin-top: 15px; padding: 15px; background: var(--color-bg-muted); border-radius: 8px; }
.selected-info h3 { margin: 0 0 10px; }
.selected-info p { margin: 5px 0; color: var(--color-text-muted); }
.legend { margin-top: 20px; padding: 15px; background: var(--color-bg-muted); border-radius: 8px; }
.legend h4 { margin: 0 0 10px; font-size: 14px; }
.legend-items { display: flex; flex-wrap: wrap; gap: 15px; }
.legend-item { display: flex; align-items: center; gap: 8px; font-size: 12px; color: var(--color-text-muted); }
.legend-line { width: 30px; border-radius: 2px; }
.id-card { width: 100%; }
.id-card-inner { background: linear-gradient(135deg, #0a0a0f 0%, #1a1a2e 50%, #0a0a0f 100%); background-image: radial-gradient(white 1px, transparent 1px), linear-gradient(135deg, #0a0a0f 0%, #1a1a2e 50%, #0a0a0f 100%); background-size: 50px 50px, 100% 100%; color: #fff; border-radius: 12px; padding: 25px; min-height: 500px; position: relative; overflow: hidden; }
.id-card-header { display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 15px; }
.id-card-header-left { flex: 1; }
.id-card-header-right { display: flex; flex-direction: column; align-items: flex-end; gap: 10px; }
.id-card-title { font-size: 2.5rem; font-weight: 700; margin: 0; color: #fff; text-shadow: 2px 2px 4px rgba(0,0,0,0.5); }
.id-profile-pic { width: 80px; height: 80px; border-radius: 8px; object-fit: cover; border: 2px solid rgba(255,255,255,0.3); }
.id-profile-placeholder { width: 80px; height: 80px; border-radius: 8px; background: linear-gradient(135deg, #4ecdc4 0%, #44a8a0 100%); display: flex; align-items: center; justify-content: center; border: 2px solid rgba(255,255,255,0.3); }
.id-profile-placeholder span { font-size: 2rem; font-weight: 700; color: #fff; text-shadow: 1px 1px 2px rgba(0,0,0,0.3); }
.id-card-actions { display: flex; gap: 8px; }
.id-card-actions .btn { background: rgba(255,255,255,0.1); border-color: rgba(255,255,255,0.3); color: #fff; }
.id-card-actions .btn:hover { background: rgba(255,255,255,0.2); }
.id-card-body { display: grid; grid-template-columns: 1fr 1.5fr; gap: 30px; }
.id-card-left { display: flex; flex-direction: column; gap: 8px; }
.id-field { font-size: 1rem; line-height: 1.4; }
.id-field-block { margin-top: 15px; font-size: 0.95rem; line-height: 1.5; }
.id-label { color: #4ecdc4; font-weight: 500; }
.id-card-right { display: flex; flex-direction: column; gap: 20px; }
.id-bio { font-size: 0.9rem; line-height: 1.6; color: #e0e0e0; }
.id-relationships { margin-top: 10px; }
.id-section-title { font-size: 1.5rem; margin: 0 0 15px; color: #fff; border-bottom: 1px solid rgba(255,255,255,0.2); padding-bottom: 8px; }
.id-rel-group { margin-bottom: 12px; font-size: 0.9rem; line-height: 1.6; }
.id-rel-label { color: #a0a0a0; }
.id-rel-group a { color: #4ecdc4; text-decoration: none; }
.id-rel-group a:hover { text-decoration: underline; }
.id-rel-type { color: #888; font-size: 0.85em; }
.id-card-warnings { margin-top: 30px; padding-top: 20px; border-top: 1px solid rgba(255,255,255,0.2); display: flex; flex-wrap: wrap; gap: 20px; }
.id-warning { display: flex; align-items: center; gap: 8px; font-size: 0.9rem; color: #ff6b6b; }
.warning-dot { width: 8px; height: 8px; background: #ff6b6b; border-radius: 50%; flex-shrink: 0; }
.warning-desc { color: #ccc; }
.id-card-manage { margin-top: 20px; background: var(--color-bg-muted); border-radius: 8px; padding: 15px; }
.id-card-manage summary { cursor: pointer; font-weight: 600; font-size: 1.1rem; padding: 5px 0; }
.id-card-manage[open] summary { margin-bottom: 15px; border-bottom: 1px solid var(--color-border-light); padding-bottom: 10px; }
.manage-section { margin-bottom: 25px; }
.manage-section h3 { margin: 0 0 15px; font-size: 1rem; }
.manage-relationships { display: flex; flex-direction: column; gap: 10px; margin-bottom: 15px; }
.manage-rel-item { display: flex; align-items: center; gap: 12px; padding: 10px; background: var(--color-bg-card); border-radius: 6px; flex-wrap: wrap; }
.manage-rel-item a { font-weight: 500; min-width: 120px; }
.weight-control { display: flex; align-items: center; gap: 8px; font-size: 12px; color: var(--color-text-muted); }
.weight-control input[type="range"] { width: 80px; cursor: pointer; }
.weight-value { min-width: 20px; text-align: center; font-weight: 600; }
.manage-needs-list { list-style: none; padding: 0; margin: 0 0 15px; }
.manage-needs-list li { display: flex; align-items: center; gap: 12px; padding: 10px; background: var(--color-bg-card); border-radius: 6px; margin-bottom: 8px; }
.manage-needs-list li .btn { margin-left: auto; }
.htmx-indicator { display: none; }
.htmx-request .htmx-indicator { display: inline; }
.htmx-request.htmx-indicator { display: inline; }
@media (max-width: 768px) {
.id-card-body { grid-template-columns: 1fr; }
.id-card-title { font-size: 1.8rem; }
.id-card-header { flex-direction: column; gap: 15px; }
}
</style>
</head>
<body>
<div class="app">
<nav>
<a href="/contacts">Contacts</a>
<a href="/graph">Graph</a>
<a href="/needs">Needs</a>
<button class="btn btn-small theme-toggle" onclick="toggleTheme()">
<span id="theme-label">Dark</span>
</button>
</nav>
<main id="main-content">
{% block content %}{% endblock %}
</main>
</div>
<script>
function toggleTheme() {
const html = document.documentElement;
const current = html.getAttribute('data-theme');
const next = current === 'light' ? 'dark' : 'light';
html.setAttribute('data-theme', next);
localStorage.setItem('theme', next);
document.getElementById('theme-label').textContent = next === 'light' ? 'Dark' : 'Light';
}
(function() {
const saved = localStorage.getItem('theme') || 'light';
document.documentElement.setAttribute('data-theme', saved);
document.getElementById('theme-label').textContent = saved === 'light' ? 'Dark' : 'Light';
})();
</script>
</body>
</html>
-204
View File
@@ -1,204 +0,0 @@
{% extends "base.html" %}
{% block title %}{{ contact.name }}{% endblock %}
{% block content %}
<div class="id-card">
<div class="id-card-inner">
<div class="id-card-header">
<div class="id-card-header-left">
<h1 class="id-card-title">I.D.: {{ contact.name }}</h1>
</div>
<div class="id-card-header-right">
{% if contact.profile_pic %}
<img src="{{ contact.profile_pic }}" alt="{{ contact.name }}'s profile" class="id-profile-pic">
{% else %}
<div class="id-profile-placeholder">
<span>{{ contact.name[0]|upper }}</span>
</div>
{% endif %}
<div class="id-card-actions">
<a href="/contacts/{{ contact.id }}/edit" class="btn btn-small">Edit</a>
<a href="/contacts" class="btn btn-small">Back</a>
</div>
</div>
</div>
<div class="id-card-body">
<div class="id-card-left">
{% if contact.legal_name %}
<div class="id-field">Legal name: {{ contact.legal_name }}</div>
{% endif %}
{% if contact.suffix %}
<div class="id-field">Suffix: {{ contact.suffix }}</div>
{% endif %}
{% if contact.gender %}
<div class="id-field">Gender: {{ contact.gender }}</div>
{% endif %}
{% if contact.age %}
<div class="id-field">Age: {{ contact.age }}</div>
{% endif %}
{% if contact.current_job %}
<div class="id-field">Job: {{ contact.current_job }}</div>
{% endif %}
{% if contact.social_structure_style %}
<div class="id-field">Social style: {{ contact.social_structure_style }}</div>
{% endif %}
{% if contact.self_sufficiency_score is not none %}
<div class="id-field">Self-Sufficiency: {{ contact.self_sufficiency_score }}</div>
{% endif %}
{% if contact.timezone %}
<div class="id-field">Timezone: {{ contact.timezone }}</div>
{% endif %}
{% if contact.safe_conversation_starters %}
<div class="id-field-block">
<span class="id-label">Safe con starters:</span> {{ contact.safe_conversation_starters }}
</div>
{% endif %}
{% if contact.topics_to_avoid %}
<div class="id-field-block">
<span class="id-label">Topics to avoid:</span> {{ contact.topics_to_avoid }}
</div>
{% endif %}
{% if contact.goals %}
<div class="id-field-block">
<span class="id-label">Goals:</span> {{ contact.goals }}
</div>
{% endif %}
</div>
<div class="id-card-right">
{% if contact.bio %}
<div class="id-bio">
<span class="id-label">Bio:</span> {{ contact.bio }}
</div>
{% endif %}
<div class="id-relationships">
<h2 class="id-section-title">Relationships</h2>
{% if grouped_relationships.familial %}
<div class="id-rel-group">
<span class="id-rel-label">Familial:</span>
{% for rel in grouped_relationships.familial %}
<a href="/contacts/{{ rel.related_contact_id }}">{{ contact_names[rel.related_contact_id] }}</a><span class="id-rel-type">({{ rel.relationship_type|replace("_", " ")|title }})</span>{% if not loop.last %}, {% endif %}
{% endfor %}
</div>
{% endif %}
{% if grouped_relationships.partners %}
<div class="id-rel-group">
<span class="id-rel-label">Partners:</span>
{% for rel in grouped_relationships.partners %}
<a href="/contacts/{{ rel.related_contact_id }}">{{ contact_names[rel.related_contact_id] }}</a>{% if not loop.last %}, {% endif %}
{% endfor %}
</div>
{% endif %}
{% if grouped_relationships.friends %}
<div class="id-rel-group">
<span class="id-rel-label">Friends:</span>
{% for rel in grouped_relationships.friends %}
<a href="/contacts/{{ rel.related_contact_id }}">{{ contact_names[rel.related_contact_id] }}</a>{% if not loop.last %}, {% endif %}
{% endfor %}
</div>
{% endif %}
{% if grouped_relationships.professional %}
<div class="id-rel-group">
<span class="id-rel-label">Professional:</span>
{% for rel in grouped_relationships.professional %}
<a href="/contacts/{{ rel.related_contact_id }}">{{ contact_names[rel.related_contact_id] }}</a><span class="id-rel-type">({{ rel.relationship_type|replace("_", " ")|title }})</span>{% if not loop.last %}, {% endif %}
{% endfor %}
</div>
{% endif %}
{% if grouped_relationships.other %}
<div class="id-rel-group">
<span class="id-rel-label">Other:</span>
{% for rel in grouped_relationships.other %}
<a href="/contacts/{{ rel.related_contact_id }}">{{ contact_names[rel.related_contact_id] }}</a><span class="id-rel-type">({{ rel.relationship_type|replace("_", " ")|title }})</span>{% if not loop.last %}, {% endif %}
{% endfor %}
</div>
{% endif %}
{% if contact.related_from %}
<div class="id-rel-group">
<span class="id-rel-label">Known by:</span>
{% for rel in contact.related_from %}
<a href="/contacts/{{ rel.contact_id }}">{{ contact_names[rel.contact_id] }}</a>{% if not loop.last %}, {% endif %}
{% endfor %}
</div>
{% endif %}
</div>
</div>
</div>
{% if contact.needs %}
<div class="id-card-warnings">
{% for need in contact.needs %}
<div class="id-warning">
<span class="warning-dot"></span>
Warning: {{ need.name }}
{% if need.description %}<span class="warning-desc"> - {{ need.description }}</span>{% endif %}
</div>
{% endfor %}
</div>
{% endif %}
</div>
<details class="id-card-manage">
<summary>Manage Contact</summary>
<div class="manage-section">
<h3>Manage Relationships</h3>
<div id="manage-relationships" class="manage-relationships">
{% include "partials/manage_relationships.html" %}
</div>
{% if all_contacts %}
<form hx-post="/htmx/contacts/{{ contact.id }}/add-relationship"
hx-target="#manage-relationships"
hx-swap="innerHTML"
class="add-form">
<select name="related_contact_id" required>
<option value="">Select contact...</option>
{% for other in all_contacts %}
{% if other.id != contact.id %}
<option value="{{ other.id }}">{{ other.name }}</option>
{% endif %}
{% endfor %}
</select>
<select name="relationship_type" required>
<option value="">Select relationship type...</option>
{% for rel_type in relationship_types %}
<option value="{{ rel_type.value }}">{{ rel_type.display_name }}</option>
{% endfor %}
</select>
<button type="submit" class="btn btn-primary">Add Relationship</button>
</form>
{% endif %}
</div>
<div class="manage-section">
<h3>Manage Needs/Warnings</h3>
<div id="manage-needs">
{% include "partials/manage_needs.html" %}
</div>
{% if available_needs %}
<form hx-post="/htmx/contacts/{{ contact.id }}/add-need"
hx-target="#manage-needs"
hx-swap="innerHTML"
class="add-form">
<select name="need_id" required>
<option value="">Select a need...</option>
{% for need in available_needs %}
<option value="{{ need.id }}">{{ need.name }}</option>
{% endfor %}
</select>
<button type="submit" class="btn btn-primary">Add Need</button>
</form>
{% endif %}
</div>
</details>
</div>
{% endblock %}
-115
View File
@@ -1,115 +0,0 @@
{% extends "base.html" %}
{% block title %}{{ "Edit " + contact.name if contact else "New Contact" }}{% endblock %}
{% block content %}
<div class="contact-form">
<h1>{{ "Edit Contact" if contact else "New Contact" }}</h1>
{% if contact %}
<form method="post" action="/htmx/contacts/{{ contact.id }}/edit">
{% else %}
<form method="post" action="/htmx/contacts/new">
{% endif %}
<div class="form-group">
<label for="name">Name *</label>
<input id="name" name="name" type="text" value="{{ contact.name if contact else '' }}" required>
</div>
<div class="form-row">
<div class="form-group">
<label for="legal_name">Legal Name</label>
<input id="legal_name" name="legal_name" type="text" value="{{ contact.legal_name or '' }}">
</div>
<div class="form-group">
<label for="suffix">Suffix</label>
<input id="suffix" name="suffix" type="text" value="{{ contact.suffix or '' }}">
</div>
</div>
<div class="form-row">
<div class="form-group">
<label for="age">Age</label>
<input id="age" name="age" type="number" value="{{ contact.age if contact and contact.age is not none else '' }}">
</div>
<div class="form-group">
<label for="gender">Gender</label>
<input id="gender" name="gender" type="text" value="{{ contact.gender or '' }}">
</div>
</div>
<div class="form-group">
<label for="current_job">Current Job</label>
<input id="current_job" name="current_job" type="text" value="{{ contact.current_job or '' }}">
</div>
<div class="form-group">
<label for="timezone">Timezone</label>
<input id="timezone" name="timezone" type="text" value="{{ contact.timezone or '' }}">
</div>
<div class="form-group">
<label for="profile_pic">Profile Picture URL</label>
<input id="profile_pic" name="profile_pic" type="url" placeholder="https://example.com/photo.jpg" value="{{ contact.profile_pic or '' }}">
</div>
<div class="form-group">
<label for="bio">Bio</label>
<textarea id="bio" name="bio" rows="3">{{ contact.bio or '' }}</textarea>
</div>
<div class="form-group">
<label for="goals">Goals</label>
<textarea id="goals" name="goals" rows="3">{{ contact.goals or '' }}</textarea>
</div>
<div class="form-group">
<label for="social_structure_style">Social Structure Style</label>
<input id="social_structure_style" name="social_structure_style" type="text" value="{{ contact.social_structure_style or '' }}">
</div>
<div class="form-group">
<label for="self_sufficiency_score">Self-Sufficiency Score (1-10)</label>
<input id="self_sufficiency_score" name="self_sufficiency_score" type="number" min="1" max="10" value="{{ contact.self_sufficiency_score if contact and contact.self_sufficiency_score is not none else '' }}">
</div>
<div class="form-group">
<label for="safe_conversation_starters">Safe Conversation Starters</label>
<textarea id="safe_conversation_starters" name="safe_conversation_starters" rows="2">{{ contact.safe_conversation_starters or '' }}</textarea>
</div>
<div class="form-group">
<label for="topics_to_avoid">Topics to Avoid</label>
<textarea id="topics_to_avoid" name="topics_to_avoid" rows="2">{{ contact.topics_to_avoid or '' }}</textarea>
</div>
<div class="form-group">
<label for="ssn">SSN</label>
<input id="ssn" name="ssn" type="text" value="{{ contact.ssn or '' }}">
</div>
{% if all_needs %}
<div class="form-group">
<label>Needs/Accommodations</label>
<div class="checkbox-group">
{% for need in all_needs %}
<label class="checkbox-label">
<input type="checkbox" name="need_ids" value="{{ need.id }}"
{% if contact and need in contact.needs %}checked{% endif %}>
{{ need.name }}
</label>
{% endfor %}
</div>
</div>
{% endif %}
<div class="form-actions">
<button type="submit" class="btn btn-primary">Save</button>
{% if contact %}
<a href="/contacts/{{ contact.id }}" class="btn">Cancel</a>
{% else %}
<a href="/contacts" class="btn">Cancel</a>
{% endif %}
</div>
</form>
</div>
{% endblock %}
-14
View File
@@ -1,14 +0,0 @@
{% extends "base.html" %}
{% block title %}Contacts{% endblock %}
{% block content %}
<div class="contact-list">
<div class="header">
<h1>Contacts</h1>
<a href="/contacts/new" class="btn btn-primary">Add Contact</a>
</div>
<div id="contact-table">
{% include "partials/contact_table.html" %}
</div>
</div>
{% endblock %}
-198
View File
@@ -1,198 +0,0 @@
{% extends "base.html" %}
{% block title %}Relationship Graph{% endblock %}
{% block content %}
<div class="graph-container">
<div class="header">
<h1>Relationship Graph</h1>
</div>
<p class="graph-hint">Drag nodes to reposition. Closer relationships have shorter, darker edges.</p>
<canvas id="graph-canvas" width="900" height="600"
style="border: 1px solid var(--color-border); border-radius: 8px; background: var(--color-bg); cursor: grab;">
</canvas>
<div id="selected-info"></div>
<div class="legend">
<h4>Relationship Closeness (1-10)</h4>
<div class="legend-items">
<div class="legend-item">
<span class="legend-line" style="background: hsl(220, 70%, 40%); height: 4px; display: inline-block;"></span>
<span>10 - Very Close (Spouse, Partner)</span>
</div>
<div class="legend-item">
<span class="legend-line" style="background: hsl(220, 70%, 52%); height: 3px; display: inline-block;"></span>
<span>7 - Close (Family, Best Friend)</span>
</div>
<div class="legend-item">
<span class="legend-line" style="background: hsl(220, 70%, 64%); height: 2px; display: inline-block;"></span>
<span>4 - Moderate (Friend, Colleague)</span>
</div>
<div class="legend-item">
<span class="legend-line" style="background: hsl(220, 70%, 72%); height: 1px; display: inline-block;"></span>
<span>2 - Distant (Acquaintance)</span>
</div>
</div>
</div>
</div>
<script>
(function() {
const RELATIONSHIP_DISPLAY = {{ relationship_type_display|tojson }};
const graphData = {{ graph_data|tojson }};
const canvas = document.getElementById('graph-canvas');
const ctx = canvas.getContext('2d');
const width = canvas.width;
const height = canvas.height;
const centerX = width / 2;
const centerY = height / 2;
const nodes = graphData.nodes.map(function(node) {
return Object.assign({}, node, {
x: centerX + (Math.random() - 0.5) * 300,
y: centerY + (Math.random() - 0.5) * 300,
vx: 0,
vy: 0
});
});
const nodeMap = new Map(nodes.map(function(node) { return [node.id, node]; }));
const edges = graphData.edges.map(function(edge) {
const sourceNode = nodeMap.get(edge.source);
const targetNode = nodeMap.get(edge.target);
if (!sourceNode || !targetNode) return null;
return Object.assign({}, edge, { sourceNode: sourceNode, targetNode: targetNode });
}).filter(function(edge) { return edge !== null; });
let dragNode = null;
let selectedNode = null;
const repulsion = 5000;
const springStrength = 0.05;
const baseSpringLength = 150;
const damping = 0.9;
const centerPull = 0.01;
function simulate() {
for (const node of nodes) { node.vx = 0; node.vy = 0; }
for (let i = 0; i < nodes.length; i++) {
for (let j = i + 1; j < nodes.length; j++) {
const dx = nodes[j].x - nodes[i].x;
const dy = nodes[j].y - nodes[i].y;
const dist = Math.sqrt(dx * dx + dy * dy) || 1;
const force = repulsion / (dist * dist);
const fx = (dx / dist) * force;
const fy = (dy / dist) * force;
nodes[i].vx -= fx; nodes[i].vy -= fy;
nodes[j].vx += fx; nodes[j].vy += fy;
}
}
for (const edge of edges) {
const dx = edge.targetNode.x - edge.sourceNode.x;
const dy = edge.targetNode.y - edge.sourceNode.y;
const dist = Math.sqrt(dx * dx + dy * dy) || 1;
const normalizedWeight = edge.closeness_weight / 10;
const idealLength = baseSpringLength * (1.5 - normalizedWeight);
const displacement = dist - idealLength;
const force = springStrength * displacement;
const fx = (dx / dist) * force;
const fy = (dy / dist) * force;
edge.sourceNode.vx += fx; edge.sourceNode.vy += fy;
edge.targetNode.vx -= fx; edge.targetNode.vy -= fy;
}
for (const node of nodes) {
node.vx += (centerX - node.x) * centerPull;
node.vy += (centerY - node.y) * centerPull;
}
for (const node of nodes) {
if (node === dragNode) continue;
node.x += node.vx * damping;
node.y += node.vy * damping;
node.x = Math.max(30, Math.min(width - 30, node.x));
node.y = Math.max(30, Math.min(height - 30, node.y));
}
}
function getEdgeColor(weight) {
const normalized = weight / 10;
return 'hsl(220, 70%, ' + (80 - normalized * 40) + '%)';
}
function draw() {
ctx.clearRect(0, 0, width, height);
for (const edge of edges) {
const lineWidth = 1 + (edge.closeness_weight / 10) * 3;
ctx.strokeStyle = getEdgeColor(edge.closeness_weight);
ctx.lineWidth = lineWidth;
ctx.beginPath();
ctx.moveTo(edge.sourceNode.x, edge.sourceNode.y);
ctx.lineTo(edge.targetNode.x, edge.targetNode.y);
ctx.stroke();
const midX = (edge.sourceNode.x + edge.targetNode.x) / 2;
const midY = (edge.sourceNode.y + edge.targetNode.y) / 2;
ctx.fillStyle = '#666';
ctx.font = '10px sans-serif';
ctx.textAlign = 'center';
const label = RELATIONSHIP_DISPLAY[edge.relationship_type] || edge.relationship_type;
ctx.fillText(label, midX, midY - 5);
}
for (const node of nodes) {
const isSelected = node === selectedNode;
const radius = isSelected ? 25 : 20;
ctx.beginPath();
ctx.arc(node.x, node.y, radius, 0, Math.PI * 2);
ctx.fillStyle = isSelected ? '#0066cc' : '#fff';
ctx.fill();
ctx.strokeStyle = '#0066cc';
ctx.lineWidth = 2;
ctx.stroke();
ctx.fillStyle = isSelected ? '#fff' : '#333';
ctx.font = '12px sans-serif';
ctx.textAlign = 'center';
ctx.textBaseline = 'middle';
const name = node.name.length > 10 ? node.name.slice(0, 9) + '\u2026' : node.name;
ctx.fillText(name, node.x, node.y);
}
}
function animate() {
simulate();
draw();
requestAnimationFrame(animate);
}
animate();
function getNodeAt(x, y) {
for (const node of nodes) {
const dx = x - node.x;
const dy = y - node.y;
if (dx * dx + dy * dy < 400) return node;
}
return null;
}
canvas.addEventListener('mousedown', function(event) {
const rect = canvas.getBoundingClientRect();
const node = getNodeAt(event.clientX - rect.left, event.clientY - rect.top);
if (node) {
dragNode = node;
selectedNode = node;
const infoDiv = document.getElementById('selected-info');
let html = '<div class="selected-info"><h3>' + node.name + '</h3>';
if (node.current_job) html += '<p>Job: ' + node.current_job + '</p>';
html += '<a href="/contacts/' + node.id + '">View details</a></div>';
infoDiv.innerHTML = html;
}
});
canvas.addEventListener('mousemove', function(event) {
if (!dragNode) return;
const rect = canvas.getBoundingClientRect();
dragNode.x = event.clientX - rect.left;
dragNode.y = event.clientY - rect.top;
});
canvas.addEventListener('mouseup', function() { dragNode = null; });
canvas.addEventListener('mouseleave', function() { dragNode = null; });
})();
</script>
{% endblock %}
-31
View File
@@ -1,31 +0,0 @@
{% extends "base.html" %}
{% block title %}Needs{% endblock %}
{% block content %}
<div class="need-list">
<div class="header">
<h1>Needs / Accommodations</h1>
<button class="btn btn-primary" onclick="document.getElementById('need-form').toggleAttribute('hidden')">Add Need</button>
</div>
<form id="need-form" hidden
hx-post="/htmx/needs"
hx-target="#need-items"
hx-swap="innerHTML"
hx-on::after-request="if(event.detail.successful) this.reset()"
class="need-form">
<div class="form-group">
<label for="name">Name *</label>
<input id="name" name="name" type="text" placeholder="e.g., Light Sensitive, ADHD" required>
</div>
<div class="form-group">
<label for="description">Description</label>
<textarea id="description" name="description" placeholder="Optional description..." rows="2"></textarea>
</div>
<button type="submit" class="btn btn-primary">Create</button>
</form>
<div id="need-items">
{% include "partials/need_items.html" %}
</div>
</div>
{% endblock %}
@@ -1,33 +0,0 @@
{% if contacts %}
<table>
<thead>
<tr>
<th>Name</th>
<th>Job</th>
<th>Timezone</th>
<th>Actions</th>
</tr>
</thead>
<tbody>
{% for contact in contacts %}
<tr id="contact-row-{{ contact.id }}">
<td><a href="/contacts/{{ contact.id }}">{{ contact.name }}</a></td>
<td>{{ contact.current_job or "-" }}</td>
<td>{{ contact.timezone or "-" }}</td>
<td>
<a href="/contacts/{{ contact.id }}/edit" class="btn">Edit</a>
<button class="btn btn-danger"
hx-delete="/api/contacts/{{ contact.id }}"
hx-target="#contact-row-{{ contact.id }}"
hx-swap="outerHTML"
hx-confirm="Delete this contact?">
Delete
</button>
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<p>No contacts yet.</p>
{% endif %}
@@ -1,14 +0,0 @@
<ul class="manage-needs-list">
{% for need in contact.needs %}
<li id="contact-need-{{ need.id }}">
<strong>{{ need.name }}</strong>
{% if need.description %}<span> - {{ need.description }}</span>{% endif %}
<button class="btn btn-small btn-danger"
hx-delete="/api/contacts/{{ contact.id }}/needs/{{ need.id }}"
hx-target="#contact-need-{{ need.id }}"
hx-swap="outerHTML">
Remove
</button>
</li>
{% endfor %}
</ul>
@@ -1,23 +0,0 @@
{% for rel in contact.related_to %}
<div class="manage-rel-item" id="rel-{{ contact.id }}-{{ rel.related_contact_id }}">
<a href="/contacts/{{ rel.related_contact_id }}">{{ contact_names[rel.related_contact_id] }}</a>
<span class="tag">{{ rel.relationship_type|replace("_", " ")|title }}</span>
<label class="weight-control">
<span>Closeness:</span>
<input type="range" min="1" max="10" value="{{ rel.closeness_weight }}"
hx-post="/htmx/contacts/{{ contact.id }}/relationships/{{ rel.related_contact_id }}/weight"
hx-trigger="change"
hx-include="this"
name="closeness_weight"
hx-swap="none"
oninput="this.nextElementSibling.textContent = this.value">
<span class="weight-value">{{ rel.closeness_weight }}</span>
</label>
<button class="btn btn-small btn-danger"
hx-delete="/api/contacts/{{ contact.id }}/relationships/{{ rel.related_contact_id }}"
hx-target="#rel-{{ contact.id }}-{{ rel.related_contact_id }}"
hx-swap="outerHTML">
Remove
</button>
</div>
{% endfor %}
@@ -1,21 +0,0 @@
{% if needs %}
<ul class="need-items">
{% for need in needs %}
<li id="need-item-{{ need.id }}">
<div class="need-info">
<strong>{{ need.name }}</strong>
{% if need.description %}<p>{{ need.description }}</p>{% endif %}
</div>
<button class="btn btn-danger"
hx-delete="/api/needs/{{ need.id }}"
hx-target="#need-item-{{ need.id }}"
hx-swap="outerHTML"
hx-confirm="Delete this need?">
Delete
</button>
</li>
{% endfor %}
</ul>
{% else %}
<p>No needs defined yet.</p>
{% endif %}
-3
View File
@@ -1,3 +0,0 @@
"""Data science CLI tools."""
from __future__ import annotations
-613
View File
@@ -1,613 +0,0 @@
"""Ingestion pipeline for loading congress data from unitedstates/congress JSON files.
Loads legislators, bills, votes, vote records, and bill text into the data_science_dev database.
Expects the parent directory to contain congress-tracker/ and congress-legislators/ as siblings.
Usage:
ingest-congress /path/to/parent/
ingest-congress /path/to/parent/ --congress 118
ingest-congress /path/to/parent/ --congress 118 --only bills
"""
from __future__ import annotations
import logging
from pathlib import Path # noqa: TC003 needed at runtime for typer CLI argument
from typing import TYPE_CHECKING, Annotated
import orjson
import typer
import yaml
from sqlalchemy import select
from sqlalchemy.orm import Session
from python.common import configure_logger
from python.orm.common import get_postgres_engine
from python.orm.data_science_dev.congress import Bill, BillText, Legislator, LegislatorSocialMedia, Vote, VoteRecord
if TYPE_CHECKING:
from collections.abc import Iterator
from sqlalchemy.engine import Engine
logger = logging.getLogger(__name__)
BATCH_SIZE = 10_000
app = typer.Typer(help="Ingest unitedstates/congress data into data_science_dev.")
@app.command()
def main(
parent_dir: Annotated[
Path,
typer.Argument(help="Parent directory containing congress-tracker/ and congress-legislators/"),
],
congress: Annotated[int | None, typer.Option(help="Only ingest a specific congress number")] = None,
only: Annotated[
str | None,
typer.Option(help="Only run a specific step: legislators, social-media, bills, votes, bill-text"),
] = None,
) -> None:
"""Ingest congress data from unitedstates/congress JSON files."""
configure_logger(level="INFO")
data_dir = parent_dir / "congress-tracker/congress/data/"
legislators_dir = parent_dir / "congress-legislators"
if not data_dir.is_dir():
typer.echo(f"Expected congress-tracker/ directory: {data_dir}", err=True)
raise typer.Exit(code=1)
if not legislators_dir.is_dir():
typer.echo(f"Expected congress-legislators/ directory: {legislators_dir}", err=True)
raise typer.Exit(code=1)
engine = get_postgres_engine(name="DATA_SCIENCE_DEV")
congress_dirs = _resolve_congress_dirs(data_dir, congress)
if not congress_dirs:
typer.echo("No congress directories found.", err=True)
raise typer.Exit(code=1)
logger.info("Found %d congress directories to process", len(congress_dirs))
steps: dict[str, tuple] = {
"legislators": (ingest_legislators, (engine, legislators_dir)),
"legislators-social-media": (ingest_social_media, (engine, legislators_dir)),
"bills": (ingest_bills, (engine, congress_dirs)),
"votes": (ingest_votes, (engine, congress_dirs)),
"bill-text": (ingest_bill_text, (engine, congress_dirs)),
}
if only:
if only not in steps:
typer.echo(f"Unknown step: {only}. Choose from: {', '.join(steps)}", err=True)
raise typer.Exit(code=1)
steps = {only: steps[only]}
for step_name, (step_func, step_args) in steps.items():
logger.info("=== Starting step: %s ===", step_name)
step_func(*step_args)
logger.info("=== Finished step: %s ===", step_name)
logger.info("ingest-congress done")
def _resolve_congress_dirs(data_dir: Path, congress: int | None) -> list[Path]:
"""Find congress number directories under data_dir."""
if congress is not None:
target = data_dir / str(congress)
return [target] if target.is_dir() else []
return sorted(path for path in data_dir.iterdir() if path.is_dir() and path.name.isdigit())
def _flush_batch(session: Session, batch: list[object], label: str) -> int:
"""Add a batch of ORM objects to the session and commit. Returns count added."""
if not batch:
return 0
session.add_all(batch)
session.commit()
count = len(batch)
logger.info("Committed %d %s", count, label)
batch.clear()
return count
# ---------------------------------------------------------------------------
# Legislators — loaded from congress-legislators YAML files
# ---------------------------------------------------------------------------
def ingest_legislators(engine: Engine, legislators_dir: Path) -> None:
"""Load legislators from congress-legislators YAML files."""
legislators_data = _load_legislators_yaml(legislators_dir)
logger.info("Loaded %d legislators from YAML files", len(legislators_data))
with Session(engine) as session:
existing_legislators = {
legislator.bioguide_id: legislator for legislator in session.scalars(select(Legislator)).all()
}
logger.info("Found %d existing legislators in DB", len(existing_legislators))
total_inserted = 0
total_updated = 0
for entry in legislators_data:
bioguide_id = entry.get("id", {}).get("bioguide")
if not bioguide_id:
continue
fields = _parse_legislator(entry)
if existing := existing_legislators.get(bioguide_id):
changed = False
for field, value in fields.items():
if value is not None and getattr(existing, field) != value:
setattr(existing, field, value)
changed = True
if changed:
total_updated += 1
else:
session.add(Legislator(bioguide_id=bioguide_id, **fields))
total_inserted += 1
session.commit()
logger.info("Inserted %d new legislators, updated %d existing", total_inserted, total_updated)
def _load_legislators_yaml(legislators_dir: Path) -> list[dict]:
"""Load and combine legislators-current.yaml and legislators-historical.yaml."""
legislators: list[dict] = []
for filename in ("legislators-current.yaml", "legislators-historical.yaml"):
path = legislators_dir / filename
if not path.exists():
logger.warning("Legislators file not found: %s", path)
continue
with path.open() as file:
data = yaml.safe_load(file)
if isinstance(data, list):
legislators.extend(data)
return legislators
def _parse_legislator(entry: dict) -> dict:
"""Extract Legislator fields from a congress-legislators YAML entry."""
ids = entry.get("id", {})
name = entry.get("name", {})
bio = entry.get("bio", {})
terms = entry.get("terms", [])
latest_term = terms[-1] if terms else {}
fec_ids = ids.get("fec")
fec_ids_joined = ",".join(fec_ids) if isinstance(fec_ids, list) else fec_ids
chamber = latest_term.get("type")
chamber_normalized = {"rep": "House", "sen": "Senate"}.get(chamber, chamber)
return {
"thomas_id": ids.get("thomas"),
"lis_id": ids.get("lis"),
"govtrack_id": ids.get("govtrack"),
"opensecrets_id": ids.get("opensecrets"),
"fec_ids": fec_ids_joined,
"first_name": name.get("first"),
"last_name": name.get("last"),
"official_full_name": name.get("official_full"),
"nickname": name.get("nickname"),
"birthday": bio.get("birthday"),
"gender": bio.get("gender"),
"current_party": latest_term.get("party"),
"current_state": latest_term.get("state"),
"current_district": latest_term.get("district"),
"current_chamber": chamber_normalized,
}
# ---------------------------------------------------------------------------
# Social Media — loaded from legislators-social-media.yaml
# ---------------------------------------------------------------------------
SOCIAL_MEDIA_PLATFORMS = {
"twitter": "https://twitter.com/{account}",
"facebook": "https://facebook.com/{account}",
"youtube": "https://youtube.com/{account}",
"instagram": "https://instagram.com/{account}",
"mastodon": None,
}
def ingest_social_media(engine: Engine, legislators_dir: Path) -> None:
"""Load social media accounts from legislators-social-media.yaml."""
social_media_path = legislators_dir / "legislators-social-media.yaml"
if not social_media_path.exists():
logger.warning("Social media file not found: %s", social_media_path)
return
with social_media_path.open() as file:
social_media_data = yaml.safe_load(file)
if not isinstance(social_media_data, list):
logger.warning("Unexpected format in %s", social_media_path)
return
logger.info("Loaded %d entries from legislators-social-media.yaml", len(social_media_data))
with Session(engine) as session:
legislator_map = _build_legislator_map(session)
existing_accounts = {
(account.legislator_id, account.platform)
for account in session.scalars(select(LegislatorSocialMedia)).all()
}
logger.info("Found %d existing social media accounts in DB", len(existing_accounts))
total_inserted = 0
total_updated = 0
for entry in social_media_data:
bioguide_id = entry.get("id", {}).get("bioguide")
if not bioguide_id:
continue
legislator_id = legislator_map.get(bioguide_id)
if legislator_id is None:
continue
social = entry.get("social", {})
for platform, url_template in SOCIAL_MEDIA_PLATFORMS.items():
account_name = social.get(platform)
if not account_name:
continue
url = url_template.format(account=account_name) if url_template else None
if (legislator_id, platform) in existing_accounts:
total_updated += 1
else:
session.add(
LegislatorSocialMedia(
legislator_id=legislator_id,
platform=platform,
account_name=str(account_name),
url=url,
source="https://github.com/unitedstates/congress-legislators",
)
)
existing_accounts.add((legislator_id, platform))
total_inserted += 1
session.commit()
logger.info("Inserted %d new social media accounts, updated %d existing", total_inserted, total_updated)
def _iter_voters(position_group: object) -> Iterator[dict]:
"""Yield voter dicts from a vote position group (handles list, single dict, or string)."""
if isinstance(position_group, dict):
yield position_group
elif isinstance(position_group, list):
for voter in position_group:
if isinstance(voter, dict):
yield voter
# ---------------------------------------------------------------------------
# Bills
# ---------------------------------------------------------------------------
def ingest_bills(engine: Engine, congress_dirs: list[Path]) -> None:
"""Load bill data.json files."""
with Session(engine) as session:
existing_bills = {(bill.congress, bill.bill_type, bill.number) for bill in session.scalars(select(Bill)).all()}
logger.info("Found %d existing bills in DB", len(existing_bills))
total_inserted = 0
batch: list[Bill] = []
for congress_dir in congress_dirs:
bills_dir = congress_dir / "bills"
if not bills_dir.is_dir():
continue
logger.info("Scanning bills from %s", congress_dir.name)
for bill_file in bills_dir.rglob("data.json"):
data = _read_json(bill_file)
if data is None:
continue
bill = _parse_bill(data, existing_bills)
if bill is not None:
batch.append(bill)
if len(batch) >= BATCH_SIZE:
total_inserted += _flush_batch(session, batch, "bills")
total_inserted += _flush_batch(session, batch, "bills")
logger.info("Inserted %d new bills total", total_inserted)
def _parse_bill(data: dict, existing_bills: set[tuple[int, str, int]]) -> Bill | None:
"""Parse a bill data.json dict into a Bill ORM object, skipping existing."""
raw_congress = data.get("congress")
bill_type = data.get("bill_type")
raw_number = data.get("number")
if raw_congress is None or bill_type is None or raw_number is None:
return None
congress = int(raw_congress)
number = int(raw_number)
if (congress, bill_type, number) in existing_bills:
return None
sponsor_bioguide = None
sponsor = data.get("sponsor")
if sponsor:
sponsor_bioguide = sponsor.get("bioguide_id")
return Bill(
congress=congress,
bill_type=bill_type,
number=number,
title=data.get("short_title") or data.get("official_title"),
title_short=data.get("short_title"),
official_title=data.get("official_title"),
status=data.get("status"),
status_at=data.get("status_at"),
sponsor_bioguide_id=sponsor_bioguide,
subjects_top_term=data.get("subjects_top_term"),
)
# ---------------------------------------------------------------------------
# Votes (and vote records)
# ---------------------------------------------------------------------------
def ingest_votes(engine: Engine, congress_dirs: list[Path]) -> None:
"""Load vote data.json files with their vote records."""
with Session(engine) as session:
legislator_map = _build_legislator_map(session)
logger.info("Loaded %d legislators into lookup map", len(legislator_map))
bill_map = _build_bill_map(session)
logger.info("Loaded %d bills into lookup map", len(bill_map))
existing_votes = {
(vote.congress, vote.chamber, vote.session, vote.number) for vote in session.scalars(select(Vote)).all()
}
logger.info("Found %d existing votes in DB", len(existing_votes))
total_inserted = 0
batch: list[Vote] = []
for congress_dir in congress_dirs:
votes_dir = congress_dir / "votes"
if not votes_dir.is_dir():
continue
logger.info("Scanning votes from %s", congress_dir.name)
for vote_file in votes_dir.rglob("data.json"):
data = _read_json(vote_file)
if data is None:
continue
vote = _parse_vote(data, legislator_map, bill_map, existing_votes)
if vote is not None:
batch.append(vote)
if len(batch) >= BATCH_SIZE:
total_inserted += _flush_batch(session, batch, "votes")
total_inserted += _flush_batch(session, batch, "votes")
logger.info("Inserted %d new votes total", total_inserted)
def _build_legislator_map(session: Session) -> dict[str, int]:
"""Build a mapping of bioguide_id -> legislator.id."""
return {legislator.bioguide_id: legislator.id for legislator in session.scalars(select(Legislator)).all()}
def _build_bill_map(session: Session) -> dict[tuple[int, str, int], int]:
"""Build a mapping of (congress, bill_type, number) -> bill.id."""
return {(bill.congress, bill.bill_type, bill.number): bill.id for bill in session.scalars(select(Bill)).all()}
def _parse_vote(
data: dict,
legislator_map: dict[str, int],
bill_map: dict[tuple[int, str, int], int],
existing_votes: set[tuple[int, str, int, int]],
) -> Vote | None:
"""Parse a vote data.json dict into a Vote ORM object with records."""
raw_congress = data.get("congress")
chamber = data.get("chamber")
raw_number = data.get("number")
vote_date = data.get("date")
if raw_congress is None or chamber is None or raw_number is None or vote_date is None:
return None
raw_session = data.get("session")
if raw_session is None:
return None
congress = int(raw_congress)
number = int(raw_number)
session_number = int(raw_session)
# Normalize chamber from "h"/"s" to "House"/"Senate"
chamber_normalized = {"h": "House", "s": "Senate"}.get(chamber, chamber)
if (congress, chamber_normalized, session_number, number) in existing_votes:
return None
# Resolve linked bill
bill_id = None
bill_ref = data.get("bill")
if bill_ref:
bill_key = (
int(bill_ref.get("congress", congress)),
bill_ref.get("type"),
int(bill_ref.get("number", 0)),
)
bill_id = bill_map.get(bill_key)
raw_votes = data.get("votes", {})
vote_counts = _count_votes(raw_votes)
vote_records = _build_vote_records(raw_votes, legislator_map)
return Vote(
congress=congress,
chamber=chamber_normalized,
session=session_number,
number=number,
vote_type=data.get("type"),
question=data.get("question"),
result=data.get("result"),
result_text=data.get("result_text"),
vote_date=vote_date[:10] if isinstance(vote_date, str) else vote_date,
bill_id=bill_id,
vote_records=vote_records,
**vote_counts,
)
def _count_votes(raw_votes: dict) -> dict[str, int]:
"""Count voters per position category, correctly handling dict and list formats."""
yea_count = 0
nay_count = 0
not_voting_count = 0
present_count = 0
for position, position_group in raw_votes.items():
voter_count = sum(1 for _ in _iter_voters(position_group))
if position in ("Yea", "Aye"):
yea_count += voter_count
elif position in ("Nay", "No"):
nay_count += voter_count
elif position == "Not Voting":
not_voting_count += voter_count
elif position == "Present":
present_count += voter_count
return {
"yea_count": yea_count,
"nay_count": nay_count,
"not_voting_count": not_voting_count,
"present_count": present_count,
}
def _build_vote_records(raw_votes: dict, legislator_map: dict[str, int]) -> list[VoteRecord]:
"""Build VoteRecord objects from raw vote data."""
records: list[VoteRecord] = []
for position, position_group in raw_votes.items():
for voter in _iter_voters(position_group):
bioguide_id = voter.get("id")
if not bioguide_id:
continue
legislator_id = legislator_map.get(bioguide_id)
if legislator_id is None:
continue
records.append(
VoteRecord(
legislator_id=legislator_id,
position=position,
)
)
return records
# ---------------------------------------------------------------------------
# Bill Text
# ---------------------------------------------------------------------------
def ingest_bill_text(engine: Engine, congress_dirs: list[Path]) -> None:
"""Load bill text from text-versions directories."""
with Session(engine) as session:
bill_map = _build_bill_map(session)
logger.info("Loaded %d bills into lookup map", len(bill_map))
existing_bill_texts = {
(bill_text.bill_id, bill_text.version_code) for bill_text in session.scalars(select(BillText)).all()
}
logger.info("Found %d existing bill text versions in DB", len(existing_bill_texts))
total_inserted = 0
batch: list[BillText] = []
for congress_dir in congress_dirs:
logger.info("Scanning bill texts from %s", congress_dir.name)
for bill_text in _iter_bill_texts(congress_dir, bill_map, existing_bill_texts):
batch.append(bill_text)
if len(batch) >= BATCH_SIZE:
total_inserted += _flush_batch(session, batch, "bill texts")
total_inserted += _flush_batch(session, batch, "bill texts")
logger.info("Inserted %d new bill text versions total", total_inserted)
def _iter_bill_texts(
congress_dir: Path,
bill_map: dict[tuple[int, str, int], int],
existing_bill_texts: set[tuple[int, str]],
) -> Iterator[BillText]:
"""Yield BillText objects for a single congress directory, skipping existing."""
bills_dir = congress_dir / "bills"
if not bills_dir.is_dir():
return
for bill_dir in bills_dir.rglob("text-versions"):
if not bill_dir.is_dir():
continue
bill_key = _bill_key_from_dir(bill_dir.parent, congress_dir)
if bill_key is None:
continue
bill_id = bill_map.get(bill_key)
if bill_id is None:
continue
for version_dir in sorted(bill_dir.iterdir()):
if not version_dir.is_dir():
continue
if (bill_id, version_dir.name) in existing_bill_texts:
continue
text_content = _read_bill_text(version_dir)
version_data = _read_json(version_dir / "data.json")
yield BillText(
bill_id=bill_id,
version_code=version_dir.name,
version_name=version_data.get("version_name") if version_data else None,
date=version_data.get("issued_on") if version_data else None,
text_content=text_content,
)
def _bill_key_from_dir(bill_dir: Path, congress_dir: Path) -> tuple[int, str, int] | None:
"""Extract (congress, bill_type, number) from directory structure."""
congress = int(congress_dir.name)
bill_type = bill_dir.parent.name
name = bill_dir.name
# Directory name is like "hr3590" — strip the type prefix to get the number
number_str = name[len(bill_type) :]
if not number_str.isdigit():
return None
return (congress, bill_type, int(number_str))
def _read_bill_text(version_dir: Path) -> str | None:
"""Read bill text from a version directory, preferring .txt over .xml."""
for extension in ("txt", "htm", "html", "xml"):
candidates = list(version_dir.glob(f"document.{extension}"))
if not candidates:
candidates = list(version_dir.glob(f"*.{extension}"))
if candidates:
try:
return candidates[0].read_text(encoding="utf-8")
except Exception:
logger.exception("Failed to read %s", candidates[0])
return None
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _read_json(path: Path) -> dict | None:
"""Read and parse a JSON file, returning None on failure."""
try:
return orjson.loads(path.read_bytes())
except FileNotFoundError:
return None
except Exception:
logger.exception("Failed to parse %s", path)
return None
if __name__ == "__main__":
app()
-247
View File
@@ -1,247 +0,0 @@
"""Ingestion pipeline for loading JSONL post files into the weekly-partitioned posts table.
Usage:
ingest-posts /path/to/files/
ingest-posts /path/to/single_file.jsonl
ingest-posts /data/dir/ --workers 4 --batch-size 5000
"""
from __future__ import annotations
import logging
from datetime import UTC, datetime
from pathlib import Path # noqa: TC003 this is needed for typer
from typing import TYPE_CHECKING, Annotated
import orjson
import psycopg
import typer
from python.common import configure_logger
from python.orm.common import get_connection_info
from python.parallelize import parallelize_process
if TYPE_CHECKING:
from collections.abc import Iterator
logger = logging.getLogger(__name__)
app = typer.Typer(help="Ingest JSONL post files into the partitioned posts table.")
@app.command()
def main(
path: Annotated[Path, typer.Argument(help="Directory containing JSONL files, or a single JSONL file")],
batch_size: Annotated[int, typer.Option(help="Rows per INSERT batch")] = 10000,
workers: Annotated[int, typer.Option(help="Parallel workers for multi-file ingestion")] = 4,
pattern: Annotated[str, typer.Option(help="Glob pattern for JSONL files")] = "*.jsonl",
) -> None:
"""Ingest JSONL post files into the weekly-partitioned posts table."""
configure_logger(level="INFO")
logger.info("starting ingest-posts")
logger.info("path=%s batch_size=%d workers=%d pattern=%s", path, batch_size, workers, pattern)
if path.is_file():
ingest_file(path, batch_size=batch_size)
elif path.is_dir():
ingest_directory(path, batch_size=batch_size, max_workers=workers, pattern=pattern)
else:
typer.echo(f"Path does not exist: {path}", err=True)
raise typer.Exit(code=1)
logger.info("ingest-posts done")
def ingest_directory(
directory: Path,
*,
batch_size: int,
max_workers: int,
pattern: str = "*.jsonl",
) -> None:
"""Ingest all JSONL files in a directory using parallel workers."""
files = sorted(directory.glob(pattern))
if not files:
logger.warning("No JSONL files found in %s", directory)
return
logger.info("Found %d JSONL files to ingest", len(files))
kwargs_list = [{"path": fp, "batch_size": batch_size} for fp in files]
parallelize_process(ingest_file, kwargs_list, max_workers=max_workers)
SCHEMA = "main"
COLUMNS = (
"post_id",
"user_id",
"instance",
"date",
"text",
"langs",
"like_count",
"reply_count",
"repost_count",
"reply_to",
"replied_author",
"thread_root",
"thread_root_author",
"repost_from",
"reposted_author",
"quotes",
"quoted_author",
"labels",
"sent_label",
"sent_score",
)
INSERT_FROM_STAGING = f"""
INSERT INTO {SCHEMA}.posts ({", ".join(COLUMNS)})
SELECT {", ".join(COLUMNS)} FROM pg_temp.staging
ON CONFLICT (post_id, date) DO NOTHING
""" # noqa: S608
FAILED_INSERT = f"""
INSERT INTO {SCHEMA}.failed_ingestion (raw_line, error)
VALUES (%(raw_line)s, %(error)s)
""" # noqa: S608
def get_psycopg_connection() -> psycopg.Connection:
"""Create a raw psycopg3 connection from environment variables."""
database, host, port, username, password = get_connection_info("DATA_SCIENCE_DEV")
return psycopg.connect(
dbname=database,
host=host,
port=int(port),
user=username,
password=password,
autocommit=False,
)
def ingest_file(path: Path, *, batch_size: int) -> None:
"""Ingest a single JSONL file into the posts table."""
log_trigger = max(100_000 // batch_size, 1)
failed_lines: list[dict] = []
try:
with get_psycopg_connection() as connection:
for index, batch in enumerate(read_jsonl_batches(path, batch_size, failed_lines), 1):
ingest_batch(connection, batch)
if index % log_trigger == 0:
logger.info("Ingested %d batches (%d rows) from %s", index, index * batch_size, path)
if failed_lines:
logger.warning("Recording %d malformed lines from %s", len(failed_lines), path.name)
with connection.cursor() as cursor:
cursor.executemany(FAILED_INSERT, failed_lines)
connection.commit()
except Exception:
logger.exception("Failed to ingest file: %s", path)
raise
def ingest_batch(connection: psycopg.Connection, batch: list[dict]) -> None:
"""COPY batch into a temp staging table, then INSERT ... ON CONFLICT into posts."""
if not batch:
return
try:
with connection.cursor() as cursor:
cursor.execute(f"""
CREATE TEMP TABLE IF NOT EXISTS staging
(LIKE {SCHEMA}.posts INCLUDING DEFAULTS)
ON COMMIT DELETE ROWS
""")
cursor.execute("TRUNCATE pg_temp.staging")
with cursor.copy(f"COPY pg_temp.staging ({', '.join(COLUMNS)}) FROM STDIN") as copy:
for row in batch:
copy.write_row(tuple(row.get(column) for column in COLUMNS))
cursor.execute(INSERT_FROM_STAGING)
connection.commit()
except Exception as error:
connection.rollback()
if len(batch) == 1:
logger.exception("Skipping bad row post_id=%s", batch[0].get("post_id"))
with connection.cursor() as cursor:
cursor.execute(
FAILED_INSERT,
{
"raw_line": orjson.dumps(batch[0], default=str).decode(),
"error": str(error),
},
)
connection.commit()
return
midpoint = len(batch) // 2
ingest_batch(connection, batch[:midpoint])
ingest_batch(connection, batch[midpoint:])
def read_jsonl_batches(file_path: Path, batch_size: int, failed_lines: list[dict]) -> Iterator[list[dict]]:
"""Stream a JSONL file and yield batches of transformed rows."""
batch: list[dict] = []
with file_path.open("r", encoding="utf-8") as handle:
for raw_line in handle:
line = raw_line.strip()
if not line:
continue
batch.extend(parse_line(line, file_path, failed_lines))
if len(batch) >= batch_size:
yield batch
batch = []
if batch:
yield batch
def parse_line(line: str, file_path: Path, failed_lines: list[dict]) -> Iterator[dict]:
"""Parse a JSONL line, handling concatenated JSON objects."""
try:
yield transform_row(orjson.loads(line))
except orjson.JSONDecodeError:
if "}{" not in line:
logger.warning("Skipping malformed line in %s: %s", file_path.name, line[:120])
failed_lines.append({"raw_line": line, "error": "malformed JSON"})
return
fragments = line.replace("}{", "}\n{").split("\n")
for fragment in fragments:
try:
yield transform_row(orjson.loads(fragment))
except (orjson.JSONDecodeError, KeyError, ValueError) as error:
logger.warning("Skipping malformed fragment in %s: %s", file_path.name, fragment[:120])
failed_lines.append({"raw_line": fragment, "error": str(error)})
except Exception as error:
logger.exception("Skipping bad row in %s: %s", file_path.name, line[:120])
failed_lines.append({"raw_line": line, "error": str(error)})
def transform_row(raw: dict) -> dict:
"""Transform a raw JSONL row into a dict matching the Posts table columns."""
raw["date"] = parse_date(raw["date"])
if raw.get("langs") is not None:
raw["langs"] = orjson.dumps(raw["langs"])
if raw.get("text") is not None:
raw["text"] = raw["text"].replace("\x00", "")
return raw
def parse_date(raw_date: int) -> datetime:
"""Parse compact YYYYMMDDHHmm integer into a naive datetime (input is UTC by spec)."""
return datetime(
raw_date // 100000000,
(raw_date // 1000000) % 100,
(raw_date // 10000) % 100,
(raw_date // 100) % 100,
raw_date % 100,
tzinfo=UTC,
)
if __name__ == "__main__":
app()
-59
View File
@@ -1,59 +0,0 @@
"""database."""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from sqlalchemy import inspect
from sqlalchemy.exc import NoInspectionAvailable
if TYPE_CHECKING:
from collections.abc import Sequence
from sqlalchemy.orm import Session
logger = logging.getLogger(__name__)
def safe_insert(orm_objects: Sequence[object], session: Session) -> list[tuple[Exception, object]]:
"""Safer insert at allows for partial rollbacks.
Args:
orm_objects (Sequence[object]): Tables to insert.
session (Session): Database session.
"""
if unmapped := [orm_object for orm_object in orm_objects if not _is_mapped_instance(orm_object)]:
error = f"safe_insert expects ORM-mapped instances {unmapped}"
raise TypeError(error)
return _safe_insert(orm_objects, session)
def _safe_insert(objects: Sequence[object], session: Session) -> list[tuple[Exception, object]]:
exceptions: list[tuple[Exception, object]] = []
try:
session.add_all(objects)
session.commit()
except Exception as error:
session.rollback()
objects_len = len(objects)
if objects_len == 1:
logger.exception(objects)
return [(error, objects[0])]
middle = objects_len // 2
exceptions.extend(_safe_insert(objects=objects[:middle], session=session))
exceptions.extend(_safe_insert(objects=objects[middle:], session=session))
return exceptions
def _is_mapped_instance(obj: object) -> bool:
"""Return True if `obj` is a SQLAlchemy ORM-mapped instance."""
try:
inspect(obj) # raises NoInspectionAvailable if not mapped
except NoInspectionAvailable:
return False
else:
return True
-129
View File
@@ -1,129 +0,0 @@
"""CLI wrapper around alembic for multi-database support.
Usage:
database <db_name> <command> [args...]
Examples:
database van_inventory upgrade head
database van_inventory downgrade head-1
database van_inventory revision --autogenerate -m "add meals table"
database van_inventory check
database richie check
database richie upgrade head
"""
from __future__ import annotations
from dataclasses import dataclass
from importlib import import_module
from typing import TYPE_CHECKING, Annotated
import typer
from alembic.config import CommandLine, Config
if TYPE_CHECKING:
from sqlalchemy.orm import DeclarativeBase
@dataclass(frozen=True)
class DatabaseConfig:
"""Configuration for a database."""
env_prefix: str
version_location: str
base_module: str
base_class_name: str
models_module: str
script_location: str = "python/alembic"
file_template: str = "%%(year)d_%%(month).2d_%%(day).2d-%%(slug)s_%%(rev)s"
def get_base(self) -> type[DeclarativeBase]:
"""Import and return the Base class."""
module = import_module(self.base_module)
return getattr(module, self.base_class_name)
def import_models(self) -> None:
"""Import ORM models so alembic autogenerate can detect them."""
import_module(self.models_module)
def alembic_config(self) -> Config:
"""Build an alembic Config for this database."""
# Runtime import needed — Config is in TYPE_CHECKING for the return type annotation
from alembic.config import Config as AlembicConfig # noqa: PLC0415
cfg = AlembicConfig()
cfg.set_main_option("script_location", self.script_location)
cfg.set_main_option("file_template", self.file_template)
cfg.set_main_option("prepend_sys_path", ".")
cfg.set_main_option("version_path_separator", "os")
cfg.set_main_option("version_locations", self.version_location)
cfg.set_main_option("revision_environment", "true")
cfg.set_section_option("post_write_hooks", "hooks", "dynamic_schema,import_postgresql,ruff")
cfg.set_section_option("post_write_hooks", "dynamic_schema.type", "dynamic_schema")
cfg.set_section_option("post_write_hooks", "import_postgresql.type", "import_postgresql")
cfg.set_section_option("post_write_hooks", "ruff.type", "ruff")
cfg.attributes["base"] = self.get_base()
cfg.attributes["env_prefix"] = self.env_prefix
self.import_models()
return cfg
DATABASES: dict[str, DatabaseConfig] = {
"richie": DatabaseConfig(
env_prefix="RICHIE",
version_location="python/alembic/richie/versions",
base_module="python.orm.richie.base",
base_class_name="RichieBase",
models_module="python.orm.richie",
),
"van_inventory": DatabaseConfig(
env_prefix="VAN_INVENTORY",
version_location="python/alembic/van_inventory/versions",
base_module="python.orm.van_inventory.base",
base_class_name="VanInventoryBase",
models_module="python.orm.van_inventory.models",
),
"signal_bot": DatabaseConfig(
env_prefix="SIGNALBOT",
version_location="python/alembic/signal_bot/versions",
base_module="python.orm.signal_bot.base",
base_class_name="SignalBotBase",
models_module="python.orm.signal_bot.models",
),
"data_science_dev": DatabaseConfig(
env_prefix="DATA_SCIENCE_DEV",
version_location="python/alembic/data_science_dev/versions",
base_module="python.orm.data_science_dev.base",
base_class_name="DataScienceDevBase",
models_module="python.orm.data_science_dev.models",
),
}
app = typer.Typer(help="Multi-database alembic wrapper.")
@app.command(
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
)
def main(
ctx: typer.Context,
db_name: Annotated[str, typer.Argument(help=f"Database name. Options: {', '.join(DATABASES)}")],
command: Annotated[str, typer.Argument(help="Alembic command (upgrade, downgrade, revision, check, etc.)")],
) -> None:
"""Run an alembic command against the specified database."""
db_config = DATABASES.get(db_name)
if not db_config:
typer.echo(f"Unknown database: {db_name!r}. Available: {', '.join(DATABASES)}", err=True)
raise typer.Exit(code=1)
alembic_cfg = db_config.alembic_config()
cmd_line = CommandLine()
options = cmd_line.parser.parse_args([command, *ctx.args])
cmd_line.run_cmd(alembic_cfg, options)
if __name__ == "__main__":
app()
-1
View File
@@ -1 +0,0 @@
"""EPUB search package."""
-57
View File
@@ -1,57 +0,0 @@
"""Grounded answer generation."""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from python.ebook_search.llm_interface import request_chat_completion
if TYPE_CHECKING:
from python.ebook_search.config import EbookSearchConfig
from python.ebook_search.search import SearchResult
logger = logging.getLogger(__name__)
def answer_query(query: str, results: list[SearchResult], config: EbookSearchConfig) -> str:
"""Answer a question using only retrieved chunks."""
if not config.answer_enabled:
logger.info("ebook_answer_skipped_disabled")
return "Answer generation is disabled. Source chunks are shown below."
if not results:
logger.info("ebook_answer_skipped_no_results")
return "No relevant sources were found."
logger.info(
"ebook_answer_request_start base_url=%s model=%s sources=%s query_length=%s",
config.vllm_base_url,
config.chat_model,
len(results),
len(query),
)
context = "\n\n".join(
f"[{index}] {result.source_title}{' - ' + result.chapter_title if result.chapter_title else ''}\n{result.text}"
for index, result in enumerate(results, start=1)
)
content = request_chat_completion(
config,
[
{
"role": "system",
"content": (
"Answer only from the provided context. Cite sources with bracketed numbers like [1]. "
"If the context is insufficient, say so."
),
},
{"role": "user", "content": f"Question:\n{query}\n\nContext:\n{context}"},
],
)
logger.info(
"ebook_answer_request_complete model=%s answer_length=%s",
config.chat_model,
len(content),
)
return content or "The model returned an empty answer."
-1
View File
@@ -1 +0,0 @@
"""Web and external API adapters for EPUB search."""
-60
View File
@@ -1,60 +0,0 @@
"""Background BM25 refresh tasks for the web app."""
from __future__ import annotations
import logging
from threading import Timer
from typing import TYPE_CHECKING
from sqlalchemy.orm import Session
from python.ebook_search.bm25_corpus import load_bm25_corpus, refresh_bm25_corpus
if TYPE_CHECKING:
from fastapi import FastAPI
from sqlalchemy.engine import Engine
from python.ebook_search.config import EbookSearchConfig
logger = logging.getLogger(__name__)
def schedule_bm25_refresh(app: FastAPI) -> None:
"""Schedule a delayed BM25 corpus refresh, replacing any pending refresh."""
existing_timer = getattr(app.state, "bm25_refresh_timer", None)
if existing_timer is not None:
existing_timer.cancel()
timer = Timer(app.state.config.bm25_refresh_delay_seconds, refresh_bm25_for_app, args=(app,))
timer.daemon = True
timer.start()
app.state.bm25_refresh_timer = timer
logger.info(
"ebook_bm25_refresh_scheduled delay_seconds=%s",
app.state.config.bm25_refresh_delay_seconds,
)
def cancel_bm25_refresh(app: FastAPI) -> None:
"""Cancel any pending BM25 corpus refresh."""
existing_timer = getattr(app.state, "bm25_refresh_timer", None)
if existing_timer is not None:
existing_timer.cancel()
app.state.bm25_refresh_timer = None
logger.info("ebook_bm25_refresh_cancelled")
def refresh_bm25_for_app(app: FastAPI) -> None:
"""Refresh the BM25 corpus using the app engine and config."""
try:
refresh_bm25_for_engine(app.state.engine, app.state.config)
except Exception:
logger.exception("ebook_bm25_refresh_failed")
def refresh_bm25_for_engine(engine: Engine, config: EbookSearchConfig) -> None:
"""Refresh the BM25 corpus using a SQLAlchemy engine."""
with Session(engine) as session:
refresh_bm25_corpus(session, config)
load_bm25_corpus.cache_clear()
logger.info("ebook_bm25_corpus_cache_cleared_after_refresh")
-77
View File
@@ -1,77 +0,0 @@
"""FastAPI HTMX app for EPUB search."""
from __future__ import annotations
import logging
from contextlib import asynccontextmanager
from typing import TYPE_CHECKING, Annotated
import typer
import uvicorn
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from sqlalchemy.orm import Session
from python.common import configure_logger
from python.ebook_search.api.bm25_tasks import cancel_bm25_refresh
from python.ebook_search.api.routes import admin_router, page_router, search_router
from python.ebook_search.api.web import STATIC_DIR
from python.ebook_search.bm25_corpus import ensure_bm25_corpus
from python.ebook_search.config import load_config
from python.orm.common import get_postgres_engine
if TYPE_CHECKING:
from collections.abc import AsyncIterator
logger = logging.getLogger(__name__)
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
"""Manage application startup and shutdown resources."""
logger.info("ebook_search_startup")
app.state.engine = get_postgres_engine(name="RICHIE", vector_engine=True)
with Session(app.state.engine) as session:
ensure_bm25_corpus(session, app.state.config)
try:
yield
finally:
logger.info("ebook_search_shutdown")
cancel_bm25_refresh(app)
app.state.engine.dispose()
def create_app() -> FastAPI:
"""Create the EPUB search web app."""
app = FastAPI(title="EPUB Search", lifespan=lifespan)
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
app.state.config = load_config()
logger.info(
"ebook_search_config_loaded top_k=%s embedding_model=%s rerank_enabled=%s answer_enabled=%s library_paths=%s",
app.state.config.top_k,
app.state.config.embedding_model,
app.state.config.rerank.enabled,
app.state.config.answer_enabled,
len(app.state.config.library_paths),
)
app.include_router(admin_router)
app.include_router(page_router)
app.include_router(search_router)
return app
def serve(
host: Annotated[str, typer.Option("--host", "-h", help="Host to bind to")] = "127.0.0.1",
port: Annotated[int, typer.Option("--port", "-p", help="Port to bind to")] = 8070,
log_level: Annotated[str, typer.Option("--log-level", "-l", help="Log level")] = "INFO",
) -> None:
"""Start the EPUB search server."""
configure_logger(log_level)
uvicorn.run(create_app(), host=host, port=port)
if __name__ == "__main__":
typer.run(serve)
@@ -1,11 +0,0 @@
"""EPUB search web route modules."""
from python.ebook_search.api.routes.admin import router as admin_router
from python.ebook_search.api.routes.page import router as page_router
from python.ebook_search.api.routes.search import router as search_router
__all__ = [
"admin_router",
"page_router",
"search_router",
]
-107
View File
@@ -1,107 +0,0 @@
"""Admin routes for the EPUB search web UI."""
from __future__ import annotations
import logging
from dataclasses import replace
from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse
from sqlalchemy.orm import Session
from python.ebook_search.api.bm25_tasks import schedule_bm25_refresh
from python.ebook_search.api.web import templates
from python.ebook_search.embeddings import embed_missing_chunks, embedding_model_stats
from python.ebook_search.ingest import ingest_configured_paths
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/admin")
EMBED_ALL_BATCH_SIZE = 32
@router.get("", response_class=HTMLResponse)
def admin(request: Request) -> HTMLResponse:
"""Render the admin page."""
with Session(request.app.state.engine) as session:
stats = embedding_model_stats(session)
logger.info("ebook_admin_page_loaded models=%s", len(stats))
return templates.TemplateResponse(request, "admin.html", {"config": request.app.state.config, "stats": stats})
@router.post("/scan", response_class=HTMLResponse)
def scan_library(request: Request) -> HTMLResponse:
"""Scan configured library paths for EPUB changes."""
try:
with Session(request.app.state.engine) as session:
count = ingest_configured_paths(session, request.app.state.config)
session.commit()
except Exception as error:
logger.exception("ebook_admin_scan_failed")
return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500)
logger.info("ebook_admin_scan_complete changed_files=%s", count)
if count > 0:
schedule_bm25_refresh(request.app)
return templates.TemplateResponse(request, "partials/admin_status.html", {"message": f"Indexed {count} EPUBs"})
@router.post("/embed-missing", response_class=HTMLResponse)
def embed_missing(request: Request) -> HTMLResponse:
"""Embed chunks missing vectors for the configured model."""
try:
with Session(request.app.state.engine) as session:
count = embed_missing_chunks(session, request.app.state.config)
session.commit()
except Exception as error:
logger.exception("ebook_admin_embed_missing_failed")
return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500)
logger.info("ebook_admin_embed_missing_complete chunks=%s", count)
return templates.TemplateResponse(
request,
"partials/admin_status.html",
{"message": f"Embedded {count} chunks"},
)
@router.post("/embed-all", response_class=HTMLResponse)
def embed_all(request: Request) -> HTMLResponse:
"""Embed all chunks missing vectors in fixed-size batches."""
total = 0
batches = 0
config = replace(request.app.state.config, embedding_batch_size=EMBED_ALL_BATCH_SIZE)
try:
with Session(request.app.state.engine) as session:
while True:
count = embed_missing_chunks(session, config)
if count == 0:
break
session.commit()
total += count
batches += 1
logger.info(
"ebook_admin_embed_all_batch_complete batch=%s chunks=%s total_chunks=%s",
batches,
count,
total,
)
except Exception as error:
logger.exception(
"ebook_admin_embed_all_failed batches=%s chunks=%s",
batches,
total,
)
return templates.TemplateResponse(
request,
"partials/error.html",
{"message": f"Embed all failed after {total} chunks in {batches} batches: {error}"},
status_code=500,
)
logger.info("ebook_admin_embed_all_complete batches=%s chunks=%s", batches, total)
return templates.TemplateResponse(
request,
"partials/admin_status.html",
{"message": f"Embedded {total} chunks in {batches} batches of {EMBED_ALL_BATCH_SIZE}"},
)
-57
View File
@@ -1,57 +0,0 @@
"""Page routes for the EPUB search web UI."""
from __future__ import annotations
import logging
from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse
from sqlalchemy import select
from sqlalchemy.orm import Session
from python.ebook_search.api.web import templates
from python.orm.richie import EbookSource
logger = logging.getLogger(__name__)
router = APIRouter()
@router.get("/", response_class=HTMLResponse)
def index(request: Request) -> HTMLResponse:
"""Render the search page."""
return templates.TemplateResponse(request, "search.html", {"config": request.app.state.config})
@router.get("/books", response_class=HTMLResponse)
def books(request: Request) -> HTMLResponse:
"""Render the indexed books page."""
with Session(request.app.state.engine) as session:
sources = list(session.scalars(select(EbookSource).order_by(EbookSource.title)).all())
logger.info("ebook_books_page_loaded count=%s", len(sources))
return templates.TemplateResponse(request, "books.html", {"sources": sources})
@router.get("/books/{source_id}", response_class=HTMLResponse)
def book_detail(source_id: int, request: Request) -> HTMLResponse:
"""Render details for one indexed book."""
with Session(request.app.state.engine) as session:
source = session.get(EbookSource, source_id)
if source is not None:
chapter_count = len(source.chapters)
chunk_count = len(source.chunks)
else:
chapter_count = 0
chunk_count = 0
logger.info(
"ebook_book_detail_loaded source_id=%s found=%s chapters=%s chunks=%s",
source_id,
source is not None,
chapter_count,
chunk_count,
)
return templates.TemplateResponse(
request,
"book_detail.html",
{"chapter_count": chapter_count, "chunk_count": chunk_count, "source": source},
)
-58
View File
@@ -1,58 +0,0 @@
"""Search routes for the EPUB search web UI."""
from __future__ import annotations
import logging
from dataclasses import replace
from time import perf_counter
from typing import Annotated
from fastapi import APIRouter, Form, Request
from fastapi.responses import HTMLResponse
from python.ebook_search.answer import answer_query
from python.ebook_search.api.web import templates
from python.ebook_search.search import search_ebooks
from python.ebook_search.timing import runtime_step_from_start
logger = logging.getLogger(__name__)
router = APIRouter()
@router.post("/search", response_class=HTMLResponse)
def search(
request: Request,
query: Annotated[str, Form()],
rerank: Annotated[str | None, Form()] = None,
) -> HTMLResponse:
"""Run a search and render HTMX results."""
try:
response = search_ebooks(request.app.state.engine, query, request.app.state.config, rerank=rerank == "true")
except Exception as error:
logger.exception("ebook_search_request_failed")
return templates.TemplateResponse(request, "partials/error.html", {"message": str(error)}, status_code=500)
answer_start = perf_counter()
if request.app.state.config.answer_enabled:
try:
answer = answer_query(query, response.results, request.app.state.config)
except RuntimeError as error:
logger.warning("ebook_answer_request_failed_falling_back error=%s", error)
answer = "Answer generation failed. Source chunks are still shown below."
else:
logger.info("ebook_answer_skipped_disabled")
answer = "Answer generation is disabled. Source chunks are shown below."
answer_step_name = "Answer generation" if request.app.state.config.answer_enabled else "Answer skipped"
response = replace(
response,
timings=(*response.timings, runtime_step_from_start(answer_step_name, answer_start)),
)
logger.info(
"ebook_search_request_complete results=%s rank_label=%s runtime_ms=%.1f",
len(response.results),
response.rank_label,
response.total_runtime_ms,
)
return templates.TemplateResponse(request, "partials/results.html", {"answer": answer, "response": response})
-140
View File
@@ -1,140 +0,0 @@
body {
margin: 0;
background: #f7f7f4;
color: #202124;
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
}
main {
max-width: 960px;
margin: 0 auto;
padding: 24px;
}
nav {
display: flex;
gap: 12px;
align-items: center;
margin-bottom: 20px;
}
nav form {
margin: 0;
}
.actions {
display: flex;
flex-wrap: wrap;
gap: 12px;
margin-bottom: 24px;
}
textarea {
display: block;
width: 100%;
margin: 8px 0 12px;
}
button {
padding: 8px 14px;
}
.check {
display: inline-flex;
gap: 8px;
align-items: center;
margin-right: 12px;
}
.rank-label {
margin-top: 24px;
font-weight: 700;
}
.results {
padding-left: 24px;
}
.meta,
.scores,
.status {
color: #626a73;
}
.scores {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin: 12px 0;
}
.scores div {
display: inline-flex;
gap: 4px;
align-items: baseline;
}
.scores dt {
font-weight: 700;
}
.scores dd {
margin: 0;
}
.runtime {
margin-top: 16px;
}
.timing-chart {
display: grid;
gap: 8px;
padding: 0;
list-style: none;
}
.timing-chart li {
display: grid;
grid-template-columns: minmax(150px, 1fr) minmax(160px, 2fr) auto auto;
gap: 8px;
align-items: center;
}
.timing-bar {
height: 10px;
overflow: hidden;
background: #e5e5df;
}
.timing-bar span {
display: block;
height: 100%;
background: #3767c8;
}
.timing-value,
.timing-remaining {
color: #626a73;
font-variant-numeric: tabular-nums;
}
table {
width: 100%;
border-collapse: collapse;
}
th,
td {
padding: 8px;
border-bottom: 1px solid #d8d8d2;
text-align: left;
}
th {
font-weight: 700;
}
.error {
color: #9f1d20;
font-weight: 700;
}
@@ -1,57 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>EPUB Admin</title>
<script src="https://unpkg.com/htmx.org@2.0.4"></script>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<main>
<nav>
<a href="/">Search</a>
<a href="/books">Books</a>
<a href="/admin">Admin</a>
</nav>
<h1>Admin</h1>
<section id="admin-status"></section>
<section class="actions">
<form hx-post="/admin/scan" hx-target="#admin-status" hx-swap="innerHTML">
<button type="submit">Scan</button>
</form>
<form hx-post="/admin/embed-missing" hx-target="#admin-status" hx-swap="innerHTML">
<button type="submit">Embed</button>
</form>
<form hx-post="/admin/embed-all" hx-target="#admin-status" hx-swap="innerHTML">
<button type="submit">Embed all</button>
</form>
</section>
<section>
<h2>Embeddings</h2>
<table>
<thead>
<tr>
<th>Model</th>
<th>Dimensions</th>
<th>Embedded</th>
<th>Missing</th>
<th>Total chunks</th>
</tr>
</thead>
<tbody>
{% for item in stats %}
<tr>
<td>{{ item.model_name }}</td>
<td>{{ item.dimension }}</td>
<td>{{ item.embedded_chunks }}</td>
<td>{{ item.missing_chunks }}</td>
<td>{{ item.total_chunks }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</section>
</main>
</body>
</html>
@@ -1,32 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% if source %}{{ source.title }}{% else %}Book not found{% endif %}</title>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<main>
<nav>
<a href="/">Search</a>
<a href="/books">Books</a>
<a href="/admin">Admin</a>
</nav>
{% if source %}
<h1>{{ source.title }}</h1>
<p class="meta">{{ source.author or "Unknown author" }}</p>
<dl>
<dt>File</dt>
<dd>{{ source.file_path }}</dd>
<dt>Chapters</dt>
<dd>{{ chapter_count }}</dd>
<dt>Chunks</dt>
<dd>{{ chunk_count }}</dd>
</dl>
{% else %}
<h1>Book not found</h1>
{% endif %}
</main>
</body>
</html>
@@ -1,31 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>EPUB Books</title>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<main>
<nav>
<a href="/">Search</a>
<a href="/books">Books</a>
<a href="/admin">Admin</a>
</nav>
<h1>Books</h1>
{% if sources %}
<ol class="results">
{% for source in sources %}
<li>
<h2><a href="/books/{{ source.id }}">{{ source.title }}</a></h2>
<p class="meta">{{ source.author or "Unknown author" }}</p>
</li>
{% endfor %}
</ol>
{% else %}
<p>No EPUBs indexed.</p>
{% endif %}
</main>
</body>
</html>
@@ -1 +0,0 @@
<p class="status">{{ message }}</p>
@@ -1 +0,0 @@
<p class="error">{{ message }}</p>
@@ -1,74 +0,0 @@
<div class="rank-label">{{ response.rank_label }}</div>
{% if response.timings %}
<section class="runtime">
<h2>Runtime</h2>
<p class="meta">Total {{ "%.1f"|format(response.total_runtime_ms) }} ms</p>
<ol class="timing-chart">
{% set total = response.total_runtime_ms %}
{% set ns = namespace(remaining=total) %}
{% for step in response.timings %}
{% set width = (step.duration_ms / total * 100) if total else 0 %}
{% if step.counts_toward_total %}
{% set ns.remaining = ns.remaining - step.duration_ms %}
{% endif %}
<li>
<span class="timing-label">{{ step.name }}</span>
<span class="timing-bar"><span style="width: {{ "%.2f"|format(width) }}%"></span></span>
<span class="timing-value">{{ "%.1f"|format(step.duration_ms) }} ms</span>
<span class="timing-remaining">{{ "%.1f"|format([ns.remaining, 0]|max) }} ms left</span>
</li>
{% endfor %}
</ol>
</section>
{% endif %}
<section class="answer">
<h2>Answer</h2>
<p>{{ answer }}</p>
</section>
{% if response.results %}
<ol class="results">
{% for result in response.results %}
<li>
<h2>{{ result.source_title }}</h2>
<p class="meta">
{% if result.source_author %}{{ result.source_author }}{% endif %}
{% if result.chapter_title %} · {{ result.chapter_title }}{% endif %}
{% if result.page_label %} · page {{ result.page_label }}{% endif %}
</p>
<p>{{ result.text }}</p>
<dl class="scores">
<div>
<dt>final</dt>
<dd>{{ "%.3f"|format(result.score) }}</dd>
</div>
{% if result.rerank_score is not none %}
<div>
<dt>rerank</dt>
<dd>{{ "%.3f"|format(result.rerank_score) }}</dd>
</div>
{% endif %}
{% if result.vector_score is not none %}
<div>
<dt>vector cosine</dt>
<dd>{{ "%.3f"|format(result.vector_score) }}</dd>
</div>
{% endif %}
{% if result.bm25_score is not none %}
<div>
<dt>BM25</dt>
<dd>{{ "%.6f"|format(result.bm25_score) }}</dd>
</div>
{% endif %}
{% if result.fused_score is not none %}
<div>
<dt>RRF</dt>
<dd>{{ "%.3f"|format(result.fused_score) }}</dd>
</div>
{% endif %}
</dl>
</li>
{% endfor %}
</ol>
{% else %}
<p>No results.</p>
{% endif %}
@@ -1,30 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>EPUB Search</title>
<script src="https://unpkg.com/htmx.org@2.0.4"></script>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<main>
<nav>
<a href="/">Search</a>
<a href="/books">Books</a>
<a href="/admin">Admin</a>
</nav>
<h1>EPUB Search</h1>
<form hx-post="/search" hx-target="#results" hx-swap="innerHTML">
<label for="query">Search</label>
<textarea id="query" name="query" rows="4" required></textarea>
<label class="check">
<input type="checkbox" name="rerank" value="true" {% if config.rerank.enabled %}checked{% endif %}>
Rerank
</label>
<button type="submit">Search</button>
</form>
<section id="results"></section>
</main>
</body>
</html>
-13
View File
@@ -1,13 +0,0 @@
"""Shared web UI resources for EPUB search."""
from __future__ import annotations
from pathlib import Path
from fastapi.templating import Jinja2Templates
PACKAGE_DIR = Path(__file__).resolve().parent
TEMPLATE_DIR = PACKAGE_DIR / "templates"
STATIC_DIR = PACKAGE_DIR / "static"
templates = Jinja2Templates(directory=TEMPLATE_DIR)
-237
View File
@@ -1,237 +0,0 @@
"""Persisted BM25 corpus management."""
from __future__ import annotations
import json
import logging
import shutil
import tempfile
from dataclasses import dataclass
from datetime import UTC, datetime
from functools import cache
from pathlib import Path
from typing import TYPE_CHECKING
import bm25s
from sqlalchemy import func, select, union_all
from python.orm.richie import EbookChapter, EbookChunk, EbookSource
if TYPE_CHECKING:
from sqlalchemy.orm import Session
from python.ebook_search.config import EbookSearchConfig
logger = logging.getLogger(__name__)
MANIFEST_NAME = "manifest.json"
REQUIRED_INDEX_FILES = frozenset(
{
"data.csc.index.npy",
"indices.csc.index.npy",
"indptr.csc.index.npy",
"params.index.json",
"vocab.index.json",
"corpus.jsonl",
}
)
@dataclass(frozen=True)
class BM25Manifest:
"""Metadata describing a persisted BM25 corpus."""
created_at: datetime
db_updated_at: datetime | None
chunk_count: int
@dataclass(frozen=True)
class BM25Corpus:
"""Loaded persisted BM25 corpus and retriever."""
retriever: object | None
records: tuple[dict[str, object], ...]
manifest: BM25Manifest
class BM25CorpusUnavailableError(RuntimeError):
"""Raised when the persisted BM25 corpus cannot be loaded."""
def bm25_index_path(config: EbookSearchConfig) -> Path:
"""Return the configured BM25 index path relative to the current working directory."""
path = Path(config.bm25_index_dir).expanduser()
if path.is_absolute():
return path
return Path.cwd() / path
def ensure_bm25_corpus(session: Session, config: EbookSearchConfig) -> None:
"""Create or refresh the persisted BM25 corpus when it is missing or stale."""
index_path = bm25_index_path(config)
manifest = read_bm25_manifest(index_path)
db_updated_at = corpus_last_updated_at(session)
if not bm25_index_exists(index_path, manifest):
logger.info("ebook_bm25_index_missing path=%s", index_path)
refresh_bm25_corpus(session, config, db_updated_at=db_updated_at)
return
if db_updated_at is not None and manifest is not None and manifest.created_at < db_updated_at:
logger.info(
"ebook_bm25_index_stale path=%s created_at=%s db_updated_at=%s",
index_path,
manifest.created_at.isoformat(),
db_updated_at.isoformat(),
)
refresh_bm25_corpus(session, config, db_updated_at=db_updated_at)
return
logger.info(
"ebook_bm25_index_current path=%s chunks=%s created_at=%s",
index_path,
manifest.chunk_count if manifest else 0,
manifest.created_at.isoformat() if manifest else None,
)
def refresh_bm25_corpus(
session: Session,
config: EbookSearchConfig,
*,
db_updated_at: datetime | None = None,
) -> BM25Manifest:
"""Rebuild and persist the BM25 corpus from the current database chunks."""
index_path = bm25_index_path(config)
records = fetch_bm25_corpus_records(session)
manifest = BM25Manifest(
created_at=datetime.now(tz=UTC),
db_updated_at=db_updated_at if db_updated_at is not None else corpus_last_updated_at(session),
chunk_count=len(records),
)
write_bm25_corpus(index_path, records, manifest)
logger.info(
"ebook_bm25_index_refreshed path=%s chunks=%s created_at=%s",
index_path,
manifest.chunk_count,
manifest.created_at.isoformat(),
)
return manifest
@cache
def load_bm25_corpus(config: EbookSearchConfig) -> BM25Corpus:
"""Load the BM25 corpus into memory once per process.
Background refresh tasks clear this cache after rebuilding the on-disk corpus.
"""
index_path = bm25_index_path(config)
logger.info("ebook_bm25_corpus_cache_load path=%s", index_path)
manifest = read_bm25_manifest(index_path)
if manifest is None or not bm25_index_exists(index_path, manifest):
msg = f"BM25 corpus is not available: {index_path}"
raise BM25CorpusUnavailableError(msg)
if manifest.chunk_count == 0:
return BM25Corpus(retriever=None, records=(), manifest=manifest)
retriever = bm25s.BM25.load(index_path, load_corpus=True, mmap=True)
records = tuple(dict(record) for record in retriever.corpus)
return BM25Corpus(retriever=retriever, records=records, manifest=manifest)
def score_bm25_corpus(query: str, corpus: BM25Corpus, *, limit: int) -> list[tuple[dict[str, object], float]]:
"""Score a query against a loaded BM25 corpus."""
if corpus.retriever is None or not corpus.records:
return []
k = min(limit, len(corpus.records))
documents, scores = corpus.retriever.retrieve(
bm25s.tokenize(query, show_progress=False),
corpus=list(corpus.records),
k=k,
show_progress=False,
)
results: list[tuple[dict[str, object], float]] = []
for document, score in zip(documents[0], scores[0], strict=True):
score_value = float(score)
if score_value <= 0:
continue
results.append((dict(document), score_value))
return results
def fetch_bm25_corpus_records(session: Session) -> list[dict[str, object]]:
"""Fetch BM25 corpus records from the database."""
statement = (
select(
EbookChunk.id.label("chunk_id"),
EbookChunk.text.label("text"),
EbookSource.title.label("source_title"),
EbookSource.author.label("source_author"),
EbookChapter.title.label("chapter_title"),
EbookChunk.page_label.label("page_label"),
EbookChunk.search_text.label("bm25_text"),
)
.select_from(EbookChunk)
.join(EbookSource, EbookSource.id == EbookChunk.source_id)
.outerjoin(EbookChapter, EbookChapter.id == EbookChunk.chapter_id)
.order_by(EbookChunk.id)
)
return [dict(row) for row in session.execute(statement).mappings()]
def corpus_last_updated_at(session: Session) -> datetime | None:
"""Return the latest source/chapter/chunk update timestamp relevant to BM25 text."""
update_times = union_all(
select(func.max(EbookSource.updated).label("updated")),
select(func.max(EbookChapter.updated).label("updated")),
select(func.max(EbookChunk.updated).label("updated")),
).subquery()
return session.scalar(select(func.max(update_times.c.updated)))
def write_bm25_corpus(index_path: Path, records: list[dict[str, object]], manifest: BM25Manifest) -> None:
"""Write a BM25 corpus and manifest atomically."""
index_path.parent.mkdir(parents=True, exist_ok=True)
temp_path = Path(tempfile.mkdtemp(prefix=f"{index_path.name}.", dir=index_path.parent))
try:
if records:
retriever = bm25s.BM25()
texts = [str(record["bm25_text"]) for record in records]
retriever.index(bm25s.tokenize(texts, show_progress=False), show_progress=False)
retriever.save(temp_path, corpus=records, show_progress=False)
write_bm25_manifest(temp_path, manifest)
if index_path.exists():
shutil.rmtree(index_path)
temp_path.rename(index_path)
except Exception:
shutil.rmtree(temp_path, ignore_errors=True)
raise
def read_bm25_manifest(index_path: Path) -> BM25Manifest | None:
"""Read the BM25 manifest if it exists and is valid."""
manifest_path = index_path / MANIFEST_NAME
if not manifest_path.exists():
return None
body = json.loads(manifest_path.read_text(encoding="utf-8"))
return BM25Manifest(
created_at=datetime.fromisoformat(str(body["created_at"])),
db_updated_at=datetime.fromisoformat(str(body["db_updated_at"])) if body.get("db_updated_at") else None,
chunk_count=int(body["chunk_count"]),
)
def write_bm25_manifest(index_path: Path, manifest: BM25Manifest) -> None:
"""Write the BM25 manifest to an index directory."""
body = {
"created_at": manifest.created_at.isoformat(),
"db_updated_at": manifest.db_updated_at.isoformat() if manifest.db_updated_at else None,
"chunk_count": manifest.chunk_count,
}
(index_path / MANIFEST_NAME).write_text(json.dumps(body, indent=2, sort_keys=True), encoding="utf-8")
def bm25_index_exists(index_path: Path, manifest: BM25Manifest | None) -> bool:
"""Return whether a usable persisted BM25 index exists."""
if manifest is None or not index_path.is_dir():
return False
if manifest.chunk_count == 0:
return True
return all((index_path / file_name).exists() for file_name in REQUIRED_INDEX_FILES)
-117
View File
@@ -1,117 +0,0 @@
"""Configuration for the EPUB search app."""
from __future__ import annotations
from dataclasses import dataclass
from os import getenv
def getenv_bool(name: str, *, default: bool) -> bool:
"""Read a boolean environment variable with a default fallback."""
value = getenv(name)
if value is None:
return default
return value.strip().lower() in {"1", "true", "yes", "on"}
def getenv_int(name: str, *, default: int) -> int:
"""Read an integer environment variable with a default fallback."""
value = getenv(name)
if value is None or not value.strip():
return default
return int(value)
@dataclass(frozen=True)
class RerankConfig:
"""vLLM reranker settings."""
enabled: bool = False
base_url: str = "http://192.168.90.25:8001"
model: str = "qwen3-reranker-06b"
candidates: int = 24
timeout_seconds: float = 30.0
@dataclass(frozen=True)
class EbookSearchConfig:
"""Runtime settings for EPUB search."""
rerank: RerankConfig
top_k: int = 12
library_paths: tuple[str, ...] = ()
vllm_base_url: str = "https://ollama.com/v1"
vllm_api_key: str = "not-needed"
chat_model: str = "deepseek-v4-flash"
answer_enabled: bool = True
embedding_base_url: str = "http://192.168.90.25:8000/v1"
embedding_api_key: str = "not-needed"
embedding_model: str = "qwen3-embedding-0.6b"
embedding_batch_size: int = 32
bm25_index_dir: str = ".ebook_search_bm25"
bm25_refresh_delay_seconds: int = 60
def load_rerank_config() -> RerankConfig:
"""Load reranker config from environment variables."""
return RerankConfig(
enabled=getenv_bool("EBOOK_SEARCH_RERANK_ENABLED", default=False),
base_url=getenv("EBOOK_SEARCH_RERANK_BASE_URL", "http://192.168.90.25:8001"),
model=getenv("EBOOK_SEARCH_RERANK_MODEL", "qwen3-reranker-06b"),
candidates=getenv_int("EBOOK_SEARCH_RERANK_CANDIDATES", default=24),
timeout_seconds=float(getenv_int("EBOOK_SEARCH_RERANK_TIMEOUT_SECONDS", default=30)),
)
def load_config() -> EbookSearchConfig:
"""Load EPUB search config from environment variables."""
return EbookSearchConfig(
rerank=load_rerank_config(),
top_k=getenv_int("EBOOK_SEARCH_TOP_K", default=12),
library_paths=library_paths_from_env(),
vllm_base_url=getenv("EBOOK_SEARCH_VLLM_BASE_URL", "https://ollama.com/v1"),
vllm_api_key=getenv("EBOOK_SEARCH_VLLM_API_KEY") or getenv("OLLAMA_API_KEY") or "not-needed",
chat_model=getenv("EBOOK_SEARCH_CHAT_MODEL", "deepseek-v4-flash"),
answer_enabled=getenv_bool("EBOOK_SEARCH_ANSWER_ENABLED", default=True),
embedding_base_url=getenv("EBOOK_SEARCH_EMBEDDING_BASE_URL", "http://192.168.90.25:8000/v1"),
embedding_api_key=getenv("EBOOK_SEARCH_EMBEDDING_API_KEY", "not-needed"),
embedding_model=normalize_embedding_model(),
embedding_batch_size=getenv_int("EBOOK_SEARCH_EMBEDDING_BATCH_SIZE", default=32),
bm25_index_dir=getenv("EBOOK_SEARCH_BM25_INDEX_DIR", ".ebook_search_bm25"),
bm25_refresh_delay_seconds=getenv_int("EBOOK_SEARCH_BM25_REFRESH_DELAY_SECONDS", default=60),
)
def normalize_embedding_model(default: str = "qwen3-embedding-0.6b") -> str:
"""Normalize supported embedding aliases to provider model names."""
aliases = {
"Qwen3-Embedding-0.6B": "qwen3-embedding-0.6b",
"Qwen3-Embedding-4B": "qwen3-embedding-4b",
"Qwen3-Embedding-8B": "qwen3-embedding-8b",
"Qwen/Qwen3-Embedding-0.6B": "qwen3-embedding-0.6b",
"Qwen/Qwen3-Embedding-4B": "qwen3-embedding-4b",
"Qwen/Qwen3-Embedding-8B": "qwen3-embedding-8b",
"qwen3-embedding:0.6b": "qwen3-embedding-0.6b",
"qwen3-embedding:4b": "qwen3-embedding-4b",
"qwen3-embedding:8b": "qwen3-embedding-8b",
"qwen3-embedding-0.6b": "qwen3-embedding-0.6b",
"qwen3-embedding-4b": "qwen3-embedding-4b",
"qwen3-embedding-8b": "qwen3-embedding-8b",
}
model = getenv("EBOOK_SEARCH_EMBEDDING_MODEL", default)
standard_model = aliases.get(model)
if standard_model is None:
error = f"Embedding model {model} is not supported. Supported models are {aliases.keys()}"
raise ValueError(error)
return standard_model
def library_paths_from_env() -> tuple[str, ...]:
"""Read configured EPUB library paths from the environment."""
value = getenv("EBOOK_SEARCH_LIBRARY_PATHS")
if value is None:
return ()
return tuple(path for path in value.split(":") if path)
-170
View File
@@ -1,170 +0,0 @@
"""Embedding model helpers."""
from __future__ import annotations
import logging
from dataclasses import dataclass
from typing import TYPE_CHECKING
from sqlalchemy import func, select
from sqlalchemy.dialects.postgresql import insert
from python.ebook_search.llm_interface import request_embeddings
from python.orm.richie import (
EbookChunk,
EbookChunkEmbedding1024,
EbookChunkEmbedding2560,
EbookChunkEmbedding4096,
EbookEmbeddingModel,
)
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from collections.abc import Sequence
from sqlalchemy.orm import Session
from python.ebook_search.config import EbookSearchConfig
MODEL_DIMENSIONS = {
"qwen3-embedding-0.6b": 1024,
"qwen3-embedding-4b": 2560,
"qwen3-embedding-8b": 4096,
}
def get_embedding_table(
dimension: int,
) -> type[EbookChunkEmbedding1024 | EbookChunkEmbedding2560 | EbookChunkEmbedding4096]:
"""Return the embedding table mapped to an embedding dimension."""
embedding_tables = {
1024: EbookChunkEmbedding1024,
2560: EbookChunkEmbedding2560,
4096: EbookChunkEmbedding4096,
}
table = embedding_tables.get(dimension)
if not table:
msg = f"Embedding dimension {dimension} is not supported"
raise ValueError(msg)
return table
@dataclass(frozen=True)
class EmbeddingModelStats:
"""Embedding coverage for one model."""
model_name: str
dimension: int
embedded_chunks: int
total_chunks: int
@property
def missing_chunks(self) -> int:
"""Return chunks missing this embedding model."""
return max(self.total_chunks - self.embedded_chunks, 0)
def embed_texts(texts: Sequence[str], config: EbookSearchConfig) -> list[list[float]]:
"""Embed text with the configured vLLM embedding model."""
logger.info(
"ebook_embed_request_start base_url=%s model=%s count=%s",
config.embedding_base_url,
config.embedding_model,
len(texts),
)
vectors = request_embeddings(texts, config)
expected_dimension = MODEL_DIMENSIONS[config.embedding_model]
for vector in vectors:
if len(vector) != expected_dimension:
msg = f"Expected {expected_dimension} dimensions, got {len(vector)}"
raise ValueError(msg)
logger.info(
"ebook_embed_request_complete model=%s count=%s dimension=%s",
config.embedding_model,
len(vectors),
expected_dimension,
)
return vectors
def embed_query(query: str, config: EbookSearchConfig) -> list[float]:
"""Embed a search query with the Qwen retrieval instruction."""
instructed_query = f"Instruct: Retrieve relevant passages for the query.\nQuery: {query}"
return embed_texts([instructed_query], config)[0]
def ensure_embedding_models(session: Session) -> None:
"""Ensure supported embedding model rows exist."""
for name, dimension in MODEL_DIMENSIONS.items():
existing = session.scalar(select(EbookEmbeddingModel).where(EbookEmbeddingModel.name == name))
if existing is None:
session.add(EbookEmbeddingModel(name=name, dimension=dimension, is_default=name == "qwen3-embedding-0.6b"))
logger.info("ebook_embedding_model_created model=%s dimension=%s", name, dimension)
session.flush()
def embedding_model_stats(session: Session) -> list[EmbeddingModelStats]:
"""Return embedding coverage counts for every supported model."""
total_chunks = session.scalar(select(func.count(EbookChunk.id))) or 0
models = {
model.name: model
for model in session.scalars(
select(EbookEmbeddingModel)
.where(EbookEmbeddingModel.name.in_(MODEL_DIMENSIONS))
.order_by(EbookEmbeddingModel.name)
)
}
stats: list[EmbeddingModelStats] = []
for model_name, dimension in MODEL_DIMENSIONS.items():
model = models.get(model_name)
embedded_chunks = 0
if model is not None:
table = get_embedding_table(dimension)
embedded_chunks = session.scalar(select(func.count(table.id)).where(table.model_id == model.id)) or 0
stats.append(
EmbeddingModelStats(
model_name=model_name,
dimension=dimension,
embedded_chunks=embedded_chunks,
total_chunks=total_chunks,
)
)
return stats
def embed_missing_chunks(session: Session, config: EbookSearchConfig) -> int:
"""Embed chunks missing embeddings for the configured model."""
ensure_embedding_models(session)
model = session.scalar(select(EbookEmbeddingModel).where(EbookEmbeddingModel.name == config.embedding_model))
if model is None:
supported_models = ", ".join(MODEL_DIMENSIONS)
msg = f"Unknown embedding model: {config.embedding_model}. Supported models: {supported_models}"
raise ValueError(msg)
table = get_embedding_table(model.dimension)
chunks = list(
session.scalars(
select(EbookChunk)
.outerjoin(table, (table.chunk_id == EbookChunk.id) & (table.model_id == model.id))
.where(table.id.is_(None))
.order_by(EbookChunk.id)
.limit(config.embedding_batch_size)
)
)
if not chunks:
logger.info("ebook_embed_missing_none model=%s", config.embedding_model)
return 0
logger.info("ebook_embed_missing_batch_start model=%s count=%s", config.embedding_model, len(chunks))
vectors = embed_texts([chunk.text for chunk in chunks], config)
rows = [
{"chunk_id": chunk.id, "model_id": model.id, "embedding": vector}
for chunk, vector in zip(chunks, vectors, strict=True)
]
statement = insert(table).values(rows).on_conflict_do_nothing(index_elements=["chunk_id", "model_id"])
session.execute(statement)
session.flush()
logger.info("ebook_embed_missing_batch_complete model=%s count=%s", config.embedding_model, len(rows))
return len(rows)
-95
View File
@@ -1,95 +0,0 @@
"""EPUB parsing helpers."""
from __future__ import annotations
import re
from dataclasses import dataclass
from typing import TYPE_CHECKING
from bs4 import BeautifulSoup
from ebooklib import ITEM_DOCUMENT, epub
if TYPE_CHECKING:
from pathlib import Path
WHITESPACE_RE = re.compile(r"\s+")
@dataclass(frozen=True)
class ParsedChapter:
"""Text extracted from one EPUB spine document."""
title: str | None
href: str | None
text: str
page_labels: tuple[str, ...]
@dataclass(frozen=True)
class ParsedEpub:
"""Parsed EPUB metadata and text."""
title: str
author: str | None
language: str | None
publisher: str | None
identifier: str | None
chapters: tuple[ParsedChapter, ...]
def parse_epub(path: Path) -> ParsedEpub:
"""Parse EPUB metadata and spine text."""
book = epub.read_epub(path)
chapters = []
for item in book.get_items_of_type(ITEM_DOCUMENT):
soup = BeautifulSoup(item.get_content(), "html.parser")
title = chapter_title(soup)
page_labels = tuple(extract_page_labels(soup))
text = clean_text(soup.get_text(" "))
if text:
chapters.append(ParsedChapter(title=title, href=item.get_name(), text=text, page_labels=page_labels))
return ParsedEpub(
title=metadata_value(book, "title") or path.stem,
author=metadata_value(book, "creator"),
language=metadata_value(book, "language"),
publisher=metadata_value(book, "publisher"),
identifier=metadata_value(book, "identifier"),
chapters=tuple(chapters),
)
def metadata_value(book: epub.EpubBook, name: str) -> str | None:
"""Return the first non-empty Dublin Core metadata value for a name."""
values = book.get_metadata("DC", name)
if not values:
return None
value = values[0][0]
return str(value).strip() or None
def chapter_title(soup: BeautifulSoup) -> str | None:
"""Extract the best available title from an EPUB document soup."""
heading = soup.find(["h1", "h2", "h3"])
if heading is None:
title = soup.find("title")
if title is None:
return None
return clean_text(title.get_text(" ")) or None
return clean_text(heading.get_text(" ")) or None
def extract_page_labels(soup: BeautifulSoup) -> list[str]:
"""Extract EPUB page-break labels from a document soup."""
labels: list[str] = []
for tag in soup.find_all(attrs={"epub:type": "pagebreak"}):
label = tag.get("title") or tag.get("aria-label") or tag.get_text(" ")
clean = clean_text(str(label))
if clean:
labels.append(clean)
return labels
def clean_text(text: str) -> str:
"""Normalize whitespace in extracted EPUB text."""
return WHITESPACE_RE.sub(" ", text).strip()
-190
View File
@@ -1,190 +0,0 @@
"""EPUB ingestion into Richie DB."""
from __future__ import annotations
import hashlib
import logging
from dataclasses import dataclass
from datetime import UTC, datetime
from pathlib import Path
from typing import TYPE_CHECKING
import tiktoken
from sqlalchemy import or_, select
from python.ebook_search.epub_parse import parse_epub
from python.orm.richie import EbookChapter, EbookChunk, EbookSource
logger = logging.getLogger(__name__)
DEFAULT_CHUNK_TOKENS = 700
DEFAULT_CHUNK_OVERLAP = 100
if TYPE_CHECKING:
from sqlalchemy.orm import Session
from python.ebook_search.config import EbookSearchConfig
from python.ebook_search.epub_parse import ParsedChapter
@dataclass(frozen=True)
class TextChunk:
"""A token-bounded chunk of text."""
text: str
token_start: int
token_count: int
def chunk_text(
text: str,
*,
chunk_tokens: int = DEFAULT_CHUNK_TOKENS,
overlap_tokens: int = DEFAULT_CHUNK_OVERLAP,
) -> list[TextChunk]:
"""Split text into overlapping token chunks."""
if chunk_tokens <= 0:
msg = "chunk_tokens must be positive"
raise ValueError(msg)
if overlap_tokens < 0 or overlap_tokens >= chunk_tokens:
msg = "overlap_tokens must be non-negative and smaller than chunk_tokens"
raise ValueError(msg)
encoding = tiktoken.get_encoding("cl100k_base")
tokens = encoding.encode(text)
if not tokens:
return []
chunks: list[TextChunk] = []
step = chunk_tokens - overlap_tokens
for start in range(0, len(tokens), step):
chunk = tokens[start : start + chunk_tokens]
if not chunk:
continue
chunks.append(
TextChunk(
text=encoding.decode(chunk).strip(),
token_start=start,
token_count=len(chunk),
)
)
if start + chunk_tokens >= len(tokens):
break
return [chunk for chunk in chunks if chunk.text]
def ingest_configured_paths(session: Session, config: EbookSearchConfig) -> int:
"""Ingest every EPUB found under configured library paths."""
count = 0
for library_path in config.library_paths:
path = Path(library_path).expanduser()
logger.info("ebook_ingest_path_start path=%s", path)
if path.is_file() and path.suffix.lower() == ".epub":
count += int(ingest_file(session, path))
elif path.is_dir():
for epub_path in sorted(path.rglob("*.epub")):
count += int(ingest_file(session, epub_path))
else:
logger.warning("ebook_ingest_path_missing path=%s", path)
logger.info("ebook_ingest_paths_complete changed_files=%s configured_paths=%s", count, len(config.library_paths))
return count
def ingest_file(session: Session, path: Path) -> bool:
"""Ingest one EPUB file. Return True when the database changed."""
resolved_path = path.expanduser().resolve()
logger.info("ebook_ingest_file_start path=%s", resolved_path)
file_hash = sha256_file(resolved_path)
existing = find_existing_source(session, resolved_path, file_hash)
if existing is not None and existing.file_sha256 == file_hash:
stat = resolved_path.stat()
existing.file_path = str(resolved_path)
existing.file_mtime = datetime.fromtimestamp(stat.st_mtime, tz=UTC)
existing.file_size = stat.st_size
session.flush()
logger.info("ebook_ingest_file_unchanged source_id=%s path=%s", existing.id, resolved_path)
return False
if existing is not None:
logger.info("ebook_ingest_file_replacing source_id=%s path=%s", existing.id, resolved_path)
session.delete(existing)
session.flush()
stat = resolved_path.stat()
parsed = parse_epub(resolved_path)
source = EbookSource(
title=parsed.title,
author=parsed.author,
language=parsed.language,
publisher=parsed.publisher,
identifier=parsed.identifier,
file_path=str(resolved_path),
file_sha256=file_hash,
file_mtime=datetime.fromtimestamp(stat.st_mtime, tz=UTC),
file_size=stat.st_size,
)
session.add(source)
session.flush()
chunk_index = 0
for spine_index, parsed_chapter in enumerate(parsed.chapters):
chapter = EbookChapter(
source_id=source.id,
spine_index=spine_index,
title=parsed_chapter.title,
href=parsed_chapter.href,
)
session.add(chapter)
session.flush()
chunk_index = add_chapter_chunks(session, source, chapter, parsed_chapter, chunk_index)
session.flush()
logger.info(
"ebook_ingest_file_complete source_id=%s path=%s chapters=%s chunks=%s",
source.id,
resolved_path,
len(parsed.chapters),
chunk_index,
)
return True
def find_existing_source(session: Session, path: Path, file_hash: str) -> EbookSource | None:
"""Find an existing source by canonical path or file hash."""
return session.scalar(
select(EbookSource).where(or_(EbookSource.file_path == str(path), EbookSource.file_sha256 == file_hash))
)
def add_chapter_chunks(
session: Session,
source: EbookSource,
chapter: EbookChapter,
parsed_chapter: ParsedChapter,
chunk_index: int,
) -> int:
"""Add chunk rows for one parsed chapter and return the next chunk index."""
page_label = parsed_chapter.page_labels[0] if parsed_chapter.page_labels else None
for text_chunk in chunk_text(parsed_chapter.text):
session.add(
EbookChunk(
source_id=source.id,
chapter_id=chapter.id,
chunk_index=chunk_index,
text=text_chunk.text,
token_start=text_chunk.token_start,
token_count=text_chunk.token_count,
page_label=page_label,
content_sha256=hashlib.sha256(text_chunk.text.encode()).hexdigest(),
search_text=f"{source.title} {source.author or ''} {chapter.title or ''} {text_chunk.text}",
)
)
chunk_index += 1
return chunk_index
def sha256_file(path: Path) -> str:
"""Calculate the SHA-256 digest for a file."""
digest = hashlib.sha256()
with path.open("rb") as file:
for block in iter(lambda: file.read(1024 * 1024), b""):
digest.update(block)
return digest.hexdigest()
-143
View File
@@ -1,143 +0,0 @@
"""LLM provider HTTP adapters."""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
import httpx
if TYPE_CHECKING:
from collections.abc import Sequence
from python.ebook_search.config import EbookSearchConfig, RerankConfig
logger = logging.getLogger(__name__)
def auth_headers(api_key: str) -> dict[str, str]:
"""Build authorization headers when an API key is configured."""
if api_key == "not-needed":
return {}
return {"Authorization": f"Bearer {api_key}"}
def request_embeddings(texts: Sequence[str], config: EbookSearchConfig) -> list[list[float]]:
"""Request embeddings from the configured OpenAI-compatible endpoint."""
try:
response = httpx.post(
f"{config.embedding_base_url.rstrip('/')}/embeddings",
headers=auth_headers(config.embedding_api_key),
json={"model": config.embedding_model, "input": list(texts)},
timeout=60,
)
response.raise_for_status()
return embedding_vectors_from_response(response.json())
except (httpx.HTTPError, ValueError, KeyError, TypeError) as error:
logger.exception(
"ebook_embed_request_failed base_url=%s model=%s count=%s",
config.embedding_base_url,
config.embedding_model,
len(texts),
)
msg = f"Embedding request failed. base_url={config.embedding_base_url} model={config.embedding_model}"
raise RuntimeError(msg) from error
def embedding_vectors_from_response(body: object) -> list[list[float]]:
"""Extract embedding vectors from an OpenAI-compatible embedding response."""
if not isinstance(body, dict):
msg = "Embedding response is not an object"
raise TypeError(msg)
data = body["data"]
if not isinstance(data, list):
msg = "Embedding response data is not a list"
raise TypeError(msg)
vectors: list[list[float]] = []
for item in data:
if not isinstance(item, dict):
msg = "Embedding item is not an object"
raise TypeError(msg)
embedding = item["embedding"]
if not isinstance(embedding, list):
msg = "Embedding value is not a list"
raise TypeError(msg)
vectors.append([float(value) for value in embedding])
return vectors
def request_rerank(
query: str,
documents: Sequence[str],
config: RerankConfig,
) -> object | None:
"""Request rerank scores from the configured vLLM endpoint."""
payload = {
"model": config.model,
"query": query,
"documents": list(documents),
}
response = httpx.post(
f"{config.base_url.rstrip('/')}/rerank",
json=payload,
timeout=config.timeout_seconds,
)
response.raise_for_status()
try:
return response.json()
except ValueError:
logger.debug("ebook_rerank_response_invalid_json", extra={"response": response.text})
return None
def request_chat_completion(
config: EbookSearchConfig,
messages: Sequence[dict[str, str]],
) -> str:
"""Request a chat completion from the configured OpenAI-compatible endpoint."""
try:
response = httpx.post(
f"{config.vllm_base_url.rstrip('/')}/chat/completions",
headers=auth_headers(config.vllm_api_key),
json={
"model": config.chat_model,
"messages": list(messages),
"temperature": 0,
},
timeout=60,
)
response.raise_for_status()
return chat_content_from_response(response.json())
except (httpx.HTTPError, ValueError, KeyError, TypeError) as error:
msg = f"Chat request failed. base_url={config.vllm_base_url} model={config.chat_model}"
raise RuntimeError(msg) from error
def chat_content_from_response(body: object) -> str:
"""Extract text content from an OpenAI-compatible chat response."""
if not isinstance(body, dict):
msg = "Chat response is not an object"
raise TypeError(msg)
choices = body["choices"]
if not isinstance(choices, list) or not choices:
msg = "Chat response has no choices"
raise ValueError(msg)
first = choices[0]
if not isinstance(first, dict):
msg = "Chat choice is not an object"
raise TypeError(msg)
message = first["message"]
if not isinstance(message, dict):
msg = "Chat message is not an object"
raise TypeError(msg)
content = message.get("content") or ""
if not isinstance(content, str):
msg = "Chat content is not text"
raise TypeError(msg)
return content
-129
View File
@@ -1,129 +0,0 @@
"""vLLM-backed optional reranking."""
from __future__ import annotations
import logging
from dataclasses import dataclass, replace
from typing import TYPE_CHECKING
from python.ebook_search.llm_interface import request_rerank
if TYPE_CHECKING:
from python.ebook_search.config import RerankConfig
from python.ebook_search.search import SearchResult
logger = logging.getLogger(__name__)
RERANK_SCORE_WEIGHT = 0.7
HYBRID_SCORE_WEIGHT = 0.3
@dataclass(frozen=True)
class RerankResult:
"""A relevance score for one candidate chunk."""
chunk_id: int
score: float
def rerank_chunks(query: str, candidates: list[SearchResult], config: RerankConfig) -> list[SearchResult]:
"""Rerank candidates with a vLLM rerank endpoint."""
if not candidates:
return []
logger.info(
"ebook_rerank_request_start base_url=%s model=%s candidates=%s",
config.base_url,
config.model,
len(candidates),
)
scores = score_candidates(query, candidates, config)
results = sorted(
(
replace(
result,
score=final_rerank_score(result, scores[result.chunk_id].score, candidates),
rerank_score=scores[result.chunk_id].score,
)
for result in candidates
),
key=lambda result: result.score,
reverse=True,
)
logger.info(
"ebook_rerank_request_complete base_url=%s model=%s candidates=%s",
config.base_url,
config.model,
len(results),
)
return results
def score_candidates(
query: str,
candidates: list[SearchResult],
config: RerankConfig,
) -> dict[int, RerankResult]:
"""Score candidate chunks with the configured rerank API."""
body = request_rerank(query, [candidate.text for candidate in candidates], config)
if body is None:
return zero_rerank_scores(candidates)
scores = parse_vllm_scores(body, candidates)
for result in scores.values():
logger.debug("ebook_rerank_candidate_scored chunk_id=%s score=%s", result.chunk_id, result.score)
return scores
def parse_vllm_scores(body: object, candidates: list[SearchResult]) -> dict[int, RerankResult]:
"""Parse vLLM rerank scores into chunk-id keyed results."""
if not isinstance(body, dict):
logger.debug("ebook_rerank_response_not_object", extra={"response": body})
return zero_rerank_scores(candidates)
results = body.get("results") or body.get("data")
if not isinstance(results, list):
logger.debug("ebook_rerank_response_missing_results", extra={"response": body})
return zero_rerank_scores(candidates)
scores = zero_rerank_scores(candidates)
for item in results:
if not isinstance(item, dict):
continue
index = item.get("index")
score = item.get("relevance_score", item.get("score"))
if not isinstance(index, int) or index < 0 or index >= len(candidates):
continue
if not isinstance(score, int | float):
continue
chunk_id = candidates[index].chunk_id
scores[chunk_id] = RerankResult(chunk_id=chunk_id, score=clamp_score(float(score)))
return scores
def zero_rerank_scores(candidates: list[SearchResult]) -> dict[int, RerankResult]:
"""Return zero relevance scores for all candidate chunks."""
return {candidate.chunk_id: RerankResult(chunk_id=candidate.chunk_id, score=0.0) for candidate in candidates}
def clamp_score(score: float) -> float:
"""Clamp a rerank score into the supported 0.0 to 1.0 range."""
return min(max(score, 0.0), 1.0)
def final_rerank_score(result: SearchResult, rerank_score: float, candidates: list[SearchResult]) -> float:
"""Combine rerank relevance with normalized hybrid retrieval evidence."""
return (RERANK_SCORE_WEIGHT * rerank_score) + (HYBRID_SCORE_WEIGHT * normalized_hybrid_score(result, candidates))
def normalized_hybrid_score(result: SearchResult, candidates: list[SearchResult]) -> float:
"""Normalize a candidate hybrid score against the rerank candidate set."""
hybrid_scores = [
candidate.fused_score if candidate.fused_score is not None else candidate.score for candidate in candidates
]
low = min(hybrid_scores)
high = max(hybrid_scores)
if high == low:
return 1.0
score = result.fused_score if result.fused_score is not None else result.score
return (score - low) / (high - low)
-377
View File
@@ -1,377 +0,0 @@
"""Hybrid search orchestration."""
from __future__ import annotations
import logging
import re
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass, replace
from typing import TYPE_CHECKING
from pgvector.sqlalchemy import Vector
from sqlalchemy import literal, select
from sqlalchemy.orm import Session
from python.ebook_search.bm25_corpus import (
load_bm25_corpus,
score_bm25_corpus,
)
from python.ebook_search.embeddings import MODEL_DIMENSIONS, embed_query, get_embedding_table
from python.ebook_search.rerank import rerank_chunks
from python.ebook_search.timing import RuntimeStep, timed_result
from python.orm.richie import (
EbookChapter,
EbookChunk,
EbookEmbeddingModel,
EbookSource,
)
if TYPE_CHECKING:
from collections.abc import Mapping
from sqlalchemy.engine import Engine
from python.ebook_search.config import EbookSearchConfig
logger = logging.getLogger(__name__)
BM25_CANDIDATE_LIMIT = 120
@dataclass(frozen=True)
class SearchResult:
"""One source chunk returned by search."""
chunk_id: int
text: str
source_title: str
score: float = 0.0
vector_score: float | None = None
bm25_score: float | None = None
fused_score: float | None = None
rerank_score: float | None = None
source_author: str | None = None
chapter_title: str | None = None
page_label: str | None = None
rank_source: str = "Hybrid"
@dataclass(frozen=True)
class SearchResponse:
"""Search output for the UI."""
query: str
results: list[SearchResult]
rank_label: str
timings: tuple[RuntimeStep, ...] = ()
@property
def total_runtime_ms(self) -> float:
"""Return total measured runtime for the response."""
return sum(step.duration_ms for step in self.timings if step.counts_toward_total)
@dataclass(frozen=True)
class RetrievalResponse:
"""Parallel retrieval output for vector and BM25 candidates."""
vector_results: list[SearchResult]
lexical_results: list[SearchResult]
timings: tuple[RuntimeStep, ...]
def search_ebooks(
engine: Engine,
query: str,
config: EbookSearchConfig,
*,
rerank: bool = False,
) -> SearchResponse:
"""Run hybrid vector/BM25 search and optional reranking."""
if not query.strip():
logger.info("ebook_search_empty_query")
return SearchResponse(query=query, results=[], rank_label="Hybrid")
logger.info("ebook_search_start query_length=%s rerank=%s", len(query), rerank)
timings: list[RuntimeStep] = []
bm25_query, timing = timed_result("BM25 query preparation", retrieval_query_from_text, query)
timings.append(timing)
retrieval, timing = timed_result(
"Hybrid retrieval",
parallel_retrieval,
engine,
query,
bm25_query,
config,
)
timings.extend(retrieval.timings)
timings.append(timing)
fused, timing = timed_result(
"Reciprocal rank fusion",
reciprocal_rank_fusion,
retrieval.vector_results,
retrieval.lexical_results,
)
timings.append(timing)
if config.rerank.enabled and rerank:
response, timing = timed_result("Rerank", apply_rerank, query, fused, config)
else:
response, timing = timed_result("Rerank skipped", skip_rerank, query, fused, config)
timings.append(timing)
response = replace(response, timings=tuple(timings))
logger.info(
"ebook_search_complete vector_candidates=%s lexical_candidates=%s "
"fused_candidates=%s returned=%s rank_label=%s runtime_ms=%.1f",
len(retrieval.vector_results),
len(retrieval.lexical_results),
len(fused),
len(response.results),
response.rank_label,
response.total_runtime_ms,
)
return response
def parallel_retrieval(
engine: Engine,
vector_query: str,
bm25_query: str,
config: EbookSearchConfig,
) -> RetrievalResponse:
"""Run vector and BM25 candidate retrieval concurrently with separate database sessions."""
with ThreadPoolExecutor(max_workers=2, thread_name_prefix="ebook-search") as executor:
vector_future = executor.submit(
timed_result,
"Embedding + vector search",
vector_candidates,
engine,
vector_query,
config,
)
bm25_future = executor.submit(
timed_result,
"BM25 search",
bm25_candidates,
bm25_query,
config,
)
vector_results, vector_timing = vector_future.result()
lexical_results, lexical_timing = bm25_future.result()
logger.info(
"ebook_parallel_retrieval_complete vector_candidates=%s lexical_candidates=%s",
len(vector_results),
len(lexical_results),
)
return RetrievalResponse(
vector_results=vector_results,
lexical_results=lexical_results,
timings=(
replace(vector_timing, counts_toward_total=False),
replace(lexical_timing, counts_toward_total=False),
),
)
def skip_rerank(
query: str,
candidates: list[SearchResult],
config: EbookSearchConfig,
) -> SearchResponse:
"""Return fused hybrid results without reranking."""
logger.info("ebook_rerank_skipped candidates=%s", len(candidates))
return SearchResponse(query=query, results=candidates[: config.top_k], rank_label="Hybrid")
def apply_rerank(
query: str,
candidates: list[SearchResult],
config: EbookSearchConfig,
) -> SearchResponse:
"""Rerank already-fused hybrid candidates."""
reranked = rerank_chunks(query, candidates[: config.rerank.candidates], config.rerank)
logger.info(
"ebook_rerank_complete input_candidates=%s returned=%s",
min(len(candidates), config.rerank.candidates),
len(reranked),
)
return SearchResponse(
query=query,
results=[replace(result, rank_source="Hybrid + rerank") for result in reranked[: config.top_k]],
rank_label="Hybrid + rerank",
)
def vector_candidates(engine: Engine, query: str, config: EbookSearchConfig) -> list[SearchResult]:
"""Return pgvector cosine candidates for a natural-language query."""
with Session(engine) as session:
model = session.scalar(select(EbookEmbeddingModel).where(EbookEmbeddingModel.name == config.embedding_model))
if model is None:
msg = f"Embedding model is not registered: {config.embedding_model}"
raise ValueError(msg)
expected_dimension = MODEL_DIMENSIONS[config.embedding_model]
if model.dimension != expected_dimension:
msg = f"Model row dimension {model.dimension} does not match configured dimension {expected_dimension}"
raise ValueError(msg)
embedding = embed_query(query, config)
limit = max(config.rerank.candidates, config.top_k) * 4
embedding_table = get_embedding_table(model.dimension)
embedding_param = literal(embedding, type_=Vector(model.dimension))
distance = embedding_table.embedding.op("<=>")(embedding_param)
score = (literal(1.0) - distance).label("score")
statement = (
select(
EbookChunk.id.label("chunk_id"),
EbookChunk.text.label("text"),
EbookSource.title.label("source_title"),
EbookSource.author.label("source_author"),
EbookChapter.title.label("chapter_title"),
EbookChunk.page_label.label("page_label"),
score,
)
.select_from(embedding_table)
.join(EbookChunk, EbookChunk.id == embedding_table.chunk_id)
.join(EbookSource, EbookSource.id == EbookChunk.source_id)
.outerjoin(EbookChapter, EbookChapter.id == EbookChunk.chapter_id)
.where(embedding_table.model_id == model.id)
.order_by(distance)
.limit(limit)
)
rows = session.execute(statement).mappings()
results = [search_result_from_row(row) for row in rows]
logger.info(
"ebook_vector_search_complete model=%s dimension=%s candidates=%s",
config.embedding_model,
model.dimension,
len(results),
)
return results
def bm25_candidates(query: str, config: EbookSearchConfig) -> list[SearchResult]:
"""Return BM25-ranked lexical candidates using the persisted corpus."""
corpus = load_bm25_corpus(config)
if not corpus.records:
logger.info("ebook_bm25_search_complete corpus=0 candidates=0")
return []
scored_records = score_bm25_corpus(query, corpus, limit=BM25_CANDIDATE_LIMIT)
results = [
replace(search_result_from_row(record), score=score, vector_score=None, bm25_score=score)
for record, score in scored_records
]
max_score = results[0].bm25_score if results else 0.0
logger.info(
"ebook_bm25_search_complete corpus=%s candidates=%s max_score=%.6f",
len(corpus.records),
len(results),
max_score,
)
return results
def reciprocal_rank_fusion(
vector_results: list[SearchResult],
lexical_results: list[SearchResult],
*,
rank_constant: int = 60,
) -> list[SearchResult]:
"""Fuse vector and lexical rankings with Reciprocal Rank Fusion."""
by_chunk: dict[int, SearchResult] = {}
scores: dict[int, float] = {}
vector_scores: dict[int, float] = {}
bm25_scores: dict[int, float] = {}
for rank, result in enumerate(vector_results, start=1):
by_chunk.setdefault(result.chunk_id, result)
vector_scores[result.chunk_id] = result.vector_score if result.vector_score is not None else result.score
scores[result.chunk_id] = scores.get(result.chunk_id, 0.0) + (1 / (rank_constant + rank))
for rank, result in enumerate(lexical_results, start=1):
by_chunk.setdefault(result.chunk_id, result)
bm25_scores[result.chunk_id] = result.bm25_score if result.bm25_score is not None else result.score
scores[result.chunk_id] = scores.get(result.chunk_id, 0.0) + (1 / (rank_constant + rank))
return sorted(
(
replace(
result,
score=scores[result.chunk_id],
vector_score=vector_scores.get(result.chunk_id),
bm25_score=bm25_scores.get(result.chunk_id),
fused_score=scores[result.chunk_id],
rank_source="Hybrid",
)
for result in by_chunk.values()
),
key=lambda result: result.score,
reverse=True,
)
def search_result_from_row(row: Mapping[str, object]) -> SearchResult:
"""Convert a database row mapping into a search result."""
return SearchResult(
chunk_id=int(row["chunk_id"]),
text=str(row["text"]),
source_title=str(row["source_title"]),
source_author=optional_str(row["source_author"]),
chapter_title=optional_str(row["chapter_title"]),
page_label=optional_str(row["page_label"]),
score=float(row["score"]) if "score" in row else 0.0,
vector_score=float(row["score"]) if "score" in row else None,
)
def optional_str(value: object) -> str | None:
"""Convert nullable database values to optional strings."""
if value is None:
return None
return str(value)
TOKEN_RE = re.compile(r"[A-Za-z0-9_]+")
def tokens(text_value: str) -> list[str]:
"""Extract tokens from a text value.
This is a simple approximation of the tokenization used by PostgreSQL's full-text search,
which is sufficient for BM25 candidate retrieval. It lowercases tokens and includes alphanumeric characters and
underscores.
"""
return [match.group(0).lower() for match in TOKEN_RE.finditer(text_value)]
QUERY_STOP_WORDS = {
"a",
"an",
"and",
"are",
"as",
"at",
"does",
"for",
"in",
"is",
"of",
"the",
"to",
"what",
"when",
"where",
"which",
"who",
"why",
}
def retrieval_query_from_text(query: str) -> str:
"""Remove generic question words while preserving entity and series terms."""
keywords = [token for token in tokens(query) if token not in QUERY_STOP_WORDS]
if not keywords:
return query
return " ".join(keywords)
-36
View File
@@ -1,36 +0,0 @@
"""Runtime timing helpers for EPUB search."""
from __future__ import annotations
from dataclasses import dataclass
from time import perf_counter
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from collections.abc import Callable
@dataclass(frozen=True)
class RuntimeStep:
"""Elapsed runtime for one named search step."""
name: str
duration_ms: float
counts_toward_total: bool = True
def runtime_step_from_start(name: str, start_seconds: float) -> RuntimeStep:
"""Create a runtime step from a prior perf_counter timestamp."""
return RuntimeStep(name=name, duration_ms=(perf_counter() - start_seconds) * 1000)
def timed_result[T, **P](
name: str,
operation: Callable[P, T],
*args: P.args,
**kwargs: P.kwargs,
) -> tuple[T, RuntimeStep]:
"""Run an operation and return its result plus elapsed runtime."""
start_seconds = perf_counter()
result = operation(*args, **kwargs)
return result, runtime_step_from_start(name, start_seconds)
-1
View File
@@ -1 +0,0 @@
"""Detect Nix evaluation warnings from build logs and create PRs with LLM-suggested fixes."""
-449
View File
@@ -1,449 +0,0 @@
"""Detect Nix evaluation warnings and create PRs with LLM-suggested fixes."""
from __future__ import annotations
import hashlib
import logging
import re
import subprocess
from dataclasses import dataclass
from io import BytesIO
from pathlib import Path
from typing import Annotated
from zipfile import ZipFile
import typer
from httpx import HTTPError, post
from python.common import configure_logger
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class EvalWarning:
"""A single Nix evaluation warning."""
system: str
message: str
@dataclass
class FileChange:
"""A file change suggested by the LLM."""
file_path: str
original: str
fixed: str
def run_cmd(cmd: list[str], *, check: bool = True) -> subprocess.CompletedProcess[str]:
"""Run a subprocess command and return the result.
Args:
cmd: Command and arguments.
check: Whether to raise on non-zero exit.
Returns:
CompletedProcess with captured stdout/stderr.
"""
logger.debug("Running: %s", " ".join(cmd))
return subprocess.run(cmd, capture_output=True, text=True, check=check)
def download_logs(run_id: str, repo: str) -> dict[str, str]:
"""Download build logs for a GitHub Actions run.
Args:
run_id: The workflow run ID.
repo: The GitHub repository (owner/repo).
Returns:
Dict mapping zip entry names to their text content, filtered to build log files.
Raises:
RuntimeError: If log download fails.
"""
result = subprocess.run(
["gh", "api", f"repos/{repo}/actions/runs/{run_id}/logs"],
capture_output=True,
check=False,
)
if result.returncode != 0:
msg = f"Failed to download logs: {result.stderr.decode(errors='replace')}"
raise RuntimeError(msg)
logs: dict[str, str] = {}
with ZipFile(BytesIO(result.stdout)) as zip_file:
for name in zip_file.namelist():
if name.startswith("build-") and name.endswith(".txt"):
logs[name] = zip_file.read(name).decode(errors="replace")
return logs
def parse_warnings(logs: dict[str, str]) -> set[EvalWarning]:
"""Parse Nix evaluation warnings from build log contents.
Args:
logs: Dict mapping zip entry names (e.g. "build-bob/2_Build.txt") to their text.
Returns:
Deduplicated set of warnings.
"""
warnings: set[EvalWarning] = set()
warning_pattern = re.compile(r"(?:^[\d\-T:.Z]+ )?(warning:|trace: warning:)")
timestamp_prefix = re.compile(r"^[\d\-T:.Z]+ ")
for name, content in sorted(logs.items()):
system = name.split("/")[0].removeprefix("build-")
for line in content.splitlines():
if warning_pattern.search(line):
message = timestamp_prefix.sub("", line).strip()
if message.startswith("warning: ignoring untrusted flake configuration setting"):
continue
logger.debug(f"Found warning: {line}")
warnings.add(EvalWarning(system=system, message=message))
logger.info("Found %d unique warnings", len(warnings))
return warnings
def extract_referenced_files(warnings: set[EvalWarning]) -> dict[str, str]:
"""Extract file paths referenced in warnings and read their contents.
Args:
warnings: List of parsed warnings.
Returns:
Dict mapping repo-relative file paths to their contents.
"""
paths: set[str] = set()
warning_text = "\n".join(w.message for w in warnings)
nix_store_path = re.compile(r"/nix/store/[^/]+-source/([^:]+\.nix)")
for match in nix_store_path.finditer(warning_text):
paths.add(match.group(1))
repo_relative_path = re.compile(r"(?<![/\w])(systems|common|users|overlays)/[^:\s]+\.nix")
for match in repo_relative_path.finditer(warning_text):
paths.add(match.group(0))
files: dict[str, str] = {}
for path_str in sorted(paths):
path = Path(path_str)
if path.is_file():
files[path_str] = path.read_text()
if not files and Path("flake.nix").is_file():
files["flake.nix"] = Path("flake.nix").read_text()
logger.info("Extracted %d referenced files", len(files))
return files
def compute_warning_hash(warnings: set[EvalWarning]) -> str:
"""Compute a short hash of the warning set for deduplication.
Args:
warnings: List of warnings.
Returns:
8-character hex hash.
"""
text = "\n".join(sorted(f"[{w.system}] {w.message}" for w in warnings))
return hashlib.sha256(text.encode()).hexdigest()[:8]
def check_duplicate_pr(warning_hash: str) -> bool:
"""Check if an open PR already exists for this warning hash.
Args:
warning_hash: The hash to check.
Returns:
True if a duplicate PR exists.
Raises:
RuntimeError: If the gh CLI call fails.
"""
result = run_cmd(
[
"gh",
"pr",
"list",
"--state",
"open",
"--label",
"eval-warning-fix",
"--json",
"title",
"--jq",
".[].title",
],
check=False,
)
if result.returncode != 0:
msg = f"Failed to check for duplicate PRs: {result.stderr}"
raise RuntimeError(msg)
for title in result.stdout.splitlines():
if warning_hash in title:
logger.info("Duplicate PR found for hash %s", warning_hash)
return True
return False
def query_ollama(
warnings: set[EvalWarning],
files: dict[str, str],
ollama_url: str,
) -> str | None:
"""Query Ollama for a fix suggestion.
Args:
warnings: List of warnings.
files: Referenced file contents.
ollama_url: Ollama API base URL.
Returns:
LLM response text, or None on failure.
"""
warning_text = "\n".join(f"[{w.system}] {w.message}" for w in warnings)
file_context = "\n".join(f"--- FILE: {path} ---\n{content}\n--- END FILE ---" for path, content in files.items())
prompt = f"""You are a NixOS configuration expert. \
Analyze the following Nix evaluation warnings and suggest fixes.
## Warnings
{warning_text}
## Referenced Files
{file_context}
## Instructions
- Identify the root cause of each warning
- Provide the exact file changes needed to fix the warnings
- Output your response in two clearly separated sections:
1. **REASONING**: Brief explanation of what causes each warning and how to fix it
2. **CHANGES**: For each file that needs changes, output a block like:
FILE: path/to/file.nix
<<<<<<< ORIGINAL
the original lines to replace
=======
the replacement lines
>>>>>>> FIXED
- Only suggest changes for files that exist in the repository
- Do not add unnecessary complexity
- Preserve the existing code style
- If a warning comes from upstream nixpkgs and cannot be fixed in this repo, \
say so in REASONING and do not suggest changes"""
try:
response = post(
f"{ollama_url}/api/generate",
json={
"model": "qwen3-coder:30b",
"prompt": prompt,
"stream": False,
"options": {"num_predict": 4096},
},
timeout=300,
)
response.raise_for_status()
except HTTPError:
logger.exception("Ollama request failed")
return None
return response.json().get("response")
def parse_changes(response: str) -> list[FileChange]:
"""Parse file changes from the **CHANGES** section of the LLM response.
Expects blocks in the format:
FILE: path/to/file.nix
<<<<<<< ORIGINAL
...
=======
...
>>>>>>> FIXED
Args:
response: Raw LLM response text.
Returns:
List of parsed file changes.
"""
if "**CHANGES**" not in response:
logger.warning("LLM response missing **CHANGES** section")
return []
changes_section = response.split("**CHANGES**", 1)[1]
changes: list[FileChange] = []
current_file = ""
section: str | None = None
original_lines: list[str] = []
fixed_lines: list[str] = []
for line in changes_section.splitlines():
stripped = line.strip()
if stripped.startswith("FILE:"):
current_file = stripped.removeprefix("FILE:").strip()
elif stripped == "<<<<<<< ORIGINAL":
section = "original"
original_lines = []
elif stripped == "=======" and section == "original":
section = "fixed"
fixed_lines = []
elif stripped == ">>>>>>> FIXED" and section == "fixed":
section = None
if current_file:
changes.append(FileChange(current_file, "\n".join(original_lines), "\n".join(fixed_lines)))
elif section == "original":
original_lines.append(line)
elif section == "fixed":
fixed_lines.append(line)
logger.info("Parsed %d file changes", len(changes))
return changes
def apply_changes(changes: list[FileChange]) -> int:
"""Apply file changes to the working directory.
Args:
changes: List of changes to apply.
Returns:
Number of changes successfully applied.
"""
applied = 0
cwd = Path.cwd().resolve()
for change in changes:
path = Path(change.file_path).resolve()
if not path.is_relative_to(cwd):
logger.warning("Path traversal blocked: %s", change.file_path)
continue
if not path.is_file():
logger.warning("File not found: %s", change.file_path)
continue
content = path.read_text()
if change.original not in content:
logger.warning("Original text not found in %s", change.file_path)
continue
path.write_text(content.replace(change.original, change.fixed, 1))
logger.info("Applied fix to %s", change.file_path)
applied += 1
return applied
def create_pr(
warning_hash: str,
warnings: set[EvalWarning],
llm_response: str,
run_url: str,
) -> None:
"""Create a git branch and PR with the applied fixes.
Args:
warning_hash: Short hash for branch naming and deduplication.
warnings: Original warnings for the PR body.
llm_response: Full LLM response for extracting reasoning.
run_url: URL to the triggering build run.
"""
branch = f"fix/eval-warning-{warning_hash}"
warning_text = "\n".join(f"[{w.system}] {w.message}" for w in warnings)
if "**REASONING**" not in llm_response:
logger.warning("LLM response missing **REASONING** section")
reasoning = ""
else:
_, after = llm_response.split("**REASONING**", 1)
reasoning = "\n".join(after.split("**CHANGES**", 1)[0].strip().splitlines()[:50])
run_cmd(["git", "config", "user.name", "github-actions[bot]"])
run_cmd(["git", "config", "user.email", "github-actions[bot]@users.noreply.github.com"])
run_cmd(["git", "checkout", "-b", branch])
run_cmd(["git", "add", "-A"])
diff_result = run_cmd(["git", "diff", "--cached", "--quiet"], check=False)
if diff_result.returncode == 0:
logger.info("No file changes to commit")
return
run_cmd(["git", "commit", "-m", f"fix: resolve nix evaluation warnings ({warning_hash})"])
run_cmd(["git", "push", "origin", branch, "--force"])
body = f"""## Nix Evaluation Warnings
Detected in [build_systems run]({run_url}):
```
{warning_text}
```
## LLM Analysis (qwen3-coder:30b)
{reasoning}
---
*Auto-generated by fix_eval_warnings. Review carefully before merging.*"""
run_cmd(
[
"gh",
"pr",
"create",
"--title",
f"fix: resolve nix eval warnings ({warning_hash})",
"--label",
"automated",
"--label",
"eval-warning-fix",
"--body",
body,
]
)
logger.info("PR created on branch %s", branch)
def main(
run_id: Annotated[str, typer.Option("--run-id", help="GitHub Actions run ID")],
repo: Annotated[str, typer.Option("--repo", help="GitHub repository (owner/repo)")],
ollama_url: Annotated[str, typer.Option("--ollama-url", help="Ollama API base URL")],
run_url: Annotated[str, typer.Option("--run-url", help="URL to the triggering build run")],
log_level: Annotated[str, typer.Option("--log-level", "-l", help="Log level")] = "INFO",
) -> None:
"""Detect Nix evaluation warnings and create PRs with LLM-suggested fixes."""
configure_logger(log_level)
logs = download_logs(run_id, repo)
warnings = parse_warnings(logs)
if not warnings:
return
warning_hash = compute_warning_hash(warnings)
if check_duplicate_pr(warning_hash):
return
files = extract_referenced_files(warnings)
llm_response = query_ollama(warnings, files, ollama_url)
if not llm_response:
return
changes = parse_changes(llm_response)
applied = apply_changes(changes)
if applied == 0:
logger.info("No changes could be applied")
return
create_pr(warning_hash, warnings, llm_response, run_url)
if __name__ == "__main__":
typer.run(main)
-6
View File
@@ -1,6 +0,0 @@
"""Reusable FastAPI tools."""
from python.fastapi_tools.db import DbSession, get_db
from python.fastapi_tools.zstd_middleware import ZstdMiddleware
__all__ = ["DbSession", "ZstdMiddleware", "get_db"]
-16
View File
@@ -1,16 +0,0 @@
"""FastAPI dependencies."""
from collections.abc import Iterator
from typing import Annotated
from fastapi import Depends, Request
from sqlalchemy.orm import Session
def get_db(request: Request) -> Iterator[Session]:
"""Get database session from app state."""
with Session(request.app.state.engine) as session:
yield session
DbSession = Annotated[Session, Depends(get_db)]
-49
View File
@@ -1,49 +0,0 @@
"""Zstd response compression middleware."""
from compression import zstd
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
from starlette.requests import Request
from starlette.responses import Response
MINIMUM_RESPONSE_SIZE = 500
class ZstdMiddleware(BaseHTTPMiddleware):
"""Middleware that compresses responses with zstd when the client supports it."""
async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
"""Compress the response with zstd if the client accepts it."""
accepted_encodings = request.headers.get("accept-encoding", "")
if "zstd" not in accepted_encodings:
return await call_next(request)
response = await call_next(request)
if response.headers.get("content-encoding") or "text/event-stream" in response.headers.get("content-type", ""):
return response
body = b""
async for chunk in response.body_iterator:
body += chunk if isinstance(chunk, bytes) else chunk.encode()
if len(body) < MINIMUM_RESPONSE_SIZE:
return Response(
content=body,
status_code=response.status_code,
headers=dict(response.headers),
media_type=response.media_type,
)
compressed = zstd.compress(body)
headers = dict(response.headers)
headers["content-encoding"] = "zstd"
headers["content-length"] = str(len(compressed))
headers.pop("transfer-encoding", None)
return Response(
content=compressed,
status_code=response.status_code,
headers=headers,
media_type=response.media_type,
)

Some files were not shown because too many files have changed in this diff Show More