setting up resource monitoring for bob and jeeves
This commit is contained in:
@@ -0,0 +1,256 @@
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
let
|
||||
monitoringInterface = "ztwfunumly";
|
||||
nodeTextfileDir = "/var/lib/prometheus-node-exporter-textfile";
|
||||
|
||||
mkProcessNameTemplate =
|
||||
perPid: template: if perPid then "${template}:{{.PID}}:{{.StartTime}}" else template;
|
||||
|
||||
mkProcessMatchers = perPid: [
|
||||
{
|
||||
name = mkProcessNameTemplate perPid "{{.Username}}:{{.Matches.Module}}";
|
||||
cmdline = [ "^/nix/store[^ ]*/bin/python[^ ]* -m (?P<Module>[^ ]+)" ];
|
||||
}
|
||||
{
|
||||
name = mkProcessNameTemplate perPid "{{.Username}}:{{.Matches.Wrapped}}";
|
||||
cmdline = [
|
||||
"^/nix/store[^ ]*/bin/python[^ ]* /nix/store[^ ]*/bin/\\.?(?P<Wrapped>[^ /]+?)(?:-wrapped)?(?:\\s|$)"
|
||||
];
|
||||
}
|
||||
{
|
||||
name = mkProcessNameTemplate perPid "{{.Username}}:{{.Matches.Wrapped}}";
|
||||
cmdline = [
|
||||
"^/nix/store[^ ]*/bin/node /nix/store[^ ]*-(?P<Wrapped>[A-Za-z0-9._+-]+)-[0-9][^ /]*/"
|
||||
];
|
||||
}
|
||||
{
|
||||
name = mkProcessNameTemplate perPid "{{.Username}}:{{.Matches.Wrapped}}";
|
||||
cmdline = [ "^/nix/store[^ ]*/(?:bin/|lib/[^ ]*/)?\\.?(?P<Wrapped>[^ /]+?)(?:-wrapped)?(?:\\s|$)" ];
|
||||
}
|
||||
{
|
||||
name = mkProcessNameTemplate perPid "{{.Username}}:{{.ExeBase}}";
|
||||
cmdline = [ ".+" ];
|
||||
}
|
||||
];
|
||||
|
||||
perPidConfig = pkgs.writeText "process-exporter-per-pid.yaml" (
|
||||
builtins.toJSON {
|
||||
process_names = mkProcessMatchers true;
|
||||
}
|
||||
);
|
||||
|
||||
zpoolLatencyScript = pkgs.writeShellScript "zpool-latency-exporter" ''
|
||||
set -euo pipefail
|
||||
|
||||
out_dir=${lib.escapeShellArg nodeTextfileDir}
|
||||
host=${lib.escapeShellArg config.networking.hostName}
|
||||
tmp_file="$(mktemp "$out_dir/zpool.prom.XXXXXX")"
|
||||
trap 'rm -f "$tmp_file"' EXIT
|
||||
|
||||
pools="$(zpool list -H -o name | paste -sd, -)"
|
||||
|
||||
cat >"$tmp_file" <<'EOF'
|
||||
# HELP zpool_iostat_total_wait_read_ns Average total read wait time reported by zpool iostat.
|
||||
# TYPE zpool_iostat_total_wait_read_ns gauge
|
||||
# HELP zpool_iostat_total_wait_write_ns Average total write wait time reported by zpool iostat.
|
||||
# TYPE zpool_iostat_total_wait_write_ns gauge
|
||||
# HELP zpool_iostat_disk_wait_read_ns Average disk read wait time reported by zpool iostat.
|
||||
# TYPE zpool_iostat_disk_wait_read_ns gauge
|
||||
# HELP zpool_iostat_disk_wait_write_ns Average disk write wait time reported by zpool iostat.
|
||||
# TYPE zpool_iostat_disk_wait_write_ns gauge
|
||||
# HELP zpool_iostat_syncq_wait_read_ns Average synchronous queue read wait time reported by zpool iostat.
|
||||
# TYPE zpool_iostat_syncq_wait_read_ns gauge
|
||||
# HELP zpool_iostat_syncq_wait_write_ns Average synchronous queue write wait time reported by zpool iostat.
|
||||
# TYPE zpool_iostat_syncq_wait_write_ns gauge
|
||||
# HELP zpool_iostat_asyncq_wait_read_ns Average asynchronous queue read wait time reported by zpool iostat.
|
||||
# TYPE zpool_iostat_asyncq_wait_read_ns gauge
|
||||
# HELP zpool_iostat_asyncq_wait_write_ns Average asynchronous queue write wait time reported by zpool iostat.
|
||||
# TYPE zpool_iostat_asyncq_wait_write_ns gauge
|
||||
EOF
|
||||
|
||||
zpool iostat -Hplvy -y 1 1 | awk -F '\t' -v host="$host" -v pools="$pools" '
|
||||
function esc(str, out) {
|
||||
out = str
|
||||
gsub(/\\/, "\\\\", out)
|
||||
gsub(/"/, "\\\"", out)
|
||||
return out
|
||||
}
|
||||
|
||||
function emit(metric, pool, vdev, value) {
|
||||
if (value == "" || value == "-") {
|
||||
return
|
||||
}
|
||||
|
||||
printf "%s{host=\"%s\",pool=\"%s\",vdev=\"%s\"} %s\n",
|
||||
metric,
|
||||
esc(host),
|
||||
esc(pool),
|
||||
esc(vdev),
|
||||
value
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
split(pools, pool_names, ",")
|
||||
for (idx in pool_names) {
|
||||
if (pool_names[idx] != "") {
|
||||
known_pools[pool_names[idx]] = 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NF == 0 {
|
||||
next
|
||||
}
|
||||
|
||||
{
|
||||
row_name = $1
|
||||
|
||||
if (row_name in known_pools) {
|
||||
current_pool = row_name
|
||||
current_vdev = "_pool"
|
||||
} else if (current_pool == "") {
|
||||
next
|
||||
} else {
|
||||
current_vdev = row_name
|
||||
}
|
||||
|
||||
emit("zpool_iostat_total_wait_read_ns", current_pool, current_vdev, $8)
|
||||
emit("zpool_iostat_total_wait_write_ns", current_pool, current_vdev, $9)
|
||||
emit("zpool_iostat_disk_wait_read_ns", current_pool, current_vdev, $10)
|
||||
emit("zpool_iostat_disk_wait_write_ns", current_pool, current_vdev, $11)
|
||||
emit("zpool_iostat_syncq_wait_read_ns", current_pool, current_vdev, $12)
|
||||
emit("zpool_iostat_syncq_wait_write_ns", current_pool, current_vdev, $13)
|
||||
emit("zpool_iostat_asyncq_wait_read_ns", current_pool, current_vdev, $14)
|
||||
emit("zpool_iostat_asyncq_wait_write_ns", current_pool, current_vdev, $15)
|
||||
}
|
||||
' >>"$tmp_file"
|
||||
|
||||
mv "$tmp_file" "$out_dir/zpool.prom"
|
||||
trap - EXIT
|
||||
'';
|
||||
in
|
||||
{
|
||||
networking.firewall.interfaces.${monitoringInterface}.allowedTCPPorts = [
|
||||
9100
|
||||
9134
|
||||
9256
|
||||
9257
|
||||
9633
|
||||
];
|
||||
|
||||
services.prometheus.exporters = {
|
||||
node = {
|
||||
enable = true;
|
||||
enabledCollectors = [
|
||||
"pressure"
|
||||
"processes"
|
||||
"systemd"
|
||||
];
|
||||
extraFlags = [ "--collector.textfile.directory=${nodeTextfileDir}" ];
|
||||
};
|
||||
|
||||
process = {
|
||||
enable = true;
|
||||
user = "root";
|
||||
group = "root";
|
||||
settings.process_names = mkProcessMatchers false;
|
||||
extraFlags = [
|
||||
"-gather-smaps=false"
|
||||
"-remove-empty-groups=true"
|
||||
"-threads=false"
|
||||
];
|
||||
};
|
||||
|
||||
smartctl.enable = true;
|
||||
zfs.enable = true;
|
||||
};
|
||||
|
||||
programs.atop = {
|
||||
enable = true;
|
||||
atopService.enable = true;
|
||||
atopRotateTimer.enable = true;
|
||||
atopacctService.enable = true;
|
||||
settings.interval = 30;
|
||||
};
|
||||
|
||||
systemd = {
|
||||
services = {
|
||||
prometheus-process-pid-exporter = {
|
||||
description = "Prometheus process exporter with per-PID naming";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
ExecStart = ''
|
||||
${pkgs.prometheus-process-exporter}/bin/process-exporter \
|
||||
--web.listen-address 0.0.0.0:9257 \
|
||||
--config.path ${perPidConfig} \
|
||||
-children=false \
|
||||
-gather-smaps=false \
|
||||
-remove-empty-groups=true \
|
||||
-threads=false
|
||||
'';
|
||||
User = "root";
|
||||
Group = "root";
|
||||
Restart = "always";
|
||||
WorkingDirectory = "/tmp";
|
||||
CapabilityBoundingSet = [ "" ];
|
||||
DeviceAllow = [ "" ];
|
||||
LockPersonality = true;
|
||||
MemoryDenyWriteExecute = true;
|
||||
NoNewPrivileges = true;
|
||||
PrivateDevices = true;
|
||||
PrivateTmp = true;
|
||||
ProtectClock = true;
|
||||
ProtectControlGroups = true;
|
||||
ProtectHome = true;
|
||||
ProtectHostname = true;
|
||||
ProtectKernelLogs = true;
|
||||
ProtectKernelModules = true;
|
||||
ProtectKernelTunables = true;
|
||||
ProtectSystem = "strict";
|
||||
RemoveIPC = true;
|
||||
RestrictAddressFamilies = [
|
||||
"AF_INET"
|
||||
"AF_INET6"
|
||||
];
|
||||
RestrictNamespaces = true;
|
||||
RestrictRealtime = true;
|
||||
RestrictSUIDSGID = true;
|
||||
SystemCallArchitectures = "native";
|
||||
UMask = "0077";
|
||||
};
|
||||
};
|
||||
|
||||
zpool-latency-exporter = {
|
||||
description = "Exports ZFS latency metrics for node_exporter textfile collection";
|
||||
after = [ "zfs-import.target" ];
|
||||
requires = [ "zfs-import.target" ];
|
||||
path = [
|
||||
config.boot.zfs.package
|
||||
pkgs.coreutils
|
||||
pkgs.gawk
|
||||
];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = zpoolLatencyScript;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
timers.zpool-latency-exporter = {
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig = {
|
||||
OnBootSec = "2m";
|
||||
OnUnitActiveSec = "60s";
|
||||
Unit = "zpool-latency-exporter.service";
|
||||
};
|
||||
};
|
||||
|
||||
tmpfiles.rules = [ "d ${nodeTextfileDir} 0755 root root - -" ];
|
||||
};
|
||||
}
|
||||
@@ -7,6 +7,7 @@
|
||||
"${inputs.self}/common/global"
|
||||
"${inputs.self}/common/optional/docker.nix"
|
||||
"${inputs.self}/common/optional/scanner.nix"
|
||||
"${inputs.self}/common/optional/monitoring-agent.nix"
|
||||
"${inputs.self}/common/optional/steam.nix"
|
||||
"${inputs.self}/common/optional/syncthing_base.nix"
|
||||
"${inputs.self}/common/optional/systemd-boot.nix"
|
||||
|
||||
@@ -10,10 +10,12 @@ in
|
||||
"${inputs.self}/users/steve"
|
||||
"${inputs.self}/common/global"
|
||||
"${inputs.self}/common/optional/docker.nix"
|
||||
"${inputs.self}/common/optional/monitoring-agent.nix"
|
||||
"${inputs.self}/common/optional/ssh_decrypt.nix"
|
||||
"${inputs.self}/common/optional/syncthing_base.nix"
|
||||
"${inputs.self}/common/optional/update.nix"
|
||||
"${inputs.self}/common/optional/zerotier.nix"
|
||||
./monitoring
|
||||
./docker
|
||||
./services
|
||||
./web_services
|
||||
|
||||
@@ -0,0 +1,426 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "100 * (1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
|
||||
"legendFormat": "{{instance}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "CPU Used",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))",
|
||||
"legendFormat": "{{instance}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "RAM Used",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "100 * (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes))",
|
||||
"legendFormat": "{{instance}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Swap Used",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "node_load1",
|
||||
"legendFormat": "{{instance}} load1",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "node_load5",
|
||||
"legendFormat": "{{instance}} load5",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "node_load15",
|
||||
"legendFormat": "{{instance}} load15",
|
||||
"range": true,
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "Load",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum by (instance) (rate(node_disk_read_bytes_total[5m]))",
|
||||
"legendFormat": "{{instance}} read",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum by (instance) (rate(node_disk_written_bytes_total[5m]))",
|
||||
"legendFormat": "{{instance}} write",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Disk Throughput",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "100 * (1 - (node_filesystem_avail_bytes{mountpoint=~\"(/|/home|/var|/zfs.*)\",fstype!=\"\"} / node_filesystem_size_bytes{mountpoint=~\"(/|/home|/var|/zfs.*)\",fstype!=\"\"}))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{mountpoint}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Filesystem Usage",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 17
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, rate(namedprocess_namegroup_cpu_seconds_total[5m]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top Grouped CPU",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 17
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, namedprocess_namegroup_memory_bytes{memtype=\"resident\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top Grouped Memory",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"monitoring"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Overview",
|
||||
"uid": "monitor-overview",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -0,0 +1,216 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, rate(namedprocess_namegroup_cpu_seconds_total[5m]))",
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Grouped CPU",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, namedprocess_namegroup_memory_bytes{memtype=\"resident\"})",
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Grouped Resident Memory",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, rate(namedprocess_namegroup_read_bytes_total[5m]))",
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Grouped Read I/O",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 10
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, rate(namedprocess_namegroup_write_bytes_total[5m]))",
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Grouped Write I/O",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"monitoring",
|
||||
"process"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-7d",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Process History Grouped",
|
||||
"uid": "monitor-process-history",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -0,0 +1,224 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, rate(namedprocess_namegroup_cpu_seconds_total[2m]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top PID CPU",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, namedprocess_namegroup_memory_bytes{memtype=\"resident\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top PID RSS",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, rate(namedprocess_namegroup_read_bytes_total[2m]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top PID Read I/O",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 10
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, rate(namedprocess_namegroup_write_bytes_total[2m]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top PID Write I/O",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "15s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"monitoring",
|
||||
"process"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-10m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Process Live PID",
|
||||
"uid": "monitor-process-pid",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -0,0 +1,351 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "100 * (zfs_pool_allocated_bytes / zfs_pool_size_bytes)",
|
||||
"legendFormat": "{{instance}} {{pool}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Pool Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "zfs_pool_free_bytes",
|
||||
"legendFormat": "{{instance}} {{pool}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Pool Free Bytes",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, zfs_dataset_used_bytes{type=\"filesystem\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{name}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top Filesystems by Used Bytes",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ns"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, zpool_iostat_total_wait_read_ns{vdev!=\"_pool\"})",
|
||||
"legendFormat": "{{host}} {{pool}} {{vdev}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "ZFS Read Wait",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ns"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, zpool_iostat_total_wait_write_ns{vdev!=\"_pool\"})",
|
||||
"legendFormat": "{{host}} {{pool}} {{vdev}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "ZFS Write Wait",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "celsius"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 17
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "smartctl_device_temperature{temperature_type=\"current\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{device}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Disk Temperature",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 17
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": false,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "smartctl_device_smart_status",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{device}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "SMART Health",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"monitoring",
|
||||
"zfs"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Storage and ZFS",
|
||||
"uid": "monitor-storage",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
{
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
let
|
||||
vars = import ../vars.nix;
|
||||
|
||||
|
||||
prometheusDataRoot = "${vars.database}/prometheus";
|
||||
mainPrometheusDataDir = "${prometheusDataRoot}/main";
|
||||
pidPrometheusDataDir = "${prometheusDataRoot}/pid-short";
|
||||
|
||||
prometheusYaml = pkgs.formats.yaml { };
|
||||
|
||||
mkPrometheusConfig =
|
||||
name: cfg:
|
||||
let
|
||||
configFile = prometheusYaml.generate "${name}.yaml" cfg;
|
||||
in
|
||||
pkgs.runCommand "${name}-checked.yaml"
|
||||
{
|
||||
nativeBuildInputs = [ pkgs.prometheus.cli ];
|
||||
}
|
||||
''
|
||||
promtool check config ${configFile}
|
||||
cp ${configFile} $out
|
||||
'';
|
||||
|
||||
mkTarget = host: address: {
|
||||
targets = [ address ];
|
||||
labels.instance = host;
|
||||
};
|
||||
|
||||
mainPrometheusConfig = mkPrometheusConfig "prometheus-main" {
|
||||
global = {
|
||||
scrape_interval = "30s";
|
||||
scrape_timeout = "10s";
|
||||
evaluation_interval = "30s";
|
||||
};
|
||||
scrape_configs = [
|
||||
{
|
||||
job_name = "node";
|
||||
static_configs = [
|
||||
(mkTarget "jeeves" "192.168.90.40:9100")
|
||||
(mkTarget "bob" "192.168.90.25:9100")
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "process_grouped";
|
||||
static_configs = [
|
||||
(mkTarget "jeeves" "192.168.90.40:9256")
|
||||
(mkTarget "bob" "192.168.90.25:9256")
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "smartctl";
|
||||
static_configs = [
|
||||
(mkTarget "jeeves" "192.168.90.40:9633")
|
||||
(mkTarget "bob" "192.168.90.25:9633")
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "zfs";
|
||||
static_configs = [
|
||||
(mkTarget "jeeves" "192.168.90.40:9134")
|
||||
(mkTarget "bob" "192.168.90.25:9134")
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
pidPrometheusConfig = mkPrometheusConfig "prometheus-pid-short" {
|
||||
global = {
|
||||
scrape_interval = "15s";
|
||||
scrape_timeout = "10s";
|
||||
evaluation_interval = "15s";
|
||||
};
|
||||
scrape_configs = [
|
||||
{
|
||||
job_name = "process_pid";
|
||||
static_configs = [
|
||||
(mkTarget "jeeves" "192.168.90.40:9257")
|
||||
(mkTarget "bob" "192.168.90.25:9257")
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
mkPrometheusService =
|
||||
{
|
||||
dataDir,
|
||||
configFile,
|
||||
port,
|
||||
retention,
|
||||
}:
|
||||
{
|
||||
after = [ "network.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
ExecStart = "${lib.getExe pkgs.prometheus} ${
|
||||
lib.escapeShellArgs [
|
||||
"--config.file=${configFile}"
|
||||
"--storage.tsdb.path=${dataDir}"
|
||||
"--storage.tsdb.retention.time=${retention}"
|
||||
"--web.listen-address=127.0.0.1:${toString port}"
|
||||
]
|
||||
}";
|
||||
User = "prometheus";
|
||||
Group = "prometheus";
|
||||
Restart = "always";
|
||||
RestartSec = "5s";
|
||||
WorkingDirectory = dataDir;
|
||||
ReadWritePaths = [ dataDir ];
|
||||
CapabilityBoundingSet = [ "" ];
|
||||
DeviceAllow = [ "/dev/null rw" ];
|
||||
DevicePolicy = "strict";
|
||||
LockPersonality = true;
|
||||
MemoryDenyWriteExecute = true;
|
||||
NoNewPrivileges = true;
|
||||
PrivateDevices = true;
|
||||
PrivateTmp = true;
|
||||
ProtectClock = true;
|
||||
ProtectControlGroups = true;
|
||||
ProtectHome = true;
|
||||
ProtectHostname = true;
|
||||
ProtectKernelLogs = true;
|
||||
ProtectKernelModules = true;
|
||||
ProtectKernelTunables = true;
|
||||
ProtectProc = "invisible";
|
||||
ProtectSystem = "strict";
|
||||
RemoveIPC = true;
|
||||
RestrictAddressFamilies = [
|
||||
"AF_INET"
|
||||
"AF_INET6"
|
||||
"AF_UNIX"
|
||||
];
|
||||
RestrictNamespaces = true;
|
||||
RestrictRealtime = true;
|
||||
RestrictSUIDSGID = true;
|
||||
SystemCallArchitectures = "native";
|
||||
SystemCallFilter = [
|
||||
"@system-service"
|
||||
"~@privileged"
|
||||
];
|
||||
};
|
||||
};
|
||||
in
|
||||
{
|
||||
users = {
|
||||
groups.prometheus = { };
|
||||
users.prometheus = {
|
||||
isSystemUser = true;
|
||||
group = "prometheus";
|
||||
description = "Prometheus daemon user";
|
||||
};
|
||||
};
|
||||
|
||||
systemd = {
|
||||
services = {
|
||||
prometheus-main = mkPrometheusService {
|
||||
configFile = mainPrometheusConfig;
|
||||
dataDir = mainPrometheusDataDir;
|
||||
port = 9090;
|
||||
retention = "90d";
|
||||
};
|
||||
|
||||
prometheus-pid-short = mkPrometheusService {
|
||||
configFile = pidPrometheusConfig;
|
||||
dataDir = pidPrometheusDataDir;
|
||||
port = 9092;
|
||||
retention = "10m";
|
||||
};
|
||||
};
|
||||
|
||||
tmpfiles.rules = [
|
||||
"d ${prometheusDataRoot} 0755 root root - -"
|
||||
"d ${mainPrometheusDataDir} 0750 prometheus prometheus - -"
|
||||
"d ${pidPrometheusDataDir} 0750 prometheus prometheus - -"
|
||||
];
|
||||
};
|
||||
}
|
||||
@@ -23,6 +23,7 @@ sudo zfs create media/secure/home_assistant -o compression=zstd-19
|
||||
sudo zfs create media/secure/notes -o copies=2
|
||||
sudo zfs create media/secure/postgres -o mountpoint=/zfs/media/database/postgres -o recordsize=16k -o primarycache=metadata
|
||||
sudo zfs create media/secure/postgres-wal -o mountpoint=/zfs/media/database/postgres-wal -o recordsize=32k -o primarycache=metadata -o special_small_blocks=32K -o compression=lz4 -o secondarycache=none -o logbias=latency
|
||||
sudo zfs create media/secure/prometheus -o mountpoint=/zfs/media/database/prometheus -o compression=lz4
|
||||
sudo zfs create media/secure/services -o compression=zstd-9
|
||||
sudo zfs create media/secure/share -o mountpoint=/zfs/media/share -o exec=off
|
||||
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
{
|
||||
...
|
||||
}:
|
||||
let
|
||||
vars = import ../vars.nix;
|
||||
grafanaDataDir = "${vars.services}/grafana";
|
||||
in
|
||||
{
|
||||
networking.firewall.allowedTCPPorts = [ 3000 ];
|
||||
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
dataDir = grafanaDataDir;
|
||||
settings = {
|
||||
database.type = "sqlite3";
|
||||
security = {
|
||||
admin_password = "$__file{${vars.secrets}/services/grafana/admin_password}";
|
||||
admin_user = "admin";
|
||||
secret_key = "$__file{${vars.secrets}/services/grafana/secret_key}";
|
||||
};
|
||||
server = {
|
||||
http_addr = "192.168.90.40";
|
||||
http_port = 3000;
|
||||
root_url = "http://$192.168.90.40:3000/";
|
||||
};
|
||||
};
|
||||
provision = {
|
||||
enable = true;
|
||||
dashboards.settings = {
|
||||
apiVersion = 1;
|
||||
providers = [
|
||||
{
|
||||
name = "monitoring";
|
||||
folder = "Monitoring";
|
||||
type = "file";
|
||||
disableDeletion = false;
|
||||
editable = false;
|
||||
allowUiUpdates = false;
|
||||
updateIntervalSeconds = 30;
|
||||
options.path = ../monitoring/dashboards;
|
||||
}
|
||||
];
|
||||
};
|
||||
datasources.settings = {
|
||||
apiVersion = 1;
|
||||
prune = true;
|
||||
datasources = [
|
||||
{
|
||||
access = "proxy";
|
||||
editable = false;
|
||||
isDefault = true;
|
||||
name = "prom-main";
|
||||
type = "prometheus";
|
||||
uid = "prom-main";
|
||||
url = "http://127.0.0.1:9090";
|
||||
}
|
||||
{
|
||||
access = "proxy";
|
||||
editable = false;
|
||||
name = "prom-pid-short";
|
||||
type = "prometheus";
|
||||
uid = "prom-pid-short";
|
||||
url = "http://127.0.0.1:9092";
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
systemd = {
|
||||
services.grafana.after = [
|
||||
"prometheus-main.service"
|
||||
"prometheus-pid-short.service"
|
||||
];
|
||||
|
||||
tmpfiles.rules = [
|
||||
"d ${grafanaDataDir} 0750 grafana grafana - -"
|
||||
];
|
||||
};
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
services.hedgedoc = {
|
||||
enable = true;
|
||||
settings = {
|
||||
host = "0.0.0.0";
|
||||
port = 3000;
|
||||
domain = "192.168.90.40";
|
||||
urlAddPort = true;
|
||||
protocolUseSSL = false;
|
||||
db = {
|
||||
dialect = "postgres";
|
||||
database = "hedgedoc";
|
||||
username = "hedgedoc";
|
||||
host = "/run/postgresql";
|
||||
};
|
||||
};
|
||||
};
|
||||
networking.firewall.allowedTCPPorts = [ 3000 ];
|
||||
|
||||
systemd.services.hedgedoc = {
|
||||
after = [ "postgresql.service" ];
|
||||
requires = [ "postgresql.service" ];
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user