setting up resource monitoring for bob and jeeves
This commit is contained in:
@@ -7,6 +7,7 @@
|
||||
"${inputs.self}/common/global"
|
||||
"${inputs.self}/common/optional/docker.nix"
|
||||
"${inputs.self}/common/optional/scanner.nix"
|
||||
"${inputs.self}/common/optional/monitoring-agent.nix"
|
||||
"${inputs.self}/common/optional/steam.nix"
|
||||
"${inputs.self}/common/optional/syncthing_base.nix"
|
||||
"${inputs.self}/common/optional/systemd-boot.nix"
|
||||
|
||||
@@ -10,10 +10,12 @@ in
|
||||
"${inputs.self}/users/steve"
|
||||
"${inputs.self}/common/global"
|
||||
"${inputs.self}/common/optional/docker.nix"
|
||||
"${inputs.self}/common/optional/monitoring-agent.nix"
|
||||
"${inputs.self}/common/optional/ssh_decrypt.nix"
|
||||
"${inputs.self}/common/optional/syncthing_base.nix"
|
||||
"${inputs.self}/common/optional/update.nix"
|
||||
"${inputs.self}/common/optional/zerotier.nix"
|
||||
./monitoring
|
||||
./docker
|
||||
./services
|
||||
./web_services
|
||||
|
||||
@@ -0,0 +1,426 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "100 * (1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
|
||||
"legendFormat": "{{instance}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "CPU Used",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "100 * (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))",
|
||||
"legendFormat": "{{instance}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "RAM Used",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "100 * (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes))",
|
||||
"legendFormat": "{{instance}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Swap Used",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "node_load1",
|
||||
"legendFormat": "{{instance}} load1",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "node_load5",
|
||||
"legendFormat": "{{instance}} load5",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "node_load15",
|
||||
"legendFormat": "{{instance}} load15",
|
||||
"range": true,
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "Load",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum by (instance) (rate(node_disk_read_bytes_total[5m]))",
|
||||
"legendFormat": "{{instance}} read",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum by (instance) (rate(node_disk_written_bytes_total[5m]))",
|
||||
"legendFormat": "{{instance}} write",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Disk Throughput",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "100 * (1 - (node_filesystem_avail_bytes{mountpoint=~\"(/|/home|/var|/zfs.*)\",fstype!=\"\"} / node_filesystem_size_bytes{mountpoint=~\"(/|/home|/var|/zfs.*)\",fstype!=\"\"}))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{mountpoint}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Filesystem Usage",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 17
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, rate(namedprocess_namegroup_cpu_seconds_total[5m]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top Grouped CPU",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 17
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, namedprocess_namegroup_memory_bytes{memtype=\"resident\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top Grouped Memory",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"monitoring"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Overview",
|
||||
"uid": "monitor-overview",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -0,0 +1,216 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, rate(namedprocess_namegroup_cpu_seconds_total[5m]))",
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Grouped CPU",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, namedprocess_namegroup_memory_bytes{memtype=\"resident\"})",
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Grouped Resident Memory",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, rate(namedprocess_namegroup_read_bytes_total[5m]))",
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Grouped Read I/O",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 10
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(10, rate(namedprocess_namegroup_write_bytes_total[5m]))",
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Grouped Write I/O",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"monitoring",
|
||||
"process"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-7d",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Process History Grouped",
|
||||
"uid": "monitor-process-history",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -0,0 +1,224 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, rate(namedprocess_namegroup_cpu_seconds_total[2m]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top PID CPU",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, namedprocess_namegroup_memory_bytes{memtype=\"resident\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top PID RSS",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, rate(namedprocess_namegroup_read_bytes_total[2m]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top PID Read I/O",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 10
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-pid-short"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, rate(namedprocess_namegroup_write_bytes_total[2m]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{groupname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top PID Write I/O",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "15s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"monitoring",
|
||||
"process"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-10m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Process Live PID",
|
||||
"uid": "monitor-process-pid",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -0,0 +1,351 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "100 * (zfs_pool_allocated_bytes / zfs_pool_size_bytes)",
|
||||
"legendFormat": "{{instance}} {{pool}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Pool Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "zfs_pool_free_bytes",
|
||||
"legendFormat": "{{instance}} {{pool}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Pool Free Bytes",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, zfs_dataset_used_bytes{type=\"filesystem\"})",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{name}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top Filesystems by Used Bytes",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ns"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, zpool_iostat_total_wait_read_ns{vdev!=\"_pool\"})",
|
||||
"legendFormat": "{{host}} {{pool}} {{vdev}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "ZFS Read Wait",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ns"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk(20, zpool_iostat_total_wait_write_ns{vdev!=\"_pool\"})",
|
||||
"legendFormat": "{{host}} {{pool}} {{vdev}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "ZFS Write Wait",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "celsius"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 17
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "smartctl_device_temperature{temperature_type=\"current\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{device}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Disk Temperature",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 17
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": false,
|
||||
"displayName": "Value"
|
||||
}
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prom-main"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "smartctl_device_smart_status",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{instance}} {{device}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "SMART Health",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"monitoring",
|
||||
"zfs"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Storage and ZFS",
|
||||
"uid": "monitor-storage",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
{
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
let
|
||||
vars = import ../vars.nix;
|
||||
|
||||
|
||||
prometheusDataRoot = "${vars.database}/prometheus";
|
||||
mainPrometheusDataDir = "${prometheusDataRoot}/main";
|
||||
pidPrometheusDataDir = "${prometheusDataRoot}/pid-short";
|
||||
|
||||
prometheusYaml = pkgs.formats.yaml { };
|
||||
|
||||
mkPrometheusConfig =
|
||||
name: cfg:
|
||||
let
|
||||
configFile = prometheusYaml.generate "${name}.yaml" cfg;
|
||||
in
|
||||
pkgs.runCommand "${name}-checked.yaml"
|
||||
{
|
||||
nativeBuildInputs = [ pkgs.prometheus.cli ];
|
||||
}
|
||||
''
|
||||
promtool check config ${configFile}
|
||||
cp ${configFile} $out
|
||||
'';
|
||||
|
||||
mkTarget = host: address: {
|
||||
targets = [ address ];
|
||||
labels.instance = host;
|
||||
};
|
||||
|
||||
mainPrometheusConfig = mkPrometheusConfig "prometheus-main" {
|
||||
global = {
|
||||
scrape_interval = "30s";
|
||||
scrape_timeout = "10s";
|
||||
evaluation_interval = "30s";
|
||||
};
|
||||
scrape_configs = [
|
||||
{
|
||||
job_name = "node";
|
||||
static_configs = [
|
||||
(mkTarget "jeeves" "192.168.90.40:9100")
|
||||
(mkTarget "bob" "192.168.90.25:9100")
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "process_grouped";
|
||||
static_configs = [
|
||||
(mkTarget "jeeves" "192.168.90.40:9256")
|
||||
(mkTarget "bob" "192.168.90.25:9256")
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "smartctl";
|
||||
static_configs = [
|
||||
(mkTarget "jeeves" "192.168.90.40:9633")
|
||||
(mkTarget "bob" "192.168.90.25:9633")
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "zfs";
|
||||
static_configs = [
|
||||
(mkTarget "jeeves" "192.168.90.40:9134")
|
||||
(mkTarget "bob" "192.168.90.25:9134")
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
pidPrometheusConfig = mkPrometheusConfig "prometheus-pid-short" {
|
||||
global = {
|
||||
scrape_interval = "15s";
|
||||
scrape_timeout = "10s";
|
||||
evaluation_interval = "15s";
|
||||
};
|
||||
scrape_configs = [
|
||||
{
|
||||
job_name = "process_pid";
|
||||
static_configs = [
|
||||
(mkTarget "jeeves" "192.168.90.40:9257")
|
||||
(mkTarget "bob" "192.168.90.25:9257")
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
mkPrometheusService =
|
||||
{
|
||||
dataDir,
|
||||
configFile,
|
||||
port,
|
||||
retention,
|
||||
}:
|
||||
{
|
||||
after = [ "network.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
ExecStart = "${lib.getExe pkgs.prometheus} ${
|
||||
lib.escapeShellArgs [
|
||||
"--config.file=${configFile}"
|
||||
"--storage.tsdb.path=${dataDir}"
|
||||
"--storage.tsdb.retention.time=${retention}"
|
||||
"--web.listen-address=127.0.0.1:${toString port}"
|
||||
]
|
||||
}";
|
||||
User = "prometheus";
|
||||
Group = "prometheus";
|
||||
Restart = "always";
|
||||
RestartSec = "5s";
|
||||
WorkingDirectory = dataDir;
|
||||
ReadWritePaths = [ dataDir ];
|
||||
CapabilityBoundingSet = [ "" ];
|
||||
DeviceAllow = [ "/dev/null rw" ];
|
||||
DevicePolicy = "strict";
|
||||
LockPersonality = true;
|
||||
MemoryDenyWriteExecute = true;
|
||||
NoNewPrivileges = true;
|
||||
PrivateDevices = true;
|
||||
PrivateTmp = true;
|
||||
ProtectClock = true;
|
||||
ProtectControlGroups = true;
|
||||
ProtectHome = true;
|
||||
ProtectHostname = true;
|
||||
ProtectKernelLogs = true;
|
||||
ProtectKernelModules = true;
|
||||
ProtectKernelTunables = true;
|
||||
ProtectProc = "invisible";
|
||||
ProtectSystem = "strict";
|
||||
RemoveIPC = true;
|
||||
RestrictAddressFamilies = [
|
||||
"AF_INET"
|
||||
"AF_INET6"
|
||||
"AF_UNIX"
|
||||
];
|
||||
RestrictNamespaces = true;
|
||||
RestrictRealtime = true;
|
||||
RestrictSUIDSGID = true;
|
||||
SystemCallArchitectures = "native";
|
||||
SystemCallFilter = [
|
||||
"@system-service"
|
||||
"~@privileged"
|
||||
];
|
||||
};
|
||||
};
|
||||
in
|
||||
{
|
||||
users = {
|
||||
groups.prometheus = { };
|
||||
users.prometheus = {
|
||||
isSystemUser = true;
|
||||
group = "prometheus";
|
||||
description = "Prometheus daemon user";
|
||||
};
|
||||
};
|
||||
|
||||
systemd = {
|
||||
services = {
|
||||
prometheus-main = mkPrometheusService {
|
||||
configFile = mainPrometheusConfig;
|
||||
dataDir = mainPrometheusDataDir;
|
||||
port = 9090;
|
||||
retention = "90d";
|
||||
};
|
||||
|
||||
prometheus-pid-short = mkPrometheusService {
|
||||
configFile = pidPrometheusConfig;
|
||||
dataDir = pidPrometheusDataDir;
|
||||
port = 9092;
|
||||
retention = "10m";
|
||||
};
|
||||
};
|
||||
|
||||
tmpfiles.rules = [
|
||||
"d ${prometheusDataRoot} 0755 root root - -"
|
||||
"d ${mainPrometheusDataDir} 0750 prometheus prometheus - -"
|
||||
"d ${pidPrometheusDataDir} 0750 prometheus prometheus - -"
|
||||
];
|
||||
};
|
||||
}
|
||||
@@ -23,6 +23,7 @@ sudo zfs create media/secure/home_assistant -o compression=zstd-19
|
||||
sudo zfs create media/secure/notes -o copies=2
|
||||
sudo zfs create media/secure/postgres -o mountpoint=/zfs/media/database/postgres -o recordsize=16k -o primarycache=metadata
|
||||
sudo zfs create media/secure/postgres-wal -o mountpoint=/zfs/media/database/postgres-wal -o recordsize=32k -o primarycache=metadata -o special_small_blocks=32K -o compression=lz4 -o secondarycache=none -o logbias=latency
|
||||
sudo zfs create media/secure/prometheus -o mountpoint=/zfs/media/database/prometheus -o compression=lz4
|
||||
sudo zfs create media/secure/services -o compression=zstd-9
|
||||
sudo zfs create media/secure/share -o mountpoint=/zfs/media/share -o exec=off
|
||||
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
{
|
||||
...
|
||||
}:
|
||||
let
|
||||
vars = import ../vars.nix;
|
||||
grafanaDataDir = "${vars.services}/grafana";
|
||||
in
|
||||
{
|
||||
networking.firewall.allowedTCPPorts = [ 3000 ];
|
||||
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
dataDir = grafanaDataDir;
|
||||
settings = {
|
||||
database.type = "sqlite3";
|
||||
security = {
|
||||
admin_password = "$__file{${vars.secrets}/services/grafana/admin_password}";
|
||||
admin_user = "admin";
|
||||
secret_key = "$__file{${vars.secrets}/services/grafana/secret_key}";
|
||||
};
|
||||
server = {
|
||||
http_addr = "192.168.90.40";
|
||||
http_port = 3000;
|
||||
root_url = "http://$192.168.90.40:3000/";
|
||||
};
|
||||
};
|
||||
provision = {
|
||||
enable = true;
|
||||
dashboards.settings = {
|
||||
apiVersion = 1;
|
||||
providers = [
|
||||
{
|
||||
name = "monitoring";
|
||||
folder = "Monitoring";
|
||||
type = "file";
|
||||
disableDeletion = false;
|
||||
editable = false;
|
||||
allowUiUpdates = false;
|
||||
updateIntervalSeconds = 30;
|
||||
options.path = ../monitoring/dashboards;
|
||||
}
|
||||
];
|
||||
};
|
||||
datasources.settings = {
|
||||
apiVersion = 1;
|
||||
prune = true;
|
||||
datasources = [
|
||||
{
|
||||
access = "proxy";
|
||||
editable = false;
|
||||
isDefault = true;
|
||||
name = "prom-main";
|
||||
type = "prometheus";
|
||||
uid = "prom-main";
|
||||
url = "http://127.0.0.1:9090";
|
||||
}
|
||||
{
|
||||
access = "proxy";
|
||||
editable = false;
|
||||
name = "prom-pid-short";
|
||||
type = "prometheus";
|
||||
uid = "prom-pid-short";
|
||||
url = "http://127.0.0.1:9092";
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
systemd = {
|
||||
services.grafana.after = [
|
||||
"prometheus-main.service"
|
||||
"prometheus-pid-short.service"
|
||||
];
|
||||
|
||||
tmpfiles.rules = [
|
||||
"d ${grafanaDataDir} 0750 grafana grafana - -"
|
||||
];
|
||||
};
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
services.hedgedoc = {
|
||||
enable = true;
|
||||
settings = {
|
||||
host = "0.0.0.0";
|
||||
port = 3000;
|
||||
domain = "192.168.90.40";
|
||||
urlAddPort = true;
|
||||
protocolUseSSL = false;
|
||||
db = {
|
||||
dialect = "postgres";
|
||||
database = "hedgedoc";
|
||||
username = "hedgedoc";
|
||||
host = "/run/postgresql";
|
||||
};
|
||||
};
|
||||
};
|
||||
networking.firewall.allowedTCPPorts = [ 3000 ];
|
||||
|
||||
systemd.services.hedgedoc = {
|
||||
after = [ "postgresql.service" ];
|
||||
requires = [ "postgresql.service" ];
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user