main
1{
2 globals,
3 lib,
4 pkgs,
5 monitoring,
6 config,
7 ...
8}:
9let
10 # Get machines that should be monitored
11 # Exclude: kyushu (laptop), shikoku (temporarily stopped), nagoya (not yet configured)
12 nodeExporterMachines = lib.filterAttrs (
13 name: _machine:
14 !builtins.elem name [
15 "kyushu"
16 "shikoku"
17 "nagoya"
18 ]
19 ) (monitoring.machinesWithNodeExporter globals.machines);
20
21 # Generate node exporter targets
22 nodeExporterTargets = monitoring.mkPrometheusTargets {
23 machines = nodeExporterMachines;
24 port = 9000;
25 };
26
27 # Machines with BIND DNS
28 bindMachines = lib.filterAttrs (
29 _name: _machine:
30 builtins.elem _name [
31 "demeter"
32 "athena"
33 ]
34 ) globals.machines;
35 bindTargets = monitoring.mkPrometheusTargets {
36 machines = bindMachines;
37 port = 9009;
38 };
39
40 # PostgreSQL hosts
41 postgresTargets = map (host: "${host}.sbr.pm:9187") [
42 "rhea"
43 "sakhalin"
44 ];
45
46 # Exportarr services configuration
47 exportarrServices = {
48 sonarr = {
49 port = 9707;
50 };
51 radarr = {
52 port = 9708;
53 };
54 lidarr = {
55 port = 9709;
56 };
57 prowlarr = {
58 port = 9710;
59 };
60 bazarr = {
61 port = 9712;
62 };
63 };
64 exportarrTargets = lib.mapAttrsToList (
65 _name: cfg: "rhea.sbr.pm:${toString cfg.port}"
66 ) exportarrServices;
67
68 # Docker hosts with metrics enabled
69 dockerMachines = lib.filterAttrs (
70 _name: _machine:
71 builtins.elem _name [
72 "sakhalin"
73 "aomi"
74 ]
75 ) globals.machines;
76 dockerTargets = monitoring.mkPrometheusTargets {
77 machines = dockerMachines;
78 port = 9323;
79 };
80in
81{
82
83 imports = [
84 ../common/services/containers.nix
85 ../common/services/docker.nix
86 ../common/services/binfmt.nix
87
88 ../common/services/prometheus-exporters-postgres.nix
89 ];
90
91 # Disable TPM2 (hardware has no TPM chip)
92 security.tpm2.enable = lib.mkForce false;
93
94 # Age secrets
95 age.secrets."grafana-admin-password" = {
96 file = ../../secrets/sakhalin/grafana-admin-password.age;
97 mode = "400";
98 owner = "grafana";
99 };
100 age.secrets."grafana-secret-key" = {
101 file = ../../secrets/sakhalin/grafana-secret-key.age;
102 mode = "400";
103 owner = "grafana";
104 };
105 age.secrets."ntfy-token" = {
106 file = ../../secrets/sakhalin/ntfy-token.age;
107 mode = "440";
108 owner = "root";
109 group = "root";
110 };
111 age.secrets."homeassistant-prometheus-token" = {
112 file = ../../secrets/sakhalin/homeassistant-prometheus-token.age;
113 mode = "400";
114 owner = "prometheus";
115 };
116 age.secrets."searxng-secret-key" = {
117 file = ../../secrets/sakhalin/searxng-secret-key.age;
118 mode = "400";
119 owner = "searx";
120 group = "searx";
121 };
122
123 systemd.services.n8n.environment = {
124 N8N_SECURE_COOKIE = "false";
125 PATH = lib.mkForce "/run/current-system/sw/bin";
126 };
127
128 services = {
129 atuin = {
130 enable = true;
131 host = "0.0.0.0";
132 openRegistration = false;
133 };
134 # PostgreSQL backups
135 postgresqlBackup = {
136 enable = true;
137 databases = [ ];
138 location = "/var/backup/postgresql";
139 startAt = "*-*-* 02:15:00"; # Daily at 2:15 AM
140 };
141
142 grafana = {
143 enable = true;
144 settings = {
145 server = {
146 http_addr = "0.0.0.0";
147 http_port = 3000;
148 domain = "grafana.sbr.pm";
149 root_url = "https://grafana.sbr.pm";
150 };
151 security.secret_key = "$__file{${config.age.secrets."grafana-secret-key".path}}";
152 };
153
154 provision = {
155 enable = true;
156 datasources.settings = {
157 apiVersion = 1;
158 datasources = [
159 {
160 name = "Prometheus";
161 type = "prometheus";
162 access = "proxy";
163 url = "http://localhost:9001";
164 isDefault = true;
165 jsonData = {
166 timeInterval = "30s";
167 };
168 }
169 ];
170 };
171
172 dashboards.settings = {
173 apiVersion = 1;
174 providers = [
175 {
176 name = "Default";
177 type = "file";
178 disableDeletion = false;
179 allowUiUpdates = true;
180 options.path = "/var/lib/grafana/dashboards";
181 }
182 ];
183 };
184 };
185 };
186 prometheus = {
187 enable = true;
188 port = 9001;
189 checkConfig = false; # Disable config check due to agenix secrets not available at build time
190
191 # Alert rules
192 ruleFiles = [
193 (pkgs.writeText "prometheus-alerts.yml" (builtins.toJSON (import ./prometheus-alerts.nix)))
194 ];
195
196 # Alertmanager configuration
197 alertmanagers = [
198 {
199 static_configs = [
200 {
201 targets = [ "localhost:9093" ];
202 }
203 ];
204 }
205 ];
206
207 scrapeConfigs = [
208 {
209 job_name = "node";
210 static_configs = [
211 {
212 targets = nodeExporterTargets;
213 }
214 ];
215 }
216 {
217 job_name = "bind";
218 static_configs = [
219 {
220 targets = bindTargets;
221 }
222 ];
223 }
224 {
225 job_name = "postgres";
226 static_configs = [
227 {
228 targets = postgresTargets;
229 }
230 ];
231 }
232 {
233 job_name = "traefik";
234 static_configs = [
235 {
236 targets = [ "rhea.sbr.pm:8080" ];
237 }
238 ];
239 }
240 {
241 job_name = "caddy";
242 static_configs = [
243 {
244 targets = [ "${builtins.head globals.machines.carthage.net.vpn.ips}:2019" ];
245 }
246 ];
247 }
248 {
249 job_name = "exportarr";
250 static_configs = [
251 {
252 targets = exportarrTargets;
253 }
254 ];
255 }
256 # Mosquitto MQTT exporter disabled - package broken in nixpkgs
257 # {
258 # job_name = "mosquitto";
259 # static_configs = [
260 # {
261 # targets = [ "demeter.sbr.pm:9234" ];
262 # }
263 # ];
264 # }
265 {
266 job_name = "homeassistant";
267 static_configs = [
268 {
269 targets = [ "${builtins.head globals.machines.hass.net.ips}:8123" ];
270 }
271 ];
272 metrics_path = "/api/prometheus";
273 bearer_token_file = config.age.secrets."homeassistant-prometheus-token".path;
274 }
275 {
276 job_name = "docker";
277 static_configs = [
278 {
279 targets = dockerTargets;
280 }
281 ];
282 }
283 {
284 job_name = "restic";
285 static_configs = [
286 {
287 targets = [ "aion.sbr.pm:9753" ];
288 }
289 ];
290 }
291 ];
292 };
293
294 # Alertmanager for routing alerts
295 prometheus.alertmanager = {
296 enable = true;
297 port = 9093;
298 webExternalUrl = "http://localhost:9093";
299
300 configuration = {
301 global = {
302 resolve_timeout = "5m";
303 };
304
305 route = {
306 group_by = [
307 "alertname"
308 "instance"
309 ];
310 group_wait = "30s";
311 group_interval = "5m";
312 repeat_interval = "12h";
313 receiver = "ntfy";
314 };
315
316 receivers = [
317 {
318 name = "ntfy";
319 webhook_configs = [
320 {
321 url = "http://localhost:8081/hook"; # alertmanager-ntfy bridge
322 send_resolved = true;
323 }
324 ];
325 }
326 ];
327 };
328 };
329
330 tarsnap = {
331 enable = true;
332 archives = {
333 documents = {
334 directories = [ "/home/vincent/desktop/documents" ];
335 period = "daily";
336 keyfile = "/etc/nixos/assets/tarsnap.documents.key";
337 };
338 org = {
339 directories = [ "/home/vincent/desktop/org" ];
340 period = "daily";
341 keyfile = "/etc/nixos/assets/tarsnap.org.key";
342 };
343 };
344 };
345 nfs.server = {
346 enable = true;
347 exports = ''
348 /export 192.168.1.0/24(rw,fsid=0,no_subtree_check) 10.100.0.0/24(rw,fsid=0,no_subtree_check)
349 /export/gaia 192.168.1.0/24(rw,fsid=1,no_subtree_check) 10.100.0.0/24(rw,fsid=1,no_subtree_check)
350 /export/toshito 192.168.1.0/24(rw,fsid=2,no_subtree_check) 10.100.0.0/24(rw,fsid=2,no_subtree_check)
351 '';
352 };
353
354 };
355
356 # Create Grafana dashboard directory and deploy Ollama dashboards
357 systemd.tmpfiles.rules = [
358 "d /var/lib/grafana/dashboards 0755 grafana grafana -"
359
360 ];
361
362 # Set Grafana admin password from secret file
363 systemd.services.grafana-set-admin-password = {
364 description = "Set Grafana admin password from secret file";
365 after = [ "grafana.service" ];
366 wantedBy = [ "multi-user.target" ];
367 serviceConfig = {
368 Type = "oneshot";
369 User = "grafana";
370 RemainAfterExit = true;
371 };
372 script = ''
373 # Only set password if admin user exists (database initialized)
374 if ${pkgs.grafana}/bin/grafana-cli --homepath /var/lib/grafana admin reset-admin-password --password-from-stdin < ${
375 config.age.secrets."grafana-admin-password".path
376 } 2>/dev/null; then
377 echo "Admin password updated successfully"
378 else
379 echo "Failed to update password or admin user doesn't exist yet"
380 fi
381 '';
382 };
383
384 # ntfy-alertmanager bridge - manual service configuration with token support
385 systemd.services.alertmanager-ntfy = {
386 description = "Alertmanager to ntfy bridge";
387 after = [ "network.target" ];
388 wantedBy = [ "multi-user.target" ];
389
390 serviceConfig = {
391 Type = "simple";
392 DynamicUser = true;
393 StateDirectory = "alertmanager-ntfy";
394 Restart = "on-failure";
395 RestartSec = "5s";
396 ExecStart = "${pkgs.alertmanager-ntfy}/bin/alertmanager-ntfy --configs /var/lib/alertmanager-ntfy/config.yml";
397 # Run config preparation as root (+ prefix) before starting the main process
398 ExecStartPre =
399 "+"
400 + pkgs.writeShellScript "prepare-alertmanager-ntfy-config" ''
401 # Read the token from the secret file
402 TOKEN=$(cat ${config.age.secrets."ntfy-token".path})
403
404 # Generate config with the actual token
405 cat > /var/lib/alertmanager-ntfy/config.yml <<'EOF'
406 http:
407 addr: 127.0.0.1:8081
408
409 ntfy:
410 baseurl: https://ntfy.sbr.pm
411 auth:
412 token: TOKEN_PLACEHOLDER
413 notification:
414 topic: homelab
415 priority: 'status == "firing" ? "urgent" : "default"'
416 tags:
417 - tag: rotating_light
418 condition: 'status == "firing" && labels.severity == "critical"'
419 - tag: warning
420 condition: 'status == "firing" && labels.severity == "warning"'
421 - tag: "+1"
422 condition: 'status == "resolved"'
423 templates:
424 title: '{{ if eq .Status "resolved" }}✅ Resolved: {{ end }}{{ if eq .Status "firing" }}🔥 {{ end }}{{ index .Annotations "summary" }}'
425 description: '{{ index .Annotations "description" }}'
426 EOF
427 # Replace placeholder with actual token
428 ${pkgs.gnused}/bin/sed -i "s/TOKEN_PLACEHOLDER/$TOKEN/" /var/lib/alertmanager-ntfy/config.yml
429 # Make config readable by the dynamic user
430 chmod 644 /var/lib/alertmanager-ntfy/config.yml
431 '';
432 };
433 };
434
435 environment.systemPackages = with pkgs; [ yt-dlp ];
436 # mr -i u daily
437 systemd.services.mr = {
438 description = "Update configs daily";
439 requires = [ "network-online.target" ];
440 after = [ "network-online.target" ];
441
442 restartIfChanged = false;
443 unitConfig.X-StopOnRemoval = false;
444
445 serviceConfig = {
446 Type = "oneshot";
447 User = "vincent";
448 OnFailure = "status-email-root@%n.service";
449 };
450
451 path = with pkgs; [
452 git
453 mr
454 ];
455 script = ''
456 set -e
457 cd /mnt/gaia/src/configs/
458 mr -t run git reset --hard
459 mr -t u
460 '';
461
462 startAt = "daily";
463 };
464 # Kiwix serve
465 systemd.services.kiwix-serve = {
466 description = "Kiwix offline content server";
467 wantedBy = [ "multi-user.target" ];
468 after = [ "network.target" ];
469
470 serviceConfig = {
471 Type = "simple";
472 User = "vincent";
473 ExecStart = "${pkgs.bash}/bin/bash -c '${pkgs.kiwix-tools}/bin/kiwix-serve --port=8080 /mnt/gaia/kiwix/*.zim'";
474 Restart = "on-failure";
475 RestartSec = "5s";
476 };
477 };
478
479 # Open firewall for services accessible from the network
480 networking.firewall.allowedTCPPorts = [
481 8000 # Paperless-ngx web interface
482 8090 # SearXNG metasearch engine
483 ];
484}