flake-update-20260201
  1{
  2  libx,
  3  globals,
  4  lib,
  5  pkgs,
  6  monitoring,
  7  config,
  8  ...
  9}:
 10let
 11  # Get machines that should be monitored
 12  # Exclude: kyushu (laptop), shikoku (temporarily stopped), nagoya (not yet configured)
 13  nodeExporterMachines = lib.filterAttrs (
 14    name: _machine:
 15    !builtins.elem name [
 16      "kyushu"
 17      "shikoku"
 18      "nagoya"
 19    ]
 20  ) (monitoring.machinesWithNodeExporter globals.machines);
 21
 22  # Generate node exporter targets
 23  nodeExporterTargets = monitoring.mkPrometheusTargets {
 24    machines = nodeExporterMachines;
 25    port = 9000;
 26  };
 27
 28  # Machines with BIND DNS
 29  bindMachines = lib.filterAttrs (
 30    _name: _machine:
 31    builtins.elem _name [
 32      "demeter"
 33      "athena"
 34    ]
 35  ) globals.machines;
 36  bindTargets = monitoring.mkPrometheusTargets {
 37    machines = bindMachines;
 38    port = 9009;
 39  };
 40
 41  # PostgreSQL hosts
 42  postgresTargets = map (host: "${host}.sbr.pm:9187") [
 43    "rhea"
 44    "sakhalin"
 45  ];
 46
 47  # Exportarr services configuration
 48  exportarrServices = {
 49    sonarr = {
 50      port = 9707;
 51    };
 52    radarr = {
 53      port = 9708;
 54    };
 55    lidarr = {
 56      port = 9709;
 57    };
 58    prowlarr = {
 59      port = 9710;
 60    };
 61    bazarr = {
 62      port = 9712;
 63    };
 64  };
 65  exportarrTargets = lib.mapAttrsToList (
 66    _name: cfg: "rhea.sbr.pm:${toString cfg.port}"
 67  ) exportarrServices;
 68
 69  # Docker hosts with metrics enabled
 70  dockerMachines = lib.filterAttrs (
 71    _name: _machine:
 72    builtins.elem _name [
 73      "sakhalin"
 74      "aomi"
 75    ]
 76  ) globals.machines;
 77  dockerTargets = monitoring.mkPrometheusTargets {
 78    machines = dockerMachines;
 79    port = 9323;
 80  };
 81in
 82{
 83
 84  imports = [
 85    ../common/services/containers.nix
 86    ../common/services/docker.nix
 87    ../common/services/binfmt.nix
 88    ../common/services/prometheus-exporters-node.nix
 89    ../common/services/prometheus-exporters-postgres.nix
 90  ];
 91
 92  # Disable TPM2 (hardware has no TPM chip)
 93  security.tpm2.enable = lib.mkForce false;
 94
 95  # Age secrets
 96  age.secrets."grafana-admin-password" = {
 97    file = ../../secrets/sakhalin/grafana-admin-password.age;
 98    mode = "400";
 99    owner = "grafana";
100  };
101  age.secrets."ntfy-token" = {
102    file = ../../secrets/sakhalin/ntfy-token.age;
103    mode = "440";
104    owner = "root";
105    group = "root";
106  };
107  age.secrets."homeassistant-prometheus-token" = {
108    file = ../../secrets/sakhalin/homeassistant-prometheus-token.age;
109    mode = "400";
110    owner = "prometheus";
111  };
112
113  # TODO make it an option ? (otherwise I'll add it for all)
114  users.users.vincent.linger = true;
115
116  systemd.services.n8n.environment = {
117    N8N_SECURE_COOKIE = "false";
118    PATH = lib.mkForce "/run/current-system/sw/bin";
119  };
120
121  services = {
122    atuin = {
123      enable = true;
124      host = "0.0.0.0";
125      openRegistration = false;
126    };
127
128    n8n = {
129      enable = true;
130      openFirewall = true;
131      # webhookUrl = "";
132    };
133    paperless = {
134      enable = true;
135      address = "0.0.0.0"; # Listen on all interfaces for access via LAN and VPN
136      port = 8000;
137      dataDir = "/mnt/gaia/paperless/data";
138      mediaDir = "/mnt/gaia/paperless/media";
139      consumptionDir = "/mnt/gaia/paperless/consume";
140      settings = {
141        PAPERLESS_URL = "https://paperless.sbr.pm";
142        PAPERLESS_EMPTY_TRASH_DIR = "/mnt/gaia/paperless/trash";
143        PAPERLESS_FILENAME_FORMAT = "{{ created_year }}/{{ document_type }}/{{ created }} - {{ title }} ({{ doc_pk }})";
144        PAPERLESS_FILENAME_FORMAT_REMOVE_NONE = "true";
145      };
146    };
147    # PostgreSQL backups
148    postgresqlBackup = {
149      enable = true;
150      databases = [ ];
151      location = "/var/backup/postgresql";
152      startAt = "*-*-* 02:15:00"; # Daily at 2:15 AM
153    };
154
155    grafana = {
156      enable = true;
157      settings = {
158        server = {
159          http_addr = "0.0.0.0";
160          http_port = 3000;
161          domain = "grafana.sbr.pm";
162          root_url = "https://grafana.sbr.pm";
163        };
164      };
165
166      provision = {
167        enable = true;
168        datasources.settings = {
169          apiVersion = 1;
170          datasources = [
171            {
172              name = "Prometheus";
173              type = "prometheus";
174              access = "proxy";
175              url = "http://localhost:9001";
176              isDefault = true;
177              jsonData = {
178                timeInterval = "30s";
179              };
180            }
181          ];
182        };
183
184        dashboards.settings = {
185          apiVersion = 1;
186          providers = [
187            {
188              name = "Default";
189              type = "file";
190              disableDeletion = false;
191              allowUiUpdates = true;
192              options.path = "/var/lib/grafana/dashboards";
193            }
194          ];
195        };
196      };
197    };
198    prometheus = {
199      enable = true;
200      port = 9001;
201      checkConfig = false; # Disable config check due to agenix secrets not available at build time
202
203      # Alert rules
204      ruleFiles = [
205        (pkgs.writeText "prometheus-alerts.yml" (builtins.toJSON (import ./prometheus-alerts.nix)))
206      ];
207
208      # Alertmanager configuration
209      alertmanagers = [
210        {
211          static_configs = [
212            {
213              targets = [ "localhost:9093" ];
214            }
215          ];
216        }
217      ];
218
219      scrapeConfigs = [
220        {
221          job_name = "node";
222          static_configs = [
223            {
224              targets = nodeExporterTargets;
225            }
226          ];
227        }
228        {
229          job_name = "bind";
230          static_configs = [
231            {
232              targets = bindTargets;
233            }
234          ];
235        }
236        {
237          job_name = "postgres";
238          static_configs = [
239            {
240              targets = postgresTargets;
241            }
242          ];
243        }
244        {
245          job_name = "traefik";
246          static_configs = [
247            {
248              targets = [ "rhea.sbr.pm:8080" ];
249            }
250          ];
251        }
252        {
253          job_name = "caddy";
254          static_configs = [
255            {
256              targets = [ "${builtins.head globals.machines.kerkouane.net.vpn.ips}:2019" ];
257            }
258          ];
259        }
260        {
261          job_name = "exportarr";
262          static_configs = [
263            {
264              targets = exportarrTargets;
265            }
266          ];
267        }
268        # Mosquitto MQTT exporter disabled - package broken in nixpkgs
269        # {
270        #   job_name = "mosquitto";
271        #   static_configs = [
272        #     {
273        #       targets = [ "demeter.sbr.pm:9234" ];
274        #     }
275        #   ];
276        # }
277        {
278          job_name = "homeassistant";
279          static_configs = [
280            {
281              targets = [ "${builtins.head globals.machines.hass.net.ips}:8123" ];
282            }
283          ];
284          metrics_path = "/api/prometheus";
285          bearer_token_file = config.age.secrets."homeassistant-prometheus-token".path;
286        }
287        {
288          job_name = "docker";
289          static_configs = [
290            {
291              targets = dockerTargets;
292            }
293          ];
294        }
295        {
296          job_name = "ollama";
297          static_configs = [
298            {
299              targets = [ "${builtins.head globals.machines.aomi.net.ips}:8000" ];
300            }
301          ];
302        }
303        {
304          job_name = "restic";
305          static_configs = [
306            {
307              targets = [ "aion.sbr.pm:9753" ];
308            }
309          ];
310        }
311      ];
312    };
313
314    # Alertmanager for routing alerts
315    prometheus.alertmanager = {
316      enable = true;
317      port = 9093;
318      webExternalUrl = "http://localhost:9093";
319
320      configuration = {
321        global = {
322          resolve_timeout = "5m";
323        };
324
325        route = {
326          group_by = [
327            "alertname"
328            "instance"
329          ];
330          group_wait = "30s";
331          group_interval = "5m";
332          repeat_interval = "12h";
333          receiver = "ntfy";
334        };
335
336        receivers = [
337          {
338            name = "ntfy";
339            webhook_configs = [
340              {
341                url = "http://localhost:8081/hook"; # alertmanager-ntfy bridge
342                send_resolved = true;
343              }
344            ];
345          }
346        ];
347      };
348    };
349
350    tarsnap = {
351      enable = true;
352      archives = {
353        documents = {
354          directories = [ "/home/vincent/desktop/documents" ];
355          period = "daily";
356          keyfile = "/etc/nixos/assets/tarsnap.documents.key";
357        };
358        org = {
359          directories = [ "/home/vincent/desktop/org" ];
360          period = "daily";
361          keyfile = "/etc/nixos/assets/tarsnap.org.key";
362        };
363      };
364    };
365    nfs.server = {
366      enable = true;
367      exports = ''
368        /export                      192.168.1.0/24(rw,fsid=0,no_subtree_check) 10.100.0.0/24(rw,fsid=0,no_subtree_check)
369        /export/gaia                 192.168.1.0/24(rw,fsid=1,no_subtree_check) 10.100.0.0/24(rw,fsid=1,no_subtree_check)
370        /export/toshito              192.168.1.0/24(rw,fsid=2,no_subtree_check) 10.100.0.0/24(rw,fsid=2,no_subtree_check)
371      '';
372    };
373
374    wireguard = {
375      enable = true;
376      ips = libx.wg-ips globals.machines.sakhalin.net.vpn.ips;
377      endpoint = "${globals.net.vpn.endpoint}";
378      endpointPublicKey = "${globals.machines.kerkouane.net.vpn.pubkey}";
379    };
380  };
381
382  # Create Grafana dashboard directory and deploy Ollama dashboards
383  systemd.tmpfiles.rules = [
384    "d /mnt/gaia/paperless 0755 paperless paperless -"
385    "d /mnt/gaia/paperless/consume 0755 paperless paperless -"
386    "d /mnt/gaia/paperless/data 0755 paperless paperless -"
387    "d /mnt/gaia/paperless/media 0755 paperless paperless -"
388    "d /mnt/gaia/paperless/trash 0755 paperless paperless -"
389    "d /var/lib/grafana/dashboards 0755 grafana grafana -"
390    "C /var/lib/grafana/dashboards/ollama-metrics.json 0644 grafana grafana - ${../../tools/ollama-exporter/grafana-dashboard.json}"
391    "C /var/lib/grafana/dashboards/ollama-performance.json 0644 grafana grafana - ${../../tools/ollama-exporter/grafana-dashboard-custom.json}"
392  ];
393
394  # Add trash directory to paperless services ReadWritePaths
395  systemd.services.paperless-scheduler.serviceConfig.ReadWritePaths = [ "/mnt/gaia/paperless/trash" ];
396  systemd.services.paperless-task-queue.serviceConfig.ReadWritePaths = [
397    "/mnt/gaia/paperless/trash"
398  ];
399  systemd.services.paperless-consumer.serviceConfig.ReadWritePaths = [ "/mnt/gaia/paperless/trash" ];
400  systemd.services.paperless-web.serviceConfig.ReadWritePaths = [ "/mnt/gaia/paperless/trash" ];
401
402  # Set Grafana admin password from secret file
403  systemd.services.grafana-set-admin-password = {
404    description = "Set Grafana admin password from secret file";
405    after = [ "grafana.service" ];
406    wantedBy = [ "multi-user.target" ];
407    serviceConfig = {
408      Type = "oneshot";
409      User = "grafana";
410      RemainAfterExit = true;
411    };
412    script = ''
413      # Only set password if admin user exists (database initialized)
414      if ${pkgs.grafana}/bin/grafana-cli --homepath /var/lib/grafana admin reset-admin-password --password-from-stdin < ${
415        config.age.secrets."grafana-admin-password".path
416      } 2>/dev/null; then
417        echo "Admin password updated successfully"
418      else
419        echo "Failed to update password or admin user doesn't exist yet"
420      fi
421    '';
422  };
423
424  # ntfy-alertmanager bridge - manual service configuration with token support
425  systemd.services.alertmanager-ntfy = {
426    description = "Alertmanager to ntfy bridge";
427    after = [ "network.target" ];
428    wantedBy = [ "multi-user.target" ];
429
430    serviceConfig = {
431      Type = "simple";
432      DynamicUser = true;
433      StateDirectory = "alertmanager-ntfy";
434      Restart = "on-failure";
435      RestartSec = "5s";
436      ExecStart = "${pkgs.alertmanager-ntfy}/bin/alertmanager-ntfy --configs /var/lib/alertmanager-ntfy/config.yml";
437      # Run config preparation as root (+ prefix) before starting the main process
438      ExecStartPre =
439        "+"
440        + pkgs.writeShellScript "prepare-alertmanager-ntfy-config" ''
441                  # Read the token from the secret file
442                  TOKEN=$(cat ${config.age.secrets."ntfy-token".path})
443
444                  # Generate config with the actual token
445                  cat > /var/lib/alertmanager-ntfy/config.yml <<'EOF'
446          http:
447            addr: 127.0.0.1:8081
448
449          ntfy:
450            baseurl: https://ntfy.sbr.pm
451            auth:
452              token: TOKEN_PLACEHOLDER
453            notification:
454              topic: homelab
455              priority: 'status == "firing" ? "urgent" : "default"'
456              tags:
457                - tag: rotating_light
458                  condition: 'status == "firing" && labels.severity == "critical"'
459                - tag: warning
460                  condition: 'status == "firing" && labels.severity == "warning"'
461                - tag: "+1"
462                  condition: 'status == "resolved"'
463              templates:
464                title: '{{ if eq .Status "resolved" }} Resolved: {{ end }}{{ if eq .Status "firing" }}🔥 {{ end }}{{ index .Annotations "summary" }}'
465                description: '{{ index .Annotations "description" }}'
466          EOF
467                  # Replace placeholder with actual token
468                  ${pkgs.gnused}/bin/sed -i "s/TOKEN_PLACEHOLDER/$TOKEN/" /var/lib/alertmanager-ntfy/config.yml
469                  # Make config readable by the dynamic user
470                  chmod 644 /var/lib/alertmanager-ntfy/config.yml
471        '';
472    };
473  };
474
475  environment.systemPackages = with pkgs; [ yt-dlp ];
476  # mr -i u daily
477  systemd.services.mr = {
478    description = "Update configs daily";
479    requires = [ "network-online.target" ];
480    after = [ "network-online.target" ];
481
482    restartIfChanged = false;
483    unitConfig.X-StopOnRemoval = false;
484
485    serviceConfig = {
486      Type = "oneshot";
487      User = "vincent";
488      OnFailure = "status-email-root@%n.service";
489    };
490
491    path = with pkgs; [
492      git
493      mr
494    ];
495    script = ''
496      set -e
497       cd /mnt/gaia/src/configs/
498       mr -t run git reset --hard
499       mr -t u
500    '';
501
502    startAt = "daily";
503  };
504  # Kiwix serve
505  systemd.services.kiwix-serve = {
506    description = "Kiwix offline content server";
507    wantedBy = [ "multi-user.target" ];
508    after = [ "network.target" ];
509
510    serviceConfig = {
511      Type = "simple";
512      User = "vincent";
513      ExecStart = "${pkgs.bash}/bin/bash -c '${pkgs.kiwix-tools}/bin/kiwix-serve --port=8080 /mnt/gaia/kiwix/*.zim'";
514      Restart = "on-failure";
515      RestartSec = "5s";
516    };
517  };
518
519  # Open firewall for services accessible from the network
520  networking.firewall.allowedTCPPorts = [
521    8000 # Paperless-ngx web interface
522  ];
523}