main
  1{
  2  globals,
  3  lib,
  4  pkgs,
  5  monitoring,
  6  config,
  7  ...
  8}:
  9let
 10  # Get machines that should be monitored
 11  # Exclude: kyushu (laptop), shikoku (temporarily stopped), nagoya (not yet configured)
 12  nodeExporterMachines = lib.filterAttrs (
 13    name: _machine:
 14    !builtins.elem name [
 15      "kyushu"
 16      "shikoku"
 17      "nagoya"
 18    ]
 19  ) (monitoring.machinesWithNodeExporter globals.machines);
 20
 21  # Generate node exporter targets
 22  nodeExporterTargets = monitoring.mkPrometheusTargets {
 23    machines = nodeExporterMachines;
 24    port = 9000;
 25  };
 26
 27  # Machines with BIND DNS
 28  bindMachines = lib.filterAttrs (
 29    _name: _machine:
 30    builtins.elem _name [
 31      "demeter"
 32      "athena"
 33    ]
 34  ) globals.machines;
 35  bindTargets = monitoring.mkPrometheusTargets {
 36    machines = bindMachines;
 37    port = 9009;
 38  };
 39
 40  # PostgreSQL hosts
 41  postgresTargets = map (host: "${host}.sbr.pm:9187") [
 42    "rhea"
 43    "sakhalin"
 44  ];
 45
 46  # Exportarr services configuration
 47  exportarrServices = {
 48    sonarr = {
 49      port = 9707;
 50    };
 51    radarr = {
 52      port = 9708;
 53    };
 54    lidarr = {
 55      port = 9709;
 56    };
 57    prowlarr = {
 58      port = 9710;
 59    };
 60    bazarr = {
 61      port = 9712;
 62    };
 63  };
 64  exportarrTargets = lib.mapAttrsToList (
 65    _name: cfg: "rhea.sbr.pm:${toString cfg.port}"
 66  ) exportarrServices;
 67
 68  # Docker hosts with metrics enabled
 69  dockerMachines = lib.filterAttrs (
 70    _name: _machine:
 71    builtins.elem _name [
 72      "sakhalin"
 73      "aomi"
 74    ]
 75  ) globals.machines;
 76  dockerTargets = monitoring.mkPrometheusTargets {
 77    machines = dockerMachines;
 78    port = 9323;
 79  };
 80in
 81{
 82
 83  imports = [
 84    ../common/services/containers.nix
 85    ../common/services/docker.nix
 86    ../common/services/binfmt.nix
 87
 88    ../common/services/prometheus-exporters-postgres.nix
 89  ];
 90
 91  # Disable TPM2 (hardware has no TPM chip)
 92  security.tpm2.enable = lib.mkForce false;
 93
 94  # Age secrets
 95  age.secrets."grafana-admin-password" = {
 96    file = ../../secrets/sakhalin/grafana-admin-password.age;
 97    mode = "400";
 98    owner = "grafana";
 99  };
100  age.secrets."grafana-secret-key" = {
101    file = ../../secrets/sakhalin/grafana-secret-key.age;
102    mode = "400";
103    owner = "grafana";
104  };
105  age.secrets."ntfy-token" = {
106    file = ../../secrets/sakhalin/ntfy-token.age;
107    mode = "440";
108    owner = "root";
109    group = "root";
110  };
111  age.secrets."homeassistant-prometheus-token" = {
112    file = ../../secrets/sakhalin/homeassistant-prometheus-token.age;
113    mode = "400";
114    owner = "prometheus";
115  };
116  age.secrets."searxng-secret-key" = {
117    file = ../../secrets/sakhalin/searxng-secret-key.age;
118    mode = "400";
119    owner = "searx";
120    group = "searx";
121  };
122
123  systemd.services.n8n.environment = {
124    N8N_SECURE_COOKIE = "false";
125    PATH = lib.mkForce "/run/current-system/sw/bin";
126  };
127
128  services = {
129    atuin = {
130      enable = true;
131      host = "0.0.0.0";
132      openRegistration = false;
133    };
134    # PostgreSQL backups
135    postgresqlBackup = {
136      enable = true;
137      databases = [ ];
138      location = "/var/backup/postgresql";
139      startAt = "*-*-* 02:15:00"; # Daily at 2:15 AM
140    };
141
142    grafana = {
143      enable = true;
144      settings = {
145        server = {
146          http_addr = "0.0.0.0";
147          http_port = 3000;
148          domain = "grafana.sbr.pm";
149          root_url = "https://grafana.sbr.pm";
150        };
151        security.secret_key = "$__file{${config.age.secrets."grafana-secret-key".path}}";
152      };
153
154      provision = {
155        enable = true;
156        datasources.settings = {
157          apiVersion = 1;
158          datasources = [
159            {
160              name = "Prometheus";
161              type = "prometheus";
162              access = "proxy";
163              url = "http://localhost:9001";
164              isDefault = true;
165              jsonData = {
166                timeInterval = "30s";
167              };
168            }
169          ];
170        };
171
172        dashboards.settings = {
173          apiVersion = 1;
174          providers = [
175            {
176              name = "Default";
177              type = "file";
178              disableDeletion = false;
179              allowUiUpdates = true;
180              options.path = "/var/lib/grafana/dashboards";
181            }
182          ];
183        };
184      };
185    };
186    prometheus = {
187      enable = true;
188      port = 9001;
189      checkConfig = false; # Disable config check due to agenix secrets not available at build time
190
191      # Alert rules
192      ruleFiles = [
193        (pkgs.writeText "prometheus-alerts.yml" (builtins.toJSON (import ./prometheus-alerts.nix)))
194      ];
195
196      # Alertmanager configuration
197      alertmanagers = [
198        {
199          static_configs = [
200            {
201              targets = [ "localhost:9093" ];
202            }
203          ];
204        }
205      ];
206
207      scrapeConfigs = [
208        {
209          job_name = "node";
210          static_configs = [
211            {
212              targets = nodeExporterTargets;
213            }
214          ];
215        }
216        {
217          job_name = "bind";
218          static_configs = [
219            {
220              targets = bindTargets;
221            }
222          ];
223        }
224        {
225          job_name = "postgres";
226          static_configs = [
227            {
228              targets = postgresTargets;
229            }
230          ];
231        }
232        {
233          job_name = "traefik";
234          static_configs = [
235            {
236              targets = [ "rhea.sbr.pm:8080" ];
237            }
238          ];
239        }
240        {
241          job_name = "caddy";
242          static_configs = [
243            {
244              targets = [ "${builtins.head globals.machines.carthage.net.vpn.ips}:2019" ];
245            }
246          ];
247        }
248        {
249          job_name = "exportarr";
250          static_configs = [
251            {
252              targets = exportarrTargets;
253            }
254          ];
255        }
256        # Mosquitto MQTT exporter disabled - package broken in nixpkgs
257        # {
258        #   job_name = "mosquitto";
259        #   static_configs = [
260        #     {
261        #       targets = [ "demeter.sbr.pm:9234" ];
262        #     }
263        #   ];
264        # }
265        {
266          job_name = "homeassistant";
267          static_configs = [
268            {
269              targets = [ "${builtins.head globals.machines.hass.net.ips}:8123" ];
270            }
271          ];
272          metrics_path = "/api/prometheus";
273          bearer_token_file = config.age.secrets."homeassistant-prometheus-token".path;
274        }
275        {
276          job_name = "docker";
277          static_configs = [
278            {
279              targets = dockerTargets;
280            }
281          ];
282        }
283        {
284          job_name = "restic";
285          static_configs = [
286            {
287              targets = [ "aion.sbr.pm:9753" ];
288            }
289          ];
290        }
291      ];
292    };
293
294    # Alertmanager for routing alerts
295    prometheus.alertmanager = {
296      enable = true;
297      port = 9093;
298      webExternalUrl = "http://localhost:9093";
299
300      configuration = {
301        global = {
302          resolve_timeout = "5m";
303        };
304
305        route = {
306          group_by = [
307            "alertname"
308            "instance"
309          ];
310          group_wait = "30s";
311          group_interval = "5m";
312          repeat_interval = "12h";
313          receiver = "ntfy";
314        };
315
316        receivers = [
317          {
318            name = "ntfy";
319            webhook_configs = [
320              {
321                url = "http://localhost:8081/hook"; # alertmanager-ntfy bridge
322                send_resolved = true;
323              }
324            ];
325          }
326        ];
327      };
328    };
329
330    tarsnap = {
331      enable = true;
332      archives = {
333        documents = {
334          directories = [ "/home/vincent/desktop/documents" ];
335          period = "daily";
336          keyfile = "/etc/nixos/assets/tarsnap.documents.key";
337        };
338        org = {
339          directories = [ "/home/vincent/desktop/org" ];
340          period = "daily";
341          keyfile = "/etc/nixos/assets/tarsnap.org.key";
342        };
343      };
344    };
345    nfs.server = {
346      enable = true;
347      exports = ''
348        /export                      192.168.1.0/24(rw,fsid=0,no_subtree_check) 10.100.0.0/24(rw,fsid=0,no_subtree_check)
349        /export/gaia                 192.168.1.0/24(rw,fsid=1,no_subtree_check) 10.100.0.0/24(rw,fsid=1,no_subtree_check)
350        /export/toshito              192.168.1.0/24(rw,fsid=2,no_subtree_check) 10.100.0.0/24(rw,fsid=2,no_subtree_check)
351      '';
352    };
353
354  };
355
356  # Create Grafana dashboard directory and deploy Ollama dashboards
357  systemd.tmpfiles.rules = [
358    "d /var/lib/grafana/dashboards 0755 grafana grafana -"
359
360  ];
361
362  # Set Grafana admin password from secret file
363  systemd.services.grafana-set-admin-password = {
364    description = "Set Grafana admin password from secret file";
365    after = [ "grafana.service" ];
366    wantedBy = [ "multi-user.target" ];
367    serviceConfig = {
368      Type = "oneshot";
369      User = "grafana";
370      RemainAfterExit = true;
371    };
372    script = ''
373      # Only set password if admin user exists (database initialized)
374      if ${pkgs.grafana}/bin/grafana-cli --homepath /var/lib/grafana admin reset-admin-password --password-from-stdin < ${
375        config.age.secrets."grafana-admin-password".path
376      } 2>/dev/null; then
377        echo "Admin password updated successfully"
378      else
379        echo "Failed to update password or admin user doesn't exist yet"
380      fi
381    '';
382  };
383
384  # ntfy-alertmanager bridge - manual service configuration with token support
385  systemd.services.alertmanager-ntfy = {
386    description = "Alertmanager to ntfy bridge";
387    after = [ "network.target" ];
388    wantedBy = [ "multi-user.target" ];
389
390    serviceConfig = {
391      Type = "simple";
392      DynamicUser = true;
393      StateDirectory = "alertmanager-ntfy";
394      Restart = "on-failure";
395      RestartSec = "5s";
396      ExecStart = "${pkgs.alertmanager-ntfy}/bin/alertmanager-ntfy --configs /var/lib/alertmanager-ntfy/config.yml";
397      # Run config preparation as root (+ prefix) before starting the main process
398      ExecStartPre =
399        "+"
400        + pkgs.writeShellScript "prepare-alertmanager-ntfy-config" ''
401                  # Read the token from the secret file
402                  TOKEN=$(cat ${config.age.secrets."ntfy-token".path})
403
404                  # Generate config with the actual token
405                  cat > /var/lib/alertmanager-ntfy/config.yml <<'EOF'
406          http:
407            addr: 127.0.0.1:8081
408
409          ntfy:
410            baseurl: https://ntfy.sbr.pm
411            auth:
412              token: TOKEN_PLACEHOLDER
413            notification:
414              topic: homelab
415              priority: 'status == "firing" ? "urgent" : "default"'
416              tags:
417                - tag: rotating_light
418                  condition: 'status == "firing" && labels.severity == "critical"'
419                - tag: warning
420                  condition: 'status == "firing" && labels.severity == "warning"'
421                - tag: "+1"
422                  condition: 'status == "resolved"'
423              templates:
424                title: '{{ if eq .Status "resolved" }} Resolved: {{ end }}{{ if eq .Status "firing" }}🔥 {{ end }}{{ index .Annotations "summary" }}'
425                description: '{{ index .Annotations "description" }}'
426          EOF
427                  # Replace placeholder with actual token
428                  ${pkgs.gnused}/bin/sed -i "s/TOKEN_PLACEHOLDER/$TOKEN/" /var/lib/alertmanager-ntfy/config.yml
429                  # Make config readable by the dynamic user
430                  chmod 644 /var/lib/alertmanager-ntfy/config.yml
431        '';
432    };
433  };
434
435  environment.systemPackages = with pkgs; [ yt-dlp ];
436  # mr -i u daily
437  systemd.services.mr = {
438    description = "Update configs daily";
439    requires = [ "network-online.target" ];
440    after = [ "network-online.target" ];
441
442    restartIfChanged = false;
443    unitConfig.X-StopOnRemoval = false;
444
445    serviceConfig = {
446      Type = "oneshot";
447      User = "vincent";
448      OnFailure = "status-email-root@%n.service";
449    };
450
451    path = with pkgs; [
452      git
453      mr
454    ];
455    script = ''
456      set -e
457       cd /mnt/gaia/src/configs/
458       mr -t run git reset --hard
459       mr -t u
460    '';
461
462    startAt = "daily";
463  };
464  # Kiwix serve
465  systemd.services.kiwix-serve = {
466    description = "Kiwix offline content server";
467    wantedBy = [ "multi-user.target" ];
468    after = [ "network.target" ];
469
470    serviceConfig = {
471      Type = "simple";
472      User = "vincent";
473      ExecStart = "${pkgs.bash}/bin/bash -c '${pkgs.kiwix-tools}/bin/kiwix-serve --port=8080 /mnt/gaia/kiwix/*.zim'";
474      Restart = "on-failure";
475      RestartSec = "5s";
476    };
477  };
478
479  # Open firewall for services accessible from the network
480  networking.firewall.allowedTCPPorts = [
481    8000 # Paperless-ngx web interface
482    8090 # SearXNG metasearch engine
483  ];
484}