Commit b2b01e6cce5a

Vincent Demeester <vincent@sbr.pm>
2025-12-18 21:16:26
feat(monitoring): comprehensive homelab prometheus and grafana setup
- Enable service discovery auto-generating targets from globals.machines - Monitor critical infrastructure (rhea, aion, sakhalin, demeter, kerkouane, athena) - Provision Grafana datasources and dashboards declaratively - Track hardware sensors, databases, DNS, web servers, MQTT, and services Signed-off-by: Vincent Demeester <vincent@sbr.pm>
1 parent 82a6224
lib/default.nix
@@ -7,6 +7,7 @@
 }:
 {
   libx = import ./functions.nix { inherit (inputs.nixpkgs) lib; };
+  monitoring = import ./monitoring.nix { inherit (inputs.nixpkgs) lib; };
   # Function for generating home-manage configs
   mkHome =
     {
@@ -63,6 +64,7 @@
           system
           ;
         libx = import ./functions.nix { inherit (pkgsInput) lib; };
+        monitoring = import ./monitoring.nix { inherit (pkgsInput) lib; };
       };
     in
     pkgsInput.lib.nixosSystem {
@@ -107,6 +109,7 @@
           globals
           ;
         libx = import ./functions.nix { inherit (pkgsInput) lib; };
+        monitoring = import ./monitoring.nix { inherit (pkgsInput) lib; };
       };
     in
     pkgsInput.lib.nixosSystem {
@@ -160,6 +163,7 @@
           nixos-raspberrypi
           ;
         libx = import ./functions.nix { inherit (pkgsInput) lib; };
+        monitoring = import ./monitoring.nix { inherit (pkgsInput) lib; };
       };
     in
     inputs.nixos-raspberrypi.lib.nixosSystemFull {
@@ -219,6 +223,7 @@
           globals
           ;
         libx = import ./functions.nix { inherit (pkgsInput) lib; };
+        monitoring = import ./monitoring.nix { inherit (pkgsInput) lib; };
       };
     in
     inputs.system-manager.lib.makeSystemConfig {
lib/monitoring.nix
@@ -0,0 +1,36 @@
+{ lib }:
+{
+  # Filter machines with node exporters
+  # Excludes non-NixOS machines, mobile devices, and machines without network config
+  machinesWithNodeExporter =
+    machines:
+    lib.filterAttrs (
+      name: machine:
+      # Has network configuration with names (indicates it's a real host)
+      (machine ? net && machine.net ? names)
+      # Exclude mobile devices and tablets
+      && !(builtins.elem name [
+        "hokkaido"
+        "suzu"
+        "osaka"
+      ])
+      # Exclude non-NixOS machines that don't run node exporter
+      && !(builtins.elem name [
+        "synodine"
+        "hass"
+        "wakasu"
+        "okinawa"
+        "kobe"
+      ])
+    ) machines;
+
+  # Generate Prometheus targets from machine list
+  # Returns list of "hostname.domain:port" strings
+  mkPrometheusTargets =
+    {
+      machines,
+      domain ? "sbr.pm",
+      port,
+    }:
+    lib.mapAttrsToList (name: _machine: "${name}.${domain}:${toString port}") machines;
+}
secrets/sakhalin/grafana-admin-password.age
@@ -0,0 +1,9 @@
+age-encryption.org/v1
+-> piv-p256 ItIHHA A9sErWr6TLyCBocDszjWBBYqkQdP/73ry7yzcgsAhs4H
+SGRIFBd5bRT6/v0SLokkOUWVE+ZREC0lRpnVcLWzPNA
+-> piv-p256 ViCCtQ ArNDm/h5079GC798nbwpasYgz9Q3oXoxG0OuCdVwHo8s
+1pCxHsOMkMYKDttQiiMj+z/Ie6JfCruS7Ck3hu5Sclo
+-> ssh-ed25519 /TxA1A fLNAvEsLu7bhIwhwh44Y+R/XvKLZa0iEOnkbI4IrMF8
+jJM/fcAyKebfQsqScbtXUaZYSR1qEYlITN5+4tlROEU
+--- PMDEDe/P+6wqbmFYfrEfL4wDJPfOJ8AixvFebe8hqRk
+�7@�f��.�X�	���K�9����]lpa��Cg�>��^q��Ƣiو1�
\ No newline at end of file
systems/aion/extra.nix
@@ -26,6 +26,10 @@ let
   };
 in
 {
+  imports = [
+    ../common/services/prometheus-exporters-node.nix
+  ];
+
   users.users.vincent.linger = true;
 
   services = {
@@ -86,7 +90,10 @@ in
 
   networking = {
     useDHCP = lib.mkDefault true;
-    firewall.allowedTCPPorts = [ 4533 ]; # Navidrome
+    firewall.allowedTCPPorts = [
+      4533 # Navidrome
+      9000 # Node exporter
+    ];
   };
 
   environment.systemPackages = with pkgs; [
systems/aomi/openshift-port-forward.nix
@@ -70,6 +70,9 @@
               # Allow OpenShift ports
               tcp dport { 80, 443, 6443 } accept
 
+              # Allow Prometheus node exporter
+              tcp dport 9000 accept
+
               # Allow libvirt
               tcp dport 16509 accept
 
systems/common/services/bind.nix
@@ -24,6 +24,11 @@ in
     extraOptions = ''
       dnssec-validation no;
     '';
+    extraConfig = ''
+      statistics-channels {
+        inet 127.0.0.1 port 8053 allow { 127.0.0.1; };
+      };
+    '';
     cacheNetworks = [ "127.0.0.0/8" ] ++ globals.net.dns.cacheNetworks;
 
     zones = [
systems/common/services/prometheus-exporters-bind.nix
@@ -2,5 +2,6 @@ _: {
   services.prometheus.exporters.bind = {
     enable = true;
     port = 9009;
+    bindURI = "http://localhost:8053";
   };
 }
systems/common/services/prometheus-exporters-node.nix
@@ -6,6 +6,8 @@
       default = [
         "systemd"
         "processes"
+        "hwmon" # Hardware sensors (lm_sensors)
+        "thermal_zone" # CPU thermal zones
       ];
     };
     prometheus-exporters-node.extraFlags = lib.mkOption {
systems/common/services/prometheus-exporters-postgres.nix
@@ -0,0 +1,9 @@
+_: {
+  services.prometheus.exporters.postgres = {
+    enable = true;
+    port = 9187;
+    runAsLocalSuperUser = true;
+  };
+
+  networking.firewall.allowedTCPPorts = [ 9187 ];
+}
systems/demeter/extra.nix
@@ -37,6 +37,17 @@
       ];
     };
 
+    prometheus.exporters.mqtt = {
+      enable = true;
+      port = 9234;
+      mqttAddress = "127.0.0.1";
+      mqttPort = 1883;
+      mqttTopic = "#"; # Subscribe to all topics
+      mqttUsername = "homeassistant";
+      environmentFile = config.age.secrets."mosquitto-homeassistant-password".path;
+      logLevel = "INFO";
+    };
+
     wireguard = {
       enable = true;
       ips = libx.wg-ips globals.machines.demeter.net.vpn.ips;
systems/kerkouane/extra.nix
@@ -76,7 +76,7 @@ in
     enable = true;
     email = "vincent@sbr.pm";
 
-    # Enable Prometheus metrics on VPN interface
+    # Enable Prometheus metrics on VPN interface only
     globalConfig = ''
       admin ${builtins.head globals.machines.kerkouane.net.vpn.ips}:2019
 
systems/rhea/extra.nix
@@ -51,6 +51,8 @@ in
   imports = [
     ../common/services/samba.nix
     ../common/services/homepage.nix
+    ../common/services/prometheus-exporters-node.nix
+    ../common/services/prometheus-exporters-postgres.nix
     ../../modules/audible-sync
     ../../modules/jellyfin-auto-collections
     ../../modules/music-playlist-dl
@@ -97,6 +99,13 @@ in
           insecure = false;
         };
 
+        # Prometheus metrics
+        metrics.prometheus = {
+          addEntryPointsLabels = true;
+          addRoutersLabels = true;
+          addServicesLabels = true;
+        };
+
         # Entry points
         entryPoints = {
           web = {
@@ -649,6 +658,8 @@ in
       443
       1883 # MQTT
       8883 # MQTTS
+      8080 # Traefik metrics
+      9000 # Node exporter
       # NFS ports
       111 # rpcbind
       2049 # NFS daemon
systems/sakhalin/extra.nix
@@ -3,8 +3,61 @@
   globals,
   lib,
   pkgs,
+  monitoring,
+  config,
   ...
 }:
+let
+  # Get machines that should be monitored
+  nodeExporterMachines = monitoring.machinesWithNodeExporter globals.machines;
+
+  # Generate node exporter targets
+  nodeExporterTargets = monitoring.mkPrometheusTargets {
+    machines = nodeExporterMachines;
+    port = 9000;
+  };
+
+  # Machines with BIND DNS
+  bindMachines = lib.filterAttrs (
+    _name: _machine:
+    builtins.elem _name [
+      "demeter"
+      "athena"
+    ]
+  ) globals.machines;
+  bindTargets = monitoring.mkPrometheusTargets {
+    machines = bindMachines;
+    port = 9009;
+  };
+
+  # PostgreSQL hosts
+  postgresTargets = map (host: "${host}.sbr.pm:9187") [
+    "rhea"
+    "sakhalin"
+  ];
+
+  # Exportarr services configuration
+  exportarrServices = {
+    sonarr = {
+      port = 9707;
+    };
+    radarr = {
+      port = 9708;
+    };
+    lidarr = {
+      port = 9709;
+    };
+    prowlarr = {
+      port = 9710;
+    };
+    bazarr = {
+      port = 9712;
+    };
+  };
+  exportarrTargets = lib.mapAttrsToList (
+    _name: cfg: "rhea.sbr.pm:${toString cfg.port}"
+  ) exportarrServices;
+in
 {
 
   imports = [
@@ -12,9 +65,17 @@
     ../common/services/docker.nix
     ../common/desktop/binfmt.nix # TODO: move to something else than desktop
     ../common/services/prometheus-exporters-node.nix
+    ../common/services/prometheus-exporters-postgres.nix
     ../common/services/linkwarden.nix
   ];
 
+  # Age secrets
+  age.secrets."grafana-admin-password" = {
+    file = ../../secrets/sakhalin/grafana-admin-password.age;
+    mode = "400";
+    owner = "grafana";
+  };
+
   # TODO make it an option ? (otherwise I'll add it for all)
   users.users.vincent.linger = true;
 
@@ -53,7 +114,40 @@
         server = {
           http_addr = "0.0.0.0";
           http_port = 3000;
-          domain = "graphana.sbr.pm";
+          domain = "grafana.sbr.pm";
+          root_url = "https://grafana.sbr.pm";
+        };
+      };
+
+      provision = {
+        enable = true;
+        datasources.settings = {
+          apiVersion = 1;
+          datasources = [
+            {
+              name = "Prometheus";
+              type = "prometheus";
+              access = "proxy";
+              url = "http://localhost:9001";
+              isDefault = true;
+              jsonData = {
+                timeInterval = "30s";
+              };
+            }
+          ];
+        };
+
+        dashboards.settings = {
+          apiVersion = 1;
+          providers = [
+            {
+              name = "Default";
+              type = "file";
+              disableDeletion = false;
+              allowUiUpdates = true;
+              options.path = "/var/lib/grafana/dashboards";
+            }
+          ];
         };
       };
     };
@@ -65,17 +159,7 @@
           job_name = "node";
           static_configs = [
             {
-              # TODO: make this dynamic
-              targets = [
-                "aion.sbr.pm:9100"
-                "aix.sbr.pm:9000"
-                "aomi.sbr.pm:9000"
-                "athena.sbr.pm:9000"
-                "demeter.sbr.pm:9000"
-                "kerkouane.sbr.pm:9000"
-                "sakhalin.sbr.pm:9000"
-                "shikoku.sbr.pm:9000"
-              ];
+              targets = nodeExporterTargets;
             }
           ];
         }
@@ -83,10 +167,23 @@
           job_name = "bind";
           static_configs = [
             {
-              targets = [
-                "demeter.sbr.pm:9009"
-                "athena.sbr.pm:9009"
-              ];
+              targets = bindTargets;
+            }
+          ];
+        }
+        {
+          job_name = "postgres";
+          static_configs = [
+            {
+              targets = postgresTargets;
+            }
+          ];
+        }
+        {
+          job_name = "traefik";
+          static_configs = [
+            {
+              targets = [ "rhea.sbr.pm:8080" ];
             }
           ];
         }
@@ -94,7 +191,7 @@
           job_name = "caddy";
           static_configs = [
             {
-              targets = [ "kerkouane.sbr.pm:2019" ];
+              targets = [ "${builtins.head globals.machines.kerkouane.net.vpn.ips}:2019" ];
             }
           ];
         }
@@ -102,31 +199,28 @@
           job_name = "exportarr";
           static_configs = [
             {
-              targets = [
-                "rhea.sbr.pm:9707" # sonarr
-                "rhea.sbr.pm:9708" # radarr
-                "rhea.sbr.pm:9709" # lidarr
-                "rhea.sbr.pm:9710" # prowlarr
-                "rhea.sbr.pm:9711" # readarr
-                "rhea.sbr.pm:9712" # bazarr
-              ];
+              targets = exportarrTargets;
             }
           ];
         }
+        {
+          job_name = "mosquitto";
+          static_configs = [
+            {
+              targets = [ "demeter.sbr.pm:9234" ];
+            }
+          ];
+        }
+        {
+          job_name = "homeassistant";
+          static_configs = [
+            {
+              targets = [ "home.sbr.pm:8123" ];
+            }
+          ];
+          metrics_path = "/api/prometheus";
+        }
       ];
-      exporters.node = {
-        enable = true;
-        port = 9000;
-        enabledCollectors = [
-          "systemd"
-          "processes"
-        ];
-        extraFlags = [
-          "--collector.ethtool"
-          "--collector.softirqs"
-          "--collector.tcpstat"
-        ];
-      };
     };
     tarsnap = {
       enable = true;
@@ -159,6 +253,34 @@
       endpointPublicKey = "${globals.machines.kerkouane.net.vpn.pubkey}";
     };
   };
+
+  # Create Grafana dashboard directory
+  systemd.tmpfiles.rules = [
+    "d /var/lib/grafana/dashboards 0755 grafana grafana -"
+  ];
+
+  # Set Grafana admin password from secret file
+  systemd.services.grafana-set-admin-password = {
+    description = "Set Grafana admin password from secret file";
+    after = [ "grafana.service" ];
+    wantedBy = [ "multi-user.target" ];
+    serviceConfig = {
+      Type = "oneshot";
+      User = "grafana";
+      RemainAfterExit = true;
+    };
+    script = ''
+      # Only set password if admin user exists (database initialized)
+      if ${pkgs.grafana}/bin/grafana-cli --homepath /var/lib/grafana admin reset-admin-password --password-from-stdin < ${
+        config.age.secrets."grafana-admin-password".path
+      } 2>/dev/null; then
+        echo "Admin password updated successfully"
+      else
+        echo "Failed to update password or admin user doesn't exist yet"
+      fi
+    '';
+  };
+
   environment.systemPackages = with pkgs; [ yt-dlp ]; # -----------------------------------
   environment.etc."vrsync".text = ''
     /home/vincent/desktop/pictures/screenshots/ vincent@synodine.home:/volumeUSB2/usbshare/pictures/screenshots/
secrets.nix
@@ -101,5 +101,6 @@ in
   "secrets/rhea/exportarr-bazarr-apikey.age".publicKeys = users ++ [ rhea ];
   "secrets/rhea/jellyfin-auto-collections-api-key.age".publicKeys = users ++ [ rhea ];
   "secrets/rhea/jellyfin-auto-collections-jellyseerr-password.age".publicKeys = users ++ [ rhea ];
+  "secrets/sakhalin/grafana-admin-password.age".publicKeys = users ++ [ sakhalin ];
   "secrets/demeter/mosquitto-homeassistant-password.age".publicKeys = users ++ [ demeter ];
 }