Commit 7a75718bf5e8

Vincent Demeester <vincent@sbr.pm>
2026-02-19 12:05:32
fix(monitoring): removed ollama monitoring
Removed ollama scrape job, alert rules, Grafana dashboards, and DNS record after aomi decommission. Kept llm.A DNS pointing to rhea.
1 parent 6ce926e
Changed files (3)
systems
systems/common/services/dns/sbr.pm-gandi.nix
@@ -19,13 +19,10 @@ baseZone
     jellyfin.A = [ "167.99.17.238" ];
     audiobookshelf.A = [ "167.99.17.238" ];
 
-    # Internal services accessible via VPN only
-    ollama.A = [ "10.100.0.50" ]; # rhea VPN IP
-
     # Service aliases
     music.A = [ "167.99.17.238" ]; # navidrome
     photos.A = [ "167.99.17.238" ]; # immich
     podcasts.A = [ "167.99.17.238" ]; # audiobookshelf
-    llm.A = [ "10.100.0.50" ]; # ollama (rhea VPN IP)
+    llm.A = [ "10.100.0.50" ]; # rhea VPN IP
   };
 }
systems/sakhalin/extra.nix
@@ -294,14 +294,6 @@ in
             }
           ];
         }
-        {
-          job_name = "ollama";
-          static_configs = [
-            {
-              targets = [ "${builtins.head globals.machines.aomi.net.ips}:8000" ];
-            }
-          ];
-        }
         {
           job_name = "restic";
           static_configs = [
@@ -463,8 +455,7 @@ in
   # Create Grafana dashboard directory and deploy Ollama dashboards
   systemd.tmpfiles.rules = [
     "d /var/lib/grafana/dashboards 0755 grafana grafana -"
-    "C /var/lib/grafana/dashboards/ollama-metrics.json 0644 grafana grafana - ${../../tools/ollama-exporter/grafana-dashboard.json}"
-    "C /var/lib/grafana/dashboards/ollama-performance.json 0644 grafana grafana - ${../../tools/ollama-exporter/grafana-dashboard-custom.json}"
+
   ];
 
   # Set Grafana admin password from secret file
systems/sakhalin/prometheus-alerts.nix
@@ -367,68 +367,6 @@
       ];
     }
 
-    {
-      name = "ollama_alerts";
-      interval = "30s";
-      rules = [
-        # Ollama service down
-        {
-          alert = "OllamaDown";
-          expr = "up{job=\"ollama\"} == 0";
-          for = "2m";
-          labels = {
-            severity = "critical";
-          };
-          annotations = {
-            summary = "Ollama service down on {{ $labels.instance }}";
-            description = "Ollama LLM service has been unreachable for more than 2 minutes - check aomi ollama-exporter";
-          };
-        }
-
-        # High latency (P95 > 5 seconds)
-        {
-          alert = "OllamaHighLatency";
-          expr = "histogram_quantile(0.95, sum(rate(ollama_response_seconds_bucket[5m])) by (le, model)) > 5";
-          for = "5m";
-          labels = {
-            severity = "warning";
-          };
-          annotations = {
-            summary = "High Ollama inference latency";
-            description = "Model {{ $labels.model }} P95 latency is {{ $value | humanizeDuration }} (threshold: 5s) - CPU may be overloaded";
-          };
-        }
-
-        # Low throughput (< 5 tokens/sec for 10+ minutes)
-        {
-          alert = "OllamaLowThroughput";
-          expr = "rate(ollama_tokens_generated_total[5m]) < 5";
-          for = "10m";
-          labels = {
-            severity = "warning";
-          };
-          annotations = {
-            summary = "Low Ollama token generation rate";
-            description = "Token generation rate is {{ $value | humanize }} tokens/sec (expected: 7-15 for CPU) - check aomi CPU usage";
-          };
-        }
-
-        # High error rate (> 5% for 5+ minutes)
-        {
-          alert = "OllamaHighErrorRate";
-          expr = "(sum(rate(ollama_requests_total{status=~\"5..\"}[5m])) / sum(rate(ollama_requests_total[5m]))) * 100 > 5";
-          for = "5m";
-          labels = {
-            severity = "critical";
-          };
-          annotations = {
-            summary = "High Ollama error rate";
-            description = "Error rate is {{ $value | humanizePercentage }} (threshold: 5%) - check ollama logs on aomi";
-          };
-        }
-      ];
-    }
-
     {
       name = "backup_alerts";
       interval = "1h";