Commit 7a75718bf5e8
Changed files (3)
systems
common
services
dns
sakhalin
systems/common/services/dns/sbr.pm-gandi.nix
@@ -19,13 +19,10 @@ baseZone
jellyfin.A = [ "167.99.17.238" ];
audiobookshelf.A = [ "167.99.17.238" ];
- # Internal services accessible via VPN only
- ollama.A = [ "10.100.0.50" ]; # rhea VPN IP
-
# Service aliases
music.A = [ "167.99.17.238" ]; # navidrome
photos.A = [ "167.99.17.238" ]; # immich
podcasts.A = [ "167.99.17.238" ]; # audiobookshelf
- llm.A = [ "10.100.0.50" ]; # ollama (rhea VPN IP)
+ llm.A = [ "10.100.0.50" ]; # rhea VPN IP
};
}
systems/sakhalin/extra.nix
@@ -294,14 +294,6 @@ in
}
];
}
- {
- job_name = "ollama";
- static_configs = [
- {
- targets = [ "${builtins.head globals.machines.aomi.net.ips}:8000" ];
- }
- ];
- }
{
job_name = "restic";
static_configs = [
@@ -463,8 +455,7 @@ in
# Create Grafana dashboard directory and deploy Ollama dashboards
systemd.tmpfiles.rules = [
"d /var/lib/grafana/dashboards 0755 grafana grafana -"
- "C /var/lib/grafana/dashboards/ollama-metrics.json 0644 grafana grafana - ${../../tools/ollama-exporter/grafana-dashboard.json}"
- "C /var/lib/grafana/dashboards/ollama-performance.json 0644 grafana grafana - ${../../tools/ollama-exporter/grafana-dashboard-custom.json}"
+
];
# Set Grafana admin password from secret file
systems/sakhalin/prometheus-alerts.nix
@@ -367,68 +367,6 @@
];
}
- {
- name = "ollama_alerts";
- interval = "30s";
- rules = [
- # Ollama service down
- {
- alert = "OllamaDown";
- expr = "up{job=\"ollama\"} == 0";
- for = "2m";
- labels = {
- severity = "critical";
- };
- annotations = {
- summary = "Ollama service down on {{ $labels.instance }}";
- description = "Ollama LLM service has been unreachable for more than 2 minutes - check aomi ollama-exporter";
- };
- }
-
- # High latency (P95 > 5 seconds)
- {
- alert = "OllamaHighLatency";
- expr = "histogram_quantile(0.95, sum(rate(ollama_response_seconds_bucket[5m])) by (le, model)) > 5";
- for = "5m";
- labels = {
- severity = "warning";
- };
- annotations = {
- summary = "High Ollama inference latency";
- description = "Model {{ $labels.model }} P95 latency is {{ $value | humanizeDuration }} (threshold: 5s) - CPU may be overloaded";
- };
- }
-
- # Low throughput (< 5 tokens/sec for 10+ minutes)
- {
- alert = "OllamaLowThroughput";
- expr = "rate(ollama_tokens_generated_total[5m]) < 5";
- for = "10m";
- labels = {
- severity = "warning";
- };
- annotations = {
- summary = "Low Ollama token generation rate";
- description = "Token generation rate is {{ $value | humanize }} tokens/sec (expected: 7-15 for CPU) - check aomi CPU usage";
- };
- }
-
- # High error rate (> 5% for 5+ minutes)
- {
- alert = "OllamaHighErrorRate";
- expr = "(sum(rate(ollama_requests_total{status=~\"5..\"}[5m])) / sum(rate(ollama_requests_total[5m]))) * 100 > 5";
- for = "5m";
- labels = {
- severity = "critical";
- };
- annotations = {
- summary = "High Ollama error rate";
- description = "Error rate is {{ $value | humanizePercentage }} (threshold: 5%) - check ollama logs on aomi";
- };
- }
- ];
- }
-
{
name = "backup_alerts";
interval = "1h";