Commit a44f34fc60a1
Changed files (3)
systems
aomi
sakhalin
systems/aomi/extra.nix
@@ -1,6 +1,7 @@
{
globals,
libx,
+ pkgs,
...
}:
{
@@ -126,4 +127,42 @@
# };
};
+ # Ollama Prometheus Exporter (Docker-based)
+ systemd.services.ollama-exporter = {
+ description = "Ollama Prometheus Exporter";
+ after = [
+ "docker.service"
+ "ollama.service"
+ ];
+ requires = [ "docker.service" ];
+ wantedBy = [ "multi-user.target" ];
+
+ serviceConfig = {
+ Type = "simple";
+ Restart = "always";
+ RestartSec = "10s";
+
+ ExecStartPre = [
+ # Stop and remove existing container
+ "-${pkgs.docker}/bin/docker stop ollama-exporter"
+ "-${pkgs.docker}/bin/docker rm ollama-exporter"
+ # Pull latest image
+ "${pkgs.docker}/bin/docker pull ghcr.io/frcooper/ollama-exporter:latest"
+ ];
+
+ ExecStart = ''
+ ${pkgs.docker}/bin/docker run --rm --name ollama-exporter \
+ -p 8000:8000 \
+ -e OLLAMA_HOST=http://localhost:11434 \
+ --network host \
+ ghcr.io/frcooper/ollama-exporter:latest
+ '';
+
+ ExecStop = "${pkgs.docker}/bin/docker stop ollama-exporter";
+ };
+ };
+
+ # Open firewall for Ollama exporter
+ networking.firewall.allowedTCPPorts = [ 8000 ];
+
}
systems/sakhalin/extra.nix
@@ -287,6 +287,14 @@ in
}
];
}
+ {
+ job_name = "ollama";
+ static_configs = [
+ {
+ targets = [ "${builtins.head globals.machines.aomi.net.ips}:8000" ];
+ }
+ ];
+ }
];
};
systems/sakhalin/prometheus-alerts.nix
@@ -367,6 +367,68 @@
];
}
+ {
+ name = "ollama_alerts";
+ interval = "30s";
+ rules = [
+ # Ollama service down
+ {
+ alert = "OllamaDown";
+ expr = "up{job=\"ollama\"} == 0";
+ for = "2m";
+ labels = {
+ severity = "critical";
+ };
+ annotations = {
+ summary = "Ollama service down on {{ $labels.instance }}";
+ description = "Ollama LLM service has been unreachable for more than 2 minutes - check aomi ollama-exporter";
+ };
+ }
+
+ # High latency (P95 > 5 seconds)
+ {
+ alert = "OllamaHighLatency";
+ expr = "histogram_quantile(0.95, sum(rate(ollama_response_seconds_bucket[5m])) by (le, model)) > 5";
+ for = "5m";
+ labels = {
+ severity = "warning";
+ };
+ annotations = {
+ summary = "High Ollama inference latency";
+ description = "Model {{ $labels.model }} P95 latency is {{ $value | humanizeDuration }} (threshold: 5s) - CPU may be overloaded";
+ };
+ }
+
+ # Low throughput (< 5 tokens/sec for 10+ minutes)
+ {
+ alert = "OllamaLowThroughput";
+ expr = "rate(ollama_tokens_generated_total[5m]) < 5";
+ for = "10m";
+ labels = {
+ severity = "warning";
+ };
+ annotations = {
+ summary = "Low Ollama token generation rate";
+ description = "Token generation rate is {{ $value | humanize }} tokens/sec (expected: 7-15 for CPU) - check aomi CPU usage";
+ };
+ }
+
+ # High error rate (> 5% for 5+ minutes)
+ {
+ alert = "OllamaHighErrorRate";
+ expr = "(sum(rate(ollama_requests_total{status=~\"5..\"}[5m])) / sum(rate(ollama_requests_total[5m]))) * 100 > 5";
+ for = "5m";
+ labels = {
+ severity = "critical";
+ };
+ annotations = {
+ summary = "High Ollama error rate";
+ description = "Error rate is {{ $value | humanizePercentage }} (threshold: 5%) - check ollama logs on aomi";
+ };
+ }
+ ];
+ }
+
{
name = "backup_alerts";
interval = "1h";