Commit 214c6c6e9f27

Vincent Demeester <vincent@sbr.pm>
2026-02-16 09:38:05
feat(aomi): removed ollama service and exporter
Removed ollama service configuration along with its prometheus exporter to reduce resource usage on the aomi host. The ollama models were consuming significant disk space and memory resources.
1 parent b9e4749
Changed files (1)
systems
systems/aomi/extra.nix
@@ -325,39 +325,7 @@
       endpoint = "${globals.net.vpn.endpoint}";
       endpointPublicKey = "${globals.machines.kerkouane.net.vpn.pubkey}";
     };
-    ollama = {
-      enable = true;
-      # acceleration = "cuda"; # no nvidia :D
-      host = "0.0.0.0"; # Listen on all interfaces for network access
-      port = 11434;
-      loadModels = [
-        # Coding Models
-        "qwen2.5-coder:7b" # Best coding: 88.4% HumanEval, Apache 2.0 (~4-5GB, 10-15 tok/s)
-        "codestral" # Latest coding (Jan 2025): 86.6% HumanEval, #1 LMsys (~14GB, 8-10 tok/s)
 
-        # Reasoning Models
-        "phi4-reasoning" # Best 14B reasoning: outperforms 70B distillation, MIT (~9GB, 6-10 tok/s)
-        "deepseek-r1:7b" # Lightweight reasoning: MIT license (~4.5GB, 8-12 tok/s)
-
-        # Multimodal
-        "qwen2.5vl:7b" # Best vision: beats Llama 3.2 11B, Apache 2.0 (~6GB, 5-8 tok/s)
-
-        # Quick Tasks
-        "phi3.5:3.8b" # Ultra-fast all-rounder: MIT license (~2.4GB, 15-25 tok/s)
-
-        # Tool Calling / OpenCode Support
-        "llama3.1:8b" # Native tool support, good for OpenCode (~4.7GB)
-        "mistral-nemo" # Fast tool calling support (~7.7GB)
-
-        # Legacy (keeping for compatibility)
-        "mistral:7b-instruct-q4_K_M" # General purpose fallback
-      ];
-      environmentVariables = {
-        OLLAMA_MODELS = "/var/lib/ollama/models";
-        OLLAMA_NUM_PARALLEL = "1"; # CPU-only, keep it simple
-        OLLAMA_KEEP_ALIVE = "10m";
-      };
-    };
     smartd = {
       enable = true;
       devices = [ { device = "/dev/nvme0n1"; } ];
@@ -404,49 +372,10 @@
     jayrah
   ];
 
-  # Ollama Prometheus Exporter (Docker-based, built locally)
   systemd.tmpfiles.rules = [
-    "d /var/lib/ollama-exporter 0755 root root -"
     "d /var/lib/git-builds 0755 builder users -"
   ];
 
-  systemd.services.ollama-exporter = {
-    description = "Ollama Prometheus Exporter";
-    after = [
-      "docker.service"
-      "ollama.service"
-    ];
-    requires = [ "docker.service" ];
-    wantedBy = [ "multi-user.target" ];
-
-    serviceConfig = {
-      Type = "simple";
-      Restart = "always";
-      RestartSec = "10s";
-
-      ExecStartPre = [
-        # Stop and remove existing container
-        "-${pkgs.docker}/bin/docker stop ollama-exporter"
-        "-${pkgs.docker}/bin/docker rm ollama-exporter"
-        # Copy source files to build directory (for future manual rebuilds if needed)
-        "${pkgs.coreutils}/bin/cp ${../../tools/ollama-exporter/Dockerfile} /var/lib/ollama-exporter/Dockerfile"
-        "${pkgs.coreutils}/bin/cp ${../../tools/ollama-exporter/ollama_exporter.py} /var/lib/ollama-exporter/ollama_exporter.py"
-        # Build image locally only if it doesn't exist (to avoid DNS timeout issues)
-        "-${pkgs.bash}/bin/bash -c '${pkgs.docker}/bin/docker image inspect ollama-exporter:local >/dev/null 2>&1 || ${pkgs.docker}/bin/docker build -t ollama-exporter:local /var/lib/ollama-exporter'"
-      ];
-
-      ExecStart = ''
-        ${pkgs.docker}/bin/docker run --rm --name ollama-exporter \
-          -p 8000:8000 \
-          -e OLLAMA_HOST=http://localhost:11434 \
-          --network host \
-          ollama-exporter:local
-      '';
-
-      ExecStop = "${pkgs.docker}/bin/docker stop ollama-exporter";
-    };
-  };
-
   # NOTE: NixOS firewall is disabled (see openshift-port-forward.nix).
   # Firewall rules must be added to the nftables config there instead.
   # networking.firewall.allowedTCPPorts = [ 8000 8888 ];