Commit 038bffa3c7e6

Vincent Demeester <vincent@sbr.pm>
2026-01-06 13:43:16
feat(aomi): Configure Ollama with local LLM models
- Enable local LLM infrastructure for AI experimentation - Load qwen2:1.5b for fast testing and mistral:7b for production use - Optimize settings for CPU-only operation with resource limits - Open firewall port 11434 for API access Signed-off-by: Vincent Demeester <vincent@sbr.pm>
1 parent 7033c82
Changed files (2)
systems/aomi/extra.nix
@@ -57,7 +57,18 @@
     };
     ollama = {
       enable = true;
-      # acceleration = "cuda"; # no nivida :D
+      # acceleration = "cuda"; # no nvidia :D
+      host = "0.0.0.0";
+      port = 11434;
+      loadModels = [
+        "qwen2:1.5b" # Small fast model for testing (2-4GB RAM, 25-30 tok/s)
+        "mistral:7b-instruct-q4_K_M" # Production balanced model (3.5GB RAM, 25+ tok/s)
+      ];
+      environmentVariables = {
+        OLLAMA_MODELS = "/var/lib/ollama/models";
+        OLLAMA_NUM_PARALLEL = "1"; # CPU-only, keep it simple
+        OLLAMA_KEEP_ALIVE = "10m";
+      };
     };
     smartd = {
       enable = true;
systems/aomi/openshift-port-forward.nix
@@ -76,6 +76,9 @@
               # Allow Docker Prometheus metrics
               tcp dport 9323 accept
 
+              # Allow Ollama API
+              tcp dport 11434 accept
+
               # Allow libvirt
               tcp dport 16509 accept