Commit `038bffa3c7e6`

Vincent Demeester <vincent@sbr.pm>

2026-01-06 13:43:16

feat(aomi): Configure Ollama with local LLM models

- Enable local LLM infrastructure for AI experimentation - Load qwen2:1.5b for fast testing and mistral:7b for production use - Optimize settings for CPU-only operation with resource limits - Open firewall port 11434 for API access Signed-off-by: Vincent Demeester <vincent@sbr.pm>

main

1 parent 7033c82

Changed files (2)

systems

aomi

extra.nix

openshift-port-forward.nix

@@ -57,7 +57,18 @@
     };
     ollama = {
       enable = true;
-      # acceleration = "cuda"; # no nivida :D
+      # acceleration = "cuda"; # no nvidia :D
+      host = "0.0.0.0";
+      port = 11434;
+      loadModels = [
+        "qwen2:1.5b" # Small fast model for testing (2-4GB RAM, 25-30 tok/s)
+        "mistral:7b-instruct-q4_K_M" # Production balanced model (3.5GB RAM, 25+ tok/s)
+      ];
+      environmentVariables = {
+        OLLAMA_MODELS = "/var/lib/ollama/models";
+        OLLAMA_NUM_PARALLEL = "1"; # CPU-only, keep it simple
+        OLLAMA_KEEP_ALIVE = "10m";
+      };
     };
     smartd = {
       enable = true;

@@ -76,6 +76,9 @@
               # Allow Docker Prometheus metrics
               tcp dport 9323 accept
 
+              # Allow Ollama API
+              tcp dport 11434 accept
+
               # Allow libvirt
               tcp dport 16509 accept

Commit 038bffa3c7e6

Commit `038bffa3c7e6`