Commit 386f52ec3a2b

Vincent Demeester <vincent@sbr.pm>
2026-02-13 14:09:08
feat(okinawa): switched ollama to vulkan GPU backend
Replaced pkgs.ollama (CPU-only) with pkgs.ollama-vulkan for actual GPU acceleration on the RX 6700S. Disabled ROCm via HIP_VISIBLE_DEVICES to avoid backend conflicts. Benchmarks show Vulkan is 2.6x faster for prompt eval and 1.2x faster for generation compared to ROCm on RDNA2.
1 parent 5906bec
Changed files (1)
systems
okinawa
systems/okinawa/extra.nix
@@ -53,9 +53,9 @@
   # Ollama for local LLM inference with dGPU
   services.ollama = {
     enable = true;
-    # Use regular ollama with Vulkan instead of ollama-rocm
+    # Use ollama-vulkan for GPU acceleration on RDNA2 (RX 6700S)
     # ollama-rocm has GGML_ASSERT(max_blocks_per_sm > 0) failures on RDNA2
-    package = pkgs.ollama; # Vulkan support for AMD GPU (more stable than ROCm)
+    package = pkgs.ollama-vulkan;
     host = "0.0.0.0";
     port = 11434;
 
@@ -77,8 +77,9 @@
     ];
 
     environmentVariables = {
-      # Vulkan is used automatically for AMD GPUs
-      # No HSA_OVERRIDE_GFX_VERSION needed with Vulkan backend
+      # Disable ROCm to avoid conflicts with Vulkan backend
+      # Vulkan is 2.6x faster prompt eval, 1.2x faster generation on RDNA2
+      HIP_VISIBLE_DEVICES = "";
       OLLAMA_KEEP_ALIVE = "10m";
       OLLAMA_NUM_PARALLEL = "1";
     };