Commit 985b808cd3f7
systems/okinawa/extra.nix
@@ -53,7 +53,9 @@
# Ollama for local LLM inference with dGPU
services.ollama = {
enable = true;
- package = pkgs.ollama-rocm; # ROCm support for AMD GPU
+ # Use regular ollama with Vulkan instead of ollama-rocm
+ # ollama-rocm has GGML_ASSERT(max_blocks_per_sm > 0) failures on RDNA2
+ package = pkgs.ollama; # Vulkan support for AMD GPU (more stable than ROCm)
host = "0.0.0.0";
port = 11434;
@@ -75,14 +77,14 @@
];
environmentVariables = {
- # Critical: RX 6700S (gfx1032) needs this override
- HSA_OVERRIDE_GFX_VERSION = "10.3.0";
+ # Vulkan is used automatically for AMD GPUs
+ # No HSA_OVERRIDE_GFX_VERSION needed with Vulkan backend
OLLAMA_KEEP_ALIVE = "10m";
OLLAMA_NUM_PARALLEL = "1";
};
};
- # ROCm environment variables
+ # GPU environment variables (needed for ROCm with RX 6700S / gfx1032)
environment.variables = {
HSA_OVERRIDE_GFX_VERSION = "10.3.0";
};
systems/okinawa/home.nix
@@ -23,6 +23,13 @@ in
../../home/common/shell/gh.nix
];
+ # llama-cpp defaults for RX 6700S (RDNA2)
+ # Flash attention crashes on gfx1030 ROCm: GGML_ASSERT(max_blocks_per_sm > 0)
+ home.sessionVariables = {
+ LLAMA_ARG_FLASH_ATTN = "off";
+ LLAMA_ARG_MAIN_GPU = "0";
+ };
+
home.packages = with pkgs; [
nautilus