Commit 054a0f9098a1

Vincent Demeester <vincent@sbr.pm>
2026-02-18 15:14:30
feat(pi): track local models config
Removed models.json from .gitignore and pointed the llama-cpp provider base URL to llm.sbr.pm instead of localhost so the config is portable across machines.
1 parent bd8ee03
Changed files (2)
dots/pi/agent/models.json
@@ -0,0 +1,65 @@
+{
+  "providers": {
+    "llama-cpp": {
+      "baseUrl": "https://llm.sbr.pm",
+      "api": "openai-completions",
+      "apiKey": "local-test",
+      "models": [
+        {
+          "id": "Qwen/Qwen3-Coder-Next-GGUF:Q3_K_M",
+          "name": "Qwen3 Coder Next 80B-A3B (best coding, ~40GB RAM)",
+          "reasoning": false,
+          "input": ["text"],
+          "contextWindow": 262144,
+          "maxTokens": 32768,
+          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
+        },
+        {
+          "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q4_K_M",
+          "name": "Qwen3 Coder 30B-A3B (sweet spot coding, ~19GB RAM)",
+          "reasoning": false,
+          "input": ["text"],
+          "contextWindow": 131072,
+          "maxTokens": 32768,
+          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
+        },
+        {
+          "id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF:Q4_K_M",
+          "name": "Qwen 2.5 Coder 7B (lightweight coding, ~5GB)",
+          "reasoning": false,
+          "input": ["text"],
+          "contextWindow": 32768,
+          "maxTokens": 16384,
+          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
+        },
+        {
+          "id": "Qwen/Qwen3-8B-GGUF:Q4_K_M",
+          "name": "Qwen3 8B (general purpose, reasoning, ~5GB)",
+          "reasoning": true,
+          "input": ["text"],
+          "contextWindow": 131072,
+          "maxTokens": 32768,
+          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
+        },
+        {
+          "id": "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M",
+          "name": "DeepSeek R1 Distill 7B (deep reasoning, ~5GB)",
+          "reasoning": true,
+          "input": ["text"],
+          "contextWindow": 131072,
+          "maxTokens": 32768,
+          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
+        },
+        {
+          "id": "unsloth/Phi-4-mini-instruct-GGUF:Q4_K_M",
+          "name": "Phi-4 mini 3.8B (ultra-fast utility, ~2.5GB)",
+          "reasoning": false,
+          "input": ["text"],
+          "contextWindow": 16384,
+          "maxTokens": 8192,
+          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
+        }
+      ]
+    }
+  }
+}
.gitignore
@@ -30,4 +30,4 @@ hardware-configuration.nix
 .playwright-mcp/
 .claude/skills/
 node_modules/
-dots/pi/agent/models.json
+