models.json

 1{
 2  "providers": {
 3    "llama-cpp": {
 4      "baseUrl": "https://llm.sbr.pm/v1",
 5      "api": "openai-completions",
 6      "apiKey": "local-test",
 7      "models": [
 8        {
 9          "id": "Qwen/Qwen3-Coder-Next-GGUF:Q3_K_M",
10          "name": "Qwen3 Coder Next 80B-A3B (best coding, ~40GB RAM)",
11          "reasoning": false,
12          "input": ["text"],
13          "contextWindow": 262144,
14          "maxTokens": 32768,
15          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
16        },
17        {
18          "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q4_K_M",
19          "name": "Qwen3 Coder 30B-A3B (sweet spot coding, ~19GB RAM)",
20          "reasoning": false,
21          "input": ["text"],
22          "contextWindow": 131072,
23          "maxTokens": 32768,
24          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
25        },
26        {
27          "id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF:Q4_K_M",
28          "name": "Qwen 2.5 Coder 7B (lightweight coding, ~5GB)",
29          "reasoning": false,
30          "input": ["text"],
31          "contextWindow": 32768,
32          "maxTokens": 16384,
33          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
34        },
35        {
36          "id": "Qwen/Qwen3-8B-GGUF:Q4_K_M",
37          "name": "Qwen3 8B (general purpose, reasoning, ~5GB)",
38          "reasoning": true,
39          "input": ["text"],
40          "contextWindow": 131072,
41          "maxTokens": 32768,
42          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
43        },
44        {
45          "id": "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M",
46          "name": "DeepSeek R1 Distill 7B (deep reasoning, ~5GB)",
47          "reasoning": true,
48          "input": ["text"],
49          "contextWindow": 131072,
50          "maxTokens": 32768,
51          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
52        },
53        {
54          "id": "unsloth/Phi-4-mini-instruct-GGUF:Q4_K_M",
55          "name": "Phi-4 mini 3.8B (ultra-fast utility, ~2.5GB)",
56          "reasoning": false,
57          "input": ["text"],
58          "contextWindow": 16384,
59          "maxTokens": 8192,
60          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
61        }
62      ]
63    }
64  }
65}