main
1{
2 "providers": {
3 "llama-cpp": {
4 "baseUrl": "https://llm.sbr.pm/v1",
5 "api": "openai-completions",
6 "apiKey": "local-test",
7 "models": [
8 {
9 "id": "Qwen/Qwen3-Coder-Next-GGUF:Q3_K_M",
10 "name": "Qwen3 Coder Next 80B-A3B (best coding, ~40GB RAM)",
11 "reasoning": false,
12 "input": ["text"],
13 "contextWindow": 262144,
14 "maxTokens": 32768,
15 "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
16 },
17 {
18 "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q4_K_M",
19 "name": "Qwen3 Coder 30B-A3B (sweet spot coding, ~19GB RAM)",
20 "reasoning": false,
21 "input": ["text"],
22 "contextWindow": 131072,
23 "maxTokens": 32768,
24 "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
25 },
26 {
27 "id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF:Q4_K_M",
28 "name": "Qwen 2.5 Coder 7B (lightweight coding, ~5GB)",
29 "reasoning": false,
30 "input": ["text"],
31 "contextWindow": 32768,
32 "maxTokens": 16384,
33 "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
34 },
35 {
36 "id": "Qwen/Qwen3-8B-GGUF:Q4_K_M",
37 "name": "Qwen3 8B (general purpose, reasoning, ~5GB)",
38 "reasoning": true,
39 "input": ["text"],
40 "contextWindow": 131072,
41 "maxTokens": 32768,
42 "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
43 },
44 {
45 "id": "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M",
46 "name": "DeepSeek R1 Distill 7B (deep reasoning, ~5GB)",
47 "reasoning": true,
48 "input": ["text"],
49 "contextWindow": 131072,
50 "maxTokens": 32768,
51 "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
52 },
53 {
54 "id": "unsloth/Phi-4-mini-instruct-GGUF:Q4_K_M",
55 "name": "Phi-4 mini 3.8B (ultra-fast utility, ~2.5GB)",
56 "reasoning": false,
57 "input": ["text"],
58 "contextWindow": 16384,
59 "maxTokens": 8192,
60 "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
61 }
62 ]
63 }
64 }
65}