auto-update-daily-20260202
1;;; emacs-ollama-config.el --- Ollama integration for Emacs -*- lexical-binding: t -*-
2
3;; Copyright (C) 2025 Vincent Demeester
4;; Author: Vincent Demeester <vincent@sbr.pm>
5
6;; This file is NOT part of GNU Emacs.
7
8;;; Commentary:
9;; Configuration for integrating Ollama LLM with Emacs using gptel.
10;; Add this to your init.el after the gptel use-package declaration.
11
12;;; Code:
13
14;; Add this inside your (use-package gptel :config ...) block
15
16;; Ollama backend with metrics tracking (RECOMMENDED - default choice)
17;; Measured overhead: ~25ms (negligible - 0.5-1.25% on small models, <0.1% on larger models)
18;; Note: Streaming disabled due to gptel compatibility issues with Ollama streaming
19(gptel-make-ollama "Ollama (with metrics)"
20 :host "192.168.1.23:8000" ; Exporter endpoint for Prometheus metrics
21 :stream nil ; Set to nil to avoid "stuck typing" issue
22 :models '(;; Tool Calling / OpenCode Support
23 "llama3.1:8b" ; Best for tool calling
24 "mistral-nemo:latest" ; Fast tool calling
25
26 ;; Coding Models
27 "qwen2.5-coder:7b" ; Best coding performance
28 "codestral:latest" ; Large coding model (22B)
29 "qwen-opencode:latest" ; Custom OpenCode model
30
31 ;; Reasoning Models
32 "deepseek-r1:7b" ; Lightweight reasoning
33 "phi4-reasoning:latest" ; 14B reasoning
34
35 ;; Multimodal
36 "qwen2.5vl:7b" ; Vision support
37
38 ;; Quick Tasks
39 "phi3.5:3.8b")) ; Fastest model
40
41;; Direct Ollama backend (no metrics, saves ~25ms per request)
42;; Only use for benchmarking or when you explicitly don't want metrics
43(gptel-make-ollama "Ollama (direct)"
44 :host "192.168.1.23:11434" ; Direct Ollama, bypasses metrics collection
45 :stream nil ; Set to nil to avoid "stuck typing" issue
46 :models '("llama3.1:8b"
47 "qwen2.5-coder:7b"
48 "phi3.5:3.8b"
49 "deepseek-r1:7b"))
50
51;; Optional: Set Ollama as default backend
52;; Uncomment these lines to use Ollama by default instead of Gemini:
53;;
54;; (setq gptel-model "llama3.1:8b"
55;; gptel-backend (gptel-make-ollama "Ollama"
56;; :host "192.168.1.23:8000"
57;; :stream t
58;; :models '("llama3.1:8b"
59;; "qwen2.5-coder:7b"
60;; "phi3.5:3.8b")))
61
62;;; Usage:
63;;
64;; 1. Open a buffer and start gptel:
65;; M-x gptel
66;; or C-c a g (if you have the binding)
67;;
68;; 2. Switch backend using the transient menu:
69;; C-c C-m (gptel-menu)
70;; Then select backend and model
71;;
72;; 3. Send requests:
73;; C-c C-c (gptel-send)
74;;
75;; 4. Abort requests:
76;; C-c C-k (gptel-abort)
77;;
78;; 5. Quick selection:
79;; In any buffer, select text and call:
80;; M-x gptel-send
81;; This will send the selected region to the LLM
82
83;;; Performance Notes:
84;;
85;; - CPU-only models: First request 30-90s, subsequent 15-45s
86;; - Model stays loaded: 10 minutes after last request
87;; - Fastest model: phi3.5:3.8b (~2-5 seconds with warm model)
88;; - Best coding: qwen2.5-coder:7b (~30-60 seconds)
89;; - Exporter overhead: ~25ms (negligible - use "with metrics" backend by default)
90;; - Only use "direct" backend for synthetic benchmarks or troubleshooting
91
92;;; Recommended Models by Task:
93;;
94;; Coding:
95;; - qwen2.5-coder:7b (best performance)
96;; - codestral:latest (larger, more capable but slower)
97;;
98;; Quick queries:
99;; - phi3.5:3.8b (fastest)
100;;
101;; Reasoning:
102;; - deepseek-r1:7b (good reasoning)
103;; - phi4-reasoning:latest (better but slower)
104;;
105;; Tool calling (for agent-like behavior):
106;; - llama3.1:8b
107;; - mistral-nemo:latest
108
109(provide 'emacs-ollama-config)
110;;; emacs-ollama-config.el ends here