flake-update-20260505
  1;;; emacs-ollama-config.el --- Ollama integration for Emacs -*- lexical-binding: t -*-
  2
  3;; Copyright (C) 2025 Vincent Demeester
  4;; Author: Vincent Demeester <vincent@sbr.pm>
  5
  6;; This file is NOT part of GNU Emacs.
  7
  8;;; Commentary:
  9;; Configuration for integrating Ollama LLM with Emacs using gptel.
 10;; Add this to your init.el after the gptel use-package declaration.
 11
 12;;; Code:
 13
 14;; Add this inside your (use-package gptel :config ...) block
 15
 16;; Ollama backend with metrics tracking (RECOMMENDED - default choice)
 17;; Measured overhead: ~25ms (negligible - 0.5-1.25% on small models, <0.1% on larger models)
 18;; Note: Streaming disabled due to gptel compatibility issues with Ollama streaming
 19(gptel-make-ollama "Ollama (with metrics)"
 20  :host "192.168.1.23:8000"  ; Exporter endpoint for Prometheus metrics
 21  :stream nil  ; Set to nil to avoid "stuck typing" issue
 22  :models '(;; Tool Calling / OpenCode Support
 23            "llama3.1:8b"              ; Best for tool calling
 24            "mistral-nemo:latest"      ; Fast tool calling
 25
 26            ;; Coding Models
 27            "qwen2.5-coder:7b"         ; Best coding performance
 28            "codestral:latest"         ; Large coding model (22B)
 29            "qwen-opencode:latest"     ; Custom OpenCode model
 30
 31            ;; Reasoning Models
 32            "deepseek-r1:7b"           ; Lightweight reasoning
 33            "phi4-reasoning:latest"    ; 14B reasoning
 34
 35            ;; Multimodal
 36            "qwen2.5vl:7b"             ; Vision support
 37
 38            ;; Quick Tasks
 39            "phi3.5:3.8b"))            ; Fastest model
 40
 41;; Direct Ollama backend (no metrics, saves ~25ms per request)
 42;; Only use for benchmarking or when you explicitly don't want metrics
 43(gptel-make-ollama "Ollama (direct)"
 44  :host "192.168.1.23:11434"  ; Direct Ollama, bypasses metrics collection
 45  :stream nil  ; Set to nil to avoid "stuck typing" issue
 46  :models '("llama3.1:8b"
 47            "qwen2.5-coder:7b"
 48            "phi3.5:3.8b"
 49            "deepseek-r1:7b"))
 50
 51;; Optional: Set Ollama as default backend
 52;; Uncomment these lines to use Ollama by default instead of Gemini:
 53;;
 54;; (setq gptel-model "llama3.1:8b"
 55;;       gptel-backend (gptel-make-ollama "Ollama"
 56;;                       :host "192.168.1.23:8000"
 57;;                       :stream t
 58;;                       :models '("llama3.1:8b"
 59;;                                 "qwen2.5-coder:7b"
 60;;                                 "phi3.5:3.8b")))
 61
 62;;; Usage:
 63;;
 64;; 1. Open a buffer and start gptel:
 65;;    M-x gptel
 66;;    or C-c a g (if you have the binding)
 67;;
 68;; 2. Switch backend using the transient menu:
 69;;    C-c C-m (gptel-menu)
 70;;    Then select backend and model
 71;;
 72;; 3. Send requests:
 73;;    C-c C-c (gptel-send)
 74;;
 75;; 4. Abort requests:
 76;;    C-c C-k (gptel-abort)
 77;;
 78;; 5. Quick selection:
 79;;    In any buffer, select text and call:
 80;;    M-x gptel-send
 81;;    This will send the selected region to the LLM
 82
 83;;; Performance Notes:
 84;;
 85;; - CPU-only models: First request 30-90s, subsequent 15-45s
 86;; - Model stays loaded: 10 minutes after last request
 87;; - Fastest model: phi3.5:3.8b (~2-5 seconds with warm model)
 88;; - Best coding: qwen2.5-coder:7b (~30-60 seconds)
 89;; - Exporter overhead: ~25ms (negligible - use "with metrics" backend by default)
 90;; - Only use "direct" backend for synthetic benchmarks or troubleshooting
 91
 92;;; Recommended Models by Task:
 93;;
 94;; Coding:
 95;;   - qwen2.5-coder:7b (best performance)
 96;;   - codestral:latest (larger, more capable but slower)
 97;;
 98;; Quick queries:
 99;;   - phi3.5:3.8b (fastest)
100;;
101;; Reasoning:
102;;   - deepseek-r1:7b (good reasoning)
103;;   - phi4-reasoning:latest (better but slower)
104;;
105;; Tool calling (for agent-like behavior):
106;;   - llama3.1:8b
107;;   - mistral-nemo:latest
108
109(provide 'emacs-ollama-config)
110;;; emacs-ollama-config.el ends here