Commit bf7cc1246686

Vincent Demeester <vincent@sbr.pm>
2026-01-07 15:33:01
feat(ollama-exporter): Add support for OpenAI-compatible v1 API endpoints
- Add /v1/chat/completions and /v1/completions endpoint handlers - Extract token usage from v1 API 'usage' field when available - Maintain backward compatibility with native Ollama API - Enable metrics collection for OpenCode and other v1 API clients
1 parent b36ba49
Changed files (1)
tools
ollama-exporter
tools/ollama-exporter/ollama_exporter.py
@@ -43,7 +43,11 @@ def extract_and_record_metrics(response_data, model):
     if not isinstance(response_data, dict):
         return
 
+    # Support both native Ollama API and OpenAI-compatible v1 API
+    # Native API has timing data at top level, v1 API might have it in different location
     # https://github.com/ollama/ollama/blob/main/docs/api.md#response
+
+    # Try to extract from native Ollama format first
     total_duration = response_data.get("total_duration", 0) # total time spent in nanoseconds generating the response
     load_duration = response_data.get("load_duration", 0) # time spent in nanoseconds loading the model
     prompt_eval_duration = response_data.get("prompt_eval_duration", 0) # time spent in nanoseconds evaluating the prompt
@@ -51,6 +55,13 @@ def extract_and_record_metrics(response_data, model):
     eval_duration = response_data.get("eval_duration", 0) # time spent in nanoseconds generating the response
     eval_count = response_data.get("eval_count", 0) # number of tokens in the response
 
+    # For v1 API, try to extract from usage field if available
+    usage = response_data.get("usage", {})
+    if usage and not prompt_eval_count:
+        prompt_eval_count = usage.get("prompt_tokens", 0)
+    if usage and not eval_count:
+        eval_count = usage.get("completion_tokens", 0)
+
     if total_duration > 0:
         total_duration_seconds = total_duration / 1_000_000_000
         OLLAMA_TOTAL_DURATION.labels(model=model).observe(total_duration_seconds)
@@ -85,6 +96,8 @@ def metrics():
 
 @app.post("/api/chat")
 @app.post("/api/generate")
+@app.post("/v1/chat/completions")
+@app.post("/v1/completions")
 async def chat_with_metrics(request: Request):
     """Handle chat and generate requests with streaming support and metrics extraction."""
     body = await request.json()