Commit 4032aeed8340

Vincent Demeester <vincent@sbr.pm>
2025-10-06 11:59:14
home/ai: some aichat update configurations...
.. stolen from chmouel. I also de-activated the activation script. I will need to run the genconf.py manually. Signed-off-by: Vincent Demeester <vincent@sbr.pm>
1 parent 440554c
home/common/dev/ai.nix
@@ -28,14 +28,19 @@
   ];
 
   xdg.configFile."aichat/config.yaml.in".source = ./aichat.yaml;
-  xdg.configFile."aichat/update-config" = {
-    source = ./aichat-update-config;
+  xdg.configFile."aichat/models-override.yaml".source = ./aichat-models-override.yaml;
+  xdg.configFile."aichat/genconf.py" = {
+    source = ./genconf.py;
     executable = true;
   };
-  home.activation = {
-    # linkGeneration writeBoundary
-    aichat-configuration = lib.hm.dag.entryAfter [ "linkGeneration" ] ''
-      /home/vincent/.config/aichat/update-config
-    '';
-  };
+  # xdg.configFile."aichat/update-config" = {
+  #   source = ./aichat-update-config;
+  #   executable = true;
+  # };
+  # home.activation = {
+  #   # linkGeneration writeBoundary
+  #   aichat-configuration = lib.hm.dag.entryAfter [ "linkGeneration" ] ''
+  #     /home/vincent/.config/aichat/genconf.py
+  #   '';
+  # };
 }
home/common/dev/aichat-models-override.yaml
@@ -0,0 +1,2190 @@
+version: 0.29.0
+list:
+- provider: openai
+  models:
+  - name: gpt-4o
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.5
+    output_price: 10.0
+    max_output_tokens: 16384
+    supports_vision: true
+    supports_function_calling: true
+  - name: gpt-4o-search-preview
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.5
+    output_price: 10.0
+    max_output_tokens: 16384
+    supports_vision: true
+  - name: chatgpt-4o-latest
+    type: chat
+    max_input_tokens: 128000
+    input_price: 5.0
+    output_price: 15.0
+    max_output_tokens: 16384
+    supports_vision: true
+    supports_function_calling: true
+  - name: gpt-4o-mini
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.15
+    output_price: 0.6
+    max_output_tokens: 16384
+    supports_vision: true
+    supports_function_calling: true
+  - name: gpt-4o-mini-search-preview
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.15
+    output_price: 0.6
+    max_output_tokens: 16384
+    supports_vision: true
+  - name: gpt-4-turbo
+    type: chat
+    max_input_tokens: 128000
+    input_price: 10.0
+    output_price: 30.0
+    max_output_tokens: 4096
+    supports_vision: true
+    supports_function_calling: true
+  - name: gpt-4.5-preview
+    type: chat
+    max_input_tokens: 128000
+    input_price: 75.0
+    output_price: 150.0
+    max_output_tokens: 16384
+    supports_vision: true
+    supports_function_calling: true
+  - name: o3-mini
+    type: chat
+    max_input_tokens: 200000
+    input_price: 1.1
+    output_price: 4.4
+    patch:
+      body:
+        max_tokens: null
+        temperature: null
+        top_p: null
+    supports_vision: true
+    supports_function_calling: true
+    system_prompt_prefix: Formatting re-enabled
+  - name: o3-mini-high
+    type: chat
+    real_name: o3-mini
+    max_input_tokens: 200000
+    input_price: 1.1
+    output_price: 4.4
+    patch:
+      body:
+        reasoning_effort: high
+        max_tokens: null
+        temperature: null
+        top_p: null
+    supports_vision: true
+    supports_function_calling: true
+    system_prompt_prefix: Formatting re-enabled
+  - name: o1-pro
+    type: chat
+    max_input_tokens: 200000
+    input_price: 150.0
+    output_price: 600.0
+    patch:
+      body:
+        max_tokens: null
+        temperature: null
+        top_p: null
+    supports_vision: true
+    supports_function_calling: true
+    system_prompt_prefix: Formatting re-enabled
+  - name: o1
+    type: chat
+    max_input_tokens: 200000
+    input_price: 15.0
+    output_price: 60.0
+    patch:
+      body:
+        max_tokens: null
+        temperature: null
+        top_p: null
+    supports_vision: true
+    supports_function_calling: true
+    system_prompt_prefix: Formatting re-enabled
+  - name: o1-preview
+    type: chat
+    max_input_tokens: 128000
+    input_price: 15.0
+    output_price: 60.0
+    patch:
+      body:
+        max_tokens: null
+        temperature: null
+        top_p: null
+    max_output_tokens: 32768
+    no_system_message: true
+  - name: o1-mini
+    type: chat
+    max_input_tokens: 128000
+    input_price: 3.0
+    output_price: 12.0
+    patch:
+      body:
+        max_tokens: null
+        temperature: null
+        top_p: null
+    max_output_tokens: 65536
+    no_system_message: true
+  - name: gpt-3.5-turbo
+    type: chat
+    max_input_tokens: 16385
+    input_price: 0.5
+    output_price: 1.5
+    max_output_tokens: 4096
+    supports_function_calling: true
+  - name: text-embedding-3-large
+    type: embedding
+    input_price: 0.13
+    max_tokens_per_chunk: 8191
+    default_chunk_size: 2000
+    max_batch_size: 100
+  - name: text-embedding-3-small
+    type: embedding
+    input_price: 0.02
+    max_tokens_per_chunk: 8191
+    default_chunk_size: 2000
+    max_batch_size: 100
+- provider: gemini
+  models:
+  - name: gemini-2.0-flash
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 8192
+    supports_vision: true
+    supports_function_calling: true
+  - name: gemini-2.0-flash-lite
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 8192
+    supports_vision: true
+    supports_function_calling: true
+  - name: gemini-2.5-pro-exp-03-25
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 65536
+    supports_vision: true
+    supports_function_calling: true
+  - name: gemma-3-27b-it
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 8192
+  - name: gemini-1.5-pro-latest
+    type: chat
+    max_input_tokens: 2097152
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 8192
+    supports_vision: true
+    supports_function_calling: true
+  - name: gemini-1.5-flash-latest
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 8192
+    supports_vision: true
+    supports_function_calling: true
+  - name: gemini-1.5-flash-8b-latest
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 8192
+    supports_vision: true
+    supports_function_calling: true
+  - name: text-embedding-004
+    type: embedding
+    input_price: 0.0
+    max_tokens_per_chunk: 2048
+    default_chunk_size: 1500
+    max_batch_size: 100
+- provider: claude
+  models:
+  - name: claude-3-7-sonnet-20250219
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-7-sonnet-20250219:thinking
+    type: chat
+    real_name: claude-3-7-sonnet-20250219
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    patch:
+      body:
+        temperature: null
+        top_p: null
+        thinking:
+          type: enabled
+          budget_tokens: 16000
+    max_output_tokens: 24000
+    require_max_tokens: true
+    supports_vision: true
+  - name: claude-3-5-sonnet-20241022
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-5-sonnet-20240620
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-5-haiku-20241022
+    type: chat
+    max_input_tokens: 200000
+    input_price: 0.8
+    output_price: 4.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-opus-20240229
+    type: chat
+    max_input_tokens: 200000
+    input_price: 15.0
+    output_price: 75.0
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-sonnet-20240229
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-haiku-20240307
+    type: chat
+    max_input_tokens: 200000
+    input_price: 0.25
+    output_price: 1.25
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+- provider: mistral
+  models:
+  - name: mistral-large-latest
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.0
+    output_price: 6.0
+    supports_function_calling: true
+  - name: mistral-small-latest
+    type: chat
+    max_input_tokens: 32000
+    input_price: 0.1
+    output_price: 0.3
+    supports_function_calling: true
+  - name: codestral-latest
+    type: chat
+    max_input_tokens: 256000
+    input_price: 0.3
+    output_price: 0.9
+    supports_function_calling: true
+  - name: ministral-8b-latest
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.1
+    output_price: 0.1
+    supports_function_calling: true
+  - name: open-mistral-nemo
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.15
+    output_price: 0.15
+    supports_function_calling: true
+  - name: pixtral-large-latest
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.0
+    output_price: 6.0
+    supports_vision: true
+  - name: pixtral-12b-latest
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.15
+    output_price: 0.15
+    supports_vision: true
+  - name: mistral-embed
+    type: embedding
+    max_input_tokens: 8092
+    input_price: 0.1
+    max_tokens_per_chunk: 8092
+    default_chunk_size: 2000
+- provider: ai21
+  models:
+  - name: jamba-large
+    type: chat
+    max_input_tokens: 256000
+    input_price: 2.0
+    output_price: 8.0
+    supports_function_calling: true
+  - name: jamba-mini
+    type: chat
+    max_input_tokens: 256000
+    input_price: 0.2
+    output_price: 0.4
+    supports_function_calling: true
+- provider: cohere
+  models:
+  - name: command-a-03-2025
+    type: chat
+    max_input_tokens: 256000
+    input_price: 2.5
+    output_price: 10.0
+    max_output_tokens: 8192
+    supports_function_calling: true
+  - name: command-r-plus-08-2024
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.5
+    output_price: 10.0
+    max_output_tokens: 4096
+    supports_function_calling: true
+  - name: command-r-08-2024
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.15
+    output_price: 0.6
+    max_output_tokens: 4096
+    supports_function_calling: true
+  - name: command-r7b-12-2024
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.0375
+    output_price: 0.15
+    max_output_tokens: 4096
+  - name: embed-english-v3.0
+    type: embedding
+    input_price: 0.1
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 96
+  - name: embed-english-light-v3.0
+    type: embedding
+    input_price: 0.1
+    max_tokens_per_chunk: 512
+    default_chunk_size: 700
+    max_batch_size: 96
+  - name: embed-multilingual-v3.0
+    type: embedding
+    input_price: 0.1
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 96
+  - name: embed-multilingual-light-v3.0
+    type: embedding
+    input_price: 0.1
+    max_tokens_per_chunk: 512
+    default_chunk_size: 700
+    max_batch_size: 96
+  - name: rerank-v3.5
+    type: reranker
+    max_input_tokens: 4096
+  - name: rerank-english-v3.0
+    type: reranker
+    max_input_tokens: 4096
+  - name: rerank-multilingual-v3.0
+    type: reranker
+    max_input_tokens: 4096
+- provider: xai
+  models:
+  - name: grok-3-latest
+    type: chat
+    max_input_tokens: 131072
+    input_price: 3.0
+    output_price: 15.0
+    supports_function_calling: true
+  - name: grok-3-fast-latest
+    type: chat
+    max_input_tokens: 131072
+    input_price: 5.0
+    output_price: 25.0
+    supports_function_calling: true
+  - name: grok-3-mini-latest
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.3
+    output_price: 0.5
+  - name: grok-3-mini-fast-latest
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.6
+    output_price: 4.0
+  - name: grok-2-latest
+    type: chat
+    max_input_tokens: 131072
+    input_price: 2.0
+    output_price: 10.0
+    supports_function_calling: true
+  - name: grok-2-vision-latest
+    type: chat
+    max_input_tokens: 32768
+    input_price: 2.0
+    output_price: 10.0
+    supports_vision: true
+    supports_function_calling: true
+- provider: perplexity
+  models:
+  - name: sonar-pro
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+  - name: sonar
+    type: chat
+    max_input_tokens: 128000
+    input_price: 1.0
+    output_price: 1.0
+  - name: sonar-reasoning-pro
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.0
+    output_price: 8.0
+  - name: sonar-reasoning
+    type: chat
+    max_input_tokens: 128000
+    input_price: 1.0
+    output_price: 5.0
+  - name: sonar-deep-research
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.0
+    output_price: 8.0
+  - name: r1-1776
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.0
+    output_price: 8.0
+- provider: groq
+  models:
+  - name: meta-llama/llama-4-maverick-17b-128e-instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    supports_vision: true
+    supports_function_calling: true
+  - name: meta-llama/llama-4-scout-17b-16e-instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    supports_vision: true
+    supports_function_calling: true
+  - name: llama-3.3-70b-versatile
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    supports_function_calling: true
+  - name: llama-3.1-8b-instant
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    supports_function_calling: true
+  - name: qwen-qwq-32b
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    supports_function_calling: true
+- provider: vertexai
+  models:
+  - name: gemini-2.0-flash-001
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 0.15
+    output_price: 0.6
+    max_output_tokens: 8192
+    supports_vision: true
+    supports_function_calling: true
+  - name: gemini-2.0-flash-lite-001
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 0.075
+    output_price: 0.3
+    max_output_tokens: 8192
+    supports_vision: true
+    supports_function_calling: true
+  - name: gemini-2.5-pro-preview-03-25
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 1.25
+    output_price: 10.0
+    max_output_tokens: 65536
+    supports_vision: true
+    supports_function_calling: true
+  - name: gemini-1.5-pro-002
+    type: chat
+    max_input_tokens: 2097152
+    input_price: 1.25
+    output_price: 3.75
+    max_output_tokens: 8192
+    supports_vision: true
+    supports_function_calling: true
+  - name: gemini-1.5-flash-002
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 0.019
+    output_price: 0.075
+    max_output_tokens: 8192
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-7-sonnet@20250219
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-7-sonnet@20250219:thinking
+    type: chat
+    real_name: claude-3-7-sonnet@20250219
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    patch:
+      body:
+        temperature: null
+        top_p: null
+        thinking:
+          type: enabled
+          budget_tokens: 16000
+    max_output_tokens: 24000
+    require_max_tokens: true
+    supports_vision: true
+  - name: claude-3-5-sonnet-v2@20241022
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-5-sonnet@20240620
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-5-haiku@20241022
+    type: chat
+    max_input_tokens: 200000
+    input_price: 0.8
+    output_price: 4.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-opus@20240229
+    type: chat
+    max_input_tokens: 200000
+    input_price: 15.0
+    output_price: 75.0
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-sonnet@20240229
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: claude-3-haiku@20240307
+    type: chat
+    max_input_tokens: 200000
+    input_price: 0.25
+    output_price: 1.25
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: mistral-large-2411
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.0
+    output_price: 6.0
+    supports_function_calling: true
+  - name: mistral-small-2503
+    type: chat
+    max_input_tokens: 32000
+    input_price: 0.1
+    output_price: 0.3
+    supports_function_calling: true
+  - name: codestral-2501
+    type: chat
+    max_input_tokens: 256000
+    input_price: 0.3
+    output_price: 0.9
+    supports_function_calling: true
+  - name: text-embedding-005
+    type: embedding
+    max_input_tokens: 20000
+    input_price: 0.025
+    max_tokens_per_chunk: 2048
+    default_chunk_size: 1500
+    max_batch_size: 5
+  - name: text-multilingual-embedding-002
+    type: embedding
+    max_input_tokens: 20000
+    input_price: 0.2
+    max_tokens_per_chunk: 2048
+    default_chunk_size: 1500
+    max_batch_size: 5
+- provider: bedrock
+  models:
+  - name: us.anthropic.claude-3-7-sonnet-20250219-v1:0
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: us.anthropic.claude-3-7-sonnet-20250219-v1:0:thinking
+    type: chat
+    real_name: us.anthropic.claude-3-7-sonnet-20250219-v1:0
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    patch:
+      body:
+        inferenceConfig:
+          temperature: null
+          topP: null
+        additionalModelRequestFields:
+          thinking:
+            type: enabled
+            budget_tokens: 16000
+    max_output_tokens: 24000
+    require_max_tokens: true
+    supports_vision: true
+  - name: anthropic.claude-3-5-sonnet-20241022-v2:0
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: anthropic.claude-3-5-sonnet-20240620-v1:0
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: anthropic.claude-3-5-haiku-20241022-v1:0
+    type: chat
+    max_input_tokens: 200000
+    input_price: 0.8
+    output_price: 4.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: anthropic.claude-3-opus-20240229-v1:0
+    type: chat
+    max_input_tokens: 200000
+    input_price: 15.0
+    output_price: 75.0
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: anthropic.claude-3-sonnet-20240229-v1:0
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: anthropic.claude-3-haiku-20240307-v1:0
+    type: chat
+    max_input_tokens: 200000
+    input_price: 0.25
+    output_price: 1.25
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: us.meta.llama3-3-70b-instruct-v1:0
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.72
+    output_price: 0.72
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_function_calling: true
+  - name: meta.llama3-1-405b-instruct-v1:0
+    type: chat
+    max_input_tokens: 131072
+    input_price: 2.4
+    output_price: 2.4
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_function_calling: true
+  - name: meta.llama3-1-70b-instruct-v1:0
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.72
+    output_price: 0.72
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_function_calling: true
+  - name: meta.llama3-1-8b-instruct-v1:0
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.22
+    output_price: 0.22
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_function_calling: true
+  - name: us.meta.llama3-2-90b-instruct-v1:0
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.72
+    output_price: 0.72
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: us.meta.llama3-2-11b-instruct-v1:0
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.16
+    output_price: 0.16
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: us.amazon.nova-pro-v1:0
+    type: chat
+    max_input_tokens: 300000
+    input_price: 0.8
+    output_price: 3.2
+    max_output_tokens: 5120
+    supports_vision: true
+  - name: us.amazon.nova-lite-v1:0
+    type: chat
+    max_input_tokens: 300000
+    input_price: 0.06
+    output_price: 0.24
+    max_output_tokens: 5120
+    supports_vision: true
+  - name: us.amazon.nova-micro-v1:0
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.035
+    output_price: 0.14
+    max_output_tokens: 5120
+  - name: mistral.mistral-large-2407-v1:0
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.0
+    output_price: 6.0
+    supports_function_calling: true
+  - name: cohere.command-r-plus-v1:0
+    type: chat
+    max_input_tokens: 128000
+    input_price: 3.0
+    output_price: 15.0
+    supports_function_calling: true
+  - name: cohere.command-r-v1:0
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.5
+    output_price: 1.5
+    supports_function_calling: true
+  - name: cohere.embed-english-v3
+    type: embedding
+    input_price: 0.1
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 96
+  - name: cohere.embed-multilingual-v3
+    type: embedding
+    input_price: 0.1
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 96
+  - name: ai21.jamba-1-5-large-v1:0
+    type: chat
+    max_input_tokens: 256000
+    input_price: 2.0
+    output_price: 8.0
+    supports_function_calling: true
+  - name: ai21.jamba-1-5-mini-v1:0
+    type: chat
+    max_input_tokens: 256000
+    input_price: 0.2
+    output_price: 0.4
+    supports_function_calling: true
+  - name: us.deepseek.r1-v1:0
+    type: chat
+    max_input_tokens: 128000
+    input_price: 1.35
+    output_price: 5.4
+- provider: cloudflare
+  models:
+  - name: '@cf/meta/llama-4-scout-17b-16e-instruct'
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 2048
+    require_max_tokens: true
+  - name: '@cf/meta/llama-3.3-70b-instruct-fp8-fast'
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 2048
+    require_max_tokens: true
+  - name: '@cf/meta/llama-3.1-70b-instruct'
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 2048
+    require_max_tokens: true
+  - name: '@cf/meta/llama-3.1-8b-instruct'
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 2048
+    require_max_tokens: true
+  - name: '@cf/qwen/qwq-32b'
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 2048
+    require_max_tokens: true
+  - name: '@cf/qwen/qwen2.5-coder-32b-instruct'
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 2048
+    require_max_tokens: true
+  - name: '@cf/google/gemma-3-12b-it'
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 2048
+    require_max_tokens: true
+  - name: '@cf/mistralai/mistral-small-3.1-24b-instruct'
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 2048
+    require_max_tokens: true
+  - name: '@cf/baai/bge-large-en-v1.5'
+    type: embedding
+    input_price: 0.0
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 100
+- provider: ernie
+  models:
+  - name: ernie-4.5-8k-preview
+    type: chat
+    max_input_tokens: 8192
+    input_price: 0.56
+    output_price: 2.24
+    supports_function_calling: true
+  - name: ernie-x1-32k-preview
+    type: chat
+    max_input_tokens: 32768
+    input_price: 0.28
+    output_price: 1.12
+  - name: ernie-4.0-turbo-8k-latest
+    type: chat
+    max_input_tokens: 8192
+    input_price: 0.42
+    output_price: 1.26
+    supports_function_calling: true
+  - name: ernie-4.0-turbo-128k
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.42
+    output_price: 1.26
+    supports_function_calling: true
+  - name: ernie-4.0-8k-latest
+    type: chat
+    max_input_tokens: 8192
+    input_price: 0.56
+    output_price: 2.24
+    supports_function_calling: true
+  - name: ernie-3.5-128k
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.112
+    output_price: 0.28
+    supports_function_calling: true
+  - name: ernie-speed-pro-128k
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.042
+    output_price: 0.084
+  - name: deepseek-v3
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.112
+    output_price: 0.224
+  - name: deepseek-r1
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.28
+    output_price: 1.12
+  - name: qwq-32b
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.28
+    output_price: 0.84
+  - name: bge-large-zh
+    type: embedding
+    input_price: 0.07
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 16
+  - name: bge-large-en
+    type: embedding
+    input_price: 0.07
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 16
+  - name: bce-reranker-base
+    type: reranker
+    max_input_tokens: 1024
+    input_price: 0.07
+- provider: qianwen
+  models:
+  - name: qwen-max-latest
+    type: chat
+    max_input_tokens: 32678
+    input_price: 1.6
+    output_price: 6.4
+    max_output_tokens: 8192
+    supports_function_calling: true
+  - name: qwen-plus-latest
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.112
+    output_price: 0.28
+    max_output_tokens: 8192
+    supports_function_calling: true
+  - name: qwen-turbo-latest
+    type: chat
+    max_input_tokens: 1000000
+    input_price: 0.042
+    output_price: 0.084
+    max_output_tokens: 8192
+    supports_function_calling: true
+  - name: qwen-long
+    type: chat
+    max_input_tokens: 1000000
+    input_price: 0.07
+    output_price: 0.28
+  - name: qwen-omni-turbo-latest
+    type: chat
+    max_input_tokens: 32768
+    max_output_tokens: 2048
+    supports_vision: true
+  - name: qwq-plus-latest
+    type: chat
+    max_input_tokens: 131072
+    max_output_tokens: 8192
+  - name: qwq-32b
+    type: chat
+    max_input_tokens: 131072
+    max_output_tokens: 8192
+  - name: qwen-vl-max-latest
+    type: chat
+    max_input_tokens: 30720
+    input_price: 0.42
+    output_price: 1.26
+    max_output_tokens: 2048
+    supports_vision: true
+  - name: qwen-vl-plus-latest
+    type: chat
+    max_input_tokens: 30000
+    input_price: 0.21
+    output_price: 0.63
+    max_output_tokens: 2048
+    supports_vision: true
+  - name: qwen2.5-72b-instruct
+    type: chat
+    max_input_tokens: 129024
+    input_price: 0.56
+    output_price: 1.68
+    max_output_tokens: 8192
+    supports_function_calling: true
+  - name: qwen2.5-vl-72b-instruct
+    type: chat
+    max_input_tokens: 129024
+    input_price: 2.24
+    output_price: 6.72
+    max_output_tokens: 8192
+    supports_vision: true
+  - name: qwen2.5-coder-32b-instruct
+    type: chat
+    max_input_tokens: 129024
+    input_price: 0.49
+    output_price: 0.98
+    max_output_tokens: 8192
+    supports_function_calling: true
+  - name: deepseek-v3
+    type: chat
+    max_input_tokens: 65792
+    input_price: 0.14
+    output_price: 0.56
+  - name: deepseek-r1
+    type: chat
+    max_input_tokens: 65792
+    input_price: 0.28
+    output_price: 1.12
+  - name: text-embedding-v3
+    type: embedding
+    input_price: 0.1
+    max_tokens_per_chunk: 8192
+    default_chunk_size: 2000
+    max_batch_size: 6
+  - name: text-embedding-v2
+    type: embedding
+    input_price: 0.1
+    max_tokens_per_chunk: 2048
+    default_chunk_size: 2000
+    max_batch_size: 25
+- provider: hunyuan
+  models:
+  - name: hunyuan-turbos-latest
+    type: chat
+    max_input_tokens: 24000
+    input_price: 0.112
+    output_price: 0.28
+    max_output_tokens: 8192
+    supports_function_calling: true
+  - name: hunyuan-t1-latest
+    type: chat
+    max_input_tokens: 28000
+    input_price: 0.14
+    output_price: 0.56
+    max_output_tokens: 64000
+  - name: hunyuan-turbo-latest
+    type: chat
+    max_input_tokens: 28000
+    input_price: 0.336
+    output_price: 1.344
+    max_output_tokens: 4096
+    supports_function_calling: true
+  - name: hunyuan-large
+    type: chat
+    max_input_tokens: 28000
+    input_price: 0.56
+    output_price: 1.68
+    max_output_tokens: 4096
+    supports_function_calling: true
+  - name: hunyuan-large-longcontext
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.84
+    output_price: 2.52
+    max_output_tokens: 6144
+    supports_function_calling: true
+  - name: hunyuan-standard
+    type: chat
+    max_input_tokens: 30000
+    input_price: 0.112
+    output_price: 0.28
+    max_output_tokens: 2048
+    supports_function_calling: true
+  - name: hunyuan-standard-256K
+    type: chat
+    max_input_tokens: 250000
+    input_price: 0.07
+    output_price: 0.28
+    max_output_tokens: 6144
+    supports_function_calling: true
+  - name: hunyuan-lite
+    type: chat
+    max_input_tokens: 250000
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 6144
+    supports_function_calling: true
+  - name: hunyuan-turbo-vision
+    type: chat
+    max_input_tokens: 6144
+    input_price: 11.2
+    output_price: 11.2
+    max_output_tokens: 2048
+    supports_vision: true
+  - name: hunyuan-vision
+    type: chat
+    max_input_tokens: 6144
+    input_price: 2.52
+    output_price: 2.52
+    max_output_tokens: 2048
+    supports_vision: true
+  - name: hunyuan-embedding
+    type: embedding
+    input_price: 0.01
+    max_tokens_per_chunk: 1024
+    default_chunk_size: 1000
+    max_batch_size: 100
+- provider: moonshot
+  models:
+  - name: kimi-latest
+    type: chat
+    supports_vision: true
+    supports_function_calling: true
+  - name: moonshot-v1-8k
+    type: chat
+    max_input_tokens: 8192
+    input_price: 1.68
+    output_price: 1.68
+    supports_function_calling: true
+  - name: moonshot-v1-32k
+    type: chat
+    max_input_tokens: 32768
+    input_price: 3.36
+    output_price: 3.36
+    supports_function_calling: true
+  - name: moonshot-v1-128k
+    type: chat
+    max_input_tokens: 131072
+    input_price: 8.4
+    output_price: 8.4
+    supports_function_calling: true
+  - name: moonshot-v1-8k-vision-preview
+    type: chat
+    max_input_tokens: 8192
+    input_price: 1.68
+    output_price: 1.68
+    supports_vision: true
+  - name: moonshot-v1-32k-vision-preview
+    type: chat
+    max_input_tokens: 32768
+    input_price: 3.36
+    output_price: 3.36
+    supports_vision: true
+  - name: moonshot-v1-128k-vision-preview
+    type: chat
+    max_input_tokens: 131072
+    input_price: 8.4
+    output_price: 8.4
+    supports_vision: true
+- provider: deepseek
+  models:
+  - name: deepseek-chat
+    type: chat
+    max_input_tokens: 64000
+    input_price: 0.27
+    output_price: 1.1
+    max_output_tokens: 8192
+    supports_function_calling: true
+  - name: deepseek-reasoner
+    type: chat
+    max_input_tokens: 64000
+    input_price: 0.55
+    output_price: 2.19
+    max_output_tokens: 8192
+- provider: zhipuai
+  models:
+  - name: glm-4-plus
+    type: chat
+    max_input_tokens: 128000
+    input_price: 7.0
+    output_price: 7.0
+    max_output_tokens: 4096
+    supports_function_calling: true
+  - name: glm-4-alltools
+    type: chat
+    max_input_tokens: 128000
+    input_price: 14.0
+    output_price: 14.0
+    max_output_tokens: 4096
+    supports_function_calling: true
+  - name: glm-4-long
+    type: chat
+    max_input_tokens: 1000000
+    input_price: 0.14
+    output_price: 0.14
+    max_output_tokens: 4096
+    supports_function_calling: true
+  - name: glm-4-flash
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.0
+    output_price: 0.0
+    max_output_tokens: 4096
+    supports_function_calling: true
+  - name: glm-4v-plus
+    type: chat
+    max_input_tokens: 8192
+    input_price: 0.56
+    output_price: 0.56
+    supports_vision: true
+  - name: glm-4v-flash
+    type: chat
+    max_input_tokens: 8192
+    input_price: 0.0
+    output_price: 0.0
+    supports_vision: true
+  - name: glm-zero-preview
+    type: chat
+    max_input_tokens: 16384
+    input_price: 1.4
+    output_price: 1.4
+  - name: embedding-3
+    type: embedding
+    max_input_tokens: 8192
+    input_price: 0.07
+    max_tokens_per_chunk: 8192
+    default_chunk_size: 2000
+  - name: rerank
+    type: reranker
+    max_input_tokens: 4096
+    input_price: 0.112
+- provider: lingyiwanwu
+  models:
+  - name: yi-lightning
+    type: chat
+    max_input_tokens: 16384
+    input_price: 0.14
+    output_price: 0.14
+  - name: yi-vision-v2
+    type: chat
+    max_input_tokens: 16384
+    input_price: 0.84
+    output_price: 0.84
+    supports_vision: true
+- provider: minimax
+  models:
+  - name: minimax-text-01
+    type: chat
+    max_input_tokens: 1000192
+    input_price: 0.14
+    output_price: 1.12
+    supports_vision: true
+  - name: abab6.5s-chat
+    type: chat
+    max_input_tokens: 245760
+    input_price: 0.14
+    output_price: 0.14
+    supports_vision: true
+  - name: deepseek-r1
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.56
+    output_price: 2.24
+- provider: openrouter
+  models:
+  - name: openai/gpt-4o
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.5
+    output_price: 10.0
+    supports_vision: true
+    supports_function_calling: true
+  - name: openai/gpt-4o-search-preview
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.5
+    output_price: 10.0
+    max_output_tokens: 16384
+    supports_vision: true
+  - name: openai/chatgpt-4o-latest
+    type: chat
+    max_input_tokens: 128000
+    input_price: 5.0
+    output_price: 15.0
+    supports_vision: true
+    supports_function_calling: true
+  - name: openai/gpt-4o-mini
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.15
+    output_price: 0.6
+    supports_vision: true
+    supports_function_calling: true
+  - name: openai/gpt-4o-mini-search-preview
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.15
+    output_price: 0.6
+    max_output_tokens: 16384
+    supports_vision: true
+  - name: openai/gpt-4-turbo
+    type: chat
+    max_input_tokens: 128000
+    input_price: 10.0
+    output_price: 30.0
+    supports_vision: true
+    supports_function_calling: true
+  - name: openai/gpt-4.5-preview
+    type: chat
+    max_input_tokens: 128000
+    input_price: 75.0
+    output_price: 150.0
+    max_output_tokens: 16384
+    supports_vision: true
+    supports_function_calling: true
+  - name: openai/o3-mini
+    type: chat
+    max_input_tokens: 200000
+    input_price: 1.1
+    output_price: 4.4
+    patch:
+      body:
+        temperature: null
+        top_p: null
+    supports_vision: true
+    supports_function_calling: true
+    system_prompt_prefix: Formatting re-enabled
+  - name: openai/o3-mini-high
+    type: chat
+    max_input_tokens: 200000
+    input_price: 1.1
+    output_price: 4.4
+    patch:
+      body:
+        temperature: null
+        top_p: null
+    supports_vision: true
+    supports_function_calling: true
+    system_prompt_prefix: Formatting re-enabled
+  - name: openai/o1-pro
+    type: chat
+    max_input_tokens: 200000
+    input_price: 150.0
+    output_price: 600.0
+    patch:
+      body:
+        max_tokens: null
+        temperature: null
+        top_p: null
+    supports_vision: true
+    supports_function_calling: true
+    system_prompt_prefix: Formatting re-enabled
+  - name: openai/o1
+    type: chat
+    max_input_tokens: 128000
+    input_price: 15.0
+    output_price: 60.0
+    patch:
+      body:
+        temperature: null
+        top_p: null
+    supports_vision: true
+    supports_function_calling: true
+    system_prompt_prefix: Formatting re-enabled
+  - name: openai/o1-preview
+    type: chat
+    max_input_tokens: 128000
+    input_price: 15.0
+    output_price: 60.0
+    patch:
+      body:
+        temperature: null
+        top_p: null
+    no_system_message: true
+  - name: openai/o1-mini
+    type: chat
+    max_input_tokens: 128000
+    input_price: 3.0
+    output_price: 12.0
+    patch:
+      body:
+        temperature: null
+        top_p: null
+    no_system_message: true
+  - name: openai/gpt-3.5-turbo
+    type: chat
+    max_input_tokens: 16385
+    input_price: 0.5
+    output_price: 1.5
+    supports_function_calling: true
+  - name: google/gemini-2.0-flash-001
+    type: chat
+    max_input_tokens: 1000000
+    input_price: 0.1
+    output_price: 0.4
+    supports_vision: true
+    supports_function_calling: true
+  - name: google/gemini-2.0-flash-lite-001
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 0.075
+    output_price: 0.3
+    supports_vision: true
+    supports_function_calling: true
+  - name: google/gemini-2.5-pro-preview-03-25
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 1.25
+    output_price: 10.0
+    supports_vision: true
+    supports_function_calling: true
+  - name: google/gemma-3-27b-it
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.1
+    output_price: 0.2
+  - name: google/gemini-pro-1.5
+    type: chat
+    max_input_tokens: 2000000
+    input_price: 1.25
+    output_price: 5.0
+    supports_vision: true
+    supports_function_calling: true
+  - name: google/gemini-flash-1.5
+    type: chat
+    max_input_tokens: 1000000
+    input_price: 0.075
+    output_price: 0.3
+    supports_vision: true
+    supports_function_calling: true
+  - name: google/gemini-flash-1.5-8b
+    type: chat
+    max_input_tokens: 1000000
+    input_price: 0.0375
+    output_price: 0.15
+    supports_vision: true
+    supports_function_calling: true
+  - name: anthropic/claude-3.7-sonnet
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: anthropic/claude-3.7-sonnet:thinking
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    patch:
+      body:
+        include_reasoning: true
+    max_output_tokens: 24000
+    require_max_tokens: true
+    supports_vision: true
+  - name: anthropic/claude-3.5-sonnet
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: anthropic/claude-3-5-haiku
+    type: chat
+    max_input_tokens: 200000
+    input_price: 0.8
+    output_price: 4.0
+    max_output_tokens: 8192
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: anthropic/claude-3-opus
+    type: chat
+    max_input_tokens: 200000
+    input_price: 15.0
+    output_price: 75.0
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: anthropic/claude-3-sonnet
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: anthropic/claude-3-haiku
+    type: chat
+    max_input_tokens: 200000
+    input_price: 0.25
+    output_price: 1.25
+    max_output_tokens: 4096
+    require_max_tokens: true
+    supports_vision: true
+    supports_function_calling: true
+  - name: meta-llama/llama-4-maverick
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 0.18
+    output_price: 0.6
+    supports_vision: true
+    supports_function_calling: true
+  - name: meta-llama/llama-4-scout
+    type: chat
+    max_input_tokens: 327680
+    input_price: 0.08
+    output_price: 0.3
+    supports_vision: true
+    supports_function_calling: true
+  - name: meta-llama/llama-3.3-70b-instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.12
+    output_price: 0.3
+  - name: meta-llama/llama-3.1-405b-instruct
+    type: chat
+    max_input_tokens: 32768
+    input_price: 0.8
+    output_price: 0.8
+    supports_function_calling: true
+  - name: meta-llama/llama-3.1-70b-instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.12
+    output_price: 0.3
+    supports_function_calling: true
+  - name: meta-llama/llama-3.1-8b-instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.02
+    output_price: 0.05
+  - name: meta-llama/llama-3.2-90b-vision-instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.9
+    output_price: 0.9
+    supports_vision: true
+  - name: meta-llama/llama-3.2-11b-vision-instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.055
+    output_price: 0.055
+    supports_vision: true
+  - name: mistralai/mistral-large-2411
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.0
+    output_price: 6.0
+    supports_function_calling: true
+  - name: mistralai/mistral-small-3.1-24b-instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.1
+    output_price: 0.3
+  - name: mistralai/codestral-2501
+    type: chat
+    max_input_tokens: 256000
+    input_price: 0.3
+    output_price: 0.9
+    supports_function_calling: true
+  - name: mistralai/ministral-8b
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.1
+    output_price: 0.1
+    supports_function_calling: true
+  - name: mistralai/mistral-nemo
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.035
+    output_price: 0.08
+    supports_function_calling: true
+  - name: mistralai/pixtral-large-2411
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.0
+    output_price: 6.0
+    supports_vision: true
+  - name: mistralai/pixtral-12b
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.1
+    output_price: 0.1
+    supports_vision: true
+  - name: ai21/jamba-1.6-large
+    type: chat
+    max_input_tokens: 256000
+    input_price: 2.0
+    output_price: 8.0
+    supports_function_calling: true
+  - name: ai21/jamba-1.6-mini
+    type: chat
+    max_input_tokens: 256000
+    input_price: 0.2
+    output_price: 0.4
+    supports_function_calling: true
+  - name: cohere/command-a
+    type: chat
+    max_input_tokens: 256000
+    input_price: 2.5
+    output_price: 10.0
+    supports_function_calling: true
+  - name: cohere/command-r-plus-08-2024
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.5
+    output_price: 10.0
+    supports_function_calling: true
+  - name: cohere/command-r-08-2024
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.15
+    output_price: 0.6
+    supports_function_calling: true
+  - name: cohere/command-r7b-12-2024
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.0375
+    output_price: 0.15
+    max_output_tokens: 4096
+  - name: deepseek/deepseek-chat-v3-0324
+    type: chat
+    max_input_tokens: 64000
+    input_price: 0.27
+    output_price: 1.1
+    supports_function_calling: true
+  - name: deepseek/deepseek-r1
+    type: chat
+    max_input_tokens: 163840
+    input_price: 0.55
+    output_price: 2.19
+    patch:
+      body:
+        include_reasoning: true
+  - name: qwen/qwen-max
+    type: chat
+    max_input_tokens: 32768
+    input_price: 1.6
+    output_price: 6.4
+    max_output_tokens: 8192
+    supports_function_calling: true
+  - name: qwen/qwen-plus
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.4
+    output_price: 1.2
+    max_output_tokens: 8192
+    supports_function_calling: true
+  - name: qwen/qwen-turbo
+    type: chat
+    max_input_tokens: 1000000
+    input_price: 0.05
+    output_price: 0.2
+    max_output_tokens: 8192
+    supports_function_calling: true
+  - name: qwen/qwen-vl-plus
+    type: chat
+    max_input_tokens: 7500
+    input_price: 0.21
+    output_price: 0.63
+    supports_vision: true
+  - name: qwen/qwq-32b
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.29
+    output_price: 0.39
+  - name: qwen/qwen-2.5-72b-instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.35
+    output_price: 0.4
+    supports_function_calling: true
+  - name: qwen/qwen2.5-vl-72b-instruct
+    type: chat
+    max_input_tokens: 32000
+    input_price: 0.7
+    output_price: 0.7
+    supports_vision: true
+  - name: qwen/qwen-2.5-coder-32b-instruct
+    type: chat
+    max_input_tokens: 32768
+    input_price: 0.18
+    output_price: 0.18
+  - name: x-ai/grok-3-beta
+    type: chat
+    max_input_tokens: 131072
+    input_price: 3.0
+    output_price: 15.0
+    supports_function_calling: true
+  - name: x-ai/grok-3-mini-beta
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.3
+    output_price: 0.5
+  - name: x-ai/grok-2-1212
+    type: chat
+    max_input_tokens: 131072
+    input_price: 2.0
+    output_price: 10.0
+    supports_function_calling: true
+  - name: x-ai/grok-2-vision-1212
+    type: chat
+    max_input_tokens: 32768
+    input_price: 2.0
+    output_price: 10.0
+    supports_vision: true
+    supports_function_calling: true
+  - name: amazon/nova-pro-v1
+    type: chat
+    max_input_tokens: 300000
+    input_price: 0.8
+    output_price: 3.2
+    max_output_tokens: 5120
+    supports_vision: true
+  - name: amazon/nova-lite-v1
+    type: chat
+    max_input_tokens: 300000
+    input_price: 0.06
+    output_price: 0.24
+    max_output_tokens: 5120
+    supports_vision: true
+  - name: amazon/nova-micro-v1
+    type: chat
+    max_input_tokens: 128000
+    input_price: 0.035
+    output_price: 0.14
+    max_output_tokens: 5120
+  - name: perplexity/sonar-pro
+    type: chat
+    max_input_tokens: 200000
+    input_price: 3.0
+    output_price: 15.0
+  - name: perplexity/sonar
+    type: chat
+    max_input_tokens: 127072
+    input_price: 1.0
+    output_price: 1.0
+  - name: perplexity/sonar-reasoning-pro
+    type: chat
+    max_input_tokens: 128000
+    input_price: 2.0
+    output_price: 8.0
+    patch:
+      body:
+        include_reasoning: true
+  - name: perplexity/sonar-reasoning
+    type: chat
+    max_input_tokens: 127000
+    input_price: 1.0
+    output_price: 5.0
+    patch:
+      body:
+        include_reasoning: true
+  - name: perplexity/sonar-deep-research
+    type: chat
+    max_input_tokens: 200000
+    input_price: 2.0
+    output_price: 8.0
+    patch:
+      body:
+        include_reasoning: true
+  - name: perplexity/r1-1776
+    type: chat
+    max_input_tokens: 127000
+    input_price: 2.0
+    output_price: 8.0
+    patch:
+      body:
+        include_reasoning: true
+  - name: minimax/minimax-01
+    type: chat
+    max_input_tokens: 1000192
+    input_price: 0.2
+    output_price: 1.1
+- provider: github
+  models:
+  - name: gpt-4o
+    type: chat
+    max_input_tokens: 128000
+    supports_function_calling: true
+  - name: gpt-4o-mini
+    type: chat
+    max_input_tokens: 128000
+    supports_function_calling: true
+  - name: o3-mini
+    type: chat
+    max_input_tokens: 200000
+    patch:
+      body:
+        max_tokens: null
+        temperature: null
+        top_p: null
+    supports_vision: true
+    supports_function_calling: true
+    system_prompt_prefix: Formatting re-enabled
+  - name: o3-mini-high
+    type: chat
+    real_name: o3-mini
+    max_input_tokens: 200000
+    patch:
+      body:
+        reasoning_effort: high
+        max_tokens: null
+        temperature: null
+        top_p: null
+    supports_vision: true
+    supports_function_calling: true
+    system_prompt_prefix: Formatting re-enabled
+  - name: o1
+    type: chat
+    max_input_tokens: 200000
+    patch:
+      body:
+        max_tokens: null
+        temperature: null
+        top_p: null
+    supports_vision: true
+    supports_function_calling: true
+    system_prompt_prefix: Formatting re-enabled
+  - name: o1-preview
+    type: chat
+    max_input_tokens: 128000
+    patch:
+      body:
+        max_tokens: null
+        temperature: null
+        top_p: null
+    no_stream: true
+    no_system_message: true
+  - name: o1-mini
+    type: chat
+    max_input_tokens: 128000
+    patch:
+      body:
+        max_tokens: null
+        temperature: null
+        top_p: null
+    no_stream: true
+    no_system_message: true
+  - name: text-embedding-3-large
+    type: embedding
+    max_tokens_per_chunk: 8191
+    default_chunk_size: 2000
+    max_batch_size: 100
+  - name: text-embedding-3-small
+    type: embedding
+    max_tokens_per_chunk: 8191
+    default_chunk_size: 2000
+    max_batch_size: 100
+  - name: llama-4-maverick-17b-128e-instruct-fp8
+    type: chat
+    max_input_tokens: 1048576
+    supports_vision: true
+  - name: llama-4-scout-17b-16e-instruct
+    type: chat
+    max_input_tokens: 327680
+    supports_vision: true
+  - name: llama-3.3-70b-instruct
+    type: chat
+    max_input_tokens: 131072
+  - name: meta-llama-3.1-405b-instruct
+    type: chat
+    max_input_tokens: 131072
+  - name: meta-llama-3.1-70b-instruct
+    type: chat
+    max_input_tokens: 131072
+  - name: meta-llama-3.1-8b-instruct
+    type: chat
+    max_input_tokens: 131072
+  - name: llama-3.2-90b-vision-instruct
+    type: chat
+    max_input_tokens: 131072
+    supports_vision: true
+  - name: llama-3.2-11b-vision-instruct
+    type: chat
+    max_input_tokens: 131072
+    supports_vision: true
+  - name: mistral-large-2411
+    type: chat
+    max_input_tokens: 128000
+    supports_function_calling: true
+  - name: mistral-small-2503
+    type: chat
+    max_input_tokens: 128000
+    supports_function_calling: true
+  - name: codestral-2501
+    type: chat
+    max_input_tokens: 256000
+    supports_function_calling: true
+  - name: mistral-nemo
+    type: chat
+    max_input_tokens: 128000
+    supports_function_calling: true
+  - name: cohere-command-r-plus-08-2024
+    type: chat
+    max_input_tokens: 128000
+    supports_function_calling: true
+  - name: cohere-command-r-08-2024
+    type: chat
+    max_input_tokens: 128000
+    supports_function_calling: true
+  - name: cohere-embed-v3-english
+    type: embedding
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 96
+  - name: cohere-embed-v3-multilingual
+    type: embedding
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 96
+  - name: ai21-jamba-1.5-large
+    type: chat
+    max_input_tokens: 256000
+    supports_function_calling: true
+  - name: ai21-jamba-1.5-mini
+    type: chat
+    max_input_tokens: 256000
+    supports_function_calling: true
+  - name: deepseek-r1
+    type: chat
+    max_input_tokens: 163840
+  - name: deepseek-v3-0324
+    type: chat
+    max_input_tokens: 163840
+  - name: phi-4
+    type: chat
+    max_input_tokens: 16384
+  - name: phi-4-mini-instruct
+    type: chat
+    max_input_tokens: 128000
+- provider: deepinfra
+  models:
+  - name: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+    type: chat
+    max_input_tokens: 1048576
+    input_price: 0.18
+    output_price: 0.6
+    supports_vision: true
+  - name: meta-llama/Llama-4-Scout-17B-16E-Instruct
+    type: chat
+    max_input_tokens: 327680
+    input_price: 0.08
+    output_price: 0.3
+    supports_vision: true
+  - name: meta-llama/Llama-3.3-70B-Instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.23
+    output_price: 0.4
+  - name: meta-llama/Meta-Llama-3.1-405B-Instruct
+    type: chat
+    max_input_tokens: 32768
+    input_price: 0.8
+    output_price: 0.8
+    supports_function_calling: true
+  - name: meta-llama/Meta-Llama-3.1-70B-Instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.23
+    output_price: 0.4
+    supports_function_calling: true
+  - name: meta-llama/Meta-Llama-3.1-8B-Instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.03
+    output_price: 0.05
+    supports_function_calling: true
+  - name: meta-llama/Llama-3.2-90B-Vision-Instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.35
+    output_price: 0.4
+  - name: meta-llama/Llama-3.2-11B-Vision-Instruct
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.055
+    output_price: 0.055
+  - name: Qwen/Qwen2.5-72B-Instruct
+    type: chat
+    max_input_tokens: 32768
+    input_price: 0.23
+    output_price: 0.4
+    supports_function_calling: true
+  - name: Qwen/QwQ-32B
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.12
+    output_price: 0.18
+  - name: Qwen/Qwen2.5-Coder-32B-Instruct
+    type: chat
+    max_input_tokens: 32768
+    input_price: 0.07
+    output_price: 0.16
+  - name: deepseek-ai/DeepSeek-V3-0324
+    type: chat
+    max_input_tokens: 163840
+    input_price: 0.4
+    output_price: 0.89
+  - name: deepseek-ai/DeepSeek-R1
+    type: chat
+    max_input_tokens: 65536
+    input_price: 0.75
+    output_price: 2.4
+  - name: google/gemma-3-27b-it
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.1
+    output_price: 0.2
+  - name: mistralai/Mistral-Small-24B-Instruct-2501
+    type: chat
+    max_input_tokens: 32768
+    input_price: 0.07
+    output_price: 0.14
+  - name: mistralai/Mistral-Nemo-Instruct-2407
+    type: chat
+    max_input_tokens: 131072
+    input_price: 0.035
+    output_price: 0.08
+  - name: BAAI/bge-large-en-v1.5
+    type: embedding
+    input_price: 0.01
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 100
+  - name: BAAI/bge-m3
+    type: embedding
+    input_price: 0.01
+    max_tokens_per_chunk: 8192
+    default_chunk_size: 2000
+    max_batch_size: 100
+  - name: intfloat/e5-large-v2
+    type: embedding
+    input_price: 0.01
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 100
+  - name: intfloat/multilingual-e5-large
+    type: embedding
+    input_price: 0.01
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 100
+  - name: thenlper/gte-large
+    type: embedding
+    input_price: 0.01
+    max_tokens_per_chunk: 512
+    default_chunk_size: 1000
+    max_batch_size: 100
+- provider: jina
+  models:
+  - name: jina-embeddings-v3
+    type: embedding
+    input_price: 0.0
+    max_tokens_per_chunk: 8192
+    default_chunk_size: 2000
+    max_batch_size: 100
+  - name: jina-colbert-v2
+    type: embedding
+    input_price: 0.0
+    max_tokens_per_chunk: 8192
+    default_chunk_size: 1500
+    max_batch_size: 100
+  - name: jina-clip-v2
+    type: embedding
+    input_price: 0.0
+    max_tokens_per_chunk: 8192
+    default_chunk_size: 1500
+    max_batch_size: 100
+  - name: jina-colbert-v2
+    type: reranker
+    max_input_tokens: 8192
+    input_price: 0.0
+  - name: jina-reranker-v2-base-multilingual
+    type: reranker
+    max_input_tokens: 8192
+    input_price: 0.0
+- provider: voyageai
+  models:
+  - name: voyage-3-large
+    type: embedding
+    max_input_tokens: 120000
+    input_price: 0.18
+    max_tokens_per_chunk: 32000
+    default_chunk_size: 2000
+    max_batch_size: 128
+  - name: voyage-3
+    type: embedding
+    max_input_tokens: 320000
+    input_price: 0.06
+    max_tokens_per_chunk: 32000
+    default_chunk_size: 2000
+    max_batch_size: 128
+  - name: voyage-3-lite
+    type: embedding
+    max_input_tokens: 1000000
+    input_price: 0.02
+    max_tokens_per_chunk: 32000
+    default_chunk_size: 1000
+    max_batch_size: 128
+  - name: rerank-2
+    type: reranker
+    max_input_tokens: 16000
+    input_price: 0.05
+  - name: rerank-2-lite
+    type: reranker
+    max_input_tokens: 8000
+    input_price: 0.02
home/common/dev/aichat.yaml
@@ -1,11 +1,11 @@
-model: gemini:gemini-2.5-flash-preview-04-17
+model: gemini:gemini-2.5-flash-lite-preview-09-2025
 wrap: 150
 save_session: true
 clients:
   - type: gemini
     name: gemini
     api_base: https://generativelanguage.googleapis.com/v1beta
-    api_key: "passage::ai/gemini/api_key"
+    api_key: "passage::redhat/google/osp/vdeemest-api-key"
     patch:
       chat_completions:
         ".*":
@@ -28,7 +28,7 @@ clients:
   - type: openai-compatible
     name: groq
     api_base: https://api.groq.com/openai/v1
-    api_key: "passage::ai/groq/api_key"
+    api_key: "passage::ai/groq/wakasu"
 
   # See https://platform.deepseek.com/api-docs/
   - type: openai-compatible
@@ -41,11 +41,11 @@ clients:
     api_base: https://openrouter.ai/api/v1
     api_key: "passage::ai/openroute/api_key"
 
-  # - type: openai-compatible
-  #   name: redhat-maas-deepseek
-  #   api_base: https://deepseek-r1-distill-qwen-14b-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com/v1
-  #   api_key: "passage::ai/gemini/api_key"
-  #   models:
-  #     - name: deepseek-r1-distill-qwen-14b
-  #       description: DeepSeek R1 Distill Qwen 14B
+  - type: openai-compatible
+    name: redhat-maas-deepseek
+    api_base: https://deepseek-r1-distill-qwen-14b-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com/v1
+    api_key: "passage::redhat/maas.deepseek.distill.api.key"
+    models:
+      - name: deepseek-r1-distill-qwen-14b
+        description: DeepSeek R1 Distill Qwen 14B
 
home/common/dev/genconf.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# dependencies = [
+#     "requests",
+#     "PyYAML",
+#     "google-generativeai",
+#
+# ]
+# ///
+
+import os.path
+import subprocess
+import socket
+import sys
+from typing import Any, Dict, List, Optional
+
+import requests
+import yaml  # pip install pyyaml types-pyyaml
+import urllib.parse as urlparse
+import google.generativeai as genai
+
+
+def debug(msg: str):
+    print(f"[DEBUG] {msg}", file=sys.stderr)
+
+
+def check_running(api_base, timeout=0.5) -> bool:
+    """Quickly check if Ollama is accessible at the given host and port."""
+    url = urlparse.urlparse(api_base)
+    port = url.port or (80 if url.scheme == "http" else 443)
+    debug(f"Checking if {url.hostname}:{port} is running (api_base={api_base})")
+    try:
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.settimeout(timeout)
+        result = sock.connect_ex((url.hostname, port))
+        sock.close()
+        debug(f"Socket connect_ex result for {url.hostname}:{port}: {result}")
+        return result == 0
+    except Exception as e:
+        debug(f"Exception in check_running: {e}")
+        raise e
+
+
+def load_config(config_path: str) -> Dict[str, Any]:
+    debug(f"Loading config from {config_path}")
+    with open(config_path, "r") as file:
+        config = yaml.safe_load(file)
+        debug(f"Loaded config: {config}")
+        return config
+
+
+def get_models(api_base: str, api_key_config: str | None) -> List[Dict[str, str]]:
+    """Query the models endpoint and return a list of model data"""
+    debug(
+        f"get_models called with api_base={api_base}, api_key_config={api_key_config}"
+    )
+    actual_api_key = None
+    if api_key_config:
+        if api_key_config.startswith("passage::"):
+            passage_path = api_key_config.split("::", 1)[1]
+            debug(f"Retrieving API key from passage at {passage_path}")
+            actual_api_key = get_passageword(passage_path)
+            if not actual_api_key:
+                print(
+                    f"Could not retrieve API key from passage for path: {passage_path}",
+                    file=sys.stderr,
+                )
+                debug(f"Failed to retrieve API key from passage for {passage_path}")
+                # Decide how to handle failure: skip, return empty, etc.
+                # Here we'll proceed without a key, which might fail later.
+        else:
+            actual_api_key = api_key_config  # Use the key directly if not a passage path
+            debug("Using API key directly from config")
+
+    headers = {}
+    if actual_api_key:
+        headers["Authorization"] = f"Bearer {actual_api_key}"
+        debug("Authorization header set")
+
+    # Ensure the URL is properly formatted
+    if not api_base.endswith("/"):
+        api_base = api_base + "/"
+        debug(f"api_base adjusted to {api_base}")
+
+    models_url = f"{api_base}models"
+    debug(f"Querying models endpoint: {models_url}")
+
+    try:
+        response = requests.get(models_url, headers=headers, timeout=10)
+        debug(f"HTTP GET {models_url} status_code={response.status_code}")
+        response.raise_for_status()
+        data = response.json()
+        debug(f"Response JSON: {data}")
+
+        # Extract models from response
+        models = data.get("data", [])
+        debug(f"Extracted models: {models}")
+        return [
+            {"name": model.get("id"), "description": model.get("id")}
+            for model in models
+        ]
+    except Exception as e:
+        print(f"Error querying {api_base}: {str(e)}", file=sys.stderr)
+        debug(f"Exception in get_models: {e}")
+        return []
+
+
+def get_gemini_models(api_key: Optional[str]) -> List[Dict[str, str]]:
+    """Query Google's Gemini API and return a list of available models"""
+    debug(f"get_gemini_models called with api_key={'***' if api_key else None}")
+    if not api_key:
+        print("Error: API key is required for Google Gemini API", file=sys.stderr)
+        debug("No API key provided to get_gemini_models")
+        return []
+
+    try:
+        # Configure the Gemini API with the provided key
+        debug("Configuring genai with provided API key")
+        genai.configure(api_key=api_key)
+
+        # Get list of available models
+        debug("Listing models from genai")
+        models_list = genai.list_models()
+        debug(f"Models list: {models_list}")
+
+        # Filter for Gemini models
+        gemini_models = [
+            {"name": model.name.split("/")[-1], "description": model.description}
+            for model in models_list
+            if "gemini" in model.name.lower()
+        ]
+        debug(f"Filtered Gemini models: {gemini_models}")
+
+        return gemini_models
+    except Exception as e:
+        print(f"Error querying Google Gemini API: {str(e)}", file=sys.stderr)
+        debug(f"Exception in get_gemini_models: {e}")
+        return []
+
+
+def get_passageword(passage_path: str) -> str | None:
+    """Retrieve passageword from passage using the given path."""
+    debug(f"get_passageword called for passage_path={passage_path}")
+    try:
+        result = subprocess.run(
+            ["passage", "show", passage_path], capture_output=True, text=True, check=True
+        )
+        # Return the first line of the output, stripping newline
+        passageword = result.stdout.splitlines()[0]
+        debug(f"Passageword retrieved from passage for {passage_path}")
+        return passageword
+    except FileNotFoundError:
+        print(
+            "Error: 'passage' command not found. Is passage installed and in your PATH?",
+            file=sys.stderr,
+        )
+        debug("'passage' command not found")
+        return None
+    except subprocess.CalledProcessError as e:
+        print(f"Error running passage show {passage_path}: {e.stderr}", file=sys.stderr)
+        debug(f"subprocess.CalledProcessError in get_passageword: {e}")
+        return None
+    except IndexError:
+        print(f"Error: 'passage show {passage_path}' returned empty output.", file=sys.stderr)
+        debug(f"IndexError: passage show {passage_path} returned empty output")
+        return None
+
+
+def main():
+    config_path = os.path.expanduser("~/.config/aichat/config.yaml.in")
+    debug(f"main: config_path={config_path}")
+    config_data = load_config(config_path)
+
+    if "clients" in config_data:
+        updated_clients = []
+        debug(f"main: found {len(config_data.get('clients', []))} clients")
+        for client in config_data.get("clients", []):
+            # Make a copy to avoid modifying the original dict during iteration if needed elsewhere
+            updated_client = client.copy()
+            debug(f"Processing client: {updated_client}")
+
+            actual_api_key = None
+            api_key_config = updated_client.get("api_key")
+            if api_key_config and api_key_config.startswith("passage::"):
+                passage_path = api_key_config.split("::", 1)[1]
+                debug(f"main: retrieving api_key from passage for {passage_path}")
+                actual_api_key = get_passageword(passage_path)
+            else:
+                actual_api_key = api_key_config
+            updated_client["api_key"] = actual_api_key
+            debug("main: actual_api_key set for client")
+
+            # For OpenAI-compatible clients, query and potentially add models
+            if updated_client.get("type") == "openai-compatible":
+                # Check if models are NOT already defined in config or are empty
+                if not updated_client.get("models"):
+                    api_base = updated_client.get("api_base")
+                    api_key = updated_client.get("api_key")
+                    debug(
+                        f"main: openai-compatible client, api_base={api_base}, api_key={'***' if api_key else None}"
+                    )
+
+                    # Skip ollama explicitly if needed, or handle based on your logic
+                    if api_base:
+                        if not check_running(api_base):
+                            debug(f"main: {api_base} not running, skipping client")
+                            continue
+                        # Try to fetch models from API
+                        fetched_models = get_models(api_base, api_key)
+                        if fetched_models:
+                            updated_client["models"] = fetched_models
+                            debug("main: models fetched and set for client")
+                        else:
+                            # Keep models empty/undefined or add an empty list
+                            updated_client["models"] = []
+                            debug("main: no models fetched, set empty list")
+                    else:
+                        # Handle cases where type is openai-compatible but no api_base
+                        updated_client["models"] = []
+                        debug(
+                            "main: openai-compatible client with no api_base, set empty models"
+                        )
+
+            # For Google Gemini clients, query and potentially add models
+            elif updated_client.get("type") == "gemini":
+                # Check if models are NOT already defined in config or are empty
+                if not updated_client.get("models"):
+                    api_key = updated_client.get("api_key")
+                    debug(f"main: gemini client, api_key={'***' if api_key else None}")
+                    fetched_models = get_gemini_models(api_key)
+                    if fetched_models:
+                        updated_client["models"] = fetched_models
+                        debug("main: gemini models fetched and set for client")
+                    else:
+                        updated_client["models"] = []
+                        debug("main: no gemini models fetched, set empty list")
+
+            updated_clients.append(updated_client)
+            debug("main: client processed and added to updated_clients")
+
+        # Replace the original clients list with the updated one
+        config_data["clients"] = updated_clients
+        debug("main: updated_clients set in config_data")
+
+    # Print the entire (potentially updated) configuration as YAML
+    debug("main: dumping config_data as YAML")
+    print(yaml.dump(config_data, default_flow_style=False, sort_keys=False))
+
+
+if __name__ == "__main__":
+    main()