Commit 4032aeed8340
Changed files (4)
home
common
home/common/dev/ai.nix
@@ -28,14 +28,19 @@
];
xdg.configFile."aichat/config.yaml.in".source = ./aichat.yaml;
- xdg.configFile."aichat/update-config" = {
- source = ./aichat-update-config;
+ xdg.configFile."aichat/models-override.yaml".source = ./aichat-models-override.yaml;
+ xdg.configFile."aichat/genconf.py" = {
+ source = ./genconf.py;
executable = true;
};
- home.activation = {
- # linkGeneration writeBoundary
- aichat-configuration = lib.hm.dag.entryAfter [ "linkGeneration" ] ''
- /home/vincent/.config/aichat/update-config
- '';
- };
+ # xdg.configFile."aichat/update-config" = {
+ # source = ./aichat-update-config;
+ # executable = true;
+ # };
+ # home.activation = {
+ # # linkGeneration writeBoundary
+ # aichat-configuration = lib.hm.dag.entryAfter [ "linkGeneration" ] ''
+ # /home/vincent/.config/aichat/genconf.py
+ # '';
+ # };
}
home/common/dev/aichat-models-override.yaml
@@ -0,0 +1,2190 @@
+version: 0.29.0
+list:
+- provider: openai
+ models:
+ - name: gpt-4o
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.5
+ output_price: 10.0
+ max_output_tokens: 16384
+ supports_vision: true
+ supports_function_calling: true
+ - name: gpt-4o-search-preview
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.5
+ output_price: 10.0
+ max_output_tokens: 16384
+ supports_vision: true
+ - name: chatgpt-4o-latest
+ type: chat
+ max_input_tokens: 128000
+ input_price: 5.0
+ output_price: 15.0
+ max_output_tokens: 16384
+ supports_vision: true
+ supports_function_calling: true
+ - name: gpt-4o-mini
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.15
+ output_price: 0.6
+ max_output_tokens: 16384
+ supports_vision: true
+ supports_function_calling: true
+ - name: gpt-4o-mini-search-preview
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.15
+ output_price: 0.6
+ max_output_tokens: 16384
+ supports_vision: true
+ - name: gpt-4-turbo
+ type: chat
+ max_input_tokens: 128000
+ input_price: 10.0
+ output_price: 30.0
+ max_output_tokens: 4096
+ supports_vision: true
+ supports_function_calling: true
+ - name: gpt-4.5-preview
+ type: chat
+ max_input_tokens: 128000
+ input_price: 75.0
+ output_price: 150.0
+ max_output_tokens: 16384
+ supports_vision: true
+ supports_function_calling: true
+ - name: o3-mini
+ type: chat
+ max_input_tokens: 200000
+ input_price: 1.1
+ output_price: 4.4
+ patch:
+ body:
+ max_tokens: null
+ temperature: null
+ top_p: null
+ supports_vision: true
+ supports_function_calling: true
+ system_prompt_prefix: Formatting re-enabled
+ - name: o3-mini-high
+ type: chat
+ real_name: o3-mini
+ max_input_tokens: 200000
+ input_price: 1.1
+ output_price: 4.4
+ patch:
+ body:
+ reasoning_effort: high
+ max_tokens: null
+ temperature: null
+ top_p: null
+ supports_vision: true
+ supports_function_calling: true
+ system_prompt_prefix: Formatting re-enabled
+ - name: o1-pro
+ type: chat
+ max_input_tokens: 200000
+ input_price: 150.0
+ output_price: 600.0
+ patch:
+ body:
+ max_tokens: null
+ temperature: null
+ top_p: null
+ supports_vision: true
+ supports_function_calling: true
+ system_prompt_prefix: Formatting re-enabled
+ - name: o1
+ type: chat
+ max_input_tokens: 200000
+ input_price: 15.0
+ output_price: 60.0
+ patch:
+ body:
+ max_tokens: null
+ temperature: null
+ top_p: null
+ supports_vision: true
+ supports_function_calling: true
+ system_prompt_prefix: Formatting re-enabled
+ - name: o1-preview
+ type: chat
+ max_input_tokens: 128000
+ input_price: 15.0
+ output_price: 60.0
+ patch:
+ body:
+ max_tokens: null
+ temperature: null
+ top_p: null
+ max_output_tokens: 32768
+ no_system_message: true
+ - name: o1-mini
+ type: chat
+ max_input_tokens: 128000
+ input_price: 3.0
+ output_price: 12.0
+ patch:
+ body:
+ max_tokens: null
+ temperature: null
+ top_p: null
+ max_output_tokens: 65536
+ no_system_message: true
+ - name: gpt-3.5-turbo
+ type: chat
+ max_input_tokens: 16385
+ input_price: 0.5
+ output_price: 1.5
+ max_output_tokens: 4096
+ supports_function_calling: true
+ - name: text-embedding-3-large
+ type: embedding
+ input_price: 0.13
+ max_tokens_per_chunk: 8191
+ default_chunk_size: 2000
+ max_batch_size: 100
+ - name: text-embedding-3-small
+ type: embedding
+ input_price: 0.02
+ max_tokens_per_chunk: 8191
+ default_chunk_size: 2000
+ max_batch_size: 100
+- provider: gemini
+ models:
+ - name: gemini-2.0-flash
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 8192
+ supports_vision: true
+ supports_function_calling: true
+ - name: gemini-2.0-flash-lite
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 8192
+ supports_vision: true
+ supports_function_calling: true
+ - name: gemini-2.5-pro-exp-03-25
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 65536
+ supports_vision: true
+ supports_function_calling: true
+ - name: gemma-3-27b-it
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 8192
+ - name: gemini-1.5-pro-latest
+ type: chat
+ max_input_tokens: 2097152
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 8192
+ supports_vision: true
+ supports_function_calling: true
+ - name: gemini-1.5-flash-latest
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 8192
+ supports_vision: true
+ supports_function_calling: true
+ - name: gemini-1.5-flash-8b-latest
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 8192
+ supports_vision: true
+ supports_function_calling: true
+ - name: text-embedding-004
+ type: embedding
+ input_price: 0.0
+ max_tokens_per_chunk: 2048
+ default_chunk_size: 1500
+ max_batch_size: 100
+- provider: claude
+ models:
+ - name: claude-3-7-sonnet-20250219
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-7-sonnet-20250219:thinking
+ type: chat
+ real_name: claude-3-7-sonnet-20250219
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ patch:
+ body:
+ temperature: null
+ top_p: null
+ thinking:
+ type: enabled
+ budget_tokens: 16000
+ max_output_tokens: 24000
+ require_max_tokens: true
+ supports_vision: true
+ - name: claude-3-5-sonnet-20241022
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-5-sonnet-20240620
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-5-haiku-20241022
+ type: chat
+ max_input_tokens: 200000
+ input_price: 0.8
+ output_price: 4.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-opus-20240229
+ type: chat
+ max_input_tokens: 200000
+ input_price: 15.0
+ output_price: 75.0
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-sonnet-20240229
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-haiku-20240307
+ type: chat
+ max_input_tokens: 200000
+ input_price: 0.25
+ output_price: 1.25
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+- provider: mistral
+ models:
+ - name: mistral-large-latest
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.0
+ output_price: 6.0
+ supports_function_calling: true
+ - name: mistral-small-latest
+ type: chat
+ max_input_tokens: 32000
+ input_price: 0.1
+ output_price: 0.3
+ supports_function_calling: true
+ - name: codestral-latest
+ type: chat
+ max_input_tokens: 256000
+ input_price: 0.3
+ output_price: 0.9
+ supports_function_calling: true
+ - name: ministral-8b-latest
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.1
+ output_price: 0.1
+ supports_function_calling: true
+ - name: open-mistral-nemo
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.15
+ output_price: 0.15
+ supports_function_calling: true
+ - name: pixtral-large-latest
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.0
+ output_price: 6.0
+ supports_vision: true
+ - name: pixtral-12b-latest
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.15
+ output_price: 0.15
+ supports_vision: true
+ - name: mistral-embed
+ type: embedding
+ max_input_tokens: 8092
+ input_price: 0.1
+ max_tokens_per_chunk: 8092
+ default_chunk_size: 2000
+- provider: ai21
+ models:
+ - name: jamba-large
+ type: chat
+ max_input_tokens: 256000
+ input_price: 2.0
+ output_price: 8.0
+ supports_function_calling: true
+ - name: jamba-mini
+ type: chat
+ max_input_tokens: 256000
+ input_price: 0.2
+ output_price: 0.4
+ supports_function_calling: true
+- provider: cohere
+ models:
+ - name: command-a-03-2025
+ type: chat
+ max_input_tokens: 256000
+ input_price: 2.5
+ output_price: 10.0
+ max_output_tokens: 8192
+ supports_function_calling: true
+ - name: command-r-plus-08-2024
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.5
+ output_price: 10.0
+ max_output_tokens: 4096
+ supports_function_calling: true
+ - name: command-r-08-2024
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.15
+ output_price: 0.6
+ max_output_tokens: 4096
+ supports_function_calling: true
+ - name: command-r7b-12-2024
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.0375
+ output_price: 0.15
+ max_output_tokens: 4096
+ - name: embed-english-v3.0
+ type: embedding
+ input_price: 0.1
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 96
+ - name: embed-english-light-v3.0
+ type: embedding
+ input_price: 0.1
+ max_tokens_per_chunk: 512
+ default_chunk_size: 700
+ max_batch_size: 96
+ - name: embed-multilingual-v3.0
+ type: embedding
+ input_price: 0.1
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 96
+ - name: embed-multilingual-light-v3.0
+ type: embedding
+ input_price: 0.1
+ max_tokens_per_chunk: 512
+ default_chunk_size: 700
+ max_batch_size: 96
+ - name: rerank-v3.5
+ type: reranker
+ max_input_tokens: 4096
+ - name: rerank-english-v3.0
+ type: reranker
+ max_input_tokens: 4096
+ - name: rerank-multilingual-v3.0
+ type: reranker
+ max_input_tokens: 4096
+- provider: xai
+ models:
+ - name: grok-3-latest
+ type: chat
+ max_input_tokens: 131072
+ input_price: 3.0
+ output_price: 15.0
+ supports_function_calling: true
+ - name: grok-3-fast-latest
+ type: chat
+ max_input_tokens: 131072
+ input_price: 5.0
+ output_price: 25.0
+ supports_function_calling: true
+ - name: grok-3-mini-latest
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.3
+ output_price: 0.5
+ - name: grok-3-mini-fast-latest
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.6
+ output_price: 4.0
+ - name: grok-2-latest
+ type: chat
+ max_input_tokens: 131072
+ input_price: 2.0
+ output_price: 10.0
+ supports_function_calling: true
+ - name: grok-2-vision-latest
+ type: chat
+ max_input_tokens: 32768
+ input_price: 2.0
+ output_price: 10.0
+ supports_vision: true
+ supports_function_calling: true
+- provider: perplexity
+ models:
+ - name: sonar-pro
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ - name: sonar
+ type: chat
+ max_input_tokens: 128000
+ input_price: 1.0
+ output_price: 1.0
+ - name: sonar-reasoning-pro
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.0
+ output_price: 8.0
+ - name: sonar-reasoning
+ type: chat
+ max_input_tokens: 128000
+ input_price: 1.0
+ output_price: 5.0
+ - name: sonar-deep-research
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.0
+ output_price: 8.0
+ - name: r1-1776
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.0
+ output_price: 8.0
+- provider: groq
+ models:
+ - name: meta-llama/llama-4-maverick-17b-128e-instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ supports_vision: true
+ supports_function_calling: true
+ - name: meta-llama/llama-4-scout-17b-16e-instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ supports_vision: true
+ supports_function_calling: true
+ - name: llama-3.3-70b-versatile
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ supports_function_calling: true
+ - name: llama-3.1-8b-instant
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ supports_function_calling: true
+ - name: qwen-qwq-32b
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ supports_function_calling: true
+- provider: vertexai
+ models:
+ - name: gemini-2.0-flash-001
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 0.15
+ output_price: 0.6
+ max_output_tokens: 8192
+ supports_vision: true
+ supports_function_calling: true
+ - name: gemini-2.0-flash-lite-001
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 0.075
+ output_price: 0.3
+ max_output_tokens: 8192
+ supports_vision: true
+ supports_function_calling: true
+ - name: gemini-2.5-pro-preview-03-25
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 1.25
+ output_price: 10.0
+ max_output_tokens: 65536
+ supports_vision: true
+ supports_function_calling: true
+ - name: gemini-1.5-pro-002
+ type: chat
+ max_input_tokens: 2097152
+ input_price: 1.25
+ output_price: 3.75
+ max_output_tokens: 8192
+ supports_vision: true
+ supports_function_calling: true
+ - name: gemini-1.5-flash-002
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 0.019
+ output_price: 0.075
+ max_output_tokens: 8192
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-7-sonnet@20250219
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-7-sonnet@20250219:thinking
+ type: chat
+ real_name: claude-3-7-sonnet@20250219
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ patch:
+ body:
+ temperature: null
+ top_p: null
+ thinking:
+ type: enabled
+ budget_tokens: 16000
+ max_output_tokens: 24000
+ require_max_tokens: true
+ supports_vision: true
+ - name: claude-3-5-sonnet-v2@20241022
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-5-sonnet@20240620
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-5-haiku@20241022
+ type: chat
+ max_input_tokens: 200000
+ input_price: 0.8
+ output_price: 4.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-opus@20240229
+ type: chat
+ max_input_tokens: 200000
+ input_price: 15.0
+ output_price: 75.0
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-sonnet@20240229
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: claude-3-haiku@20240307
+ type: chat
+ max_input_tokens: 200000
+ input_price: 0.25
+ output_price: 1.25
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: mistral-large-2411
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.0
+ output_price: 6.0
+ supports_function_calling: true
+ - name: mistral-small-2503
+ type: chat
+ max_input_tokens: 32000
+ input_price: 0.1
+ output_price: 0.3
+ supports_function_calling: true
+ - name: codestral-2501
+ type: chat
+ max_input_tokens: 256000
+ input_price: 0.3
+ output_price: 0.9
+ supports_function_calling: true
+ - name: text-embedding-005
+ type: embedding
+ max_input_tokens: 20000
+ input_price: 0.025
+ max_tokens_per_chunk: 2048
+ default_chunk_size: 1500
+ max_batch_size: 5
+ - name: text-multilingual-embedding-002
+ type: embedding
+ max_input_tokens: 20000
+ input_price: 0.2
+ max_tokens_per_chunk: 2048
+ default_chunk_size: 1500
+ max_batch_size: 5
+- provider: bedrock
+ models:
+ - name: us.anthropic.claude-3-7-sonnet-20250219-v1:0
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: us.anthropic.claude-3-7-sonnet-20250219-v1:0:thinking
+ type: chat
+ real_name: us.anthropic.claude-3-7-sonnet-20250219-v1:0
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ patch:
+ body:
+ inferenceConfig:
+ temperature: null
+ topP: null
+ additionalModelRequestFields:
+ thinking:
+ type: enabled
+ budget_tokens: 16000
+ max_output_tokens: 24000
+ require_max_tokens: true
+ supports_vision: true
+ - name: anthropic.claude-3-5-sonnet-20241022-v2:0
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: anthropic.claude-3-5-sonnet-20240620-v1:0
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: anthropic.claude-3-5-haiku-20241022-v1:0
+ type: chat
+ max_input_tokens: 200000
+ input_price: 0.8
+ output_price: 4.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: anthropic.claude-3-opus-20240229-v1:0
+ type: chat
+ max_input_tokens: 200000
+ input_price: 15.0
+ output_price: 75.0
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: anthropic.claude-3-sonnet-20240229-v1:0
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: anthropic.claude-3-haiku-20240307-v1:0
+ type: chat
+ max_input_tokens: 200000
+ input_price: 0.25
+ output_price: 1.25
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: us.meta.llama3-3-70b-instruct-v1:0
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.72
+ output_price: 0.72
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_function_calling: true
+ - name: meta.llama3-1-405b-instruct-v1:0
+ type: chat
+ max_input_tokens: 131072
+ input_price: 2.4
+ output_price: 2.4
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_function_calling: true
+ - name: meta.llama3-1-70b-instruct-v1:0
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.72
+ output_price: 0.72
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_function_calling: true
+ - name: meta.llama3-1-8b-instruct-v1:0
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.22
+ output_price: 0.22
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_function_calling: true
+ - name: us.meta.llama3-2-90b-instruct-v1:0
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.72
+ output_price: 0.72
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: us.meta.llama3-2-11b-instruct-v1:0
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.16
+ output_price: 0.16
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: us.amazon.nova-pro-v1:0
+ type: chat
+ max_input_tokens: 300000
+ input_price: 0.8
+ output_price: 3.2
+ max_output_tokens: 5120
+ supports_vision: true
+ - name: us.amazon.nova-lite-v1:0
+ type: chat
+ max_input_tokens: 300000
+ input_price: 0.06
+ output_price: 0.24
+ max_output_tokens: 5120
+ supports_vision: true
+ - name: us.amazon.nova-micro-v1:0
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.035
+ output_price: 0.14
+ max_output_tokens: 5120
+ - name: mistral.mistral-large-2407-v1:0
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.0
+ output_price: 6.0
+ supports_function_calling: true
+ - name: cohere.command-r-plus-v1:0
+ type: chat
+ max_input_tokens: 128000
+ input_price: 3.0
+ output_price: 15.0
+ supports_function_calling: true
+ - name: cohere.command-r-v1:0
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.5
+ output_price: 1.5
+ supports_function_calling: true
+ - name: cohere.embed-english-v3
+ type: embedding
+ input_price: 0.1
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 96
+ - name: cohere.embed-multilingual-v3
+ type: embedding
+ input_price: 0.1
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 96
+ - name: ai21.jamba-1-5-large-v1:0
+ type: chat
+ max_input_tokens: 256000
+ input_price: 2.0
+ output_price: 8.0
+ supports_function_calling: true
+ - name: ai21.jamba-1-5-mini-v1:0
+ type: chat
+ max_input_tokens: 256000
+ input_price: 0.2
+ output_price: 0.4
+ supports_function_calling: true
+ - name: us.deepseek.r1-v1:0
+ type: chat
+ max_input_tokens: 128000
+ input_price: 1.35
+ output_price: 5.4
+- provider: cloudflare
+ models:
+ - name: '@cf/meta/llama-4-scout-17b-16e-instruct'
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 2048
+ require_max_tokens: true
+ - name: '@cf/meta/llama-3.3-70b-instruct-fp8-fast'
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 2048
+ require_max_tokens: true
+ - name: '@cf/meta/llama-3.1-70b-instruct'
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 2048
+ require_max_tokens: true
+ - name: '@cf/meta/llama-3.1-8b-instruct'
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 2048
+ require_max_tokens: true
+ - name: '@cf/qwen/qwq-32b'
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 2048
+ require_max_tokens: true
+ - name: '@cf/qwen/qwen2.5-coder-32b-instruct'
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 2048
+ require_max_tokens: true
+ - name: '@cf/google/gemma-3-12b-it'
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 2048
+ require_max_tokens: true
+ - name: '@cf/mistralai/mistral-small-3.1-24b-instruct'
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 2048
+ require_max_tokens: true
+ - name: '@cf/baai/bge-large-en-v1.5'
+ type: embedding
+ input_price: 0.0
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 100
+- provider: ernie
+ models:
+ - name: ernie-4.5-8k-preview
+ type: chat
+ max_input_tokens: 8192
+ input_price: 0.56
+ output_price: 2.24
+ supports_function_calling: true
+ - name: ernie-x1-32k-preview
+ type: chat
+ max_input_tokens: 32768
+ input_price: 0.28
+ output_price: 1.12
+ - name: ernie-4.0-turbo-8k-latest
+ type: chat
+ max_input_tokens: 8192
+ input_price: 0.42
+ output_price: 1.26
+ supports_function_calling: true
+ - name: ernie-4.0-turbo-128k
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.42
+ output_price: 1.26
+ supports_function_calling: true
+ - name: ernie-4.0-8k-latest
+ type: chat
+ max_input_tokens: 8192
+ input_price: 0.56
+ output_price: 2.24
+ supports_function_calling: true
+ - name: ernie-3.5-128k
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.112
+ output_price: 0.28
+ supports_function_calling: true
+ - name: ernie-speed-pro-128k
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.042
+ output_price: 0.084
+ - name: deepseek-v3
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.112
+ output_price: 0.224
+ - name: deepseek-r1
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.28
+ output_price: 1.12
+ - name: qwq-32b
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.28
+ output_price: 0.84
+ - name: bge-large-zh
+ type: embedding
+ input_price: 0.07
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 16
+ - name: bge-large-en
+ type: embedding
+ input_price: 0.07
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 16
+ - name: bce-reranker-base
+ type: reranker
+ max_input_tokens: 1024
+ input_price: 0.07
+- provider: qianwen
+ models:
+ - name: qwen-max-latest
+ type: chat
+ max_input_tokens: 32678
+ input_price: 1.6
+ output_price: 6.4
+ max_output_tokens: 8192
+ supports_function_calling: true
+ - name: qwen-plus-latest
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.112
+ output_price: 0.28
+ max_output_tokens: 8192
+ supports_function_calling: true
+ - name: qwen-turbo-latest
+ type: chat
+ max_input_tokens: 1000000
+ input_price: 0.042
+ output_price: 0.084
+ max_output_tokens: 8192
+ supports_function_calling: true
+ - name: qwen-long
+ type: chat
+ max_input_tokens: 1000000
+ input_price: 0.07
+ output_price: 0.28
+ - name: qwen-omni-turbo-latest
+ type: chat
+ max_input_tokens: 32768
+ max_output_tokens: 2048
+ supports_vision: true
+ - name: qwq-plus-latest
+ type: chat
+ max_input_tokens: 131072
+ max_output_tokens: 8192
+ - name: qwq-32b
+ type: chat
+ max_input_tokens: 131072
+ max_output_tokens: 8192
+ - name: qwen-vl-max-latest
+ type: chat
+ max_input_tokens: 30720
+ input_price: 0.42
+ output_price: 1.26
+ max_output_tokens: 2048
+ supports_vision: true
+ - name: qwen-vl-plus-latest
+ type: chat
+ max_input_tokens: 30000
+ input_price: 0.21
+ output_price: 0.63
+ max_output_tokens: 2048
+ supports_vision: true
+ - name: qwen2.5-72b-instruct
+ type: chat
+ max_input_tokens: 129024
+ input_price: 0.56
+ output_price: 1.68
+ max_output_tokens: 8192
+ supports_function_calling: true
+ - name: qwen2.5-vl-72b-instruct
+ type: chat
+ max_input_tokens: 129024
+ input_price: 2.24
+ output_price: 6.72
+ max_output_tokens: 8192
+ supports_vision: true
+ - name: qwen2.5-coder-32b-instruct
+ type: chat
+ max_input_tokens: 129024
+ input_price: 0.49
+ output_price: 0.98
+ max_output_tokens: 8192
+ supports_function_calling: true
+ - name: deepseek-v3
+ type: chat
+ max_input_tokens: 65792
+ input_price: 0.14
+ output_price: 0.56
+ - name: deepseek-r1
+ type: chat
+ max_input_tokens: 65792
+ input_price: 0.28
+ output_price: 1.12
+ - name: text-embedding-v3
+ type: embedding
+ input_price: 0.1
+ max_tokens_per_chunk: 8192
+ default_chunk_size: 2000
+ max_batch_size: 6
+ - name: text-embedding-v2
+ type: embedding
+ input_price: 0.1
+ max_tokens_per_chunk: 2048
+ default_chunk_size: 2000
+ max_batch_size: 25
+- provider: hunyuan
+ models:
+ - name: hunyuan-turbos-latest
+ type: chat
+ max_input_tokens: 24000
+ input_price: 0.112
+ output_price: 0.28
+ max_output_tokens: 8192
+ supports_function_calling: true
+ - name: hunyuan-t1-latest
+ type: chat
+ max_input_tokens: 28000
+ input_price: 0.14
+ output_price: 0.56
+ max_output_tokens: 64000
+ - name: hunyuan-turbo-latest
+ type: chat
+ max_input_tokens: 28000
+ input_price: 0.336
+ output_price: 1.344
+ max_output_tokens: 4096
+ supports_function_calling: true
+ - name: hunyuan-large
+ type: chat
+ max_input_tokens: 28000
+ input_price: 0.56
+ output_price: 1.68
+ max_output_tokens: 4096
+ supports_function_calling: true
+ - name: hunyuan-large-longcontext
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.84
+ output_price: 2.52
+ max_output_tokens: 6144
+ supports_function_calling: true
+ - name: hunyuan-standard
+ type: chat
+ max_input_tokens: 30000
+ input_price: 0.112
+ output_price: 0.28
+ max_output_tokens: 2048
+ supports_function_calling: true
+ - name: hunyuan-standard-256K
+ type: chat
+ max_input_tokens: 250000
+ input_price: 0.07
+ output_price: 0.28
+ max_output_tokens: 6144
+ supports_function_calling: true
+ - name: hunyuan-lite
+ type: chat
+ max_input_tokens: 250000
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 6144
+ supports_function_calling: true
+ - name: hunyuan-turbo-vision
+ type: chat
+ max_input_tokens: 6144
+ input_price: 11.2
+ output_price: 11.2
+ max_output_tokens: 2048
+ supports_vision: true
+ - name: hunyuan-vision
+ type: chat
+ max_input_tokens: 6144
+ input_price: 2.52
+ output_price: 2.52
+ max_output_tokens: 2048
+ supports_vision: true
+ - name: hunyuan-embedding
+ type: embedding
+ input_price: 0.01
+ max_tokens_per_chunk: 1024
+ default_chunk_size: 1000
+ max_batch_size: 100
+- provider: moonshot
+ models:
+ - name: kimi-latest
+ type: chat
+ supports_vision: true
+ supports_function_calling: true
+ - name: moonshot-v1-8k
+ type: chat
+ max_input_tokens: 8192
+ input_price: 1.68
+ output_price: 1.68
+ supports_function_calling: true
+ - name: moonshot-v1-32k
+ type: chat
+ max_input_tokens: 32768
+ input_price: 3.36
+ output_price: 3.36
+ supports_function_calling: true
+ - name: moonshot-v1-128k
+ type: chat
+ max_input_tokens: 131072
+ input_price: 8.4
+ output_price: 8.4
+ supports_function_calling: true
+ - name: moonshot-v1-8k-vision-preview
+ type: chat
+ max_input_tokens: 8192
+ input_price: 1.68
+ output_price: 1.68
+ supports_vision: true
+ - name: moonshot-v1-32k-vision-preview
+ type: chat
+ max_input_tokens: 32768
+ input_price: 3.36
+ output_price: 3.36
+ supports_vision: true
+ - name: moonshot-v1-128k-vision-preview
+ type: chat
+ max_input_tokens: 131072
+ input_price: 8.4
+ output_price: 8.4
+ supports_vision: true
+- provider: deepseek
+ models:
+ - name: deepseek-chat
+ type: chat
+ max_input_tokens: 64000
+ input_price: 0.27
+ output_price: 1.1
+ max_output_tokens: 8192
+ supports_function_calling: true
+ - name: deepseek-reasoner
+ type: chat
+ max_input_tokens: 64000
+ input_price: 0.55
+ output_price: 2.19
+ max_output_tokens: 8192
+- provider: zhipuai
+ models:
+ - name: glm-4-plus
+ type: chat
+ max_input_tokens: 128000
+ input_price: 7.0
+ output_price: 7.0
+ max_output_tokens: 4096
+ supports_function_calling: true
+ - name: glm-4-alltools
+ type: chat
+ max_input_tokens: 128000
+ input_price: 14.0
+ output_price: 14.0
+ max_output_tokens: 4096
+ supports_function_calling: true
+ - name: glm-4-long
+ type: chat
+ max_input_tokens: 1000000
+ input_price: 0.14
+ output_price: 0.14
+ max_output_tokens: 4096
+ supports_function_calling: true
+ - name: glm-4-flash
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.0
+ output_price: 0.0
+ max_output_tokens: 4096
+ supports_function_calling: true
+ - name: glm-4v-plus
+ type: chat
+ max_input_tokens: 8192
+ input_price: 0.56
+ output_price: 0.56
+ supports_vision: true
+ - name: glm-4v-flash
+ type: chat
+ max_input_tokens: 8192
+ input_price: 0.0
+ output_price: 0.0
+ supports_vision: true
+ - name: glm-zero-preview
+ type: chat
+ max_input_tokens: 16384
+ input_price: 1.4
+ output_price: 1.4
+ - name: embedding-3
+ type: embedding
+ max_input_tokens: 8192
+ input_price: 0.07
+ max_tokens_per_chunk: 8192
+ default_chunk_size: 2000
+ - name: rerank
+ type: reranker
+ max_input_tokens: 4096
+ input_price: 0.112
+- provider: lingyiwanwu
+ models:
+ - name: yi-lightning
+ type: chat
+ max_input_tokens: 16384
+ input_price: 0.14
+ output_price: 0.14
+ - name: yi-vision-v2
+ type: chat
+ max_input_tokens: 16384
+ input_price: 0.84
+ output_price: 0.84
+ supports_vision: true
+- provider: minimax
+ models:
+ - name: minimax-text-01
+ type: chat
+ max_input_tokens: 1000192
+ input_price: 0.14
+ output_price: 1.12
+ supports_vision: true
+ - name: abab6.5s-chat
+ type: chat
+ max_input_tokens: 245760
+ input_price: 0.14
+ output_price: 0.14
+ supports_vision: true
+ - name: deepseek-r1
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.56
+ output_price: 2.24
+- provider: openrouter
+ models:
+ - name: openai/gpt-4o
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.5
+ output_price: 10.0
+ supports_vision: true
+ supports_function_calling: true
+ - name: openai/gpt-4o-search-preview
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.5
+ output_price: 10.0
+ max_output_tokens: 16384
+ supports_vision: true
+ - name: openai/chatgpt-4o-latest
+ type: chat
+ max_input_tokens: 128000
+ input_price: 5.0
+ output_price: 15.0
+ supports_vision: true
+ supports_function_calling: true
+ - name: openai/gpt-4o-mini
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.15
+ output_price: 0.6
+ supports_vision: true
+ supports_function_calling: true
+ - name: openai/gpt-4o-mini-search-preview
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.15
+ output_price: 0.6
+ max_output_tokens: 16384
+ supports_vision: true
+ - name: openai/gpt-4-turbo
+ type: chat
+ max_input_tokens: 128000
+ input_price: 10.0
+ output_price: 30.0
+ supports_vision: true
+ supports_function_calling: true
+ - name: openai/gpt-4.5-preview
+ type: chat
+ max_input_tokens: 128000
+ input_price: 75.0
+ output_price: 150.0
+ max_output_tokens: 16384
+ supports_vision: true
+ supports_function_calling: true
+ - name: openai/o3-mini
+ type: chat
+ max_input_tokens: 200000
+ input_price: 1.1
+ output_price: 4.4
+ patch:
+ body:
+ temperature: null
+ top_p: null
+ supports_vision: true
+ supports_function_calling: true
+ system_prompt_prefix: Formatting re-enabled
+ - name: openai/o3-mini-high
+ type: chat
+ max_input_tokens: 200000
+ input_price: 1.1
+ output_price: 4.4
+ patch:
+ body:
+ temperature: null
+ top_p: null
+ supports_vision: true
+ supports_function_calling: true
+ system_prompt_prefix: Formatting re-enabled
+ - name: openai/o1-pro
+ type: chat
+ max_input_tokens: 200000
+ input_price: 150.0
+ output_price: 600.0
+ patch:
+ body:
+ max_tokens: null
+ temperature: null
+ top_p: null
+ supports_vision: true
+ supports_function_calling: true
+ system_prompt_prefix: Formatting re-enabled
+ - name: openai/o1
+ type: chat
+ max_input_tokens: 128000
+ input_price: 15.0
+ output_price: 60.0
+ patch:
+ body:
+ temperature: null
+ top_p: null
+ supports_vision: true
+ supports_function_calling: true
+ system_prompt_prefix: Formatting re-enabled
+ - name: openai/o1-preview
+ type: chat
+ max_input_tokens: 128000
+ input_price: 15.0
+ output_price: 60.0
+ patch:
+ body:
+ temperature: null
+ top_p: null
+ no_system_message: true
+ - name: openai/o1-mini
+ type: chat
+ max_input_tokens: 128000
+ input_price: 3.0
+ output_price: 12.0
+ patch:
+ body:
+ temperature: null
+ top_p: null
+ no_system_message: true
+ - name: openai/gpt-3.5-turbo
+ type: chat
+ max_input_tokens: 16385
+ input_price: 0.5
+ output_price: 1.5
+ supports_function_calling: true
+ - name: google/gemini-2.0-flash-001
+ type: chat
+ max_input_tokens: 1000000
+ input_price: 0.1
+ output_price: 0.4
+ supports_vision: true
+ supports_function_calling: true
+ - name: google/gemini-2.0-flash-lite-001
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 0.075
+ output_price: 0.3
+ supports_vision: true
+ supports_function_calling: true
+ - name: google/gemini-2.5-pro-preview-03-25
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 1.25
+ output_price: 10.0
+ supports_vision: true
+ supports_function_calling: true
+ - name: google/gemma-3-27b-it
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.1
+ output_price: 0.2
+ - name: google/gemini-pro-1.5
+ type: chat
+ max_input_tokens: 2000000
+ input_price: 1.25
+ output_price: 5.0
+ supports_vision: true
+ supports_function_calling: true
+ - name: google/gemini-flash-1.5
+ type: chat
+ max_input_tokens: 1000000
+ input_price: 0.075
+ output_price: 0.3
+ supports_vision: true
+ supports_function_calling: true
+ - name: google/gemini-flash-1.5-8b
+ type: chat
+ max_input_tokens: 1000000
+ input_price: 0.0375
+ output_price: 0.15
+ supports_vision: true
+ supports_function_calling: true
+ - name: anthropic/claude-3.7-sonnet
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: anthropic/claude-3.7-sonnet:thinking
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ patch:
+ body:
+ include_reasoning: true
+ max_output_tokens: 24000
+ require_max_tokens: true
+ supports_vision: true
+ - name: anthropic/claude-3.5-sonnet
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: anthropic/claude-3-5-haiku
+ type: chat
+ max_input_tokens: 200000
+ input_price: 0.8
+ output_price: 4.0
+ max_output_tokens: 8192
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: anthropic/claude-3-opus
+ type: chat
+ max_input_tokens: 200000
+ input_price: 15.0
+ output_price: 75.0
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: anthropic/claude-3-sonnet
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: anthropic/claude-3-haiku
+ type: chat
+ max_input_tokens: 200000
+ input_price: 0.25
+ output_price: 1.25
+ max_output_tokens: 4096
+ require_max_tokens: true
+ supports_vision: true
+ supports_function_calling: true
+ - name: meta-llama/llama-4-maverick
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 0.18
+ output_price: 0.6
+ supports_vision: true
+ supports_function_calling: true
+ - name: meta-llama/llama-4-scout
+ type: chat
+ max_input_tokens: 327680
+ input_price: 0.08
+ output_price: 0.3
+ supports_vision: true
+ supports_function_calling: true
+ - name: meta-llama/llama-3.3-70b-instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.12
+ output_price: 0.3
+ - name: meta-llama/llama-3.1-405b-instruct
+ type: chat
+ max_input_tokens: 32768
+ input_price: 0.8
+ output_price: 0.8
+ supports_function_calling: true
+ - name: meta-llama/llama-3.1-70b-instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.12
+ output_price: 0.3
+ supports_function_calling: true
+ - name: meta-llama/llama-3.1-8b-instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.02
+ output_price: 0.05
+ - name: meta-llama/llama-3.2-90b-vision-instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.9
+ output_price: 0.9
+ supports_vision: true
+ - name: meta-llama/llama-3.2-11b-vision-instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.055
+ output_price: 0.055
+ supports_vision: true
+ - name: mistralai/mistral-large-2411
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.0
+ output_price: 6.0
+ supports_function_calling: true
+ - name: mistralai/mistral-small-3.1-24b-instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.1
+ output_price: 0.3
+ - name: mistralai/codestral-2501
+ type: chat
+ max_input_tokens: 256000
+ input_price: 0.3
+ output_price: 0.9
+ supports_function_calling: true
+ - name: mistralai/ministral-8b
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.1
+ output_price: 0.1
+ supports_function_calling: true
+ - name: mistralai/mistral-nemo
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.035
+ output_price: 0.08
+ supports_function_calling: true
+ - name: mistralai/pixtral-large-2411
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.0
+ output_price: 6.0
+ supports_vision: true
+ - name: mistralai/pixtral-12b
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.1
+ output_price: 0.1
+ supports_vision: true
+ - name: ai21/jamba-1.6-large
+ type: chat
+ max_input_tokens: 256000
+ input_price: 2.0
+ output_price: 8.0
+ supports_function_calling: true
+ - name: ai21/jamba-1.6-mini
+ type: chat
+ max_input_tokens: 256000
+ input_price: 0.2
+ output_price: 0.4
+ supports_function_calling: true
+ - name: cohere/command-a
+ type: chat
+ max_input_tokens: 256000
+ input_price: 2.5
+ output_price: 10.0
+ supports_function_calling: true
+ - name: cohere/command-r-plus-08-2024
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.5
+ output_price: 10.0
+ supports_function_calling: true
+ - name: cohere/command-r-08-2024
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.15
+ output_price: 0.6
+ supports_function_calling: true
+ - name: cohere/command-r7b-12-2024
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.0375
+ output_price: 0.15
+ max_output_tokens: 4096
+ - name: deepseek/deepseek-chat-v3-0324
+ type: chat
+ max_input_tokens: 64000
+ input_price: 0.27
+ output_price: 1.1
+ supports_function_calling: true
+ - name: deepseek/deepseek-r1
+ type: chat
+ max_input_tokens: 163840
+ input_price: 0.55
+ output_price: 2.19
+ patch:
+ body:
+ include_reasoning: true
+ - name: qwen/qwen-max
+ type: chat
+ max_input_tokens: 32768
+ input_price: 1.6
+ output_price: 6.4
+ max_output_tokens: 8192
+ supports_function_calling: true
+ - name: qwen/qwen-plus
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.4
+ output_price: 1.2
+ max_output_tokens: 8192
+ supports_function_calling: true
+ - name: qwen/qwen-turbo
+ type: chat
+ max_input_tokens: 1000000
+ input_price: 0.05
+ output_price: 0.2
+ max_output_tokens: 8192
+ supports_function_calling: true
+ - name: qwen/qwen-vl-plus
+ type: chat
+ max_input_tokens: 7500
+ input_price: 0.21
+ output_price: 0.63
+ supports_vision: true
+ - name: qwen/qwq-32b
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.29
+ output_price: 0.39
+ - name: qwen/qwen-2.5-72b-instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.35
+ output_price: 0.4
+ supports_function_calling: true
+ - name: qwen/qwen2.5-vl-72b-instruct
+ type: chat
+ max_input_tokens: 32000
+ input_price: 0.7
+ output_price: 0.7
+ supports_vision: true
+ - name: qwen/qwen-2.5-coder-32b-instruct
+ type: chat
+ max_input_tokens: 32768
+ input_price: 0.18
+ output_price: 0.18
+ - name: x-ai/grok-3-beta
+ type: chat
+ max_input_tokens: 131072
+ input_price: 3.0
+ output_price: 15.0
+ supports_function_calling: true
+ - name: x-ai/grok-3-mini-beta
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.3
+ output_price: 0.5
+ - name: x-ai/grok-2-1212
+ type: chat
+ max_input_tokens: 131072
+ input_price: 2.0
+ output_price: 10.0
+ supports_function_calling: true
+ - name: x-ai/grok-2-vision-1212
+ type: chat
+ max_input_tokens: 32768
+ input_price: 2.0
+ output_price: 10.0
+ supports_vision: true
+ supports_function_calling: true
+ - name: amazon/nova-pro-v1
+ type: chat
+ max_input_tokens: 300000
+ input_price: 0.8
+ output_price: 3.2
+ max_output_tokens: 5120
+ supports_vision: true
+ - name: amazon/nova-lite-v1
+ type: chat
+ max_input_tokens: 300000
+ input_price: 0.06
+ output_price: 0.24
+ max_output_tokens: 5120
+ supports_vision: true
+ - name: amazon/nova-micro-v1
+ type: chat
+ max_input_tokens: 128000
+ input_price: 0.035
+ output_price: 0.14
+ max_output_tokens: 5120
+ - name: perplexity/sonar-pro
+ type: chat
+ max_input_tokens: 200000
+ input_price: 3.0
+ output_price: 15.0
+ - name: perplexity/sonar
+ type: chat
+ max_input_tokens: 127072
+ input_price: 1.0
+ output_price: 1.0
+ - name: perplexity/sonar-reasoning-pro
+ type: chat
+ max_input_tokens: 128000
+ input_price: 2.0
+ output_price: 8.0
+ patch:
+ body:
+ include_reasoning: true
+ - name: perplexity/sonar-reasoning
+ type: chat
+ max_input_tokens: 127000
+ input_price: 1.0
+ output_price: 5.0
+ patch:
+ body:
+ include_reasoning: true
+ - name: perplexity/sonar-deep-research
+ type: chat
+ max_input_tokens: 200000
+ input_price: 2.0
+ output_price: 8.0
+ patch:
+ body:
+ include_reasoning: true
+ - name: perplexity/r1-1776
+ type: chat
+ max_input_tokens: 127000
+ input_price: 2.0
+ output_price: 8.0
+ patch:
+ body:
+ include_reasoning: true
+ - name: minimax/minimax-01
+ type: chat
+ max_input_tokens: 1000192
+ input_price: 0.2
+ output_price: 1.1
+- provider: github
+ models:
+ - name: gpt-4o
+ type: chat
+ max_input_tokens: 128000
+ supports_function_calling: true
+ - name: gpt-4o-mini
+ type: chat
+ max_input_tokens: 128000
+ supports_function_calling: true
+ - name: o3-mini
+ type: chat
+ max_input_tokens: 200000
+ patch:
+ body:
+ max_tokens: null
+ temperature: null
+ top_p: null
+ supports_vision: true
+ supports_function_calling: true
+ system_prompt_prefix: Formatting re-enabled
+ - name: o3-mini-high
+ type: chat
+ real_name: o3-mini
+ max_input_tokens: 200000
+ patch:
+ body:
+ reasoning_effort: high
+ max_tokens: null
+ temperature: null
+ top_p: null
+ supports_vision: true
+ supports_function_calling: true
+ system_prompt_prefix: Formatting re-enabled
+ - name: o1
+ type: chat
+ max_input_tokens: 200000
+ patch:
+ body:
+ max_tokens: null
+ temperature: null
+ top_p: null
+ supports_vision: true
+ supports_function_calling: true
+ system_prompt_prefix: Formatting re-enabled
+ - name: o1-preview
+ type: chat
+ max_input_tokens: 128000
+ patch:
+ body:
+ max_tokens: null
+ temperature: null
+ top_p: null
+ no_stream: true
+ no_system_message: true
+ - name: o1-mini
+ type: chat
+ max_input_tokens: 128000
+ patch:
+ body:
+ max_tokens: null
+ temperature: null
+ top_p: null
+ no_stream: true
+ no_system_message: true
+ - name: text-embedding-3-large
+ type: embedding
+ max_tokens_per_chunk: 8191
+ default_chunk_size: 2000
+ max_batch_size: 100
+ - name: text-embedding-3-small
+ type: embedding
+ max_tokens_per_chunk: 8191
+ default_chunk_size: 2000
+ max_batch_size: 100
+ - name: llama-4-maverick-17b-128e-instruct-fp8
+ type: chat
+ max_input_tokens: 1048576
+ supports_vision: true
+ - name: llama-4-scout-17b-16e-instruct
+ type: chat
+ max_input_tokens: 327680
+ supports_vision: true
+ - name: llama-3.3-70b-instruct
+ type: chat
+ max_input_tokens: 131072
+ - name: meta-llama-3.1-405b-instruct
+ type: chat
+ max_input_tokens: 131072
+ - name: meta-llama-3.1-70b-instruct
+ type: chat
+ max_input_tokens: 131072
+ - name: meta-llama-3.1-8b-instruct
+ type: chat
+ max_input_tokens: 131072
+ - name: llama-3.2-90b-vision-instruct
+ type: chat
+ max_input_tokens: 131072
+ supports_vision: true
+ - name: llama-3.2-11b-vision-instruct
+ type: chat
+ max_input_tokens: 131072
+ supports_vision: true
+ - name: mistral-large-2411
+ type: chat
+ max_input_tokens: 128000
+ supports_function_calling: true
+ - name: mistral-small-2503
+ type: chat
+ max_input_tokens: 128000
+ supports_function_calling: true
+ - name: codestral-2501
+ type: chat
+ max_input_tokens: 256000
+ supports_function_calling: true
+ - name: mistral-nemo
+ type: chat
+ max_input_tokens: 128000
+ supports_function_calling: true
+ - name: cohere-command-r-plus-08-2024
+ type: chat
+ max_input_tokens: 128000
+ supports_function_calling: true
+ - name: cohere-command-r-08-2024
+ type: chat
+ max_input_tokens: 128000
+ supports_function_calling: true
+ - name: cohere-embed-v3-english
+ type: embedding
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 96
+ - name: cohere-embed-v3-multilingual
+ type: embedding
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 96
+ - name: ai21-jamba-1.5-large
+ type: chat
+ max_input_tokens: 256000
+ supports_function_calling: true
+ - name: ai21-jamba-1.5-mini
+ type: chat
+ max_input_tokens: 256000
+ supports_function_calling: true
+ - name: deepseek-r1
+ type: chat
+ max_input_tokens: 163840
+ - name: deepseek-v3-0324
+ type: chat
+ max_input_tokens: 163840
+ - name: phi-4
+ type: chat
+ max_input_tokens: 16384
+ - name: phi-4-mini-instruct
+ type: chat
+ max_input_tokens: 128000
+- provider: deepinfra
+ models:
+ - name: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+ type: chat
+ max_input_tokens: 1048576
+ input_price: 0.18
+ output_price: 0.6
+ supports_vision: true
+ - name: meta-llama/Llama-4-Scout-17B-16E-Instruct
+ type: chat
+ max_input_tokens: 327680
+ input_price: 0.08
+ output_price: 0.3
+ supports_vision: true
+ - name: meta-llama/Llama-3.3-70B-Instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.23
+ output_price: 0.4
+ - name: meta-llama/Meta-Llama-3.1-405B-Instruct
+ type: chat
+ max_input_tokens: 32768
+ input_price: 0.8
+ output_price: 0.8
+ supports_function_calling: true
+ - name: meta-llama/Meta-Llama-3.1-70B-Instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.23
+ output_price: 0.4
+ supports_function_calling: true
+ - name: meta-llama/Meta-Llama-3.1-8B-Instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.03
+ output_price: 0.05
+ supports_function_calling: true
+ - name: meta-llama/Llama-3.2-90B-Vision-Instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.35
+ output_price: 0.4
+ - name: meta-llama/Llama-3.2-11B-Vision-Instruct
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.055
+ output_price: 0.055
+ - name: Qwen/Qwen2.5-72B-Instruct
+ type: chat
+ max_input_tokens: 32768
+ input_price: 0.23
+ output_price: 0.4
+ supports_function_calling: true
+ - name: Qwen/QwQ-32B
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.12
+ output_price: 0.18
+ - name: Qwen/Qwen2.5-Coder-32B-Instruct
+ type: chat
+ max_input_tokens: 32768
+ input_price: 0.07
+ output_price: 0.16
+ - name: deepseek-ai/DeepSeek-V3-0324
+ type: chat
+ max_input_tokens: 163840
+ input_price: 0.4
+ output_price: 0.89
+ - name: deepseek-ai/DeepSeek-R1
+ type: chat
+ max_input_tokens: 65536
+ input_price: 0.75
+ output_price: 2.4
+ - name: google/gemma-3-27b-it
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.1
+ output_price: 0.2
+ - name: mistralai/Mistral-Small-24B-Instruct-2501
+ type: chat
+ max_input_tokens: 32768
+ input_price: 0.07
+ output_price: 0.14
+ - name: mistralai/Mistral-Nemo-Instruct-2407
+ type: chat
+ max_input_tokens: 131072
+ input_price: 0.035
+ output_price: 0.08
+ - name: BAAI/bge-large-en-v1.5
+ type: embedding
+ input_price: 0.01
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 100
+ - name: BAAI/bge-m3
+ type: embedding
+ input_price: 0.01
+ max_tokens_per_chunk: 8192
+ default_chunk_size: 2000
+ max_batch_size: 100
+ - name: intfloat/e5-large-v2
+ type: embedding
+ input_price: 0.01
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 100
+ - name: intfloat/multilingual-e5-large
+ type: embedding
+ input_price: 0.01
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 100
+ - name: thenlper/gte-large
+ type: embedding
+ input_price: 0.01
+ max_tokens_per_chunk: 512
+ default_chunk_size: 1000
+ max_batch_size: 100
+- provider: jina
+ models:
+ - name: jina-embeddings-v3
+ type: embedding
+ input_price: 0.0
+ max_tokens_per_chunk: 8192
+ default_chunk_size: 2000
+ max_batch_size: 100
+ - name: jina-colbert-v2
+ type: embedding
+ input_price: 0.0
+ max_tokens_per_chunk: 8192
+ default_chunk_size: 1500
+ max_batch_size: 100
+ - name: jina-clip-v2
+ type: embedding
+ input_price: 0.0
+ max_tokens_per_chunk: 8192
+ default_chunk_size: 1500
+ max_batch_size: 100
+ - name: jina-colbert-v2
+ type: reranker
+ max_input_tokens: 8192
+ input_price: 0.0
+ - name: jina-reranker-v2-base-multilingual
+ type: reranker
+ max_input_tokens: 8192
+ input_price: 0.0
+- provider: voyageai
+ models:
+ - name: voyage-3-large
+ type: embedding
+ max_input_tokens: 120000
+ input_price: 0.18
+ max_tokens_per_chunk: 32000
+ default_chunk_size: 2000
+ max_batch_size: 128
+ - name: voyage-3
+ type: embedding
+ max_input_tokens: 320000
+ input_price: 0.06
+ max_tokens_per_chunk: 32000
+ default_chunk_size: 2000
+ max_batch_size: 128
+ - name: voyage-3-lite
+ type: embedding
+ max_input_tokens: 1000000
+ input_price: 0.02
+ max_tokens_per_chunk: 32000
+ default_chunk_size: 1000
+ max_batch_size: 128
+ - name: rerank-2
+ type: reranker
+ max_input_tokens: 16000
+ input_price: 0.05
+ - name: rerank-2-lite
+ type: reranker
+ max_input_tokens: 8000
+ input_price: 0.02
home/common/dev/aichat.yaml
@@ -1,11 +1,11 @@
-model: gemini:gemini-2.5-flash-preview-04-17
+model: gemini:gemini-2.5-flash-lite-preview-09-2025
wrap: 150
save_session: true
clients:
- type: gemini
name: gemini
api_base: https://generativelanguage.googleapis.com/v1beta
- api_key: "passage::ai/gemini/api_key"
+ api_key: "passage::redhat/google/osp/vdeemest-api-key"
patch:
chat_completions:
".*":
@@ -28,7 +28,7 @@ clients:
- type: openai-compatible
name: groq
api_base: https://api.groq.com/openai/v1
- api_key: "passage::ai/groq/api_key"
+ api_key: "passage::ai/groq/wakasu"
# See https://platform.deepseek.com/api-docs/
- type: openai-compatible
@@ -41,11 +41,11 @@ clients:
api_base: https://openrouter.ai/api/v1
api_key: "passage::ai/openroute/api_key"
- # - type: openai-compatible
- # name: redhat-maas-deepseek
- # api_base: https://deepseek-r1-distill-qwen-14b-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com/v1
- # api_key: "passage::ai/gemini/api_key"
- # models:
- # - name: deepseek-r1-distill-qwen-14b
- # description: DeepSeek R1 Distill Qwen 14B
+ - type: openai-compatible
+ name: redhat-maas-deepseek
+ api_base: https://deepseek-r1-distill-qwen-14b-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com/v1
+ api_key: "passage::redhat/maas.deepseek.distill.api.key"
+ models:
+ - name: deepseek-r1-distill-qwen-14b
+ description: DeepSeek R1 Distill Qwen 14B
home/common/dev/genconf.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# dependencies = [
+# "requests",
+# "PyYAML",
+# "google-generativeai",
+#
+# ]
+# ///
+
+import os.path
+import subprocess
+import socket
+import sys
+from typing import Any, Dict, List, Optional
+
+import requests
+import yaml # pip install pyyaml types-pyyaml
+import urllib.parse as urlparse
+import google.generativeai as genai
+
+
+def debug(msg: str):
+ print(f"[DEBUG] {msg}", file=sys.stderr)
+
+
+def check_running(api_base, timeout=0.5) -> bool:
+ """Quickly check if Ollama is accessible at the given host and port."""
+ url = urlparse.urlparse(api_base)
+ port = url.port or (80 if url.scheme == "http" else 443)
+ debug(f"Checking if {url.hostname}:{port} is running (api_base={api_base})")
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.settimeout(timeout)
+ result = sock.connect_ex((url.hostname, port))
+ sock.close()
+ debug(f"Socket connect_ex result for {url.hostname}:{port}: {result}")
+ return result == 0
+ except Exception as e:
+ debug(f"Exception in check_running: {e}")
+ raise e
+
+
+def load_config(config_path: str) -> Dict[str, Any]:
+ debug(f"Loading config from {config_path}")
+ with open(config_path, "r") as file:
+ config = yaml.safe_load(file)
+ debug(f"Loaded config: {config}")
+ return config
+
+
+def get_models(api_base: str, api_key_config: str | None) -> List[Dict[str, str]]:
+ """Query the models endpoint and return a list of model data"""
+ debug(
+ f"get_models called with api_base={api_base}, api_key_config={api_key_config}"
+ )
+ actual_api_key = None
+ if api_key_config:
+ if api_key_config.startswith("passage::"):
+ passage_path = api_key_config.split("::", 1)[1]
+ debug(f"Retrieving API key from passage at {passage_path}")
+ actual_api_key = get_passageword(passage_path)
+ if not actual_api_key:
+ print(
+ f"Could not retrieve API key from passage for path: {passage_path}",
+ file=sys.stderr,
+ )
+ debug(f"Failed to retrieve API key from passage for {passage_path}")
+ # Decide how to handle failure: skip, return empty, etc.
+ # Here we'll proceed without a key, which might fail later.
+ else:
+ actual_api_key = api_key_config # Use the key directly if not a passage path
+ debug("Using API key directly from config")
+
+ headers = {}
+ if actual_api_key:
+ headers["Authorization"] = f"Bearer {actual_api_key}"
+ debug("Authorization header set")
+
+ # Ensure the URL is properly formatted
+ if not api_base.endswith("/"):
+ api_base = api_base + "/"
+ debug(f"api_base adjusted to {api_base}")
+
+ models_url = f"{api_base}models"
+ debug(f"Querying models endpoint: {models_url}")
+
+ try:
+ response = requests.get(models_url, headers=headers, timeout=10)
+ debug(f"HTTP GET {models_url} status_code={response.status_code}")
+ response.raise_for_status()
+ data = response.json()
+ debug(f"Response JSON: {data}")
+
+ # Extract models from response
+ models = data.get("data", [])
+ debug(f"Extracted models: {models}")
+ return [
+ {"name": model.get("id"), "description": model.get("id")}
+ for model in models
+ ]
+ except Exception as e:
+ print(f"Error querying {api_base}: {str(e)}", file=sys.stderr)
+ debug(f"Exception in get_models: {e}")
+ return []
+
+
+def get_gemini_models(api_key: Optional[str]) -> List[Dict[str, str]]:
+ """Query Google's Gemini API and return a list of available models"""
+ debug(f"get_gemini_models called with api_key={'***' if api_key else None}")
+ if not api_key:
+ print("Error: API key is required for Google Gemini API", file=sys.stderr)
+ debug("No API key provided to get_gemini_models")
+ return []
+
+ try:
+ # Configure the Gemini API with the provided key
+ debug("Configuring genai with provided API key")
+ genai.configure(api_key=api_key)
+
+ # Get list of available models
+ debug("Listing models from genai")
+ models_list = genai.list_models()
+ debug(f"Models list: {models_list}")
+
+ # Filter for Gemini models
+ gemini_models = [
+ {"name": model.name.split("/")[-1], "description": model.description}
+ for model in models_list
+ if "gemini" in model.name.lower()
+ ]
+ debug(f"Filtered Gemini models: {gemini_models}")
+
+ return gemini_models
+ except Exception as e:
+ print(f"Error querying Google Gemini API: {str(e)}", file=sys.stderr)
+ debug(f"Exception in get_gemini_models: {e}")
+ return []
+
+
+def get_passageword(passage_path: str) -> str | None:
+ """Retrieve passageword from passage using the given path."""
+ debug(f"get_passageword called for passage_path={passage_path}")
+ try:
+ result = subprocess.run(
+ ["passage", "show", passage_path], capture_output=True, text=True, check=True
+ )
+ # Return the first line of the output, stripping newline
+ passageword = result.stdout.splitlines()[0]
+ debug(f"Passageword retrieved from passage for {passage_path}")
+ return passageword
+ except FileNotFoundError:
+ print(
+ "Error: 'passage' command not found. Is passage installed and in your PATH?",
+ file=sys.stderr,
+ )
+ debug("'passage' command not found")
+ return None
+ except subprocess.CalledProcessError as e:
+ print(f"Error running passage show {passage_path}: {e.stderr}", file=sys.stderr)
+ debug(f"subprocess.CalledProcessError in get_passageword: {e}")
+ return None
+ except IndexError:
+ print(f"Error: 'passage show {passage_path}' returned empty output.", file=sys.stderr)
+ debug(f"IndexError: passage show {passage_path} returned empty output")
+ return None
+
+
+def main():
+ config_path = os.path.expanduser("~/.config/aichat/config.yaml.in")
+ debug(f"main: config_path={config_path}")
+ config_data = load_config(config_path)
+
+ if "clients" in config_data:
+ updated_clients = []
+ debug(f"main: found {len(config_data.get('clients', []))} clients")
+ for client in config_data.get("clients", []):
+ # Make a copy to avoid modifying the original dict during iteration if needed elsewhere
+ updated_client = client.copy()
+ debug(f"Processing client: {updated_client}")
+
+ actual_api_key = None
+ api_key_config = updated_client.get("api_key")
+ if api_key_config and api_key_config.startswith("passage::"):
+ passage_path = api_key_config.split("::", 1)[1]
+ debug(f"main: retrieving api_key from passage for {passage_path}")
+ actual_api_key = get_passageword(passage_path)
+ else:
+ actual_api_key = api_key_config
+ updated_client["api_key"] = actual_api_key
+ debug("main: actual_api_key set for client")
+
+ # For OpenAI-compatible clients, query and potentially add models
+ if updated_client.get("type") == "openai-compatible":
+ # Check if models are NOT already defined in config or are empty
+ if not updated_client.get("models"):
+ api_base = updated_client.get("api_base")
+ api_key = updated_client.get("api_key")
+ debug(
+ f"main: openai-compatible client, api_base={api_base}, api_key={'***' if api_key else None}"
+ )
+
+ # Skip ollama explicitly if needed, or handle based on your logic
+ if api_base:
+ if not check_running(api_base):
+ debug(f"main: {api_base} not running, skipping client")
+ continue
+ # Try to fetch models from API
+ fetched_models = get_models(api_base, api_key)
+ if fetched_models:
+ updated_client["models"] = fetched_models
+ debug("main: models fetched and set for client")
+ else:
+ # Keep models empty/undefined or add an empty list
+ updated_client["models"] = []
+ debug("main: no models fetched, set empty list")
+ else:
+ # Handle cases where type is openai-compatible but no api_base
+ updated_client["models"] = []
+ debug(
+ "main: openai-compatible client with no api_base, set empty models"
+ )
+
+ # For Google Gemini clients, query and potentially add models
+ elif updated_client.get("type") == "gemini":
+ # Check if models are NOT already defined in config or are empty
+ if not updated_client.get("models"):
+ api_key = updated_client.get("api_key")
+ debug(f"main: gemini client, api_key={'***' if api_key else None}")
+ fetched_models = get_gemini_models(api_key)
+ if fetched_models:
+ updated_client["models"] = fetched_models
+ debug("main: gemini models fetched and set for client")
+ else:
+ updated_client["models"] = []
+ debug("main: no gemini models fetched, set empty list")
+
+ updated_clients.append(updated_client)
+ debug("main: client processed and added to updated_clients")
+
+ # Replace the original clients list with the updated one
+ config_data["clients"] = updated_clients
+ debug("main: updated_clients set in config_data")
+
+ # Print the entire (potentially updated) configuration as YAML
+ debug("main: dumping config_data as YAML")
+ print(yaml.dump(config_data, default_flow_style=False, sort_keys=False))
+
+
+if __name__ == "__main__":
+ main()