diff --git a/models/moonshot/kimi-k2.6-subscription.yaml b/models/moonshot/kimi-k2.6-subscription.yaml new file mode 100644 index 0000000..0a407fe --- /dev/null +++ b/models/moonshot/kimi-k2.6-subscription.yaml @@ -0,0 +1,32 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: moonshot +authType: subscription +model: kimi-k2.6 +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the chat completion. + range: + min: 1 + group: generation_length + - path: thinking.type + type: enum + label: Thinking mode + description: >- + Controls whether Kimi reasons step by step before answering. Thinking is enabled by default; + set disabled to respond directly. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/moonshot/kimi-k2.7-code-highspeed-subscription.yaml b/models/moonshot/kimi-k2.7-code-highspeed-subscription.yaml new file mode 100644 index 0000000..4e0ea36 --- /dev/null +++ b/models/moonshot/kimi-k2.7-code-highspeed-subscription.yaml @@ -0,0 +1,21 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: moonshot +authType: subscription +model: kimi-k2.7-code-highspeed +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the chat completion, covering both thinking and the final answer. + range: + min: 1 + group: generation_length + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/moonshot/kimi-k2.7-code-subscription.yaml b/models/moonshot/kimi-k2.7-code-subscription.yaml new file mode 100644 index 0000000..8cb377e --- /dev/null +++ b/models/moonshot/kimi-k2.7-code-subscription.yaml @@ -0,0 +1,21 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: moonshot +authType: subscription +model: kimi-k2.7-code +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the chat completion, covering both thinking and the final answer. + range: + min: 1 + group: generation_length + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/nvidia/nemotron-3-ultra-subscription.yaml b/models/nvidia/nemotron-3-ultra-subscription.yaml new file mode 100644 index 0000000..ba1e2dc --- /dev/null +++ b/models/nvidia/nemotron-3-ultra-subscription.yaml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: subscription +model: nemotron-3-ultra +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 16384 + range: + min: 1 + max: 32768 + group: generation_length + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning. + default: high + values: + - none + - medium + - high + group: reasoning + - path: reasoning_budget + type: integer + label: Reasoning budget + description: Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement. + default: 16384 + range: + min: -1 + max: 32768 + group: reasoning + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/xiaomi/mimo-v2.5-pro.yaml b/models/xiaomi/mimo-v2.5-pro.yaml new file mode 100644 index 0000000..a1e5e2b --- /dev/null +++ b/models/xiaomi/mimo-v2.5-pro.yaml @@ -0,0 +1,86 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: xiaomi +authType: api_key +model: mimo-v2.5-pro +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate, covering both the thinking trace and the final answer. + range: + min: 1 + group: generation_length + - path: thinking.type + type: enum + label: Thinking mode + description: >- + Controls whether MiMo reasons step by step before answering. Enabled by default; + set disabled to respond directly. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: temperature + type: number + label: Temperature + description: >- + Controls randomness. Lower values are more focused; higher values are more varied. + Ignored while thinking is enabled, where it is forced to 1.0. + default: 1 + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + applicability: + except: + thinking.type: enabled + - path: top_p + type: number + label: Top P + description: >- + Nucleus sampling cutoff. Ignored while thinking is enabled, where it is forced to 0.95. + default: 0.95 + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + thinking.type: enabled + - path: presence_penalty + type: number + label: Presence penalty + description: Penalizes tokens that have already appeared, encouraging the model to introduce new topics. + default: 0 + range: + min: -2 + max: 2 + step: 0.1 + group: sampling + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes tokens in proportion to how often they have appeared, reducing verbatim repetition. + default: 0 + range: + min: -2 + max: 2 + step: 0.1 + group: sampling + - path: stop + type: string + label: Stop sequences + description: Up to a few sequences where generation stops; the stop text is not included in the output. + group: generation_length + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/xiaomi/mimo-v2.5-subscription.yaml b/models/xiaomi/mimo-v2.5-subscription.yaml new file mode 100644 index 0000000..0322f67 --- /dev/null +++ b/models/xiaomi/mimo-v2.5-subscription.yaml @@ -0,0 +1,61 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: xiaomi +authType: subscription +model: mimo-v2.5 +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate, covering both the thinking trace and the final answer. + range: + min: 1 + group: generation_length + - path: thinking.type + type: enum + label: Thinking mode + description: >- + Controls whether MiMo reasons step by step before answering. Enabled by default; + set disabled to respond directly. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: temperature + type: number + label: Temperature + description: >- + Controls randomness. Lower values are more focused; higher values are more varied. + Ignored while thinking is enabled, where it is forced to 1.0. + default: 1 + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + applicability: + except: + thinking.type: enabled + - path: top_p + type: number + label: Top P + description: >- + Nucleus sampling cutoff. Ignored while thinking is enabled, where it is forced to 0.95. + default: 0.95 + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + thinking.type: enabled + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/packages/modelparams/src/generated/data.ts b/packages/modelparams/src/generated/data.ts index c10a067..6241116 100644 --- a/packages/modelparams/src/generated/data.ts +++ b/packages/modelparams/src/generated/data.ts @@ -9638,6 +9638,105 @@ export const CATALOG = [ } ] }, + { + "provider": "moonshot", + "authType": "subscription", + "model": "kimi-k2.6", + "params": [ + { + "path": "max_completion_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate in the chat completion.", + "group": "generation_length", + "type": "integer", + "range": { + "min": 1 + } + }, + { + "path": "thinking.type", + "label": "Thinking mode", + "description": "Controls whether Kimi reasons step by step before answering. Thinking is enabled by default; set disabled to respond directly.", + "group": "reasoning", + "type": "enum", + "default": "enabled", + "values": [ + "enabled", + "disabled" + ] + }, + { + "path": "response_format.type", + "label": "Response format", + "description": "Forces the response into plain text or a JSON object.", + "group": "output_format", + "type": "enum", + "default": "text", + "values": [ + "text", + "json_object" + ] + } + ] + }, + { + "provider": "moonshot", + "authType": "subscription", + "model": "kimi-k2.7-code-highspeed", + "params": [ + { + "path": "max_completion_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate in the chat completion, covering both thinking and the final answer.", + "group": "generation_length", + "type": "integer", + "range": { + "min": 1 + } + }, + { + "path": "response_format.type", + "label": "Response format", + "description": "Forces the response into plain text or a JSON object.", + "group": "output_format", + "type": "enum", + "default": "text", + "values": [ + "text", + "json_object" + ] + } + ] + }, + { + "provider": "moonshot", + "authType": "subscription", + "model": "kimi-k2.7-code", + "params": [ + { + "path": "max_completion_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate in the chat completion, covering both thinking and the final answer.", + "group": "generation_length", + "type": "integer", + "range": { + "min": 1 + } + }, + { + "path": "response_format.type", + "label": "Response format", + "description": "Forces the response into plain text or a JSON object.", + "group": "output_format", + "type": "enum", + "default": "text", + "values": [ + "text", + "json_object" + ] + } + ] + }, { "provider": "moonshot", "authType": "api_key", @@ -10632,6 +10731,79 @@ export const CATALOG = [ } ] }, + { + "provider": "nvidia", + "authType": "subscription", + "model": "nemotron-3-ultra", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 1, + "range": { + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.95, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 16384, + "range": { + "min": 1, + "max": 32768 + } + }, + { + "path": "reasoning_effort", + "label": "Reasoning effort", + "description": "Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning.", + "group": "reasoning", + "type": "enum", + "default": "high", + "values": [ + "none", + "medium", + "high" + ] + }, + { + "path": "reasoning_budget", + "label": "Reasoning budget", + "description": "Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement.", + "group": "reasoning", + "type": "integer", + "default": 16384, + "range": { + "min": -1, + "max": 32768 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, { "provider": "nvidia", "authType": "api_key", @@ -13424,6 +13596,193 @@ export const CATALOG = [ } ] }, + { + "provider": "xiaomi", + "authType": "api_key", + "model": "mimo-v2.5-pro", + "params": [ + { + "path": "max_completion_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate, covering both the thinking trace and the final answer.", + "group": "generation_length", + "type": "integer", + "range": { + "min": 1 + } + }, + { + "path": "thinking.type", + "label": "Thinking mode", + "description": "Controls whether MiMo reasons step by step before answering. Enabled by default; set disabled to respond directly.", + "group": "reasoning", + "type": "enum", + "default": "enabled", + "values": [ + "enabled", + "disabled" + ] + }, + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values are more focused; higher values are more varied. Ignored while thinking is enabled, where it is forced to 1.0.", + "group": "sampling", + "applicability": { + "except": { + "thinking.type": "enabled" + } + }, + "type": "number", + "default": 1, + "range": { + "min": 0, + "max": 2, + "step": 0.1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Nucleus sampling cutoff. Ignored while thinking is enabled, where it is forced to 0.95.", + "group": "sampling", + "applicability": { + "except": { + "thinking.type": "enabled" + } + }, + "type": "number", + "default": 0.95, + "range": { + "min": 0, + "max": 1, + "step": 0.01 + } + }, + { + "path": "presence_penalty", + "label": "Presence penalty", + "description": "Penalizes tokens that have already appeared, encouraging the model to introduce new topics.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2, + "step": 0.1 + } + }, + { + "path": "frequency_penalty", + "label": "Frequency penalty", + "description": "Penalizes tokens in proportion to how often they have appeared, reducing verbatim repetition.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2, + "step": 0.1 + } + }, + { + "path": "stop", + "label": "Stop sequences", + "description": "Up to a few sequences where generation stops; the stop text is not included in the output.", + "group": "generation_length", + "type": "string" + }, + { + "path": "response_format.type", + "label": "Response format", + "description": "Forces the response into plain text or a JSON object.", + "group": "output_format", + "type": "enum", + "default": "text", + "values": [ + "text", + "json_object" + ] + } + ] + }, + { + "provider": "xiaomi", + "authType": "subscription", + "model": "mimo-v2.5", + "params": [ + { + "path": "max_completion_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate, covering both the thinking trace and the final answer.", + "group": "generation_length", + "type": "integer", + "range": { + "min": 1 + } + }, + { + "path": "thinking.type", + "label": "Thinking mode", + "description": "Controls whether MiMo reasons step by step before answering. Enabled by default; set disabled to respond directly.", + "group": "reasoning", + "type": "enum", + "default": "enabled", + "values": [ + "enabled", + "disabled" + ] + }, + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values are more focused; higher values are more varied. Ignored while thinking is enabled, where it is forced to 1.0.", + "group": "sampling", + "applicability": { + "except": { + "thinking.type": "enabled" + } + }, + "type": "number", + "default": 1, + "range": { + "min": 0, + "max": 2, + "step": 0.1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Nucleus sampling cutoff. Ignored while thinking is enabled, where it is forced to 0.95.", + "group": "sampling", + "applicability": { + "except": { + "thinking.type": "enabled" + } + }, + "type": "number", + "default": 0.95, + "range": { + "min": 0, + "max": 1, + "step": 0.01 + } + }, + { + "path": "response_format.type", + "label": "Response format", + "description": "Forces the response into plain text or a JSON object.", + "group": "output_format", + "type": "enum", + "default": "text", + "values": [ + "text", + "json_object" + ] + } + ] + }, { "provider": "z-ai", "authType": "api_key", diff --git a/packages/modelparams/src/generated/defaults.ts b/packages/modelparams/src/generated/defaults.ts index a81eeaf..d71d778 100644 --- a/packages/modelparams/src/generated/defaults.ts +++ b/packages/modelparams/src/generated/defaults.ts @@ -706,6 +706,16 @@ export const DEFAULTS = { "thinking.type": "enabled", "response_format.type": "text", }, + "moonshot/kimi-k2.6-subscription": { + "thinking.type": "enabled", + "response_format.type": "text", + }, + "moonshot/kimi-k2.7-code-highspeed-subscription": { + "response_format.type": "text", + }, + "moonshot/kimi-k2.7-code-subscription": { + "response_format.type": "text", + }, "moonshot/moonshot-v1-128k": { temperature: 0.3, top_p: 1, @@ -798,6 +808,13 @@ export const DEFAULTS = { reasoning_effort: "high", reasoning_budget: 16384, }, + "nvidia/nemotron-3-ultra-subscription": { + temperature: 1, + top_p: 0.95, + max_tokens: 16384, + reasoning_effort: "high", + reasoning_budget: 16384, + }, "nvidia/nemotron-content-safety-reasoning-4b": { temperature: 1, top_p: 1, @@ -1062,6 +1079,20 @@ export const DEFAULTS = { top_p: 0.95, "response_format.type": "text", }, + "xiaomi/mimo-v2.5-pro": { + "thinking.type": "enabled", + temperature: 1, + top_p: 0.95, + presence_penalty: 0, + frequency_penalty: 0, + "response_format.type": "text", + }, + "xiaomi/mimo-v2.5-subscription": { + "thinking.type": "enabled", + temperature: 1, + top_p: 0.95, + "response_format.type": "text", + }, "z-ai/glm-4.5": { temperature: 0.6, top_p: 0.95, diff --git a/packages/modelparams/src/generated/model-ids.ts b/packages/modelparams/src/generated/model-ids.ts index 8bc59d0..50a12e0 100644 --- a/packages/modelparams/src/generated/model-ids.ts +++ b/packages/modelparams/src/generated/model-ids.ts @@ -106,6 +106,9 @@ export const MODEL_IDS = [ "mistral/open-mistral-nemo", "moonshot/kimi-k2.5", "moonshot/kimi-k2.6", + "moonshot/kimi-k2.6-subscription", + "moonshot/kimi-k2.7-code-highspeed-subscription", + "moonshot/kimi-k2.7-code-subscription", "moonshot/moonshot-v1-128k", "moonshot/moonshot-v1-32k", "moonshot/moonshot-v1-8k", @@ -120,6 +123,7 @@ export const MODEL_IDS = [ "nvidia/nemotron-3-nano-30b-a3b", "nvidia/nemotron-3-super-120b-a12b", "nvidia/nemotron-3-ultra-550b-a55b", + "nvidia/nemotron-3-ultra-subscription", "nvidia/nemotron-content-safety-reasoning-4b", "nvidia/nemotron-mini-4b-instruct", "nvidia/riva-translate-4b-instruct-v1.1", @@ -175,6 +179,8 @@ export const MODEL_IDS = [ "xai/grok-4.3", "xai/grok-build-0.1", "xiaomi/mimo-v2.5", + "xiaomi/mimo-v2.5-pro", + "xiaomi/mimo-v2.5-subscription", "z-ai/glm-4.5", "z-ai/glm-4.5-air", "z-ai/glm-4.5-air-subscription", diff --git a/packages/modelparams/src/generated/params-by-id.ts b/packages/modelparams/src/generated/params-by-id.ts index 702e49f..064bf9e 100644 --- a/packages/modelparams/src/generated/params-by-id.ts +++ b/packages/modelparams/src/generated/params-by-id.ts @@ -886,6 +886,19 @@ export type ParamsById = { "thinking.type": "enabled" | "disabled"; "response_format.type": "text" | "json_object"; }; + "moonshot/kimi-k2.6-subscription": { + max_completion_tokens: number; + "thinking.type": "enabled" | "disabled"; + "response_format.type": "text" | "json_object"; + }; + "moonshot/kimi-k2.7-code-highspeed-subscription": { + max_completion_tokens: number; + "response_format.type": "text" | "json_object"; + }; + "moonshot/kimi-k2.7-code-subscription": { + max_completion_tokens: number; + "response_format.type": "text" | "json_object"; + }; "moonshot/moonshot-v1-128k": { max_completion_tokens: number; temperature: number; @@ -992,6 +1005,14 @@ export type ParamsById = { seed: number; stop: string; }; + "nvidia/nemotron-3-ultra-subscription": { + temperature: number; + top_p: number; + max_tokens: number; + reasoning_effort: "none" | "medium" | "high"; + reasoning_budget: number; + stop: string; + }; "nvidia/nemotron-content-safety-reasoning-4b": { temperature: number; top_p: number; @@ -1305,6 +1326,23 @@ export type ParamsById = { top_p: number; "response_format.type": "text" | "json_object"; }; + "xiaomi/mimo-v2.5-pro": { + max_completion_tokens: number; + "thinking.type": "enabled" | "disabled"; + temperature: number; + top_p: number; + presence_penalty: number; + frequency_penalty: number; + stop: string; + "response_format.type": "text" | "json_object"; + }; + "xiaomi/mimo-v2.5-subscription": { + max_completion_tokens: number; + "thinking.type": "enabled" | "disabled"; + temperature: number; + top_p: number; + "response_format.type": "text" | "json_object"; + }; "z-ai/glm-4.5": { max_tokens: number; temperature: number;