mnfst · guillaumegay13 · Jun 29, 2026 · Jun 29, 2026
diff --git a/models/moonshot/kimi-k2.6-subscription.yaml b/models/moonshot/kimi-k2.6-subscription.yaml
@@ -0,0 +1,32 @@
+# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json
+provider: moonshot
+authType: subscription
+model: kimi-k2.6
+params:
+  - path: max_completion_tokens
+    type: integer
+    label: Max tokens
+    description: Maximum number of tokens to generate in the chat completion.
+    range:
+      min: 1
+    group: generation_length
+  - path: thinking.type
+    type: enum
+    label: Thinking mode
+    description: >-
+      Controls whether Kimi reasons step by step before answering. Thinking is enabled by default;
+      set disabled to respond directly.
+    default: enabled
+    values:
+      - enabled
+      - disabled
+    group: reasoning
+  - path: response_format.type
+    type: enum
+    label: Response format
+    description: Forces the response into plain text or a JSON object.
+    default: text
+    values:
+      - text
+      - json_object
+    group: output_format
diff --git a/models/moonshot/kimi-k2.7-code-highspeed-subscription.yaml b/models/moonshot/kimi-k2.7-code-highspeed-subscription.yaml
@@ -0,0 +1,21 @@
+# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json
+provider: moonshot
+authType: subscription
+model: kimi-k2.7-code-highspeed
+params:
+  - path: max_completion_tokens
+    type: integer
+    label: Max tokens
+    description: Maximum number of tokens to generate in the chat completion, covering both thinking and the final answer.
+    range:
+      min: 1
+    group: generation_length
+  - path: response_format.type
+    type: enum
+    label: Response format
+    description: Forces the response into plain text or a JSON object.
+    default: text
+    values:
+      - text
+      - json_object
+    group: output_format
diff --git a/models/moonshot/kimi-k2.7-code-subscription.yaml b/models/moonshot/kimi-k2.7-code-subscription.yaml
@@ -0,0 +1,21 @@
+# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json
+provider: moonshot
+authType: subscription
+model: kimi-k2.7-code
+params:
+  - path: max_completion_tokens
+    type: integer
+    label: Max tokens
+    description: Maximum number of tokens to generate in the chat completion, covering both thinking and the final answer.
+    range:
+      min: 1
+    group: generation_length
+  - path: response_format.type
+    type: enum
+    label: Response format
+    description: Forces the response into plain text or a JSON object.
+    default: text
+    values:
+      - text
+      - json_object
+    group: output_format
diff --git a/models/nvidia/nemotron-3-ultra-subscription.yaml b/models/nvidia/nemotron-3-ultra-subscription.yaml
@@ -0,0 +1,54 @@
+# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json
+provider: nvidia
+authType: subscription
+model: nemotron-3-ultra
+params:
+  - path: temperature
+    type: number
+    label: Temperature
+    description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.
+    default: 1
+    range:
+      max: 1
+    group: sampling
+  - path: top_p
+    type: number
+    label: Top P
+    description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.
+    default: 0.95
+    range:
+      max: 1
+    group: sampling
+  - path: max_tokens
+    type: integer
+    label: Max tokens
+    description: Maximum number of tokens to generate. Generation stops when this limit is reached.
+    default: 16384
+    range:
+      min: 1
+      max: 32768
+    group: generation_length
+  - path: reasoning_effort
+    type: enum
+    label: Reasoning effort
+    description: Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning.
+    default: high
+    values:
+      - none
+      - medium
+      - high
+    group: reasoning
+  - path: reasoning_budget
+    type: integer
+    label: Reasoning budget
+    description: Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement.
+    default: 16384
+    range:
+      min: -1
+      max: 32768
+    group: reasoning
+  - path: stop
+    type: string
+    label: Stop
+    description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.
+    group: generation_length
diff --git a/models/xiaomi/mimo-v2.5-pro.yaml b/models/xiaomi/mimo-v2.5-pro.yaml
@@ -0,0 +1,86 @@
+# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json
+provider: xiaomi
+authType: api_key
+model: mimo-v2.5-pro
+params:
+  - path: max_completion_tokens
+    type: integer
+    label: Max tokens
+    description: Maximum number of tokens to generate, covering both the thinking trace and the final answer.
+    range:
+      min: 1
+    group: generation_length
+  - path: thinking.type
+    type: enum
+    label: Thinking mode
+    description: >-
+      Controls whether MiMo reasons step by step before answering. Enabled by default;
+      set disabled to respond directly.
+    default: enabled
+    values:
+      - enabled
+      - disabled
+    group: reasoning
+  - path: temperature
+    type: number
+    label: Temperature
+    description: >-
+      Controls randomness. Lower values are more focused; higher values are more varied.
+      Ignored while thinking is enabled, where it is forced to 1.0.
+    default: 1
+    range:
+      min: 0
+      max: 2
+      step: 0.1
+    group: sampling
+    applicability:
+      except:
+        thinking.type: enabled
+  - path: top_p
+    type: number
+    label: Top P
+    description: >-
+      Nucleus sampling cutoff. Ignored while thinking is enabled, where it is forced to 0.95.
+    default: 0.95
+    range:
+      min: 0
+      max: 1
+      step: 0.01
+    group: sampling
+    applicability:
+      except:
+        thinking.type: enabled
+  - path: presence_penalty
+    type: number
+    label: Presence penalty
+    description: Penalizes tokens that have already appeared, encouraging the model to introduce new topics.
+    default: 0
+    range:
+      min: -2
+      max: 2
+      step: 0.1
+    group: sampling
+  - path: frequency_penalty
+    type: number
+    label: Frequency penalty
+    description: Penalizes tokens in proportion to how often they have appeared, reducing verbatim repetition.
+    default: 0
+    range:
+      min: -2
+      max: 2
+      step: 0.1
+    group: sampling
+  - path: stop
+    type: string
+    label: Stop sequences
+    description: Up to a few sequences where generation stops; the stop text is not included in the output.
+    group: generation_length
+  - path: response_format.type
+    type: enum
+    label: Response format
+    description: Forces the response into plain text or a JSON object.
+    default: text
+    values:
+      - text
+      - json_object
+    group: output_format
diff --git a/models/xiaomi/mimo-v2.5-subscription.yaml b/models/xiaomi/mimo-v2.5-subscription.yaml
@@ -0,0 +1,61 @@
+# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json
+provider: xiaomi
+authType: subscription
+model: mimo-v2.5
+params:
+  - path: max_completion_tokens
+    type: integer
+    label: Max tokens
+    description: Maximum number of tokens to generate, covering both the thinking trace and the final answer.
+    range:
+      min: 1
+    group: generation_length
+  - path: thinking.type
+    type: enum
+    label: Thinking mode
+    description: >-
+      Controls whether MiMo reasons step by step before answering. Enabled by default;
+      set disabled to respond directly.
+    default: enabled
+    values:
+      - enabled
+      - disabled
+    group: reasoning
+  - path: temperature
+    type: number
+    label: Temperature
+    description: >-
+      Controls randomness. Lower values are more focused; higher values are more varied.
+      Ignored while thinking is enabled, where it is forced to 1.0.
+    default: 1
+    range:
+      min: 0
+      max: 2
+      step: 0.1
+    group: sampling
+    applicability:
+      except:
+        thinking.type: enabled
+  - path: top_p
+    type: number
+    label: Top P
+    description: >-
+      Nucleus sampling cutoff. Ignored while thinking is enabled, where it is forced to 0.95.
+    default: 0.95
+    range:
+      min: 0
+      max: 1
+      step: 0.01
+    group: sampling
+    applicability:
+      except:
+        thinking.type: enabled
+  - path: response_format.type
+    type: enum
+    label: Response format
+    description: Forces the response into plain text or a JSON object.
+    default: text
+    values:
+      - text
+      - json_object
+    group: output_format