From 26785c196aa23e88f50c19c390dcdb2eaaccf283 Mon Sep 17 00:00:00 2001 From: Guillaume Gay Date: Mon, 29 Jun 2026 11:18:37 +0200 Subject: [PATCH] Add NVIDIA Nemotron-3-Ultra (subscription) parameters --- .../nvidia/nemotron-3-ultra-subscription.yaml | 54 ++++++++++++++ packages/modelparams/src/generated/data.ts | 73 +++++++++++++++++++ .../modelparams/src/generated/defaults.ts | 7 ++ .../modelparams/src/generated/model-ids.ts | 1 + .../modelparams/src/generated/params-by-id.ts | 8 ++ 5 files changed, 143 insertions(+) create mode 100644 models/nvidia/nemotron-3-ultra-subscription.yaml diff --git a/models/nvidia/nemotron-3-ultra-subscription.yaml b/models/nvidia/nemotron-3-ultra-subscription.yaml new file mode 100644 index 0000000..ba1e2dc --- /dev/null +++ b/models/nvidia/nemotron-3-ultra-subscription.yaml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: subscription +model: nemotron-3-ultra +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 16384 + range: + min: 1 + max: 32768 + group: generation_length + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning. + default: high + values: + - none + - medium + - high + group: reasoning + - path: reasoning_budget + type: integer + label: Reasoning budget + description: Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement. + default: 16384 + range: + min: -1 + max: 32768 + group: reasoning + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/packages/modelparams/src/generated/data.ts b/packages/modelparams/src/generated/data.ts index c10a067..02a0194 100644 --- a/packages/modelparams/src/generated/data.ts +++ b/packages/modelparams/src/generated/data.ts @@ -10632,6 +10632,79 @@ export const CATALOG = [ } ] }, + { + "provider": "nvidia", + "authType": "subscription", + "model": "nemotron-3-ultra", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 1, + "range": { + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.95, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 16384, + "range": { + "min": 1, + "max": 32768 + } + }, + { + "path": "reasoning_effort", + "label": "Reasoning effort", + "description": "Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning.", + "group": "reasoning", + "type": "enum", + "default": "high", + "values": [ + "none", + "medium", + "high" + ] + }, + { + "path": "reasoning_budget", + "label": "Reasoning budget", + "description": "Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement.", + "group": "reasoning", + "type": "integer", + "default": 16384, + "range": { + "min": -1, + "max": 32768 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, { "provider": "nvidia", "authType": "api_key", diff --git a/packages/modelparams/src/generated/defaults.ts b/packages/modelparams/src/generated/defaults.ts index a81eeaf..4139bd4 100644 --- a/packages/modelparams/src/generated/defaults.ts +++ b/packages/modelparams/src/generated/defaults.ts @@ -798,6 +798,13 @@ export const DEFAULTS = { reasoning_effort: "high", reasoning_budget: 16384, }, + "nvidia/nemotron-3-ultra-subscription": { + temperature: 1, + top_p: 0.95, + max_tokens: 16384, + reasoning_effort: "high", + reasoning_budget: 16384, + }, "nvidia/nemotron-content-safety-reasoning-4b": { temperature: 1, top_p: 1, diff --git a/packages/modelparams/src/generated/model-ids.ts b/packages/modelparams/src/generated/model-ids.ts index 8bc59d0..d2bcc42 100644 --- a/packages/modelparams/src/generated/model-ids.ts +++ b/packages/modelparams/src/generated/model-ids.ts @@ -120,6 +120,7 @@ export const MODEL_IDS = [ "nvidia/nemotron-3-nano-30b-a3b", "nvidia/nemotron-3-super-120b-a12b", "nvidia/nemotron-3-ultra-550b-a55b", + "nvidia/nemotron-3-ultra-subscription", "nvidia/nemotron-content-safety-reasoning-4b", "nvidia/nemotron-mini-4b-instruct", "nvidia/riva-translate-4b-instruct-v1.1", diff --git a/packages/modelparams/src/generated/params-by-id.ts b/packages/modelparams/src/generated/params-by-id.ts index 702e49f..d089c70 100644 --- a/packages/modelparams/src/generated/params-by-id.ts +++ b/packages/modelparams/src/generated/params-by-id.ts @@ -992,6 +992,14 @@ export type ParamsById = { seed: number; stop: string; }; + "nvidia/nemotron-3-ultra-subscription": { + temperature: number; + top_p: number; + max_tokens: number; + reasoning_effort: "none" | "medium" | "high"; + reasoning_budget: number; + stop: string; + }; "nvidia/nemotron-content-safety-reasoning-4b": { temperature: number; top_p: number;