Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions models/nvidia/nemotron-3-ultra-subscription.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json
provider: nvidia
authType: subscription
model: nemotron-3-ultra
params:
- path: temperature
type: number
label: Temperature
description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.
default: 1
range:
max: 1
group: sampling
- path: top_p
type: number
label: Top P
description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.
default: 0.95
range:
max: 1
group: sampling
- path: max_tokens
type: integer
label: Max tokens
description: Maximum number of tokens to generate. Generation stops when this limit is reached.
default: 16384
range:
min: 1
max: 32768
group: generation_length
- path: reasoning_effort
type: enum
label: Reasoning effort
description: Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning.
default: high
values:
- none
- medium
- high
group: reasoning
- path: reasoning_budget
type: integer
label: Reasoning budget
description: Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement.
default: 16384
range:
min: -1
max: 32768
group: reasoning
- path: stop
type: string
label: Stop
description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.
group: generation_length
73 changes: 73 additions & 0 deletions packages/modelparams/src/generated/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10632,6 +10632,79 @@ export const CATALOG = [
}
]
},
{
"provider": "nvidia",
"authType": "subscription",
"model": "nemotron-3-ultra",
"params": [
{
"path": "temperature",
"label": "Temperature",
"description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.",
"group": "sampling",
"type": "number",
"default": 1,
"range": {
"max": 1
}
},
{
"path": "top_p",
"label": "Top P",
"description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.",
"group": "sampling",
"type": "number",
"default": 0.95,
"range": {
"max": 1
}
},
{
"path": "max_tokens",
"label": "Max tokens",
"description": "Maximum number of tokens to generate. Generation stops when this limit is reached.",
"group": "generation_length",
"type": "integer",
"default": 16384,
"range": {
"min": 1,
"max": 32768
}
},
{
"path": "reasoning_effort",
"label": "Reasoning effort",
"description": "Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning.",
"group": "reasoning",
"type": "enum",
"default": "high",
"values": [
"none",
"medium",
"high"
]
},
{
"path": "reasoning_budget",
"label": "Reasoning budget",
"description": "Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement.",
"group": "reasoning",
"type": "integer",
"default": 16384,
"range": {
"min": -1,
"max": 32768
}
},
{
"path": "stop",
"label": "Stop",
"description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.",
"group": "generation_length",
"type": "string"
}
]
},
{
"provider": "nvidia",
"authType": "api_key",
Expand Down
7 changes: 7 additions & 0 deletions packages/modelparams/src/generated/defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,13 @@ export const DEFAULTS = {
reasoning_effort: "high",
reasoning_budget: 16384,
},
"nvidia/nemotron-3-ultra-subscription": {
temperature: 1,
top_p: 0.95,
max_tokens: 16384,
reasoning_effort: "high",
reasoning_budget: 16384,
},
"nvidia/nemotron-content-safety-reasoning-4b": {
temperature: 1,
top_p: 1,
Expand Down
1 change: 1 addition & 0 deletions packages/modelparams/src/generated/model-ids.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ export const MODEL_IDS = [
"nvidia/nemotron-3-nano-30b-a3b",
"nvidia/nemotron-3-super-120b-a12b",
"nvidia/nemotron-3-ultra-550b-a55b",
"nvidia/nemotron-3-ultra-subscription",
"nvidia/nemotron-content-safety-reasoning-4b",
"nvidia/nemotron-mini-4b-instruct",
"nvidia/riva-translate-4b-instruct-v1.1",
Expand Down
8 changes: 8 additions & 0 deletions packages/modelparams/src/generated/params-by-id.ts
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,14 @@ export type ParamsById = {
seed: number;
stop: string;
};
"nvidia/nemotron-3-ultra-subscription": {
temperature: number;
top_p: number;
max_tokens: number;
reasoning_effort: "none" | "medium" | "high";
reasoning_budget: number;
stop: string;
};
"nvidia/nemotron-content-safety-reasoning-4b": {
temperature: number;
top_p: number;
Expand Down
Loading