From 26785c196aa23e88f50c19c390dcdb2eaaccf283 Mon Sep 17 00:00:00 2001
From: Guillaume Gay <guillaume.gay@protonmail.com>
Date: Mon, 29 Jun 2026 11:18:37 +0200
Subject: [PATCH] Add NVIDIA Nemotron-3-Ultra (subscription) parameters

---
 .../nvidia/nemotron-3-ultra-subscription.yaml | 54 ++++++++++++++
 packages/modelparams/src/generated/data.ts    | 73 +++++++++++++++++++
 .../modelparams/src/generated/defaults.ts     |  7 ++
 .../modelparams/src/generated/model-ids.ts    |  1 +
 .../modelparams/src/generated/params-by-id.ts |  8 ++
 5 files changed, 143 insertions(+)
 create mode 100644 models/nvidia/nemotron-3-ultra-subscription.yaml

diff --git a/models/nvidia/nemotron-3-ultra-subscription.yaml b/models/nvidia/nemotron-3-ultra-subscription.yaml
new file mode 100644
index 0000000..ba1e2dc
--- /dev/null
+++ b/models/nvidia/nemotron-3-ultra-subscription.yaml
@@ -0,0 +1,54 @@
+# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json
+provider: nvidia
+authType: subscription
+model: nemotron-3-ultra
+params:
+  - path: temperature
+    type: number
+    label: Temperature
+    description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.
+    default: 1
+    range:
+      max: 1
+    group: sampling
+  - path: top_p
+    type: number
+    label: Top P
+    description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.
+    default: 0.95
+    range:
+      max: 1
+    group: sampling
+  - path: max_tokens
+    type: integer
+    label: Max tokens
+    description: Maximum number of tokens to generate. Generation stops when this limit is reached.
+    default: 16384
+    range:
+      min: 1
+      max: 32768
+    group: generation_length
+  - path: reasoning_effort
+    type: enum
+    label: Reasoning effort
+    description: Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning.
+    default: high
+    values:
+      - none
+      - medium
+      - high
+    group: reasoning
+  - path: reasoning_budget
+    type: integer
+    label: Reasoning budget
+    description: Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement.
+    default: 16384
+    range:
+      min: -1
+      max: 32768
+    group: reasoning
+  - path: stop
+    type: string
+    label: Stop
+    description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.
+    group: generation_length
diff --git a/packages/modelparams/src/generated/data.ts b/packages/modelparams/src/generated/data.ts
index c10a067..02a0194 100644
--- a/packages/modelparams/src/generated/data.ts
+++ b/packages/modelparams/src/generated/data.ts
@@ -10632,6 +10632,79 @@ export const CATALOG = [
       }
     ]
   },
+  {
+    "provider": "nvidia",
+    "authType": "subscription",
+    "model": "nemotron-3-ultra",
+    "params": [
+      {
+        "path": "temperature",
+        "label": "Temperature",
+        "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.",
+        "group": "sampling",
+        "type": "number",
+        "default": 1,
+        "range": {
+          "max": 1
+        }
+      },
+      {
+        "path": "top_p",
+        "label": "Top P",
+        "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.",
+        "group": "sampling",
+        "type": "number",
+        "default": 0.95,
+        "range": {
+          "max": 1
+        }
+      },
+      {
+        "path": "max_tokens",
+        "label": "Max tokens",
+        "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.",
+        "group": "generation_length",
+        "type": "integer",
+        "default": 16384,
+        "range": {
+          "min": 1,
+          "max": 32768
+        }
+      },
+      {
+        "path": "reasoning_effort",
+        "label": "Reasoning effort",
+        "description": "Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning.",
+        "group": "reasoning",
+        "type": "enum",
+        "default": "high",
+        "values": [
+          "none",
+          "medium",
+          "high"
+        ]
+      },
+      {
+        "path": "reasoning_budget",
+        "label": "Reasoning budget",
+        "description": "Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement.",
+        "group": "reasoning",
+        "type": "integer",
+        "default": 16384,
+        "range": {
+          "min": -1,
+          "max": 32768
+        }
+      },
+      {
+        "path": "stop",
+        "label": "Stop",
+        "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.",
+        "group": "generation_length",
+        "type": "string"
+      }
+    ]
+  },
   {
     "provider": "nvidia",
     "authType": "api_key",
diff --git a/packages/modelparams/src/generated/defaults.ts b/packages/modelparams/src/generated/defaults.ts
index a81eeaf..4139bd4 100644
--- a/packages/modelparams/src/generated/defaults.ts
+++ b/packages/modelparams/src/generated/defaults.ts
@@ -798,6 +798,13 @@ export const DEFAULTS = {
     reasoning_effort: "high",
     reasoning_budget: 16384,
   },
+  "nvidia/nemotron-3-ultra-subscription": {
+    temperature: 1,
+    top_p: 0.95,
+    max_tokens: 16384,
+    reasoning_effort: "high",
+    reasoning_budget: 16384,
+  },
   "nvidia/nemotron-content-safety-reasoning-4b": {
     temperature: 1,
     top_p: 1,
diff --git a/packages/modelparams/src/generated/model-ids.ts b/packages/modelparams/src/generated/model-ids.ts
index 8bc59d0..d2bcc42 100644
--- a/packages/modelparams/src/generated/model-ids.ts
+++ b/packages/modelparams/src/generated/model-ids.ts
@@ -120,6 +120,7 @@ export const MODEL_IDS = [
   "nvidia/nemotron-3-nano-30b-a3b",
   "nvidia/nemotron-3-super-120b-a12b",
   "nvidia/nemotron-3-ultra-550b-a55b",
+  "nvidia/nemotron-3-ultra-subscription",
   "nvidia/nemotron-content-safety-reasoning-4b",
   "nvidia/nemotron-mini-4b-instruct",
   "nvidia/riva-translate-4b-instruct-v1.1",
diff --git a/packages/modelparams/src/generated/params-by-id.ts b/packages/modelparams/src/generated/params-by-id.ts
index 702e49f..d089c70 100644
--- a/packages/modelparams/src/generated/params-by-id.ts
+++ b/packages/modelparams/src/generated/params-by-id.ts
@@ -992,6 +992,14 @@ export type ParamsById = {
     seed: number;
     stop: string;
   };
+  "nvidia/nemotron-3-ultra-subscription": {
+    temperature: number;
+    top_p: number;
+    max_tokens: number;
+    reasoning_effort: "none" | "medium" | "high";
+    reasoning_budget: number;
+    stop: string;
+  };
   "nvidia/nemotron-content-safety-reasoning-4b": {
     temperature: number;
     top_p: number;