ServiceNow · jlamypoirier · May 19, 2026 · May 19, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,5 @@
 # syntax=docker/dockerfile:1.7-labs
-FROM nvcr.io/nvidia/pytorch:25.11-py3
+FROM nvcr.io/nvidia/pytorch:26.02-py3
 
 # Install dependencies.
 RUN apt-get update \
@@ -25,7 +25,7 @@ ENV PIP_CONSTRAINT=""
 # We need to compile from the repo because of https://github.com/state-spaces/mamba/issues/720 (same for causal-conv1d)
 # We set the number of workers to avoid OOM when compiling on laptop. (TODO: Can we make it configurable?)
 RUN MAX_JOBS=2 pip install --no-build-isolation "causal-conv1d @ git+https://github.com/Dao-AILab/[email protected]"
-RUN MAX_JOBS=2 pip install --no-build-isolation mamba-ssm==2.2.6.post3
+RUN MAX_JOBS=2 pip install --no-build-isolation mamba-ssm==2.3.2.post1
 RUN MAX_JOBS=2 pip install --no-build-isolation "flash-linear-attention @ git+https://github.com/fla-org/flash-linear-attention@67eee20c8503cd19eeb52aa1b99821308e9260c5"
 # Copy dependency files. Source files end up root-owned and read-only for non-root processes;
 # /app itself stays writable so the runtime UID can create new files (logs, __pycache__, etc.).
@@ -35,7 +35,7 @@ COPY ./fast_llm/__init__.py fast_llm/
 COPY ./fast_llm/csrc/ fast_llm/csrc/
 
 # Install dependencies within the virtual environment.
-RUN pip install --no-cache-dir --no-build-isolation -e ".[CORE,OPTIONAL,HUGGINGFACE,SSM,VISION,GENERATION,STREAMING,DEV]" triton==3.5.1 "transformers>=5.0.0,<6.0.0"
+RUN pip install --no-cache-dir --no-build-isolation -e ".[CORE,OPTIONAL,HUGGINGFACE,SSM,VISION,GENERATION,STREAMING,DEV]" "transformers>=5.0.0,<6.0.0"
 
 # Copy the remaining source code (read-only for non-root, see the note on dependency files above).
 COPY ./Megatron-LM Megatron-LM

diff --git a/setup.cfg b/setup.cfg
@@ -9,7 +9,7 @@ include_package_data = True
 python_requires = >=3.12
 install_requires =
     requests>=2.33.0
-    PyYAML>=6.0.3
+    PyYAML>=6.0.1
     pybind11>=3.0.1
     packaging>=25.0
 
@@ -25,10 +25,10 @@ CORE =
     # Used for checkpoints
     safetensors>=0.6.2
     # Update the base image (version fixed to ensure there is a wheel for the base image), may need --no-build-isolation
-    flash-attn==2.7.4.post1
+    flash-attn>=2.7.4,<2.8.0
     # Dropless MoE kernel is broken with triton >= 3.2.0 and needs a rewrite (also limited to 32 experts).
     # Not pinning triton here as it breaks cpu-only installs and pip dependency resolution.
-    # triton==3.5.1
+    # triton==3.7.0
 
 
 # Small packages required for some optional features and tools.
@@ -53,7 +53,7 @@ HUGGINGFACE =
 # To install on cpu environment (ex. for IDE support):
 #   MAMBA_FORCE_BUILD=TRUE CAUSAL_CONV1D_FORCE_BUILD=TRUE CAUSAL_CONV1D_SKIP_CUDA_BUILD=TRUE pip install -e ".[CORE,SSM]" --no-build-isolation
 SSM =
-    mamba_ssm[causal-conv1d]==2.2.6.post3
+    mamba_ssm[causal-conv1d]==2.3.2.post1
     # TODO: This is required for varlen mamba, but fails to compile in nvcr.io/nvidia/pytorch:25.11-py3.
     # mamba_ssm[causal-conv1d] @ git+https://github.com/jxiw/varlen_mamba.git@varlen_mamba
     flash-linear-attention @ git+https://github.com/fla-org/flash-linear-attention@67eee20c8503cd19eeb52aa1b99821308e9260c5