From db6100bc411d6b0771cd028c21860bdb5b680241 Mon Sep 17 00:00:00 2001 From: Joel Lamy-Poirier Date: Tue, 19 May 2026 16:35:05 -0400 Subject: [PATCH] Bump base image to PyTorch 26.02-py3 and dependent packages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - nvcr.io/nvidia/pytorch: 25.11-py3 → 26.02-py3 - mamba-ssm: 2.2.6.post3 → 2.3.2.post1 - flash-attn: pinned 2.7.4.post1 → range >=2.7.4,<2.8.0 - triton: drop the 3.5.1 pin from the Dockerfile (already not pinned in setup.cfg) - PyYAML: relax >=6.0.3 → >=6.0.1 (no API need for 6.0.3) Full test suite passes 2622/2622 on both the previous image (PyTorch 25.11) and the new image — verified end-to-end on the EAI cluster. Co-Authored-By: Claude Opus 4.7 (1M context) --- Dockerfile | 6 +++--- setup.cfg | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index c85fdf5d3..93e53e683 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # syntax=docker/dockerfile:1.7-labs -FROM nvcr.io/nvidia/pytorch:25.11-py3 +FROM nvcr.io/nvidia/pytorch:26.02-py3 # Install dependencies. RUN apt-get update \ @@ -25,7 +25,7 @@ ENV PIP_CONSTRAINT="" # We need to compile from the repo because of https://github.com/state-spaces/mamba/issues/720 (same for causal-conv1d) # We set the number of workers to avoid OOM when compiling on laptop. (TODO: Can we make it configurable?) RUN MAX_JOBS=2 pip install --no-build-isolation "causal-conv1d @ git+https://github.com/Dao-AILab/causal-conv1d@v1.5.4" -RUN MAX_JOBS=2 pip install --no-build-isolation mamba-ssm==2.2.6.post3 +RUN MAX_JOBS=2 pip install --no-build-isolation mamba-ssm==2.3.2.post1 RUN MAX_JOBS=2 pip install --no-build-isolation "flash-linear-attention @ git+https://github.com/fla-org/flash-linear-attention@67eee20c8503cd19eeb52aa1b99821308e9260c5" # Copy dependency files. Source files end up root-owned and read-only for non-root processes; # /app itself stays writable so the runtime UID can create new files (logs, __pycache__, etc.). @@ -35,7 +35,7 @@ COPY ./fast_llm/__init__.py fast_llm/ COPY ./fast_llm/csrc/ fast_llm/csrc/ # Install dependencies within the virtual environment. -RUN pip install --no-cache-dir --no-build-isolation -e ".[CORE,OPTIONAL,HUGGINGFACE,SSM,VISION,GENERATION,STREAMING,DEV]" triton==3.5.1 "transformers>=5.0.0,<6.0.0" +RUN pip install --no-cache-dir --no-build-isolation -e ".[CORE,OPTIONAL,HUGGINGFACE,SSM,VISION,GENERATION,STREAMING,DEV]" "transformers>=5.0.0,<6.0.0" # Copy the remaining source code (read-only for non-root, see the note on dependency files above). COPY ./Megatron-LM Megatron-LM diff --git a/setup.cfg b/setup.cfg index f3a571b5c..bab3eebff 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,7 +9,7 @@ include_package_data = True python_requires = >=3.12 install_requires = requests>=2.33.0 - PyYAML>=6.0.3 + PyYAML>=6.0.1 pybind11>=3.0.1 packaging>=25.0 @@ -25,10 +25,10 @@ CORE = # Used for checkpoints safetensors>=0.6.2 # Update the base image (version fixed to ensure there is a wheel for the base image), may need --no-build-isolation - flash-attn==2.7.4.post1 + flash-attn>=2.7.4,<2.8.0 # Dropless MoE kernel is broken with triton >= 3.2.0 and needs a rewrite (also limited to 32 experts). # Not pinning triton here as it breaks cpu-only installs and pip dependency resolution. - # triton==3.5.1 + # triton==3.7.0 # Small packages required for some optional features and tools. @@ -53,7 +53,7 @@ HUGGINGFACE = # To install on cpu environment (ex. for IDE support): # MAMBA_FORCE_BUILD=TRUE CAUSAL_CONV1D_FORCE_BUILD=TRUE CAUSAL_CONV1D_SKIP_CUDA_BUILD=TRUE pip install -e ".[CORE,SSM]" --no-build-isolation SSM = - mamba_ssm[causal-conv1d]==2.2.6.post3 + mamba_ssm[causal-conv1d]==2.3.2.post1 # TODO: This is required for varlen mamba, but fails to compile in nvcr.io/nvidia/pytorch:25.11-py3. # mamba_ssm[causal-conv1d] @ git+https://github.com/jxiw/varlen_mamba.git@varlen_mamba flash-linear-attention @ git+https://github.com/fla-org/flash-linear-attention@67eee20c8503cd19eeb52aa1b99821308e9260c5