Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# syntax=docker/dockerfile:1.7-labs
FROM nvcr.io/nvidia/pytorch:25.11-py3
FROM nvcr.io/nvidia/pytorch:26.02-py3

# Install dependencies.
RUN apt-get update \
Expand All @@ -25,7 +25,7 @@ ENV PIP_CONSTRAINT=""
# We need to compile from the repo because of https://github.com/state-spaces/mamba/issues/720 (same for causal-conv1d)
# We set the number of workers to avoid OOM when compiling on laptop. (TODO: Can we make it configurable?)
RUN MAX_JOBS=2 pip install --no-build-isolation "causal-conv1d @ git+https://github.com/Dao-AILab/[email protected]"
RUN MAX_JOBS=2 pip install --no-build-isolation mamba-ssm==2.2.6.post3
RUN MAX_JOBS=2 pip install --no-build-isolation mamba-ssm==2.3.2.post1
RUN MAX_JOBS=2 pip install --no-build-isolation "flash-linear-attention @ git+https://github.com/fla-org/flash-linear-attention@67eee20c8503cd19eeb52aa1b99821308e9260c5"
# Copy dependency files. Source files end up root-owned and read-only for non-root processes;
# /app itself stays writable so the runtime UID can create new files (logs, __pycache__, etc.).
Expand All @@ -35,7 +35,7 @@ COPY ./fast_llm/__init__.py fast_llm/
COPY ./fast_llm/csrc/ fast_llm/csrc/

# Install dependencies within the virtual environment.
RUN pip install --no-cache-dir --no-build-isolation -e ".[CORE,OPTIONAL,HUGGINGFACE,SSM,VISION,GENERATION,STREAMING,DEV]" triton==3.5.1 "transformers>=5.0.0,<6.0.0"
RUN pip install --no-cache-dir --no-build-isolation -e ".[CORE,OPTIONAL,HUGGINGFACE,SSM,VISION,GENERATION,STREAMING,DEV]" "transformers>=5.0.0,<6.0.0"

# Copy the remaining source code (read-only for non-root, see the note on dependency files above).
COPY ./Megatron-LM Megatron-LM
Expand Down
8 changes: 4 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ include_package_data = True
python_requires = >=3.12
install_requires =
requests>=2.33.0
PyYAML>=6.0.3
PyYAML>=6.0.1
pybind11>=3.0.1
packaging>=25.0

Expand All @@ -25,10 +25,10 @@ CORE =
# Used for checkpoints
safetensors>=0.6.2
# Update the base image (version fixed to ensure there is a wheel for the base image), may need --no-build-isolation
flash-attn==2.7.4.post1
flash-attn>=2.7.4,<2.8.0
# Dropless MoE kernel is broken with triton >= 3.2.0 and needs a rewrite (also limited to 32 experts).
# Not pinning triton here as it breaks cpu-only installs and pip dependency resolution.
# triton==3.5.1
# triton==3.7.0


# Small packages required for some optional features and tools.
Expand All @@ -53,7 +53,7 @@ HUGGINGFACE =
# To install on cpu environment (ex. for IDE support):
# MAMBA_FORCE_BUILD=TRUE CAUSAL_CONV1D_FORCE_BUILD=TRUE CAUSAL_CONV1D_SKIP_CUDA_BUILD=TRUE pip install -e ".[CORE,SSM]" --no-build-isolation
SSM =
mamba_ssm[causal-conv1d]==2.2.6.post3
mamba_ssm[causal-conv1d]==2.3.2.post1
# TODO: This is required for varlen mamba, but fails to compile in nvcr.io/nvidia/pytorch:25.11-py3.
# mamba_ssm[causal-conv1d] @ git+https://github.com/jxiw/varlen_mamba.git@varlen_mamba
flash-linear-attention @ git+https://github.com/fla-org/flash-linear-attention@67eee20c8503cd19eeb52aa1b99821308e9260c5
Expand Down
Loading