57 lines
2.0 KiB
Docker
57 lines
2.0 KiB
Docker
FROM nvcr.io/nvidia/pytorch:25.10-py3
|
|
|
|
ARG HTTP_PROXY
|
|
ARG HTTPS_PROXY
|
|
ARG http_proxy
|
|
ARG https_proxy
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
ENV PYTHONUNBUFFERED=1
|
|
ENV PIP_NO_CACHE_DIR=1
|
|
ENV PIP_CONSTRAINT=
|
|
ENV MAX_JOBS=8
|
|
ENV PYTHONPATH=/opt/Megatron-LM:$PYTHONPATH
|
|
|
|
RUN apt-get update && apt-get install -y \
|
|
git curl wget vim tmux htop rsync ca-certificates \
|
|
build-essential ninja-build cmake pkg-config bzip2 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
|
|
python -m pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
|
|
python -m pip install -U pip setuptools wheel packaging
|
|
|
|
RUN python -m pip install \
|
|
transformers datasets tokenizers sentencepiece accelerate \
|
|
numpy pandas pyarrow fastparquet zstandard jsonlines tqdm rich einops regex \
|
|
tensorboard wandb evaluate lm-eval \
|
|
omegaconf hydra-core nltk ftfy six psutil pydantic
|
|
|
|
# NGC PyTorch usually already includes Transformer Engine.
|
|
# Keep this check; do not reinstall TE unless it fails.
|
|
RUN python - <<'PY'
|
|
import torch
|
|
print("torch:", torch.__version__, "cuda:", torch.version.cuda)
|
|
try:
|
|
import transformer_engine
|
|
print("transformer_engine: OK")
|
|
except Exception as e:
|
|
print("transformer_engine import failed:", e)
|
|
PY
|
|
|
|
RUN git clone https://github.com/NVIDIA/Megatron-LM.git /opt/Megatron-LM && \
|
|
cd /opt/Megatron-LM && \
|
|
python -m pip install -U "setuptools<80.0.0,>=77.0.0" packaging && \
|
|
python -m pip install --no-build-isolation -e .
|
|
|
|
# Optional Apex: only install if you really need fused optimizers from Apex.
|
|
# Many modern Megatron paths rely more on Transformer Engine / fused kernels.
|
|
RUN git clone https://github.com/NVIDIA/apex.git /opt/apex && \
|
|
cd /opt/apex && \
|
|
python -m pip install -v --disable-pip-version-check --no-build-isolation \
|
|
--config-settings "--build-option=--cpp_ext" \
|
|
--config-settings "--build-option=--cuda_ext" \
|
|
.
|
|
|
|
WORKDIR /workspace
|
|
CMD ["/bin/bash"] |