Initial Commit
This commit is contained in:
57
scripts/Dockerfile
Normal file
57
scripts/Dockerfile
Normal file
@@ -0,0 +1,57 @@
|
||||
FROM nvcr.io/nvidia/pytorch:25.10-py3
|
||||
|
||||
ARG HTTP_PROXY
|
||||
ARG HTTPS_PROXY
|
||||
ARG http_proxy
|
||||
ARG https_proxy
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV PIP_NO_CACHE_DIR=1
|
||||
ENV PIP_CONSTRAINT=
|
||||
ENV MAX_JOBS=8
|
||||
ENV PYTHONPATH=/opt/Megatron-LM:$PYTHONPATH
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
git curl wget vim tmux htop rsync ca-certificates \
|
||||
build-essential ninja-build cmake pkg-config bzip2 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
|
||||
python -m pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
|
||||
python -m pip install -U pip setuptools wheel packaging
|
||||
|
||||
RUN python -m pip install \
|
||||
transformers datasets tokenizers sentencepiece accelerate \
|
||||
numpy pandas pyarrow fastparquet zstandard jsonlines tqdm rich einops regex \
|
||||
tensorboard wandb evaluate lm-eval \
|
||||
omegaconf hydra-core nltk ftfy six psutil pydantic
|
||||
|
||||
# NGC PyTorch usually already includes Transformer Engine.
|
||||
# Keep this check; do not reinstall TE unless it fails.
|
||||
RUN python - <<'PY'
|
||||
import torch
|
||||
print("torch:", torch.__version__, "cuda:", torch.version.cuda)
|
||||
try:
|
||||
import transformer_engine
|
||||
print("transformer_engine: OK")
|
||||
except Exception as e:
|
||||
print("transformer_engine import failed:", e)
|
||||
PY
|
||||
|
||||
RUN git clone https://github.com/NVIDIA/Megatron-LM.git /opt/Megatron-LM && \
|
||||
cd /opt/Megatron-LM && \
|
||||
python -m pip install -U "setuptools<80.0.0,>=77.0.0" packaging && \
|
||||
python -m pip install --no-build-isolation -e .
|
||||
|
||||
# Optional Apex: only install if you really need fused optimizers from Apex.
|
||||
# Many modern Megatron paths rely more on Transformer Engine / fused kernels.
|
||||
RUN git clone https://github.com/NVIDIA/apex.git /opt/apex && \
|
||||
cd /opt/apex && \
|
||||
python -m pip install -v --disable-pip-version-check --no-build-isolation \
|
||||
--config-settings "--build-option=--cpp_ext" \
|
||||
--config-settings "--build-option=--cuda_ext" \
|
||||
.
|
||||
|
||||
WORKDIR /workspace
|
||||
CMD ["/bin/bash"]
|
||||
Reference in New Issue
Block a user