Initial Commit

This commit is contained in:
2026-05-06 15:06:07 +08:00
parent b5ac2c8ed5
commit f154c1611d
29 changed files with 1068 additions and 0 deletions

57
scripts/Dockerfile Normal file
View File

@@ -0,0 +1,57 @@
FROM nvcr.io/nvidia/pytorch:25.10-py3
ARG HTTP_PROXY
ARG HTTPS_PROXY
ARG http_proxy
ARG https_proxy
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV PIP_NO_CACHE_DIR=1
ENV PIP_CONSTRAINT=
ENV MAX_JOBS=8
ENV PYTHONPATH=/opt/Megatron-LM:$PYTHONPATH
RUN apt-get update && apt-get install -y \
git curl wget vim tmux htop rsync ca-certificates \
build-essential ninja-build cmake pkg-config bzip2 \
&& rm -rf /var/lib/apt/lists/*
RUN python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
python -m pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
python -m pip install -U pip setuptools wheel packaging
RUN python -m pip install \
transformers datasets tokenizers sentencepiece accelerate \
numpy pandas pyarrow fastparquet zstandard jsonlines tqdm rich einops regex \
tensorboard wandb evaluate lm-eval \
omegaconf hydra-core nltk ftfy six psutil pydantic
# NGC PyTorch usually already includes Transformer Engine.
# Keep this check; do not reinstall TE unless it fails.
RUN python - <<'PY'
import torch
print("torch:", torch.__version__, "cuda:", torch.version.cuda)
try:
import transformer_engine
print("transformer_engine: OK")
except Exception as e:
print("transformer_engine import failed:", e)
PY
RUN git clone https://github.com/NVIDIA/Megatron-LM.git /opt/Megatron-LM && \
cd /opt/Megatron-LM && \
python -m pip install -U "setuptools<80.0.0,>=77.0.0" packaging && \
python -m pip install --no-build-isolation -e .
# Optional Apex: only install if you really need fused optimizers from Apex.
# Many modern Megatron paths rely more on Transformer Engine / fused kernels.
RUN git clone https://github.com/NVIDIA/apex.git /opt/apex && \
cd /opt/apex && \
python -m pip install -v --disable-pip-version-check --no-build-isolation \
--config-settings "--build-option=--cpp_ext" \
--config-settings "--build-option=--cuda_ext" \
.
WORKDIR /workspace
CMD ["/bin/bash"]