FROM nvcr.io/nvidia/pytorch:25.10-py3

ARG HTTP_PROXY
ARG HTTPS_PROXY
ARG http_proxy
ARG https_proxy

ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV PIP_NO_CACHE_DIR=1
ENV PIP_CONSTRAINT=
ENV MAX_JOBS=8
ENV PYTHONPATH=/opt/Megatron-LM:$PYTHONPATH

RUN apt-get update && apt-get install -y \
    git curl wget vim tmux htop rsync ca-certificates \
    build-essential ninja-build cmake pkg-config bzip2 \
    && rm -rf /var/lib/apt/lists/*

RUN python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
    python -m pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
    python -m pip install -U pip setuptools wheel packaging

RUN python -m pip install \
    transformers datasets tokenizers sentencepiece accelerate \
    numpy pandas pyarrow fastparquet zstandard jsonlines tqdm rich einops regex \
    tensorboard wandb evaluate lm-eval \
    omegaconf hydra-core nltk ftfy six psutil pydantic

# NGC PyTorch usually already includes Transformer Engine.
# Keep this check; do not reinstall TE unless it fails.
RUN python - <<'PY'
import torch
print("torch:", torch.__version__, "cuda:", torch.version.cuda)
try:
    import transformer_engine
    print("transformer_engine: OK")
except Exception as e:
    print("transformer_engine import failed:", e)
PY

RUN git clone https://github.com/NVIDIA/Megatron-LM.git /opt/Megatron-LM && \
    cd /opt/Megatron-LM && \
    python -m pip install -U "setuptools<80.0.0,>=77.0.0" packaging && \
    python -m pip install --no-build-isolation -e .

# Optional Apex: only install if you really need fused optimizers from Apex.
# Many modern Megatron paths rely more on Transformer Engine / fused kernels.
RUN git clone https://github.com/NVIDIA/apex.git /opt/apex && \
    cd /opt/apex && \
    python -m pip install -v --disable-pip-version-check --no-build-isolation \
      --config-settings "--build-option=--cpp_ext" \
      --config-settings "--build-option=--cuda_ext" \
      .

WORKDIR /workspace
CMD ["/bin/bash"]