chore: update README and model inference testing scripts

This commit is contained in:
2026-05-06 18:35:53 +08:00
parent 404f1b85aa
commit 056df3b6ca
3 changed files with 105 additions and 22 deletions

View File

@@ -24,27 +24,6 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
# pip install flask-restful
# torchrun $DISTRIBUTED_ARGS $MEGATRON_PATH/tools/run_text_generation_server.py \
# --tensor-model-parallel-size 1 \
# --pipeline-model-parallel-size 1 \
# --num-layers 12 \
# --hidden-size 3072 \
# --load ${CHECKPOINT} \
# --num-attention-heads 8 \
# --num-query-groups 4 \
# --max-position-embeddings 4096 \
# --fp16 \
# --micro-batch-size 1 \
# --seq-length 1024 \
# --temperature 1.0 \
# --top_p 0.9 \
# --seed 42 \
# --tokenizer-type GPT2BPETokenizer
# --vocab-file $VOCAB_FILE \
# --merge-file $MERGE_FILE \
torchrun $DISTRIBUTED_ARGS $MEGATRON_PATH/tools/run_text_generation_server.py \
--load $CHECKPOINT \
--tensor-model-parallel-size 1 \

View File

@@ -0,0 +1,50 @@
#!/bin/bash
# This example will start serving the 345M model.
DISTRIBUTED_ARGS="--nproc_per_node 1 \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"
# <Path to checkpoint (e.g /345m)>
CHECKPOINT=/apps/yi/model_training/artifacts/checkpoints/qwen3_1p7b_smoke_yi
# <Path to vocab.json (e.g. /gpt2-vocab.json)>
VOCAB_FILE=/apps/yi/model_training/data/tokenizer/vocab.json
# <Path to merges.txt (e.g. /gpt2-merges.txt)>
MERGE_FILE=/apps/yi/model_training/data/tokenizer/merges.txt
# <Path to tokenizer>
TOKENIZER_PATH=/apps/yi/model_training/data/tokenizer
MEGATRON_PATH=/apps/yi/model_training/Megatron-LM
export CUDA_DEVICE_MAX_CONNECTIONS=1
# pip install flask-restful
torchrun $DISTRIBUTED_ARGS $MEGATRON_PATH/tools/run_text_generation_server.py \
--load $CHECKPOINT \
--tensor-model-parallel-size 1 \
--pipeline-model-parallel-size 1 \
--num-layers 28 \
--hidden-size 2048 \
--ffn-hidden-size 6144 \
--num-attention-heads 16 \
--num-query-groups 8 \
--group-query-attention \
--seq-length 4096 \
--max-position-embeddings 4096 \
--position-embedding-type rope \
--rotary-base 10000 \
--swiglu \
--disable-bias-linear \
--normalization RMSNorm \
--untie-embeddings-and-output-weights \
--tokenizer-type HuggingFaceTokenizer \
--tokenizer-model $TOKENIZER_PATH \
--bf16 \
--micro-batch-size 1 \
--micro-batch-size 1 \
--inference-max-requests 1