chore: update README and model inference testing scripts
This commit is contained in:
50
scripts/kaiyuan2b-training/eval_smoke_qwen3_1p7b.sh
Normal file
50
scripts/kaiyuan2b-training/eval_smoke_qwen3_1p7b.sh
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
# This example will start serving the 345M model.
|
||||
DISTRIBUTED_ARGS="--nproc_per_node 1 \
|
||||
--nnodes 1 \
|
||||
--node_rank 0 \
|
||||
--master_addr localhost \
|
||||
--master_port 6000"
|
||||
|
||||
# <Path to checkpoint (e.g /345m)>
|
||||
CHECKPOINT=/apps/yi/model_training/artifacts/checkpoints/qwen3_1p7b_smoke_yi
|
||||
|
||||
# <Path to vocab.json (e.g. /gpt2-vocab.json)>
|
||||
VOCAB_FILE=/apps/yi/model_training/data/tokenizer/vocab.json
|
||||
|
||||
# <Path to merges.txt (e.g. /gpt2-merges.txt)>
|
||||
MERGE_FILE=/apps/yi/model_training/data/tokenizer/merges.txt
|
||||
|
||||
# <Path to tokenizer>
|
||||
TOKENIZER_PATH=/apps/yi/model_training/data/tokenizer
|
||||
|
||||
MEGATRON_PATH=/apps/yi/model_training/Megatron-LM
|
||||
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
|
||||
# pip install flask-restful
|
||||
|
||||
torchrun $DISTRIBUTED_ARGS $MEGATRON_PATH/tools/run_text_generation_server.py \
|
||||
--load $CHECKPOINT \
|
||||
--tensor-model-parallel-size 1 \
|
||||
--pipeline-model-parallel-size 1 \
|
||||
--num-layers 28 \
|
||||
--hidden-size 2048 \
|
||||
--ffn-hidden-size 6144 \
|
||||
--num-attention-heads 16 \
|
||||
--num-query-groups 8 \
|
||||
--group-query-attention \
|
||||
--seq-length 4096 \
|
||||
--max-position-embeddings 4096 \
|
||||
--position-embedding-type rope \
|
||||
--rotary-base 10000 \
|
||||
--swiglu \
|
||||
--disable-bias-linear \
|
||||
--normalization RMSNorm \
|
||||
--untie-embeddings-and-output-weights \
|
||||
--tokenizer-type HuggingFaceTokenizer \
|
||||
--tokenizer-model $TOKENIZER_PATH \
|
||||
--bf16 \
|
||||
--micro-batch-size 1 \
|
||||
--micro-batch-size 1 \
|
||||
--inference-max-requests 1
|
||||
Reference in New Issue
Block a user