chore: update README and model inference testing scripts
This commit is contained in:
@@ -24,27 +24,6 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
|
||||
# pip install flask-restful
|
||||
|
||||
# torchrun $DISTRIBUTED_ARGS $MEGATRON_PATH/tools/run_text_generation_server.py \
|
||||
# --tensor-model-parallel-size 1 \
|
||||
# --pipeline-model-parallel-size 1 \
|
||||
# --num-layers 12 \
|
||||
# --hidden-size 3072 \
|
||||
# --load ${CHECKPOINT} \
|
||||
# --num-attention-heads 8 \
|
||||
# --num-query-groups 4 \
|
||||
# --max-position-embeddings 4096 \
|
||||
# --fp16 \
|
||||
# --micro-batch-size 1 \
|
||||
# --seq-length 1024 \
|
||||
# --temperature 1.0 \
|
||||
|
||||
# --top_p 0.9 \
|
||||
# --seed 42 \
|
||||
# --tokenizer-type GPT2BPETokenizer
|
||||
# --vocab-file $VOCAB_FILE \
|
||||
# --merge-file $MERGE_FILE \
|
||||
|
||||
|
||||
torchrun $DISTRIBUTED_ARGS $MEGATRON_PATH/tools/run_text_generation_server.py \
|
||||
--load $CHECKPOINT \
|
||||
--tensor-model-parallel-size 1 \
|
||||
|
||||
Reference in New Issue
Block a user