Files
pretrain_kaiyuan2b/scripts/kaiyuan2b-training/params/qwen3_1p7b/hparams.sh

13 lines
245 B
Bash

HPARAMS="
--micro-batch-size 4
--global-batch-size 2048
--train-iters 87000
--eval-iters 10
--eval-interval 1000
--save-interval 1000
--log-interval 1
--lr 5e-3
--min-lr 5e-3
--lr-decay-style constant
--lr-warmup-iters 10
"