Files
pretrain_kaiyuan2b/scripts/kaiyuan2b-training/params/optim_common.sh
2026-05-06 15:06:07 +08:00

10 lines
213 B
Bash

# note: by default decoupled_weight_decay is True and adam optimizer acts as adamW
OPTIM_ARGS="
--optimizer adam
--adam-beta1 0.9
--adam-beta2 0.95
--adam-eps 1e-8
--weight-decay 0.1
--clip-grad 1.0
"