10 lines
213 B
Bash
10 lines
213 B
Bash
# note: by default decoupled_weight_decay is True and adam optimizer acts as adamW
|
|
|
|
OPTIM_ARGS="
|
|
--optimizer adam
|
|
--adam-beta1 0.9
|
|
--adam-beta2 0.95
|
|
--adam-eps 1e-8
|
|
--weight-decay 0.1
|
|
--clip-grad 1.0
|
|
" |