# note: by default decoupled_weight_decay is True and adam optimizer acts as adamW OPTIM_ARGS=" --optimizer adam --adam-beta1 0.9 --adam-beta2 0.95 --adam-eps 1e-8 --weight-decay 0.1 --clip-grad 1.0 "