Initial Commit
This commit is contained in:
19
scripts/kaiyuan2b-training/params/qwen3_1p7b/model.sh
Normal file
19
scripts/kaiyuan2b-training/params/qwen3_1p7b/model.sh
Normal file
@@ -0,0 +1,19 @@
|
||||
# note: official qwen3 training uses qk norm while megatron has no official support
|
||||
|
||||
MODEL_ARGS="
|
||||
--seq-length 4096
|
||||
--hidden-size 2048
|
||||
--ffn-hidden-size 6144
|
||||
--num-layers 28
|
||||
--num-attention-heads 16
|
||||
--num-query-groups 8
|
||||
--rotary-base 10000
|
||||
--init-method-std 0.018
|
||||
--group-query-attention
|
||||
--max-position-embeddings 4096
|
||||
--position-embedding-type rope
|
||||
--swiglu
|
||||
--disable-bias-linear
|
||||
--normalization RMSNorm
|
||||
--untie-embeddings-and-output-weights
|
||||
"
|
||||
Reference in New Issue
Block a user