chore: adapt path and training hparams
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
|
||||
DATA_DIR=/ssd/yi/converted_data/megatron_phase1
|
||||
DATA_DIR=/ssd1/yi/converted_data/phase1
|
||||
|
||||
START=0
|
||||
END=210
|
||||
END=0
|
||||
|
||||
DATA_PATHS=""
|
||||
for idx in $(seq -f "%05g" $START $END); do
|
||||
@@ -13,7 +13,7 @@ DATA_ARGS="
|
||||
--data-path ${DATA_PATHS}
|
||||
--split 999,1,0
|
||||
--tokenizer-type HuggingFaceTokenizer
|
||||
--tokenizer-model /apps/yi/model_training/data/tokenizer
|
||||
--tokenizer-model /ssd1/yi/data/tokenizer
|
||||
"
|
||||
|
||||
# --vocab-file /apps/yi/model_training/data/tokenizer/vocab.json
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
HPARAMS="
|
||||
--micro-batch-size 16
|
||||
--micro-batch-size 4
|
||||
--global-batch-size 2048
|
||||
--train-iters 87000
|
||||
--eval-iters 10
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
ARTIFACT_ROOT=${ARTIFACT_ROOT:-/apps/yi/model_training/artifacts}
|
||||
ARTIFACT_ROOT=${ARTIFACT_ROOT:-/ssd1/yi/artifacts}
|
||||
RUN_STATE_DIR="${ARTIFACT_ROOT}/run_state"
|
||||
LOG_DIR="${ARTIFACT_ROOT}/logs"
|
||||
|
||||
|
||||
@@ -7,9 +7,9 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
MODE=${1:-qwen3_1p7b_smoke_yi}
|
||||
TRAIN_NAME=${2:-qwen3_1p7b_smoke_yi}
|
||||
|
||||
MEGATRON_PATH=/apps/yi/model_training/Megatron-LM
|
||||
ARTIFACT_ROOT=/apps/yi/model_training/artifacts
|
||||
SCRIPT_DIR=/apps/yi/model_training/scripts/kaiyuan2b-training
|
||||
MEGATRON_PATH=/ssd1/yi/pretrain_kaiyuan2b/Megatron-LM
|
||||
ARTIFACT_ROOT=/ssd1/yi/artifacts
|
||||
SCRIPT_DIR=/ssd1/yi/pretrain_kaiyuan2b/scripts/kaiyuan2b-training
|
||||
PARAMS_DIR="${SCRIPT_DIR}/params"
|
||||
TB_DIR="${ARTIFACT_ROOT}/tb_logs/${TRAIN_NAME}"
|
||||
CKPT_DIR="${ARTIFACT_ROOT}/checkpoints/${TRAIN_NAME}"
|
||||
|
||||
Reference in New Issue
Block a user