1127 update to latest
This commit is contained in:
151
dllm/examples/llada/eval.sh
Normal file
151
dllm/examples/llada/eval.sh
Normal file
@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env bash
|
||||
# ===== Mandatory for proper import and evaluation =====
|
||||
export PYTHONPATH=.:$PYTHONPATH
|
||||
export HF_ALLOW_CODE_EVAL=1 # Allow code evaluation
|
||||
export HF_DATASETS_TRUST_REMOTE_CODE=True # For cmmlu dataset
|
||||
|
||||
|
||||
# ===== Optional but recommended for stability and debugging =====
|
||||
export PYTHONBREAKPOINT=0 # Disable interactive breakpoints
|
||||
export NCCL_ASYNC_ERROR_HANDLING=1 # Enable async error handling for multi-GPU communication to avoid deadlocks
|
||||
export NCCL_DEBUG=warn # Show NCCL warnings for better diagnosis without flooding logs
|
||||
export TORCH_DISTRIBUTED_DEBUG=DETAIL # Provide detailed logging for PyTorch distributed debugging
|
||||
|
||||
|
||||
# ===== Input Arguments =====
|
||||
model_name_or_path="GSAI-ML/LLaDA-8B-Instruct"
|
||||
instruct=True
|
||||
num_gpu=4
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--model_name_or_path)
|
||||
model_name_or_path="$2"; shift 2 ;;
|
||||
--instruct)
|
||||
instruct="$2"; shift 2 ;;
|
||||
--num_gpu)
|
||||
num_gpu="$2"; shift 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ===== Conditional Configurations =====
|
||||
if [ "$instruct" = "True" ]; then
|
||||
echo ">>> Running in INSTRUCT mode"
|
||||
common_args="--model llada --apply_chat_template"
|
||||
else
|
||||
echo ">>> Running in BASE mode"
|
||||
common_args="--model llada"
|
||||
fi
|
||||
|
||||
|
||||
# =======================
|
||||
# Generation Tasks
|
||||
# =======================
|
||||
|
||||
if [ "$instruct" = "True" ]; then
|
||||
# Instruct Generation Tasks
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks gsm8k_cot --num_fewshot 8 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=32,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks bbh --num_fewshot 3 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=32,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks minerva_math --num_fewshot 4 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=32,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks humaneval_instruct --num_fewshot 0 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=32,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks mbpp_llada_instruct --num_fewshot 3 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=32,cfg=0.0"
|
||||
|
||||
else
|
||||
# Base Generation Tasks
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks gsm8k --num_fewshot 8 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=32,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks bbh --num_fewshot 3 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=32,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks minerva_math --num_fewshot 4 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=32,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks humaneval --num_fewshot 0 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=32,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks mbpp --num_fewshot 3 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=32,cfg=0.0"
|
||||
fi
|
||||
|
||||
|
||||
# =======================
|
||||
# Likelihood Tasks
|
||||
# =======================
|
||||
|
||||
if [ "$instruct" = "True" ]; then
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks mmlu_generative --num_fewshot 0 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=3,steps=3,block_length=3,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks mmlu_pro --num_fewshot 0 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=256,steps=256,block_length=256,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks hellaswag_gen --num_fewshot 0 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=3,steps=3,block_length=3,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks arc_challenge_chat --num_fewshot 0 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=5,steps=5,block_length=5,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks gpqa_n_shot_gen --num_fewshot 5 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=32,steps=32,block_length=32,cfg=0.0"
|
||||
|
||||
else
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks gpqa_main_n_shot --num_fewshot 5 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=128,max_new_tokens=1024,steps=1024,block_length=1024,cfg=0.5"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks truthfulqa_mc2 --num_fewshot 0 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=128,max_new_tokens=1024,steps=1024,block_length=1024,cfg=2.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks arc_challenge --num_fewshot 0 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=128,max_new_tokens=1024,steps=1024,block_length=1024,cfg=0.5"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks hellaswag --num_fewshot 0 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=128,max_new_tokens=1024,steps=1024,block_length=1024,cfg=0.5"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks winogrande --num_fewshot 5 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=128,max_new_tokens=1024,steps=1024,block_length=1024,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks piqa --num_fewshot 0 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=128,max_new_tokens=1024,steps=1024,block_length=1024,cfg=0.5"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks mmlu --num_fewshot 5 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=1024,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks cmmlu --num_fewshot 5 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=1024,cfg=0.0"
|
||||
|
||||
accelerate launch --num_processes ${num_gpu} dllm/pipelines/llada/eval.py \
|
||||
--tasks ceval-valid --num_fewshot 5 ${common_args} \
|
||||
--model_args "pretrained=${model_name_or_path},is_check_greedy=False,mc_num=1,max_new_tokens=1024,steps=1024,block_length=1024,cfg=0.0"
|
||||
fi
|
||||
Reference in New Issue
Block a user