evaluate_bigbench.sh 777 B

12345678910111213141516171819202122232425262728
  1. #!/bin/bash
  2. NUM_WORKERS=1
  3. NUM_GPUS_PER_WORKER=8
  4. HOST_FILE_PATH="<your hostfile>"
  5. OPTIONS_NCCL="NCCL_IB_DISABLE=0 NCCL_NET_GDR_LEVEL=2 CUDA_LAUNCH_BLOCKING=0"
  6. script_path=$(realpath $0)
  7. script_dir=$(dirname $script_path)
  8. main_dir=$(dirname $script_dir)
  9. source "${main_dir}/configs/model_glm_130b.sh"
  10. DATA_PATH="/thudm/LargeScale/data/zeroshot"
  11. ARGS="${main_dir}/evaluate_bigbench.py \
  12. --mode inference \
  13. --json-shots 0 1 2 3 5 \
  14. --task $* \
  15. $MODEL_ARGS"
  16. TIMESTAMP=$(date +'%Y.%m.%d-%H:%M:%S')
  17. EXP_NAME=${TIMESTAMP}
  18. mkdir -p logs
  19. run_cmd="TF_FORCE_GPU_ALLOW_GROWTH=true ${OPTIONS_NCCL} deepspeed --num_nodes ${NUM_WORKERS} --num_gpus ${NUM_GPUS_PER_WORKER} --hostfile ${HOST_FILE_PATH} ${ARGS}"
  20. eval ${run_cmd} 2>&1 | tee logs/${EXP_NAME}.log