From 506b3fcfdfca378c5fd851ff719fb4e271ec8bff Mon Sep 17 00:00:00 2001 From: sunshun-yuan Date: Thu, 14 Aug 2025 11:31:49 +0800 Subject: [PATCH] expose "batch_size" and "num_npu" in README of bevfusion model --- model_examples/BEVFusion/README.md | 24 ++++- .../nnodes_train_performance_16p_base_fp32.sh | 37 ++++--- model_examples/BEVFusion/test/parse_args.sh | 102 ++++++++++++++++++ .../BEVFusion/test/train_full_8p_base_fp32.sh | 28 +++-- .../test/train_performance_8p_base_fp32.sh | 31 +++--- 5 files changed, 182 insertions(+), 40 deletions(-) create mode 100644 model_examples/BEVFusion/test/parse_args.sh diff --git a/model_examples/BEVFusion/README.md b/model_examples/BEVFusion/README.md index 9863b82b..cbb0a934 100644 --- a/model_examples/BEVFusion/README.md +++ b/model_examples/BEVFusion/README.md @@ -129,15 +129,31 @@ cd ../ ``` - 单机8卡训练 + + 运行脚本支持命令行参数(支持默认值+关键字参数+位置参数) + - `--batch-size`:每卡batch size大小,默认值4; + - `--num-npu`:每节点NPU卡数,默认值8; ```shell - bash test/train_full_8p_base_fp32.sh # 8卡训练,默认训练6个epochs - bash test/train_performance_8p_base_fp32.sh # 8卡性能,默认训练1个epochs + # 精度测试拉起脚本,默认训练6个epochs + bash test/train_full_8p_base_fp32.sh --batch-size=4 --num-npu=8 # batch-size 和 num-npu 可不指定直接使用默认值,下同 + # 性能测试拉起脚本,默认训练1个epochs + bash test/train_performance_8p_base_fp32.sh --batch-size=4 --num-npu=8 ``` - 双机16卡性能 + + 运行脚本支持命令行参数(支持默认值+关键字参数+位置参数) + - `--batch-size`:每卡batch size大小,默认值4; + - `--num-npu`:每节点NPU卡数,默认值8; + - `--nnodes`:节点总数,默认值2; + - `--node-rank`:当前节点编号(0 ~ nnodes-1),默认为主节点0; + - `--port`:通信端口号,默认值29500; + - `--master-addr`:主节点IP地址; ```shell - bash test/nnodes_train_performance_16p_base_fp32.sh 2 0 port master_addr # 主节点,默认训练1个epochs - bash test/nnodes_train_performance_16p_base_fp32.sh 2 1 port master_addr # 副节点 + # 主节点拉起脚本,默认训练1个epochs + bash test/nnodes_train_performance_16p_base_fp32.sh --batch-size=4 --num-npu=8 --nnodes=2 --node-rank=0 --port=port --master-addr=master_addr # master-addr 必须指定,其余可省略以使用默认值 + # 主节点拉起脚本,默认训练1个epochs + bash test/nnodes_train_performance_16p_base_fp32.sh --batch-size=4 --num-npu=8 --nnodes=2 --node-rank=1 --port=port --master-addr=master_addr # node-rank,master-addr 必须指定,其余可省略以使用默认值 ``` # 训练结果 diff --git a/model_examples/BEVFusion/test/nnodes_train_performance_16p_base_fp32.sh b/model_examples/BEVFusion/test/nnodes_train_performance_16p_base_fp32.sh index aa465b99..c69af40f 100644 --- a/model_examples/BEVFusion/test/nnodes_train_performance_16p_base_fp32.sh +++ b/model_examples/BEVFusion/test/nnodes_train_performance_16p_base_fp32.sh @@ -1,15 +1,17 @@ +#!/bin/bash + # 网络名称,同目录名称,需要模型审视修改 Network="BEVFusion" -batch_size=4 -gpus=8 - -NNODES=$1 -NODE_RANK=$2 -PORT=$3 -MASTER_ADDR=$4 -world_size=$((NNODES * gpus)) export PERFORMANCE_MODE=1 # Performance-Testing mode +batch_size=4 # 单卡batch_size +num_npu=8 # 每节点NPU卡数 +nnodes=2 # 节点总数 +node_rank=0 # 当前节点编号(0 ~ nnodes-1),默认为主节点 +port=29500 # 通信端口号 +master_addr="" # 主节点IP地址 +world_size=$((nnodes * num_npu)) + # cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 cur_path=$(pwd) cur_path_last_dirname=${cur_path##*/} @@ -22,6 +24,12 @@ else fi source ${test_path_dir}/env_npu.sh +# 解析参数 +source ${test_path_dir}/parse_args.sh +declare_required_params batch_size num_npu nnodes node_rank port master_addr # 接收参数顺序 +parse_common_args "$@" + +base_batch_size=$(($batch_size * $num_npu)) #创建DeviceID输出目录,不需要修改 output_path=${cur_path}/test/output/ @@ -35,9 +43,10 @@ sed -i "s|max_epochs=6|max_epochs=1|g" projects/BEVFusion/configs/bevfusion_lida #训练开始时间,不需要修改 start_time=$(date +%s) bash tools/nnodes_dist_train.sh \ - projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${gpus} ${NNODES} ${NODE_RANK} ${PORT} ${MASTER_ADDR}\ - --cfg-options load_from=pretrained/bevfusion_lidar_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d-2628f933.pth model.img_backbone.init_cfg.checkpoint=pretrained/swint-nuimages-pretrained.pth \ - > ${test_path_dir}/output/train_performance_8p_base_fp32.log 2>&1 & + projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${num_npu} ${nnodes} ${node_rank} ${port} ${master_addr}\ + --cfg-options train_dataloader.batch_size=${batch_size} auto_scale_lr.base_batch_size=${base_batch_size} \ + load_from=pretrained/bevfusion_lidar_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d-2628f933.pth model.img_backbone.init_cfg.checkpoint=pretrained/swint-nuimages-pretrained.pth \ + > ${test_path_dir}/output/train_performance_${num_npu}p_base_fp32.log 2>&1 & wait #训练结束时间,不需要修改 @@ -49,13 +58,13 @@ sed -i "s|max_epochs=1|max_epochs=6|g" projects/BEVFusion/configs/bevfusion_lida cd .. # 主节点打印性能数据 -if [ "$NODE_RANK" -eq 0 ]; then +if [ "$node_rank" -eq 0 ]; then #结果打印,不需要修改 echo "------------------ Final result ------------------" #获取性能数据,不需要修改 #单迭代训练时长,不需要修改 - TrainingTime=$(grep -v val ${test_path_dir}/output/train_performance_8p_base_fp32.log | grep -o " time: [0-9.]*" | tail -n +200 | grep -o "[0-9.]*" | awk '{sum += $1} END {print sum/NR}') + TrainingTime=$(grep -v val ${test_path_dir}/output/train_performance_${num_npu}p_base_fp32.log | grep -o " time: [0-9.]*" | tail -n +200 | grep -o "[0-9.]*" | awk '{sum += $1} END {print sum/NR}') #吞吐量 ActualFPS=$(awk BEGIN'{print ('$batch_size' * '$world_size') / '$TrainingTime'}') @@ -64,7 +73,7 @@ if [ "$NODE_RANK" -eq 0 ]; then echo "Final Performance images/sec : $ActualFPS" #loss值,不需要修改 - ActualLoss=$(grep -o "loss: [0-9.]*" ${test_path_dir}/output/train_performance_8p_base_fp32.log | awk 'END {print $NF}') + ActualLoss=$(grep -o "loss: [0-9.]*" ${test_path_dir}/output/train_performance_${num_npu}p_base_fp32.log | awk 'END {print $NF}') #打印,不需要修改 echo "Final Train Loss : ${ActualLoss}" diff --git a/model_examples/BEVFusion/test/parse_args.sh b/model_examples/BEVFusion/test/parse_args.sh new file mode 100644 index 00000000..eb3e1af4 --- /dev/null +++ b/model_examples/BEVFusion/test/parse_args.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# ================================ +# 公共参数解析器(支持默认值 + 关键字 + 位置参数) +# ================================ + +# 用法:declare_required_params batch_size npus +declare_required_params() { + local params=("$@") + export REQUIRED_PARAMS=("${params[@]}") +} + +# 补全默认值 +setup_defaults() { + local param + for param in "${REQUIRED_PARAMS[@]}"; do + eval "$param=\${$param}" + done +} + +# 帮助函数 +show_help() { + cat << EOF +用法: $0 [参数] 或 $0 [选项] +示例: + a.sh 4 8 + a.sh --batch-size=4 --npus=8 + b.sh 4 8 --nnodes=2 --node-rank=1 --port=30000 --master_addr=192.168.1.100 +EOF +} + +# 解析命令行参数(支持 --key=value 和 位置参数) +parse_common_args() { + # Step 1: 确保 REQUIRED_PARAMS 已定义 + if [[ -z "${REQUIRED_PARAMS+set}" ]]; then + echo "错误:未调用 declare_required_params,请先声明所需参数!" >&2 + exit 1 + fi + + # Step 2: 补全默认值 + setup_defaults + + # Step 3: 提取命令行参数 + local args=("$@") + local pos_args=() + local key_value_args=() + + for arg in "${args[@]}"; do + if [[ "$arg" == --* ]]; then + key_value_args+=("$arg") # 提取长选项参数(-- 开头的) + else + pos_args+=("$arg") # 提取位置参数 + fi + done + + # Step 4: 处理位置参数(按 REQUIRED_PARAMS 顺序赋值) + local idx=0 + declare -A required_param_map + for param in "${REQUIRED_PARAMS[@]}"; do + opt_name="${param//_/-}" + required_param_map["$opt_name"]="$param" + if [[ $idx -lt ${#pos_args[@]} ]] && [[ -n "${pos_args[$idx]}" ]]; then + eval "$param=\"\${pos_args[$idx]}\"" + fi + ((idx++)) + done + + # Step 5: 处理长选项(所有 key_value_args 都是 --param=value 格式) + for arg in "${key_value_args[@]}"; do + # 跳过空值 + [[ -z "$arg" ]] && continue + + # 提取 --param=value 中的 param 和 value + local param_key="${arg#--}" # 去掉 -- + local param_name="${param_key%%=*}" # 如 batch-size + local value="${param_key#*=}" # 如 8 + + # 查表:查找 param_name 是否在映射表中 + if [[ -n "${required_param_map[$param_name]}" ]]; then + local var_name="${required_param_map[$param_name]}" + eval "$var_name=\"\$value\"" + else + echo "未知参数: $arg" >&2 + exit 1 + fi + done + + # Step 6: 检查必填参数是否都提供了 + for param in "${REQUIRED_PARAMS[@]}"; do + if [[ -z "${!param}" ]]; then + echo "缺少必要参数: --${param//-/_}=" >&2 + show_help + exit 1 + fi + done + + # Step 7: 成功提示 + echo "参数解析成功!" + for param in "${REQUIRED_PARAMS[@]}"; do + local name=$(echo "$param" | cut -d'=' -f1) + echo "$name: ${!name}" + done +} \ No newline at end of file diff --git a/model_examples/BEVFusion/test/train_full_8p_base_fp32.sh b/model_examples/BEVFusion/test/train_full_8p_base_fp32.sh index a382fcf6..b9c8e979 100644 --- a/model_examples/BEVFusion/test/train_full_8p_base_fp32.sh +++ b/model_examples/BEVFusion/test/train_full_8p_base_fp32.sh @@ -1,7 +1,8 @@ +#!/bin/bash # 网络名称,同目录名称,需要模型审视修改 Network="BEVFusion" batch_size=4 -world_size=8 +num_npu=8 epochs=6 # cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 @@ -16,6 +17,12 @@ else fi source ${test_path_dir}/env_npu.sh +# 解析参数 +source ${test_path_dir}/parse_args.sh +declare_required_params batch_size num_npu # 接收参数顺序 +parse_common_args "$@" + +base_batch_size=$(($batch_size * $num_npu)) #创建DeviceID输出目录,不需要修改 output_path=${cur_path}/test/output/ @@ -34,9 +41,10 @@ cd mmdetection3d #训练开始时间,不需要修改 start_time=$(date +%s) bash tools/dist_train.sh \ - projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${world_size} \ - --cfg-options load_from=pretrained/bevfusion_lidar_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d-2628f933.pth model.img_backbone.init_cfg.checkpoint=pretrained/swint-nuimages-pretrained.pth \ - > ${test_path_dir}/output/train_full_8p_base_fp32.log 2>&1 & + projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${num_npu} \ + --cfg-options train_dataloader.batch_size=${batch_size} auto_scale_lr.base_batch_size=${base_batch_size} \ + load_from=pretrained/bevfusion_lidar_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d-2628f933.pth model.img_backbone.init_cfg.checkpoint=pretrained/swint-nuimages-pretrained.pth \ + > ${test_path_dir}/output/train_full_${num_npu}p_base_fp32.log 2>&1 & wait #训练结束时间,不需要修改 @@ -50,22 +58,22 @@ echo "------------------ Final result ------------------" #获取性能数据,不需要修改 #单迭代训练时长,不需要修改 -TrainingTime=$(grep -v val ${test_path_dir}/output/train_full_8p_base_fp32.log | grep -o " time: [0-9.]*" | tail -n +200 | grep -o "[0-9.]*" | awk '{sum += $1} END {print sum/NR}') +TrainingTime=$(grep -v val ${test_path_dir}/output/train_full_${num_npu}p_base_fp32.log | grep -o " time: [0-9.]*" | tail -n +200 | grep -o "[0-9.]*" | awk '{sum += $1} END {print sum/NR}') #吞吐量 -ActualFPS=$(awk BEGIN'{print ('$batch_size' * '$world_size') / '$TrainingTime'}') +ActualFPS=$(awk BEGIN'{print ('$batch_size' * '$num_npu') / '$TrainingTime'}') #打印,不需要修改 echo "Final Performance images/sec : $ActualFPS" #loss值,不需要修改 -ActualLoss=$(grep -o "loss: [0-9.]*" ${test_path_dir}/output/train_full_8p_base_fp32.log | awk 'END {print $NF}') +ActualLoss=$(grep -o "loss: [0-9.]*" ${test_path_dir}/output/train_full_${num_npu}p_base_fp32.log | awk 'END {print $NF}') #NDS值 -NDS=$(grep -o "pred_instances_3d_NuScenes/NDS: [0-9.]*" ${test_path_dir}/output/train_full_8p_base_fp32.log | awk 'END {print $NF}') +NDS=$(grep -o "pred_instances_3d_NuScenes/NDS: [0-9.]*" ${test_path_dir}/output/train_full_${num_npu}p_base_fp32.log | awk 'END {print $NF}') #mAP值 -mAP=$(grep -o "pred_instances_3d_NuScenes/mAP: [0-9.]*" ${test_path_dir}/output/train_full_8p_base_fp32.log | awk 'END {print $NF}') +mAP=$(grep -o "pred_instances_3d_NuScenes/mAP: [0-9.]*" ${test_path_dir}/output/train_full_${num_npu}p_base_fp32.log | awk 'END {print $NF}') #打印,不需要修改 echo "Final Train Loss : ${ActualLoss}" @@ -76,7 +84,7 @@ echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 #训练用例信息,不需要修改 BatchSize=${batch_size} -WORLD_SIZE=${world_size} +WORLD_SIZE=${num_npu} DeviceType=$(uname -m) CaseName=${Network}_bs${BatchSize}_${WORLD_SIZE}'p'_'acc' diff --git a/model_examples/BEVFusion/test/train_performance_8p_base_fp32.sh b/model_examples/BEVFusion/test/train_performance_8p_base_fp32.sh index f59bc6c2..cf116f03 100644 --- a/model_examples/BEVFusion/test/train_performance_8p_base_fp32.sh +++ b/model_examples/BEVFusion/test/train_performance_8p_base_fp32.sh @@ -1,9 +1,9 @@ +#!/bin/bash # 网络名称,同目录名称,需要模型审视修改 Network="BEVFusion" -batch_size=4 -world_size=8 - export PERFORMANCE_MODE=1 # Performance-Testing mode +batch_size=4 +num_npu=8 # cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 cur_path=$(pwd) @@ -17,6 +17,12 @@ else fi source ${test_path_dir}/env_npu.sh +# 解析参数 +source ${test_path_dir}/parse_args.sh +declare_required_params batch_size num_npu # 接收参数顺序 +parse_common_args "$@" + +base_batch_size=$(($batch_size * $num_npu)) #创建DeviceID输出目录,不需要修改 output_path=${cur_path}/test/output/ @@ -30,9 +36,10 @@ sed -i "s|max_epochs=6|max_epochs=1|g" projects/BEVFusion/configs/bevfusion_lida #训练开始时间,不需要修改 start_time=$(date +%s) bash tools/dist_train.sh \ - projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${world_size} \ - --cfg-options load_from=pretrained/bevfusion_lidar_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d-2628f933.pth model.img_backbone.init_cfg.checkpoint=pretrained/swint-nuimages-pretrained.pth \ - > ${test_path_dir}/output/train_performance_8p_base_fp32.log 2>&1 & + projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${num_npu} \ + --cfg-options train_dataloader.batch_size=${batch_size} auto_scale_lr.base_batch_size=${base_batch_size} \ + load_from=pretrained/bevfusion_lidar_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d-2628f933.pth model.img_backbone.init_cfg.checkpoint=pretrained/swint-nuimages-pretrained.pth \ + > ${test_path_dir}/output/train_performance_${num_npu}p_base_fp32.log 2>&1 & wait #训练结束时间,不需要修改 @@ -48,22 +55,22 @@ echo "------------------ Final result ------------------" #获取性能数据,不需要修改 #单迭代训练时长,不需要修改 -TrainingTime=$(grep -v val ${test_path_dir}/output/train_performance_8p_base_fp32.log | grep -o " time: [0-9.]*" | tail -n +200 | grep -o "[0-9.]*" | awk '{sum += $1} END {print sum/NR}') +TrainingTime=$(grep -v val ${test_path_dir}/output/train_performance_${num_npu}p_base_fp32.log | grep -o " time: [0-9.]*" | tail -n +200 | grep -o "[0-9.]*" | awk '{sum += $1} END {print sum/NR}') #吞吐量 -ActualFPS=$(awk BEGIN'{print ('$batch_size' * '$world_size') / '$TrainingTime'}') +ActualFPS=$(awk BEGIN'{print ('$batch_size' * '$num_npu') / '$TrainingTime'}') #打印,不需要修改 echo "Final Performance images/sec : $ActualFPS" #loss值,不需要修改 -ActualLoss=$(grep -o "loss: [0-9.]*" ${test_path_dir}/output/train_performance_8p_base_fp32.log | awk 'END {print $NF}') +ActualLoss=$(grep -o "loss: [0-9.]*" ${test_path_dir}/output/train_performance_${num_npu}p_base_fp32.log | awk 'END {print $NF}') #NDS值 -NDS=$(grep -o "pred_instances_3d_NuScenes/NDS: [0-9.]*" ${test_path_dir}/output/train_performance_8p_base_fp32.log | awk 'END {print $NF}') +NDS=$(grep -o "pred_instances_3d_NuScenes/NDS: [0-9.]*" ${test_path_dir}/output/train_performance_${num_npu}p_base_fp32.log | awk 'END {print $NF}') #mAP值 -mAP=$(grep -o "pred_instances_3d_NuScenes/mAP: [0-9.]*" ${test_path_dir}/output/train_performance_8p_base_fp32.log | awk 'END {print $NF}') +mAP=$(grep -o "pred_instances_3d_NuScenes/mAP: [0-9.]*" ${test_path_dir}/output/train_performance_${num_npu}p_base_fp32.log | awk 'END {print $NF}') #打印,不需要修改 echo "Final Train Loss : ${ActualLoss}" @@ -74,7 +81,7 @@ echo "E2E Training Duration sec : $e2e_time" #性能看护结果汇总 #训练用例信息,不需要修改 BatchSize=${batch_size} -WORLD_SIZE=${world_size} +WORLD_SIZE=${num_npu} DeviceType=$(uname -m) CaseName=${Network}_bs${BatchSize}_${WORLD_SIZE}'p'_'performance' -- Gitee