diff --git a/model_examples/BEVDet4D/BEVDet.patch b/model_examples/BEVDet4D/BEVDet.patch new file mode 100644 index 0000000000000000000000000000000000000000..a15b0bf6e5ed29a38813fde6b79d55ace6f5e0e9 --- /dev/null +++ b/model_examples/BEVDet4D/BEVDet.patch @@ -0,0 +1,502 @@ +diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py +index 5fc198b..576738c 100644 +--- a/configs/_base_/default_runtime.py ++++ b/configs/_base_/default_runtime.py +@@ -10,7 +10,7 @@ log_config = dict( + dict(type='TensorboardLoggerHook') + ]) + # yapf:enable +-dist_params = dict(backend='nccl') ++dist_params = dict(backend='hccl') + log_level = 'INFO' + work_dir = None + load_from = None +diff --git a/configs/bevdet/bevdet-stbase-4d-stereo-512x1408-cbgs.py b/configs/bevdet/bevdet-stbase-4d-stereo-512x1408-cbgs.py +index 51f526b..ecb1cb6 100644 +--- a/configs/bevdet/bevdet-stbase-4d-stereo-512x1408-cbgs.py ++++ b/configs/bevdet/bevdet-stbase-4d-stereo-512x1408-cbgs.py +@@ -271,7 +271,7 @@ test_data_config = dict( + ann_file=data_root + 'bevdetv3-nuscenes_infos_val.pkl') + + data = dict( +- samples_per_gpu=2, # with 32 GPU ++ samples_per_gpu=8, # with 32 GPU + workers_per_gpu=4, + train=dict( + type='CBGSDataset', +@@ -303,6 +303,13 @@ lr_config = dict( + step=[20,]) + runner = dict(type='EpochBasedRunner', max_epochs=20) + ++log_config = dict( ++ interval=1, ++ hooks=[dict(type='TextLoggerHook'), ++ dict(type='TensorboardLoggerHook')]) ++dist_params = dict(backend='hccl') ++log_level = 'INFO' ++ + custom_hooks = [ + dict( + type='MEGVIIEMAHook', +diff --git a/mmdet3d/__init__.py b/mmdet3d/__init__.py +index 643c39c..0cf030f 100644 +--- a/mmdet3d/__init__.py ++++ b/mmdet3d/__init__.py +@@ -2,7 +2,6 @@ + import mmcv + + import mmdet +-import mmseg + from .version import __version__, short_version + + +@@ -19,7 +18,7 @@ def digit_version(version_str): + + + mmcv_minimum_version = '1.5.2' +-mmcv_maximum_version = '1.7.0' ++mmcv_maximum_version = '1.7.2' + mmcv_version = digit_version(mmcv.__version__) + + +@@ -39,11 +38,5 @@ assert (mmdet_version >= digit_version(mmdet_minimum_version) + + mmseg_minimum_version = '0.20.0' + mmseg_maximum_version = '1.0.0' +-mmseg_version = digit_version(mmseg.__version__) +-assert (mmseg_version >= digit_version(mmseg_minimum_version) +- and mmseg_version <= digit_version(mmseg_maximum_version)), \ +- f'MMSEG=={mmseg.__version__} is used but incompatible. ' \ +- f'Please install mmseg>={mmseg_minimum_version}, ' \ +- f'<={mmseg_maximum_version}.' + + __all__ = ['__version__', 'short_version'] +diff --git a/mmdet3d/apis/train.py b/mmdet3d/apis/train.py +index 4d97026..be10ecd 100644 +--- a/mmdet3d/apis/train.py ++++ b/mmdet3d/apis/train.py +@@ -4,7 +4,7 @@ import warnings + + import numpy as np + import torch +-from mmcv.parallel import MMDataParallel, MMDistributedDataParallel ++from mmcv.device.npu import NPUDataParallel, NPUDistributedDataParallel + from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner, + Fp16OptimizerHook, OptimizerHook, build_optimizer, + build_runner, get_dist_info) +@@ -103,13 +103,13 @@ def train_segmentor(model, + find_unused_parameters = cfg.get('find_unused_parameters', False) + # Sets the `find_unused_parameters` parameter in + # torch.nn.parallel.DistributedDataParallel +- model = MMDistributedDataParallel( ++ model = NPUDistributedDataParallel( + model.cuda(), + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False, + find_unused_parameters=find_unused_parameters) + else: +- model = MMDataParallel( ++ model = NPUDataParallel( + model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) + + # build runner +@@ -223,13 +223,13 @@ def train_detector(model, + find_unused_parameters = cfg.get('find_unused_parameters', False) + # Sets the `find_unused_parameters` parameter in + # torch.nn.parallel.DistributedDataParallel +- model = MMDistributedDataParallel( ++ model = NPUDistributedDataParallel( + model.cuda(), + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False, + find_unused_parameters=find_unused_parameters) + else: +- model = MMDataParallel( ++ model = NPUDataParallel( + model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) + + # build runner +diff --git a/mmdet3d/datasets/pipelines/loading.py b/mmdet3d/datasets/pipelines/loading.py +index b9357ff..3f23888 100644 +--- a/mmdet3d/datasets/pipelines/loading.py ++++ b/mmdet3d/datasets/pipelines/loading.py +@@ -1163,7 +1163,7 @@ class LoadAnnotations(object): + + def __call__(self, results): + gt_boxes, gt_labels = results['ann_infos'] +- gt_boxes, gt_labels = torch.Tensor(gt_boxes), torch.tensor(gt_labels) ++ gt_boxes, gt_labels = torch.Tensor(np.array(gt_boxes)), torch.tensor(np.array(gt_labels)) + if len(gt_boxes) == 0: + gt_boxes = torch.zeros(0, 9) + results['gt_bboxes_3d'] = \ +diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py +index a960dd3..a52a1d8 100644 +--- a/mmdet3d/datasets/pipelines/transforms_3d.py ++++ b/mmdet3d/datasets/pipelines/transforms_3d.py +@@ -519,7 +519,7 @@ class ObjectSample(object): + input_dict['img'] = sampled_dict['img'] + gt_bboxes_ignore = np.ones_like(gt_labels_3d) + gt_bboxes_ignore[num_exist:] = 0 +- gt_bboxes_ignore = gt_bboxes_ignore.astype(np.bool) ++ gt_bboxes_ignore = gt_bboxes_ignore.astype(np.bool_) + input_dict['gt_bboxes_ignore'] = gt_bboxes_ignore + input_dict['gt_bboxes_3d'] = gt_bboxes_3d + input_dict['gt_labels_3d'] = gt_labels_3d.astype(np.int64) +@@ -924,14 +924,14 @@ class ObjectRangeFilter(object): + + if 'gt_bboxes_ignore' in input_dict: + gt_bboxes_ignore = input_dict['gt_bboxes_ignore'] +- gt_bboxes_ignore = gt_bboxes_ignore[mask.numpy().astype(np.bool)] ++ gt_bboxes_ignore = gt_bboxes_ignore[mask.numpy().astype(np.bool_)] + input_dict['gt_bboxes_ignore'] = gt_bboxes_ignore + gt_bboxes_3d = gt_bboxes_3d[mask] + # mask is a torch tensor but gt_labels_3d is still numpy array + # using mask to index gt_labels_3d will cause bug when + # len(gt_labels_3d) == 1, where mask=1 will be interpreted + # as gt_labels_3d[1] and cause out of index error +- gt_labels_3d = gt_labels_3d[mask.numpy().astype(np.bool)] ++ gt_labels_3d = gt_labels_3d[mask.cpu().numpy().astype(np.bool_)] + + # limit rad to [-pi, pi] + gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi) +diff --git a/mmdet3d/models/detectors/__init__.py b/mmdet3d/models/detectors/__init__.py +index afc800c..3bf7046 100644 +--- a/mmdet3d/models/detectors/__init__.py ++++ b/mmdet3d/models/detectors/__init__.py +@@ -1,9 +1,8 @@ + # Copyright (c) OpenMMLab. All rights reserved. + from .base import Base3DDetector +-from .bevdet import BEVDepth4D, BEVDet, BEVDet4D, BEVDetTRT, BEVStereo4D ++from .bevdet import BEVDepth4D, BEVDet, BEVDet4D, BEVStereo4D + from .bevdet_occ import BEVStereo4DOCC + from .centerpoint import CenterPoint +-from .dal import DAL + from .dynamic_voxelnet import DynamicVoxelNet + from .fcos_mono3d import FCOSMono3D + from .groupfree3dnet import GroupFree3DNet +@@ -28,5 +27,5 @@ __all__ = [ + 'CenterPoint', 'SSD3DNet', 'ImVoteNet', 'SingleStageMono3DDetector', + 'FCOSMono3D', 'ImVoxelNet', 'GroupFree3DNet', 'PointRCNN', 'SMOKEMono3D', + 'MinkSingleStage3DDetector', 'SASSD', 'BEVDet', 'BEVDet4D', 'BEVDepth4D', +- 'BEVDetTRT', 'BEVStereo4D', 'BEVStereo4DOCC' ++ 'BEVStereo4D', 'BEVStereo4DOCC' + ] +diff --git a/mmdet3d/models/detectors/bevdet.py b/mmdet3d/models/detectors/bevdet.py +index ad1154e..34d4286 100644 +--- a/mmdet3d/models/detectors/bevdet.py ++++ b/mmdet3d/models/detectors/bevdet.py +@@ -3,7 +3,6 @@ import torch + import torch.nn.functional as F + from mmcv.runner import force_fp32 + +-from mmdet3d.ops.bev_pool_v2.bev_pool import TRTBEVPoolv2 + from mmdet.models import DETECTORS + from .. import builder + from .centerpoint import CenterPoint +@@ -215,58 +214,6 @@ class BEVDet(CenterPoint): + return outs + + +-@DETECTORS.register_module() +-class BEVDetTRT(BEVDet): +- +- def result_serialize(self, outs): +- outs_ = [] +- for out in outs: +- for key in ['reg', 'height', 'dim', 'rot', 'vel', 'heatmap']: +- outs_.append(out[0][key]) +- return outs_ +- +- def result_deserialize(self, outs): +- outs_ = [] +- keys = ['reg', 'height', 'dim', 'rot', 'vel', 'heatmap'] +- for head_id in range(len(outs) // 6): +- outs_head = [dict()] +- for kid, key in enumerate(keys): +- outs_head[0][key] = outs[head_id * 6 + kid] +- outs_.append(outs_head) +- return outs_ +- +- def forward( +- self, +- img, +- ranks_depth, +- ranks_feat, +- ranks_bev, +- interval_starts, +- interval_lengths, +- ): +- x = self.img_backbone(img) +- x = self.img_neck(x) +- x = self.img_view_transformer.depth_net(x) +- depth = x[:, :self.img_view_transformer.D].softmax(dim=1) +- tran_feat = x[:, self.img_view_transformer.D:( +- self.img_view_transformer.D + +- self.img_view_transformer.out_channels)] +- tran_feat = tran_feat.permute(0, 2, 3, 1) +- x = TRTBEVPoolv2.apply(depth.contiguous(), tran_feat.contiguous(), +- ranks_depth, ranks_feat, ranks_bev, +- interval_starts, interval_lengths) +- x = x.permute(0, 3, 1, 2).contiguous() +- bev_feat = self.bev_encoder(x) +- outs = self.pts_bbox_head([bev_feat]) +- outs = self.result_serialize(outs) +- return outs +- +- def get_bev_pool_input(self, input): +- input = self.prepare_inputs(input) +- coor = self.img_view_transformer.get_lidar_coor(*input[1:7]) +- return self.img_view_transformer.voxel_pooling_prepare_v2(coor) +- +- + @DETECTORS.register_module() + class BEVDet4D(BEVDet): + r"""BEVDet4D paradigm for multi-camera 3D object detection. +diff --git a/mmdet3d/models/necks/view_transformer.py b/mmdet3d/models/necks/view_transformer.py +index ec03722..dfe5961 100644 +--- a/mmdet3d/models/necks/view_transformer.py ++++ b/mmdet3d/models/necks/view_transformer.py +@@ -7,12 +7,10 @@ from mmcv.runner import BaseModule, force_fp32 + from torch.cuda.amp.autocast_mode import autocast + from torch.utils.checkpoint import checkpoint + +-from mmdet3d.ops.bev_pool_v2.bev_pool import bev_pool_v2 ++from mx_driving.point import bev_pool_v3 + from mmdet.models.backbones.resnet import BasicBlock + from ..builder import NECKS + +-from torch.utils.checkpoint import checkpoint +- + + @NECKS.register_module() + class LSSViewTransformer(BaseModule): +@@ -190,20 +188,14 @@ class LSSViewTransformer(BaseModule): + (B, N_cams, D, H, W, C). + """ + +- ranks_bev, ranks_depth, ranks_feat, \ +- interval_starts, interval_lengths = \ +- self.voxel_pooling_prepare_v2(coor) ++ ranks_bev, ranks_depth, ranks_feat = self.voxel_pooling_prepare_v2(coor) + + self.ranks_bev = ranks_bev.int().contiguous() + self.ranks_feat = ranks_feat.int().contiguous() + self.ranks_depth = ranks_depth.int().contiguous() +- self.interval_starts = interval_starts.int().contiguous() +- self.interval_lengths = interval_lengths.int().contiguous() + + def voxel_pooling_v2(self, coor, depth, feat): +- ranks_bev, ranks_depth, ranks_feat, \ +- interval_starts, interval_lengths = \ +- self.voxel_pooling_prepare_v2(coor) ++ ranks_bev, ranks_depth, ranks_feat = self.voxel_pooling_prepare_v2(coor) + if ranks_feat is None: + print('warning ---> no points within the predefined ' + 'bev receptive field') +@@ -219,9 +211,8 @@ class LSSViewTransformer(BaseModule): + bev_feat_shape = (depth.shape[0], int(self.grid_size[2]), + int(self.grid_size[1]), int(self.grid_size[0]), + feat.shape[-1]) # (B, Z, Y, X, C) +- bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, +- bev_feat_shape, interval_starts, +- interval_lengths) ++ bev_feat = bev_pool_v3(depth, feat, ranks_depth, ranks_feat, ++ ranks_bev, bev_feat_shape) + # collapse Z + if self.collapse_z: + bev_feat = torch.cat(bev_feat.unbind(dim=2), 1) +@@ -270,22 +261,8 @@ class LSSViewTransformer(BaseModule): + self.grid_size[2] * self.grid_size[1] * self.grid_size[0]) + ranks_bev += coor[:, 2] * (self.grid_size[1] * self.grid_size[0]) + ranks_bev += coor[:, 1] * self.grid_size[0] + coor[:, 0] +- order = ranks_bev.argsort() +- ranks_bev, ranks_depth, ranks_feat = \ +- ranks_bev[order], ranks_depth[order], ranks_feat[order] +- +- kept = torch.ones( +- ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool) +- kept[1:] = ranks_bev[1:] != ranks_bev[:-1] +- interval_starts = torch.where(kept)[0].int() +- if len(interval_starts) == 0: +- return None, None, None, None, None +- interval_lengths = torch.zeros_like(interval_starts) +- interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1] +- interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1] + return ranks_bev.int().contiguous(), ranks_depth.int().contiguous( +- ), ranks_feat.int().contiguous(), interval_starts.int().contiguous( +- ), interval_lengths.int().contiguous() ++ ), ranks_feat.int().contiguous() + + def pre_compute(self, input): + if self.initial_flag: +@@ -304,10 +281,9 @@ class LSSViewTransformer(BaseModule): + bev_feat_shape = (depth.shape[0], int(self.grid_size[2]), + int(self.grid_size[1]), int(self.grid_size[0]), + feat.shape[-1]) # (B, Z, Y, X, C) +- bev_feat = bev_pool_v2(depth, feat, self.ranks_depth, ++ bev_feat = bev_pool_v3(depth, feat, self.ranks_depth, + self.ranks_feat, self.ranks_bev, +- bev_feat_shape, self.interval_starts, +- self.interval_lengths) ++ bev_feat_shape) + + bev_feat = bev_feat.squeeze(2) + else: +diff --git a/mmdet3d/ops/paconv/paconv.py b/mmdet3d/ops/paconv/paconv.py +index bda8bfe..9015b9e 100644 +--- a/mmdet3d/ops/paconv/paconv.py ++++ b/mmdet3d/ops/paconv/paconv.py +@@ -97,8 +97,6 @@ class ScoreNet(nn.Module): + scores = F.softmax(scores / self.temp_factor, dim=1) + elif self.score_norm == 'sigmoid': + scores = torch.sigmoid(scores / self.temp_factor) +- else: # 'identity' +- scores = scores + + scores = scores.permute(0, 2, 3, 1) # (B, N, K, M) + +diff --git a/tests/test_utils/test_box3d.py b/tests/test_utils/test_box3d.py +index 69d8b31..5149884 100644 +--- a/tests/test_utils/test_box3d.py ++++ b/tests/test_utils/test_box3d.py +@@ -1197,7 +1197,7 @@ def test_depth_boxes3d(): + # Test init with numpy array + np_boxes = np.array( + [[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601], +- [2.3262, 3.3065, --0.44255, 0.8234, 0.5325, 1.0099, 2.9971]], ++ [2.3262, 3.3065, -0.44255, 0.8234, 0.5325, 1.0099, 2.9971]], + dtype=np.float32) + boxes_1 = DepthInstance3DBoxes(np_boxes) + assert torch.allclose(boxes_1.tensor, torch.from_numpy(np_boxes)) +diff --git a/tools/deployment/mmdet3d_handler.py b/tools/deployment/mmdet3d_handler.py +index 8b526cd..dd5699f 100644 +--- a/tools/deployment/mmdet3d_handler.py ++++ b/tools/deployment/mmdet3d_handler.py +@@ -30,10 +30,9 @@ class MMdet3dHandler(BaseHandler): + pertaining to the model artifacts parameters. + """ + properties = context.system_properties +- self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu' ++ self.map_location = 'npu' + self.device = torch.device(self.map_location + ':' + +- str(properties.get('gpu_id')) if torch.cuda. +- is_available() else self.map_location) ++ str(properties.get('gpu_id'))) + self.manifest = context.manifest + + model_dir = properties.get('model_dir') +diff --git a/tools/dist_test.sh b/tools/dist_test.sh +index dea131b..6dafb49 100755 +--- a/tools/dist_test.sh ++++ b/tools/dist_test.sh +@@ -9,7 +9,7 @@ PORT=${PORT:-29500} + MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} + + PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +-python -m torch.distributed.launch \ ++python -m torch.distributed.run \ + --nnodes=$NNODES \ + --node_rank=$NODE_RANK \ + --master_addr=$MASTER_ADDR \ +diff --git a/tools/dist_train.sh b/tools/dist_train.sh +index aa71bf4..49753e8 100755 +--- a/tools/dist_train.sh ++++ b/tools/dist_train.sh +@@ -8,7 +8,8 @@ PORT=${PORT:-29500} + MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} + + PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +-python -m torch.distributed.launch \ ++# OMP_NUM_THREADS=16 MKL_NUM_THREADS=16 ++MKL_NUM_THREADS=1 python -m torch.distributed.run \ + --nnodes=$NNODES \ + --node_rank=$NODE_RANK \ + --master_addr=$MASTER_ADDR \ +diff --git a/tools/test.py b/tools/test.py +index c669247..904623e 100644 +--- a/tools/test.py ++++ b/tools/test.py +@@ -7,7 +7,7 @@ import mmcv + import torch + from mmcv import Config, DictAction + from mmcv.cnn import fuse_conv_bn +-from mmcv.parallel import MMDataParallel, MMDistributedDataParallel ++from mmcv.device.npu import NPUDataParallel, NPUDistributedDataParallel + from mmcv.runner import (get_dist_info, init_dist, load_checkpoint, + wrap_fp16_model) + +@@ -18,6 +18,9 @@ from mmdet3d.models import build_model + from mmdet.apis import multi_gpu_test, set_random_seed + from mmdet.datasets import replace_ImageToTensor + ++import torch_npu ++from torch_npu.contrib import transfer_to_npu ++ + if mmdet.__version__ > '2.23.0': + # If mmdet version > 2.23.0, setup_multi_processes would be imported and + # used from mmdet instead of mmdet3d. +@@ -116,6 +119,7 @@ def parse_args(): + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') ++ parser.add_argument('--local-rank', type=int, default=0) + parser.add_argument('--local_rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: +@@ -235,10 +239,10 @@ def main(): + model.PALETTE = dataset.PALETTE + + if not distributed: +- model = MMDataParallel(model, device_ids=cfg.gpu_ids) ++ model = NPUDataParallel(model.npu(), device_ids=cfg.gpu_ids) + outputs = single_gpu_test(model, data_loader, args.show, args.show_dir) + else: +- model = MMDistributedDataParallel( ++ model = NPUDistributedDataParallel( + model.cuda(), + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False) +diff --git a/tools/train.py b/tools/train.py +index 72a1579..8be5ffb 100644 +--- a/tools/train.py ++++ b/tools/train.py +@@ -12,6 +12,8 @@ import torch + import torch.distributed as dist + from mmcv import Config, DictAction + from mmcv.runner import get_dist_info, init_dist ++from mx_driving.patcher.patcher import PatcherBuilder, Patch ++from mx_driving.patcher.tensor import batch_matmul + + from mmdet import __version__ as mmdet_version + from mmdet3d import __version__ as mmdet3d_version +@@ -22,6 +24,10 @@ from mmdet3d.utils import collect_env, get_root_logger + from mmdet.apis import set_random_seed + from mmseg import __version__ as mmseg_version + ++import torch_npu ++from torch_npu.contrib import transfer_to_npu ++from msprobe.pytorch import seed_all ++ + try: + # If mmdet version > 2.20.0, setup_multi_processes would be imported and + # used from mmdet instead of mmdet3d. +@@ -93,6 +99,7 @@ def parse_args(): + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') ++ parser.add_argument('--local-rank', type=int, default=0) + parser.add_argument('--local_rank', type=int, default=0) + parser.add_argument( + '--autoscale-lr', +@@ -274,4 +281,10 @@ def main(): + + + if __name__ == '__main__': +- main() ++ ++ from bevdet4d_patch import generate_patcher_builder ++ ++ bevdet4d_patcher_builder = generate_patcher_builder() ++ with bevdet4d_patcher_builder.build(): ++ main() ++ diff --git a/model_examples/BEVDet4D/README.md b/model_examples/BEVDet4D/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5b169ba02d96f9e852b920be7984cc4bf989aa5e --- /dev/null +++ b/model_examples/BEVDet4D/README.md @@ -0,0 +1,188 @@ +# BEVDet4D + +# 目录 +- [BEVDet4D](#bevdet) +- [目录](#目录) +- [简介](#简介) + - [模型介绍](#模型介绍) + - [代码实现](#代码实现) +- [准备训练环境](#准备训练环境) + - [安装昇腾环境](#安装昇腾环境) + - [安装模型环境](#安装模型环境) +- [准备数据集](#准备数据集) + - [预训练数据集](#预训练数据集) + - [获取预训练权重](#获取预训练权重) +- [快速开始](#快速开始) + - [模型训练](#模型训练) + - [模型验证](#模型验证) + - [训练结果](#训练结果) +- [变更说明](#变更说明) + - [FAQ](#faq) + + +# 简介 + +## 模型介绍 + +BEVDet4D 是一种将 BEVDet 从仅空间的 3D 扩展到时空 4D 工作空间的多相机三维目标检测范式。它通过融合前后帧特征,以极小的计算成本获取时间线索,将速度预测任务简化为位置偏移预测,在 nuScenes 基准测试中取得了优异的成绩。 + +## 代码实现 +- 参考实现: + + ``` + url=https://github.com/HuangJunJie2017/BEVDet.git + commit_id=58c2587a8f89a1927926f0bdb6cde2917c91a9a5 + ``` + +- 适配昇腾 AI 处理器的实现: + + ``` + url=https://gitee.com/ascend/DrivingSDK.git + code_path=model_examples/BEVDet4D + ``` + +# 准备训练环境 +## 安装昇腾环境 +请参考昇腾社区中《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》文档搭建昇腾环境。本仓已支持表1中软件版本。 + + **表 1** 昇腾软件版本支持表 + + | 软件类型 | 支持版本 | + |:------------------:|:--------:| + | FrameworkPTAdapter | 7.1.0 | + | CANN | 8.2.RC1 | + +## 安装模型环境 + + 当前模型支持的 PyTorch 版本和已知三方库依赖如下表所示。 + + **表 2** 版本支持表 + + | 三方库 | 支持版本 | + |:--------------:|:------:| + | PyTorch | 2.1 | + | mmcv | 1.x | + | mmdet | 2.28.2 | + | mmsegmentation | 0.30.0 | + +- 安装Driving SDK + + 请参考昇腾[Driving SDK](https://gitee.com/ascend/DrivingSDK)代码仓说明编译安装Driving SDK + >【注意】请使用7.1.RC1及之后的Driving SDK + +- 安装基础依赖 + + 在模型源码包根目录下执行命令,安装模型需要的依赖。 + + ```shell + pip install -r requirements.txt + ``` + +- 源码安装mmcv + + ```shell + git clone -b 1.x https://github.com/open-mmlab/mmcv.git + cp mmcv.patch mmcv + cd mmcv + git apply mmcv.patch + MMCV_WITH_OPS=1 FORCE_NPU=1 python setup.py install + cd .. + ``` + +- 模型代码更新 + ```shell + git clone https://github.com/HuangJunJie2017/BEVDet.git + cp -r test BEVDet + cp BEVDet.patch BEVDet + cp bevdet4d_patch.py BEVDet/tools + cd BEVDet + git checkout 58c2587a8f89a1927926f0bdb6cde2917c91a9a5 + git apply BEVDet.patch + ``` + +# 准备数据集 + +## 预训练数据集 +用户自行获取*nuscenes*数据集,在源码目录创建软连接`data/nuscenes`指向解压后的nuscenes数据目录 + +运行数据预处理脚本生成BEVDet模型训练需要的pkl文件 + ```shell + python tools/create_data_bevdet.py + ``` + + 整理好的数据集目录如下: + +``` +BEVDet/data + nuscenes + lidarseg + maps + samples + sweeps + v1.0-trainval + nuscenes_infos_train.pkl + nuscenes_infos_val.pkl + bevdetv3-nuscenes_infos_train.pkl + bevdetv3-nuscenes_infos_val.pkl +``` +## 获取预训练权重 +联网情况下,预训练权重会自动下载。 + +# 快速开始 + +## 模型训练 + +1. 进入源码根目录 + + ```shell + cd /${模型文件夹名称} + ``` + +2. 单机8卡训练 + +- 8卡性能 + ```shell + bash test/train_8p.sh --py_config=configs/bevdet/bevdet-stbase-4d-stereo-512x1408-cbgs.py + ``` +- 8卡精度 + ```shell + bash test/train_full_8p.sh --py_config=configs/bevdet/bevdet-stbase-4d-stereo-512x1408-cbgs.py --test=1 + ``` + + 模型训练脚本参数说明如下。 + + ``` + 公共参数: + --py_config //不同类型任务配置文件 + --test //--test=1固定随机性用于测试精度,默认不开启 + --work_dir //输出路径包括日志和训练参数 + ``` + + 训练完成后,权重文件保存在当前路径下,并输出模型训练精度和性能信息。 + + + +## 训练结果 + +**表 3** 训练结果展示表 + +| 芯片 | 精度 | 卡数 | Global Batchsize | Loss | FPS | 平均step耗时 | Max epochs | +|:-------------:|:------:|:----:|:----:|:----:|:----:|:----------:|:----------:| +| 竞品A |FP32 |8p | 64 |19.513 | 5.59 | 11.44秒 | 3 | +| Atlas 800T A2 |FP32 |8p | 64 | 19.402 | 7.04 | 9.09秒| 3 | +| Atlas 800T A2 |FP16 |8p | 64 | 19.371 | 8.82 | 7.26秒| 3 | + +# 变更说明 +- 2025.8.8:首次发布。 + +## FAQ +Q: 在无网络或设有防火墙的环境下如何下载预训练权重? + +A: 无网络情况下,用户可以自行下载 *SwinTransformer* 预训练权重 [*swin_base_patch4_window12_384_22k.pth*](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth)。将下载好的权重拷贝至以下目录,其中 ${torch_hub} 替换为实际下载位置,默认为 ~/.cache/torch/hub +``` +${torch_hub}/checkpoints/resnet50-0676ba61.pth +``` + + + + diff --git a/model_examples/BEVDet4D/bevdet4d_patch.py b/model_examples/BEVDet4D/bevdet4d_patch.py new file mode 100644 index 0000000000000000000000000000000000000000..73fde071454edf6eb2fc8ffa42ac76d612cacffe --- /dev/null +++ b/model_examples/BEVDet4D/bevdet4d_patch.py @@ -0,0 +1,127 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. +# Copyright (c) OpenMMLab. All rights reserved. +import os +import sys +from types import ModuleType +from typing import Dict +import torch +import torch_npu + +import mx_driving +from mx_driving.patcher import PatcherBuilder, Patch +from mx_driving.patcher import batch_matmul + +sys.path.append("..") + + + +def lssview(view_transform: ModuleType, options: Dict): + + def get_lidar_coor_(self, sensor2ego, ego2global, cam2imgs, post_rots, post_trans, + bda): + B, N, _, _ = sensor2ego.shape + + # post-transformation + # B x N x D x H x W x 3 + points = self.frustum.to(sensor2ego) - post_trans.view(B, N, 1, 1, 1, 3) + B, N, D, H, W, _ = points.shape + points = points.view(B, N, D * H * W, 3, 1) + points = torch.inverse(post_rots).view(B, N, 1, 3, 3).matmul(points) + + # cam_to_ego + points = torch.cat((points[..., :2, :] * points[..., 2:3, :], points[..., 2:3, :]), 3) + combine = sensor2ego[:, :, :3, :3].matmul(torch.inverse(cam2imgs)) + points = combine.view(B, N, 1, 3, 3).matmul(points).squeeze(-1) + points += sensor2ego[:, :, :3, 3].view(B, N, 1, 3) + points = bda[:, :3, :3].view(B, 1, 1, 3, 3).matmul( + points.unsqueeze(-1)).squeeze(-1) + points += bda[:, :3, 3].view(B, 1, 1, 3) + return points.view(B, N, D, H, W, 3) + + def gen_grid_(self, metas, B, N, D, H, W, hi, wi): + frustum = metas['frustum'] + points = frustum - metas['post_trans'].view(B, N, 1, 1, 1, 3) + ori_shape = points.shape + points = points.view(B, N, -1, 3) + points = torch.inverse(metas['post_rots']).view(B, N, 1, 3, 3) \ + .matmul(points.unsqueeze(-1)) + points = torch.cat( + (points[..., :2, :] * points[..., 2:3, :], points[..., 2:3, :]), 3) + + rots = metas['k2s_sensor'][:, :, :3, :3].contiguous() + trans = metas['k2s_sensor'][:, :, :3, 3].contiguous() + combine = rots.matmul(torch.inverse(metas['intrins'])) + + points = combine.view(B, N, 1, 3, 3).matmul(points) + points += trans.view(B, N, 1, 3, 1) + neg_mask = (points.view(ori_shape))[..., 2, 0] < 1e-3 + points = metas['intrins'].view(B, N, 1, 3, 3).matmul(points) + points = points[..., :2, :] / points[..., 2:3, :] + + points = metas['post_rots'][..., :2, :2].view(B, N, 1, 2, 2).matmul( + points).squeeze(-1) + points += metas['post_trans'][..., :2].view(B, N, 1, 2) + + new_shape = list(ori_shape) + new_shape[-1] = 2 + points = points.view(new_shape) + px = points[..., 0] / (wi - 1.0) * 2.0 - 1.0 + py = points[..., 1] / (hi - 1.0) * 2.0 - 1.0 + px[neg_mask] = -2 + py[neg_mask] = -2 + grid = torch.stack([px, py], dim=-1) + grid = grid.view(B * N, D * H, W, 2) + return grid + + if hasattr(view_transform, "LSSViewTransformer"): + view_transform.LSSViewTransformer.get_lidar_coor = get_lidar_coor_ + if hasattr(view_transform, "DepthNet"): + view_transform.DepthNet.gen_grid = gen_grid_ + + +def swin_fp16(swin: ModuleType, options: Dict): + + def forward_fp16(self, x): + # change the code to support fp16 + with torch.autocast(device_type="npu", dtype=torch.float16): + x = self.patch_embed(x) + + hw_shape = (self.patch_embed.DH, self.patch_embed.DW) + if self.use_abs_pos_embed: + x = x + self.absolute_pos_embed + x = self.drop_after_pos(x) + + outs = [] + for i, stage in enumerate(self.stages): + x, hw_shape, out, out_hw_shape = stage(x, hw_shape) + if i == 0 and self.return_stereo_feat: + out = out.view(-1, *out_hw_shape, + self.num_features[i]).permute(0, 3, 1, + 2).contiguous() + outs.append(out) + if i in self.out_indices: + norm_layer = getattr(self, f'norm{i}') + out = norm_layer(out) + out = out.view(-1, *out_hw_shape, + self.num_features[i]).permute(0, 3, 1, + 2).contiguous() + outs.append(out) + elif self.output_missing_index_as_none: + outs.append(None) + return [out.float() for out in outs] + + if hasattr(swin, "SwinTransformer"): + swin.SwinTransformer.forward = forward_fp16 + + +def generate_patcher_builder(): + bevdet4d_patcher_builder = ( + PatcherBuilder() + .add_module_patch("torch", Patch(batch_matmul)) + .add_module_patch("mmdet3d.models.necks.view_transformer", Patch(lssview)) + ) + if os.environ.get("BEVDET4D_FP16"): + bevdet4d_patcher_builder.add_module_patch("mmdet3d.models.backbones.swin", Patch(swin_fp16)) + if os.environ.get("BEVDET4D_PERFORMANCE_FLAG"): + bevdet4d_patcher_builder.brake_at(250) + return bevdet4d_patcher_builder diff --git a/model_examples/BEVDet4D/mmcv.patch b/model_examples/BEVDet4D/mmcv.patch new file mode 100644 index 0000000000000000000000000000000000000000..379d9106635ac3d9a9b8cfed68f7515b79aaef81 --- /dev/null +++ b/model_examples/BEVDet4D/mmcv.patch @@ -0,0 +1,14 @@ +diff --git a/mmcv/parallel/distributed.py b/mmcv/parallel/distributed.py +index bf34cb59..f0dfecc9 100644 +--- a/mmcv/parallel/distributed.py ++++ b/mmcv/parallel/distributed.py +@@ -156,8 +156,7 @@ class MMDistributedDataParallel(DistributedDataParallel): + Returns: + Any: Forward result of :attr:`module`. + """ +- module_to_run = self._replicated_tensor_module if \ +- self._use_replicated_tensor_module else self.module ++ module_to_run = self.module + + if self.device_ids: + inputs, kwargs = self.to_kwargs( # type: ignore diff --git a/model_examples/BEVDet4D/requirements.txt b/model_examples/BEVDet4D/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3bf249c9727a1865bfb403f982f329d09f7ac961 --- /dev/null +++ b/model_examples/BEVDet4D/requirements.txt @@ -0,0 +1,27 @@ +setuptools==65.7.0 +torchvision==0.16.0 +nuscenes-devkit==1.1.11 +numba==0.58.1 +numpy==1.23.1 +lyft_dataset_sdk +nuscenes-devkit==1.1.11 +scikit-image +trimesh==2.35.39 +tensorboard +networkx +attrs +decorator +sympy +cffi +pyyaml +pathlib2 +psutil +protobuf==4.25.0 +scipy +requests +absl-py +yapf +mmdet==2.28.2 +mmsegmentation==0.30.0 +ninja +mindstudio-probe diff --git a/model_examples/BEVDet4D/test/env_npu.sh b/model_examples/BEVDet4D/test/env_npu.sh new file mode 100644 index 0000000000000000000000000000000000000000..6ce6385116bb14c6eb4c4be33b51525c44d07f1c --- /dev/null +++ b/model_examples/BEVDet4D/test/env_npu.sh @@ -0,0 +1,50 @@ +#!/bin/bash +CANN_INSTALL_PATH_CONF='/etc/Ascend/ascend_cann_install.info' + +if [ -f $CANN_INSTALL_PATH_CONF ]; then + CANN_INSTALL_PATH=$(cat $CANN_INSTALL_PATH_CONF | grep Install_Path | cut -d "=" -f 2) +else + CANN_INSTALL_PATH="/usr/local/Ascend" +fi + +if [ -d ${CANN_INSTALL_PATH}/ascend-toolkit/latest ]; then + source ${CANN_INSTALL_PATH}/ascend-toolkit/set_env.sh +else + source ${CANN_INSTALL_PATH}/nnae/set_env.sh +fi + + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=2 +#设置是否开启combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=1 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +#HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 +#配置HCCL的初始化root通信网卡IP +export HCCL_IF_IP=$(hostname -I |awk '{print $1}') + +#设置是否开启均匀绑核,0-关闭/1-开启粗粒度绑核/2-开启细粒度绑核 +export CPU_AFFINITY_CONF=1 + +export OMP_NUM_THREADS=16 +export MKL_NUM_THREADS=16 + +#设置device侧日志登记为error +msnpureport -g error -d 0 +msnpureport -g error -d 1 +msnpureport -g error -d 2 +msnpureport -g error -d 3 +msnpureport -g error -d 4 +msnpureport -g error -d 5 +msnpureport -g error -d 6 +msnpureport -g error -d 7 +#关闭Device侧Event日志 +msnpureport -e disable diff --git a/model_examples/BEVDet4D/test/train_full_8p.sh b/model_examples/BEVDet4D/test/train_full_8p.sh new file mode 100644 index 0000000000000000000000000000000000000000..0947cf54c738610158e23cf71a8bb29fdf65516c --- /dev/null +++ b/model_examples/BEVDet4D/test/train_full_8p.sh @@ -0,0 +1,133 @@ +#!/bin/bash + +#当前路径 +cur_path=`pwd` + +#集合通信参数 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + +test=0 + +#基础参数 +batch_size=8 +#训练step +max_epochs=24 + +# 帮助信息 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_8p.sh " + echo " " + echo "parameter explain: + --py_config train config + --test switch to test mode when != 0 + --work_dir set output dir for training + -h/--help show help message + " + exit 1 +fi + +#参数校验 +for para in $* +do + if [[ $para == --py_config* ]];then + py_config=`echo ${para#*=}` + elif [[ $para == --test* ]];then + test=`echo ${para#*=}` + elif [[ $para == --work_dir* ]];then + work_dir=`echo ${para#*=}` + fi +done + +if (($test!=0)); then + max_epochs=3 + sed -i.bak '/if __name__ == '"'"'__main__'"'"':/a\ seed_all(seed=1234, mode=True)' tools/train.py +fi + +#校验是否传入py_config +if [[ $py_config == "" ]];then + echo "[Error] para \"py_config\" must be config" + exit 1 +fi + +#配置名称 +config_name=`echo $py_config | awk -F "/" '{print $NF}' | awk -F "." '{print $1}'` +#网络名称,同配置名称 +Network=$config_name + +if [[ $work_dir == "" ]];then + work_dir="output/train_8p/$config_name" +else + work_dir="${work_dir}/train_8p/$config_name" +fi + +test_path_dir=$cur_path +ASCEND_DEVICE_ID=0 + +if [ ! -d ${test_path_dir}/output ];then + mkdir ${test_path_dir}/output +fi +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt +fi + +#训练开始时间 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/test/env_npu.sh +fi + +#设置环境变量 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID +export WORLD_SIZE=8 +export BEVDET4D_FP16=1 + +bash ./tools/dist_train.sh ${py_config} ${WORLD_SIZE} \ +--work-dir ${work_dir} \ +--cfg-options runner.max_epochs=$max_epochs data.samples_per_gpu=$batch_size + +#训练结束时间 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +if (($test!=0)); then + mv tools/train.py.bak tools/train.py +fi + +log_file=`find ${work_dir} -regex ".*\.log" | sort -r | head -n 1` + +#结果打印 +echo "------------------ Final result ------------------" + +#性能看护结果汇总 +#训练用例信息 +DeviceType=`uname -m` +CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'loss' + +##获取性能数据 +#吞吐量 +ActualFPS=${FPS} + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中 +grep "Epoch " ${log_file}|awk -F "loss: " '!/Epoch \[1\]\[1/ {print $NF}' | awk -F "," '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#获取loss值 +ActualLoss=$(tail -n 100 $log_file | grep -oP 'loss: \K[\d.]+' | awk '{sum+=$1;count++} END {printf "%.3f", sum/count}') +echo "Average Loss: ${ActualLoss}" + +#关键信息打印到${CaseName}.log中 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/model_examples/BEVDet4D/test/train_performance_8p.sh b/model_examples/BEVDet4D/test/train_performance_8p.sh new file mode 100644 index 0000000000000000000000000000000000000000..16e04da2451be74d9cb463143562aca997c36131 --- /dev/null +++ b/model_examples/BEVDet4D/test/train_performance_8p.sh @@ -0,0 +1,123 @@ +#!/bin/bash + +#当前路径 +cur_path=`pwd` + +#集合通信参数 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + +#基础参数 +batch_size=8 +#训练step +max_epochs=24 + +# 帮助信息 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_8p.sh " + echo " " + echo "parameter explain: + --py_config train config + --test switch to test mode when != 0 + --work_dir set output dir for training + -h/--help show help message + " + exit 1 +fi + +#参数校验 +for para in $* +do + if [[ $para == --py_config* ]];then + py_config=`echo ${para#*=}` + elif [[ $para == --test* ]];then + test=`echo ${para#*=}` + elif [[ $para == --work_dir* ]];then + work_dir=`echo ${para#*=}` + fi +done + +#校验是否传入py_config +if [[ $py_config == "" ]];then + echo "[Error] para \"py_config\" must be config" + exit 1 +fi + +#配置名称 +config_name=`echo $py_config | awk -F "/" '{print $NF}' | awk -F "." '{print $1}'` +#网络名称,同配置名称 +Network=$config_name + +if [[ $work_dir == "" ]];then + work_dir="output/train_8p/$config_name" +else + work_dir="${work_dir}/train_8p/$config_name" +fi + +test_path_dir=$cur_path +ASCEND_DEVICE_ID=0 + +if [ ! -d ${test_path_dir}/output ];then + mkdir ${test_path_dir}/output +fi +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt +fi + +#训练开始时间 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/test/env_npu.sh +fi + +#设置环境变量 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID +export WORLD_SIZE=8 +export BEVDET4D_PERFORMANCE_FLAG=1 +export BEVDET4D_FP16=1 + +bash ./tools/dist_train.sh ${py_config} ${WORLD_SIZE} \ +--work-dir ${work_dir} \ +--cfg-options runner.max_epochs=$max_epochs data.samples_per_gpu=$batch_size + +#训练结束时间 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +log_file=`find ${work_dir} -regex ".*\.log" | sort -r | head -n 1` + +#结果打印 +echo "------------------ Final result ------------------" +#输出性能FPS +TrainingTime=`grep -a 'Epoch ' ${log_file}|awk -F " time: " '!/Epoch \[1\]\[1/ {print $NF}'|awk -F " " '{print $1}' | awk '{ sum += $0; n++} END { if (n > 0) print sum / n;}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'* '${WORLD_SIZE}'/'${TrainingTime}'}'` +#打印 +echo "Final Performance images/sec : $FPS" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息 +DeviceType=`uname -m` +CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p' + +##获取性能数据 +#吞吐量 +ActualFPS=${FPS} + +#关键信息打印到${CaseName}.log中 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${batch_size}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log