From dcc3ecaf4593f5da6d96bb3b9950f616168cccc0 Mon Sep 17 00:00:00 2001
From: zhttjd <zhanghongtao33@huawei.com>
Date: Tue, 12 Aug 2025 17:52:47 +0800
Subject: [PATCH] Fix CVE 7.0.RC1

---
 model_examples/TPVFormer/TPVFormer.patch | 966 +++++++++++------------
 1 file changed, 483 insertions(+), 483 deletions(-)

diff --git a/model_examples/TPVFormer/TPVFormer.patch b/model_examples/TPVFormer/TPVFormer.patch
index e4ab8748..b324f784 100644
--- a/model_examples/TPVFormer/TPVFormer.patch
+++ b/model_examples/TPVFormer/TPVFormer.patch
@@ -1,169 +1,169 @@
-diff --git a/config/_base_/optimizer.py b/config/_base_/optimizer.py
-index 90beb1b..ff4b69d 100644
---- a/config/_base_/optimizer.py
-+++ b/config/_base_/optimizer.py
-@@ -1,5 +1,5 @@
- optimizer = dict(
--    type='AdamW',
-+    type='NpuFusedAdamW',
-     lr=2e-4,
-     paramwise_cfg=dict(
-         custom_keys={
-diff --git a/config/tpv_lidarseg.py b/config/tpv_lidarseg.py
-index ef6adca..871c6d2 100644
---- a/config/tpv_lidarseg.py
-+++ b/config/tpv_lidarseg.py
-@@ -35,6 +35,9 @@ nbr_class = 17
- 
- self_cross_layer = dict(
-     type='TPVFormerLayer',
-+    tpv_h=tpv_h_,
-+    tpv_w=tpv_w_,
-+    tpv_z=tpv_z_,
-     attn_cfgs=[
-         dict(
-             type='TPVCrossViewHybridAttention',
-@@ -76,6 +79,9 @@ self_cross_layer = dict(
- 
- self_layer = dict(
-     type='TPVFormerLayer',
-+    tpv_h=tpv_h_,
-+    tpv_w=tpv_w_,
-+    tpv_z=tpv_z_,
-     attn_cfgs=[
-         dict(
-             type='TPVCrossViewHybridAttention',
-diff --git a/config/tpv_lidarseg_dim64.py b/config/tpv_lidarseg_dim64.py
-index 797e334..534c8b8 100644
---- a/config/tpv_lidarseg_dim64.py
-+++ b/config/tpv_lidarseg_dim64.py
-@@ -35,6 +35,9 @@ nbr_class = 17
- 
- self_cross_layer = dict(
-     type='TPVFormerLayer',
-+    tpv_h=tpv_h_,
-+    tpv_w=tpv_w_,
-+    tpv_z=tpv_z_,
-     attn_cfgs=[
-         dict(
-             type='TPVCrossViewHybridAttention',
-@@ -76,6 +79,9 @@ self_cross_layer = dict(
- 
- self_layer = dict(
-     type='TPVFormerLayer',
-+    tpv_h=tpv_h_,
-+    tpv_w=tpv_w_,
-+    tpv_z=tpv_z_,
-     attn_cfgs=[
-         dict(
-             type='TPVCrossViewHybridAttention',
-diff --git a/config/tpv_lidarseg_dim96.py b/config/tpv_lidarseg_dim96.py
-index c34389b..8aa84c7 100644
---- a/config/tpv_lidarseg_dim96.py
-+++ b/config/tpv_lidarseg_dim96.py
-@@ -35,6 +35,9 @@ nbr_class = 17
- 
- self_cross_layer = dict(
-     type='TPVFormerLayer',
-+    tpv_h=tpv_h_,
-+    tpv_w=tpv_w_,
-+    tpv_z=tpv_z_,
-     attn_cfgs=[
-         dict(
-             type='TPVCrossViewHybridAttention',
-@@ -76,6 +79,9 @@ self_cross_layer = dict(
- 
- self_layer = dict(
-     type='TPVFormerLayer',
-+    tpv_h=tpv_h_,
-+    tpv_w=tpv_w_,
-+    tpv_z=tpv_z_,
-     attn_cfgs=[
-         dict(
-             type='TPVCrossViewHybridAttention',
-diff --git a/eval.py b/eval.py
-index adf2c6a..3676fac 100644
---- a/eval.py
-+++ b/eval.py
-@@ -1,6 +1,9 @@
- 
- import os, time, argparse, os.path as osp, numpy as np
- import torch
-+import torch.nn as nn
-+import torch_npu
-+from torch_npu.contrib import transfer_to_npu
- import torch.distributed as dist
- 
- from utils.metric_util import MeanIoU
-@@ -20,7 +23,7 @@ def pass_print(*args, **kwargs):
- 
- def main(local_rank, args):
-     # global settings
--    torch.backends.cudnn.benchmark = True
-+    #torch.backends.cudnn.benchmark = True
- 
-     # load config
-     cfg = Config.fromfile(args.py_config)
-@@ -41,14 +44,14 @@ def main(local_rank, args):
-     port = os.environ.get("MASTER_PORT", "20506")
-     hosts = int(os.environ.get("WORLD_SIZE", 1))  # number of nodes
-     rank = int(os.environ.get("RANK", 0))  # node id
--    gpus = torch.cuda.device_count()  # gpus per node
-+    npus = torch.cuda.device_count()  # npus per node
-     print(f"tcp://{ip}:{port}")
-     dist.init_process_group(
--        backend="nccl", init_method=f"tcp://{ip}:{port}", 
--        world_size=hosts * gpus, rank=rank * gpus + local_rank
-+        backend="hccl", init_method=f"tcp://{ip}:{port}",
-+        world_size=hosts * npus, rank=rank * npus + local_rank
-     )
-     world_size = dist.get_world_size()
--    cfg.gpu_ids = range(world_size)
-+    cfg.npu_ids = range(world_size)
-     torch.cuda.set_device(local_rank)
- 
-     if dist.get_rank() != 0:
-@@ -156,11 +159,11 @@ def main(local_rank, args):
-             
-             predict_labels_pts = predict_labels_pts.squeeze(-1).squeeze(-1)
-             predict_labels_pts = torch.argmax(predict_labels_pts, dim=1) # bs, n
--            predict_labels_pts = predict_labels_pts.detach().cpu()
--            val_pt_labs = val_pt_labs.squeeze(-1).cpu()
-+            predict_labels_pts = predict_labels_pts.detach().npu()
-+            val_pt_labs = val_pt_labs.squeeze(-1).npu()
-             
-             predict_labels_vox = torch.argmax(predict_labels_vox, dim=1)
--            predict_labels_vox = predict_labels_vox.detach().cpu()
-+            predict_labels_vox = predict_labels_vox.detach().npu()
-             for count in range(len(val_grid_int)):
-                 CalMeanIou_pts._after_step(predict_labels_pts[count], val_pt_labs[count])
-                 CalMeanIou_vox._after_step(
-@@ -168,7 +171,7 @@ def main(local_rank, args):
-                     val_pt_labs[count])
-             val_loss_list.append(loss.detach().cpu().numpy())
-             if i_iter_val % print_freq == 0 and dist.get_rank() == 0:
--                logger.info('[EVAL] Iter %5d: Loss: %.3f (%.3f)'%(
-+                logger.info('[EVAL] Iter %5d: Loss: %.3f (%.3f)' % (
-                     i_iter_val, loss.item(), np.mean(val_loss_list)))
-     
-     val_miou_pts = CalMeanIou_pts._after_epoch()
-@@ -187,8 +190,8 @@ if __name__ == '__main__':
- 
-     args = parser.parse_args()
-     
--    ngpus = torch.cuda.device_count()
--    args.gpus = ngpus
-+    npus = torch.cuda.device_count()
-+    args.npus = npus
-     print(args)
- 
--    torch.multiprocessing.spawn(main, args=(args,), nprocs=args.gpus)
-+    torch.multiprocessing.spawn(main, args=(args,), nprocs=args.npus)
-diff --git a/kitti_ssc/kitti_ssc/tpvformer10/modules/temporal_self_attention.py b/kitti_ssc/kitti_ssc/tpvformer10/modules/temporal_self_attention.py
-index 3e95234..8e7a0f9 100644
---- a/kitti_ssc/kitti_ssc/tpvformer10/modules/temporal_self_attention.py
-+++ b/kitti_ssc/kitti_ssc/tpvformer10/modules/temporal_self_attention.py
-@@ -85,7 +85,7 @@ class TPVTemporalSelfAttention(BaseModule):
+diff --git a/config/_base_/optimizer.py b/config/_base_/optimizer.py
+index 90beb1b..ff4b69d 100644
+--- a/config/_base_/optimizer.py
++++ b/config/_base_/optimizer.py
+@@ -1,5 +1,5 @@
+ optimizer = dict(
+-    type='AdamW',
++    type='NpuFusedAdamW',
+     lr=2e-4,
+     paramwise_cfg=dict(
+         custom_keys={
+diff --git a/config/tpv_lidarseg.py b/config/tpv_lidarseg.py
+index ef6adca..871c6d2 100644
+--- a/config/tpv_lidarseg.py
++++ b/config/tpv_lidarseg.py
+@@ -35,6 +35,9 @@ nbr_class = 17
+ 
+ self_cross_layer = dict(
+     type='TPVFormerLayer',
++    tpv_h=tpv_h_,
++    tpv_w=tpv_w_,
++    tpv_z=tpv_z_,
+     attn_cfgs=[
+         dict(
+             type='TPVCrossViewHybridAttention',
+@@ -76,6 +79,9 @@ self_cross_layer = dict(
+ 
+ self_layer = dict(
+     type='TPVFormerLayer',
++    tpv_h=tpv_h_,
++    tpv_w=tpv_w_,
++    tpv_z=tpv_z_,
+     attn_cfgs=[
+         dict(
+             type='TPVCrossViewHybridAttention',
+diff --git a/config/tpv_lidarseg_dim64.py b/config/tpv_lidarseg_dim64.py
+index 797e334..534c8b8 100644
+--- a/config/tpv_lidarseg_dim64.py
++++ b/config/tpv_lidarseg_dim64.py
+@@ -35,6 +35,9 @@ nbr_class = 17
+ 
+ self_cross_layer = dict(
+     type='TPVFormerLayer',
++    tpv_h=tpv_h_,
++    tpv_w=tpv_w_,
++    tpv_z=tpv_z_,
+     attn_cfgs=[
+         dict(
+             type='TPVCrossViewHybridAttention',
+@@ -76,6 +79,9 @@ self_cross_layer = dict(
+ 
+ self_layer = dict(
+     type='TPVFormerLayer',
++    tpv_h=tpv_h_,
++    tpv_w=tpv_w_,
++    tpv_z=tpv_z_,
+     attn_cfgs=[
+         dict(
+             type='TPVCrossViewHybridAttention',
+diff --git a/config/tpv_lidarseg_dim96.py b/config/tpv_lidarseg_dim96.py
+index c34389b..8aa84c7 100644
+--- a/config/tpv_lidarseg_dim96.py
++++ b/config/tpv_lidarseg_dim96.py
+@@ -35,6 +35,9 @@ nbr_class = 17
+ 
+ self_cross_layer = dict(
+     type='TPVFormerLayer',
++    tpv_h=tpv_h_,
++    tpv_w=tpv_w_,
++    tpv_z=tpv_z_,
+     attn_cfgs=[
+         dict(
+             type='TPVCrossViewHybridAttention',
+@@ -76,6 +79,9 @@ self_cross_layer = dict(
+ 
+ self_layer = dict(
+     type='TPVFormerLayer',
++    tpv_h=tpv_h_,
++    tpv_w=tpv_w_,
++    tpv_z=tpv_z_,
+     attn_cfgs=[
+         dict(
+             type='TPVCrossViewHybridAttention',
+diff --git a/eval.py b/eval.py
+index adf2c6a..3676fac 100644
+--- a/eval.py
++++ b/eval.py
+@@ -1,6 +1,9 @@
+ 
+ import os, time, argparse, os.path as osp, numpy as np
+ import torch
++import torch.nn as nn
++import torch_npu
++from torch_npu.contrib import transfer_to_npu
+ import torch.distributed as dist
+ 
+ from utils.metric_util import MeanIoU
+@@ -20,7 +23,7 @@ def pass_print(*args, **kwargs):
+ 
+ def main(local_rank, args):
+     # global settings
+-    torch.backends.cudnn.benchmark = True
++    #torch.backends.cudnn.benchmark = True
+ 
+     # load config
+     cfg = Config.fromfile(args.py_config)
+@@ -41,14 +44,14 @@ def main(local_rank, args):
+     port = os.environ.get("MASTER_PORT", "20506")
+     hosts = int(os.environ.get("WORLD_SIZE", 1))  # number of nodes
+     rank = int(os.environ.get("RANK", 0))  # node id
+-    gpus = torch.cuda.device_count()  # gpus per node
++    npus = torch.cuda.device_count()  # npus per node
+     print(f"tcp://{ip}:{port}")
+     dist.init_process_group(
+-        backend="nccl", init_method=f"tcp://{ip}:{port}", 
+-        world_size=hosts * gpus, rank=rank * gpus + local_rank
++        backend="hccl", init_method=f"tcp://{ip}:{port}",
++        world_size=hosts * npus, rank=rank * npus + local_rank
+     )
+     world_size = dist.get_world_size()
+-    cfg.gpu_ids = range(world_size)
++    cfg.npu_ids = range(world_size)
+     torch.cuda.set_device(local_rank)
+ 
+     if dist.get_rank() != 0:
+@@ -156,11 +159,11 @@ def main(local_rank, args):
+             
+             predict_labels_pts = predict_labels_pts.squeeze(-1).squeeze(-1)
+             predict_labels_pts = torch.argmax(predict_labels_pts, dim=1) # bs, n
+-            predict_labels_pts = predict_labels_pts.detach().cpu()
+-            val_pt_labs = val_pt_labs.squeeze(-1).cpu()
++            predict_labels_pts = predict_labels_pts.detach().npu()
++            val_pt_labs = val_pt_labs.squeeze(-1).npu()
+             
+             predict_labels_vox = torch.argmax(predict_labels_vox, dim=1)
+-            predict_labels_vox = predict_labels_vox.detach().cpu()
++            predict_labels_vox = predict_labels_vox.detach().npu()
+             for count in range(len(val_grid_int)):
+                 CalMeanIou_pts._after_step(predict_labels_pts[count], val_pt_labs[count])
+                 CalMeanIou_vox._after_step(
+@@ -168,7 +171,7 @@ def main(local_rank, args):
+                     val_pt_labs[count])
+             val_loss_list.append(loss.detach().cpu().numpy())
+             if i_iter_val % print_freq == 0 and dist.get_rank() == 0:
+-                logger.info('[EVAL] Iter %5d: Loss: %.3f (%.3f)'%(
++                logger.info('[EVAL] Iter %5d: Loss: %.3f (%.3f)' % (
+                     i_iter_val, loss.item(), np.mean(val_loss_list)))
+     
+     val_miou_pts = CalMeanIou_pts._after_epoch()
+@@ -187,8 +190,8 @@ if __name__ == '__main__':
+ 
+     args = parser.parse_args()
+     
+-    ngpus = torch.cuda.device_count()
+-    args.gpus = ngpus
++    npus = torch.cuda.device_count()
++    args.npus = npus
+     print(args)
+ 
+-    torch.multiprocessing.spawn(main, args=(args,), nprocs=args.gpus)
++    torch.multiprocessing.spawn(main, args=(args,), nprocs=args.npus)
+diff --git a/kitti_ssc/kitti_ssc/tpvformer10/modules/temporal_self_attention.py b/kitti_ssc/kitti_ssc/tpvformer10/modules/temporal_self_attention.py
+index 3e95234..8e7a0f9 100644
+--- a/kitti_ssc/kitti_ssc/tpvformer10/modules/temporal_self_attention.py
++++ b/kitti_ssc/kitti_ssc/tpvformer10/modules/temporal_self_attention.py
+@@ -85,7 +85,7 @@ class TPVTemporalSelfAttention(BaseModule):
                  [1, 0, 0],
                  [-1, 0, 0]
              ]
@@ -172,11 +172,11 @@ index 3e95234..8e7a0f9 100644
  
          grid_hw = xyz[:, [0, 1]] # H, 2
          grid_zh = xyz[:, [2, 0]]
-diff --git a/kitti_ssc/kitti_ssc/tpvformer10/modules/transformer.py b/kitti_ssc/kitti_ssc/tpvformer10/modules/transformer.py
-index bd560e4..fb40ee2 100644
---- a/kitti_ssc/kitti_ssc/tpvformer10/modules/transformer.py
-+++ b/kitti_ssc/kitti_ssc/tpvformer10/modules/transformer.py
-@@ -67,9 +67,9 @@ class TPVPerceptionTransformer(BaseModule):
+diff --git a/kitti_ssc/kitti_ssc/tpvformer10/modules/transformer.py b/kitti_ssc/kitti_ssc/tpvformer10/modules/transformer.py
+index bd560e4..fb40ee2 100644
+--- a/kitti_ssc/kitti_ssc/tpvformer10/modules/transformer.py
++++ b/kitti_ssc/kitti_ssc/tpvformer10/modules/transformer.py
+@@ -67,9 +67,9 @@ class TPVPerceptionTransformer(BaseModule):
      def init_layers(self):
          """Initialize layers of the Detr3DTransformer."""
          self.level_embeds = nn.Parameter(torch.Tensor(
@@ -188,12 +188,12 @@ index bd560e4..fb40ee2 100644
  
      def init_weights(self):
          """Initialize the transformer weights."""
-diff --git a/mmcv_need/distributed.py b/mmcv_need/distributed.py
-new file mode 100644
-index 0000000..aa840c0
---- /dev/null
-+++ b/mmcv_need/distributed.py
-@@ -0,0 +1,165 @@
+diff --git a/mmcv_need/distributed.py b/mmcv_need/distributed.py
+new file mode 100644
+index 0000000..aa840c0
+--- /dev/null
++++ b/mmcv_need/distributed.py
+@@ -0,0 +1,165 @@
 +# Copyright (c) OpenMMLab. All rights reserved.
 +# Copyright 2024 Huawei Technologies Co., Ltd
 +from typing import Any, List, Tuple
@@ -359,12 +359,12 @@ index 0000000..aa840c0
 +            return module_to_run(*inputs[0], **kwargs[0])  # type: ignore
 +        else:
 +            return module_to_run(*inputs, **kwargs)
-diff --git a/mmcv_need/modulated_deform_conv.py b/mmcv_need/modulated_deform_conv.py
-new file mode 100644
-index 0000000..3612e60
---- /dev/null
-+++ b/mmcv_need/modulated_deform_conv.py
-@@ -0,0 +1,154 @@
+diff --git a/mmcv_need/modulated_deform_conv.py b/mmcv_need/modulated_deform_conv.py
+new file mode 100644
+index 0000000..3612e60
+--- /dev/null
++++ b/mmcv_need/modulated_deform_conv.py
+@@ -0,0 +1,154 @@
 +# Copyright (c) OpenMMLab. All rights reserved.
 +# Copyright 2024 Huawei Technologies Co., Ltd
 +import math
@@ -519,12 +519,12 @@ index 0000000..3612e60
 +        super()._load_from_state_dict(
 +            state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
 +        )
-diff --git a/mmcv_need/optimizer.py b/mmcv_need/optimizer.py
-new file mode 100644
-index 0000000..2f56f94
---- /dev/null
-+++ b/mmcv_need/optimizer.py
-@@ -0,0 +1,555 @@
+diff --git a/mmcv_need/optimizer.py b/mmcv_need/optimizer.py
+new file mode 100644
+index 0000000..2f56f94
+--- /dev/null
++++ b/mmcv_need/optimizer.py
+@@ -0,0 +1,555 @@
 +# Copyright (c) OpenMMLab. All rights reserved.
 +# Copyright 2024 Huawei Technologies Co., Ltd
 +# Modified: Replaced the fused optimizer by clip_grad_norm_fused_
@@ -1080,12 +1080,12 @@ index 0000000..2f56f94
 +                # clear grads
 +                runner.model.zero_grad()
 +                runner.optimizer.zero_grad()
-diff --git a/mmdet_need/resnet.py b/mmdet_need/resnet.py
-new file mode 100644
-index 0000000..a22b825
---- /dev/null
-+++ b/mmdet_need/resnet.py
-@@ -0,0 +1,672 @@
+diff --git a/mmdet_need/resnet.py b/mmdet_need/resnet.py
+new file mode 100644
+index 0000000..a22b825
+--- /dev/null
++++ b/mmdet_need/resnet.py
+@@ -0,0 +1,672 @@
 +# Copyright (c) OpenMMLab. All rights reserved.
 +# Copyright 2024 Huawei Technologies Co., Ltd
 +import warnings
@@ -1758,45 +1758,45 @@ index 0000000..a22b825
 +    def __init__(self, **kwargs):
 +        super(ResNetV1d, self).__init__(
 +            deep_stem=True, avg_down=True, **kwargs)
-diff --git a/requirements.txt b/requirements.txt
-index 507cfeb..d551a42 100644
---- a/requirements.txt
-+++ b/requirements.txt
-@@ -1,10 +1,20 @@
--mmcv_full==1.4.0
--mmdet==2.14.0
--# mmdet3d==0.18.1
--mmsegmentation==0.14.1
--numba==0.48.0
-+setuptools==65.7.0
-+torchvision==0.16.0
-+opencv-python-headless==4.5.3.56
-+nuscenes-devkit==1.1.11
-+numba==0.58.1
- numpy==1.23.1
--Pillow==9.3.0
--PyYAML==6.0
-+pillow==10.2.0
- timm==0.4.12
--torch==1.10.0
-\ No newline at end of file
-+attrs
-+decorator
-+sympy
-+cffi
-+pyyaml
-+pathlib2
-+psutil
-+protobuf==3.19.6
-+scipy
-+requests
-+absl-py
-+
-diff --git a/tpvformer04/modules/cross_view_hybrid_attention.py b/tpvformer04/modules/cross_view_hybrid_attention.py
-index 3b18d5c..3788851 100644
---- a/tpvformer04/modules/cross_view_hybrid_attention.py
-+++ b/tpvformer04/modules/cross_view_hybrid_attention.py
-@@ -3,6 +3,8 @@ from .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFuncti
+diff --git a/requirements.txt b/requirements.txt
+index 507cfeb..d551a42 100644
+--- a/requirements.txt
++++ b/requirements.txt
+@@ -1,10 +1,20 @@
+-mmcv_full==1.4.0
+-mmdet==2.14.0
+-# mmdet3d==0.18.1
+-mmsegmentation==0.14.1
+-numba==0.48.0
++setuptools==65.7.0
++torchvision==0.16.0
++opencv-python-headless==4.5.3.56
++nuscenes-devkit==1.1.11
++numba==0.58.1
+ numpy==1.23.1
+-Pillow==9.3.0
+-PyYAML==6.0
++pillow==10.2.0
+ timm==0.4.12
+-torch==1.10.0
+\ No newline at end of file
++attrs
++decorator
++sympy
++cffi
++pyyaml
++pathlib2
++psutil
++protobuf==4.25.8
++scipy
++requests
++absl-py
++
+diff --git a/tpvformer04/modules/cross_view_hybrid_attention.py b/tpvformer04/modules/cross_view_hybrid_attention.py
+index 3b18d5c..3788851 100644
+--- a/tpvformer04/modules/cross_view_hybrid_attention.py
++++ b/tpvformer04/modules/cross_view_hybrid_attention.py
+@@ -3,6 +3,8 @@ from .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFuncti
  from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
  import warnings
  import torch
@@ -1805,7 +1805,7 @@ index 3b18d5c..3788851 100644
  import torch.nn as nn
  from mmcv.cnn import xavier_init, constant_init
  from mmcv.cnn.bricks.registry import ATTENTION
-@@ -213,19 +215,9 @@ class TPVCrossViewHybridAttention(BaseModule):
+@@ -213,19 +215,9 @@ class TPVCrossViewHybridAttention(BaseModule):
              raise ValueError(
                  f'Last dim of reference_points must be'
                  f' 2 or 4, but get {reference_points.shape[-1]} instead.')
@@ -1828,11 +1828,11 @@ index 3b18d5c..3788851 100644
          # output shape (bs*num_tpv_queue, num_query, embed_dims)
          # (bs*num_tpv_queue, num_query, embed_dims)-> (num_query, embed_dims, bs*num_tpv_queue)
          output = output.permute(1, 2, 0)
-diff --git a/tpvformer04/modules/image_cross_attention.py b/tpvformer04/modules/image_cross_attention.py
-index a784336..c737446 100644
---- a/tpvformer04/modules/image_cross_attention.py
-+++ b/tpvformer04/modules/image_cross_attention.py
-@@ -2,6 +2,8 @@
+diff --git a/tpvformer04/modules/image_cross_attention.py b/tpvformer04/modules/image_cross_attention.py
+index a784336..c737446 100644
+--- a/tpvformer04/modules/image_cross_attention.py
++++ b/tpvformer04/modules/image_cross_attention.py
+@@ -2,6 +2,8 @@
  from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
  import warnings
  import torch
@@ -1841,7 +1841,7 @@ index a784336..c737446 100644
  import torch.nn as nn
  from mmcv.cnn import xavier_init, constant_init
  from mmcv.cnn.bricks.registry import ATTENTION
-@@ -427,17 +429,8 @@ class TPVMSDeformableAttention3D(BaseModule):
+@@ -427,17 +429,8 @@ class TPVMSDeformableAttention3D(BaseModule):
          #  sampling_locations.shape: bs, num_query, num_heads, num_levels, num_all_points, 2
          #  attention_weights.shape: bs, num_query, num_heads, num_levels, num_all_points
  
@@ -1861,11 +1861,11 @@ index a784336..c737446 100644
  
          output = self.reshape_output(output, query_lens)
          if not self.batch_first:
-diff --git a/tpvformer10/modules/cross_view_hybrid_attention.py b/tpvformer10/modules/cross_view_hybrid_attention.py
-index 1e3316b..e6c17b8 100644
---- a/tpvformer10/modules/cross_view_hybrid_attention.py
-+++ b/tpvformer10/modules/cross_view_hybrid_attention.py
-@@ -3,6 +3,8 @@ from .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFuncti
+diff --git a/tpvformer10/modules/cross_view_hybrid_attention.py b/tpvformer10/modules/cross_view_hybrid_attention.py
+index 1e3316b..e6c17b8 100644
+--- a/tpvformer10/modules/cross_view_hybrid_attention.py
++++ b/tpvformer10/modules/cross_view_hybrid_attention.py
+@@ -3,6 +3,8 @@ from .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFuncti
  from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
  import math
  import torch
@@ -1874,7 +1874,7 @@ index 1e3316b..e6c17b8 100644
  import torch.nn as nn
  from mmcv.cnn import xavier_init, constant_init
  from mmcv.cnn.bricks.registry import ATTENTION
-@@ -85,7 +87,7 @@ class TPVCrossViewHybridAttention(BaseModule):
+@@ -85,7 +87,7 @@ class TPVCrossViewHybridAttention(BaseModule):
                  [1, 0, 0],
                  [-1, 0, 0]
              ]
@@ -1883,7 +1883,7 @@ index 1e3316b..e6c17b8 100644
  
          grid_hw = xyz[:, [0, 1]] # H, 2
          grid_zh = xyz[:, [2, 0]]
-@@ -186,17 +188,8 @@ class TPVCrossViewHybridAttention(BaseModule):
+@@ -186,17 +188,8 @@ class TPVCrossViewHybridAttention(BaseModule):
                  f'Last dim of reference_points must be'
                  f' 2, but get {reference_points.shape[-1]} instead.')
          
@@ -1903,11 +1903,11 @@ index 1e3316b..e6c17b8 100644
  
          outputs = self.reshape_output(output, query_lens)
  
-diff --git a/tpvformer10/modules/encoder.py b/tpvformer10/modules/encoder.py
-index d76cb72..dfa571f 100644
---- a/tpvformer10/modules/encoder.py
-+++ b/tpvformer10/modules/encoder.py
-@@ -193,8 +193,8 @@ class TPVFormerEncoder(TransformerLayerSequence):
+diff --git a/tpvformer10/modules/encoder.py b/tpvformer10/modules/encoder.py
+index d76cb72..dfa571f 100644
+--- a/tpvformer10/modules/encoder.py
++++ b/tpvformer10/modules/encoder.py
+@@ -193,8 +193,8 @@ class TPVFormerEncoder(TransformerLayerSequence):
          lidar2img = lidar2img.view(
              1, B, num_cam, 1, 4, 4).repeat(D, 1, 1, num_query, 1, 1)
  
@@ -1918,11 +1918,11 @@ index d76cb72..dfa571f 100644
          eps = 1e-5
  
          tpv_mask = (reference_points_cam[..., 2:3] > eps)
-diff --git a/tpvformer10/modules/image_cross_attention.py b/tpvformer10/modules/image_cross_attention.py
-index 00c7087..1d549fa 100644
---- a/tpvformer10/modules/image_cross_attention.py
-+++ b/tpvformer10/modules/image_cross_attention.py
-@@ -2,6 +2,8 @@
+diff --git a/tpvformer10/modules/image_cross_attention.py b/tpvformer10/modules/image_cross_attention.py
+index 00c7087..1d549fa 100644
+--- a/tpvformer10/modules/image_cross_attention.py
++++ b/tpvformer10/modules/image_cross_attention.py
+@@ -2,6 +2,8 @@
  from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
  import warnings
  import torch
@@ -1931,7 +1931,7 @@ index 00c7087..1d549fa 100644
  import torch.nn as nn
  from mmcv.cnn import xavier_init, constant_init
  from mmcv.cnn.bricks.registry import ATTENTION
-@@ -409,17 +411,8 @@ class TPVMSDeformableAttention3D(BaseModule):
+@@ -409,17 +411,8 @@ class TPVMSDeformableAttention3D(BaseModule):
          #  sampling_locations.shape: bs, num_query, num_heads, num_levels, num_all_points, 2
          #  attention_weights.shape: bs, num_query, num_heads, num_levels, num_all_points
  
@@ -1951,11 +1951,11 @@ index 00c7087..1d549fa 100644
  
          output = self.reshape_output(output, query_lens)
          if not self.batch_first:
-diff --git a/tpvformer10/modules/tpvformer_layer.py b/tpvformer10/modules/tpvformer_layer.py
-index e57e4a2..1288e11 100644
---- a/tpvformer10/modules/tpvformer_layer.py
-+++ b/tpvformer10/modules/tpvformer_layer.py
-@@ -46,6 +46,9 @@ class TPVFormerLayer(BaseModule):
+diff --git a/tpvformer10/modules/tpvformer_layer.py b/tpvformer10/modules/tpvformer_layer.py
+index e57e4a2..1288e11 100644
+--- a/tpvformer10/modules/tpvformer_layer.py
++++ b/tpvformer10/modules/tpvformer_layer.py
+@@ -46,6 +46,9 @@ class TPVFormerLayer(BaseModule):
      """
  
      def __init__(self,
@@ -1965,7 +1965,7 @@ index e57e4a2..1288e11 100644
                   attn_cfgs=None,
                   ffn_cfgs=dict(
                       type='FFN',
-@@ -128,6 +131,15 @@ class TPVFormerLayer(BaseModule):
+@@ -128,6 +131,15 @@ class TPVFormerLayer(BaseModule):
          num_norms = operation_order.count('norm')
          for _ in range(num_norms):
              self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])
@@ -1981,7 +1981,7 @@ index e57e4a2..1288e11 100644
  
      def forward(self,
                  query,
-@@ -169,15 +181,6 @@ class TPVFormerLayer(BaseModule):
+@@ -169,15 +181,6 @@ class TPVFormerLayer(BaseModule):
          for layer in self.operation_order:
              # cross view hybrid attention
              if layer == 'self_attn':
@@ -1997,7 +1997,7 @@ index e57e4a2..1288e11 100644
                  if not isinstance(query, (list, tuple)):
                      query = torch.split(
                          query, [tpv_h*tpv_w, tpv_z*tpv_h, tpv_w*tpv_z], dim=1)
-@@ -187,8 +190,8 @@ class TPVFormerLayer(BaseModule):
+@@ -187,8 +190,8 @@ class TPVFormerLayer(BaseModule):
                      identity if self.pre_norm else None,
                      query_pos=tpv_pos,
                      reference_points=ref_2d,
@@ -2008,219 +2008,219 @@ index e57e4a2..1288e11 100644
                      **kwargs)
                  attn_index += 1
                  query = torch.cat(query, dim=1)
-diff --git a/tpvformer10/tpv_head.py b/tpvformer10/tpv_head.py
-index 193d909..824cf9e 100644
---- a/tpvformer10/tpv_head.py
-+++ b/tpvformer10/tpv_head.py
-@@ -47,9 +47,9 @@ class TPVFormerHead(BaseModule):
-         # transformer layers
-         self.encoder = build_transformer_layer_sequence(encoder)
-         self.level_embeds = nn.Parameter(
--            torch.Tensor(self.num_feature_levels, self.embed_dims))
-+            torch.Tensor(self.num_feature_levels, self.embed_dims).npu())
-         self.cams_embeds = nn.Parameter(
--            torch.Tensor(self.num_cams, self.embed_dims))
-+            torch.Tensor(self.num_cams, self.embed_dims).npu())
-         self.tpv_embedding_hw = nn.Embedding(self.tpv_h * self.tpv_w, self.embed_dims)
-         self.tpv_embedding_zh = nn.Embedding(self.tpv_z * self.tpv_h, self.embed_dims)
-         self.tpv_embedding_wz = nn.Embedding(self.tpv_w * self.tpv_z, self.embed_dims)
-diff --git a/train.py b/train.py
-index e53a80c..cb736d2 100644
---- a/train.py
-+++ b/train.py
-@@ -1,6 +1,9 @@
- 
- import os, time, argparse, os.path as osp, numpy as np
- import torch
-+import torch.nn as nn
-+import torch_npu
-+from torch_npu.contrib import transfer_to_npu
- import torch.distributed as dist
- 
- from utils.metric_util import MeanIoU
-@@ -14,6 +17,9 @@ from mmcv.runner import build_optimizer
- from mmseg.utils import get_root_logger
- from timm.scheduler import CosineLRScheduler
- 
-+torch_npu.npu.set_compile_mode(jit_compile=False)
-+torch.npu.config.allow_internal_format = False
-+
- import warnings
- warnings.filterwarnings("ignore")
- 
-@@ -23,7 +29,7 @@ def pass_print(*args, **kwargs):
- 
- def main(local_rank, args):
-     # global settings
--    torch.backends.cudnn.benchmark = True
-+    # torch.backends.cudnn.benchmark = True
- 
-     # load config
-     cfg = Config.fromfile(args.py_config)
-@@ -35,7 +41,10 @@ def main(local_rank, args):
-     train_dataloader_config = cfg.train_data_loader
-     val_dataloader_config = cfg.val_data_loader
- 
--    max_num_epochs = cfg.max_epochs
-+    if args.max_epochs != 0:
-+        max_num_epochs = args.max_epochs
-+    else:
-+        max_num_epochs = cfg.max_epochs
-     grid_size = cfg.grid_size
- 
-     # init DDP
-@@ -44,14 +53,14 @@ def main(local_rank, args):
-     port = os.environ.get("MASTER_PORT", "20506")
-     hosts = int(os.environ.get("WORLD_SIZE", 1))  # number of nodes
-     rank = int(os.environ.get("RANK", 0))  # node id
--    gpus = torch.cuda.device_count()  # gpus per node
-+    npus = torch.cuda.device_count()  # gpus per node
-     print(f"tcp://{ip}:{port}")
-     dist.init_process_group(
--        backend="nccl", init_method=f"tcp://{ip}:{port}", 
--        world_size=hosts * gpus, rank=rank * gpus + local_rank
-+        backend="hccl", init_method=f"tcp://{ip}:{port}",
-+        world_size=hosts * npus, rank=rank * npus + local_rank
-     )
-     world_size = dist.get_world_size()
--    cfg.gpu_ids = range(world_size)
-+    cfg.npu_ids = range(world_size)
-     torch.cuda.set_device(local_rank)
- 
-     if dist.get_rank() != 0:
-@@ -113,7 +122,7 @@ def main(local_rank, args):
-         loss_builder.build(ignore_label=ignore_label)
-     scheduler = CosineLRScheduler(
-         optimizer,
--        t_initial=len(train_dataset_loader)*max_num_epochs,
-+        t_initial=len(train_dataset_loader) * max_num_epochs,
-         lr_min=1e-6,
-         warmup_t=500,
-         warmup_lr_init=1e-5,
-@@ -176,7 +185,7 @@ def main(local_rank, args):
-         data_time_s = time.time()
-         time_s = time.time()
-         for i_iter, (imgs, img_metas, train_vox_label, train_grid, train_pt_labs) in enumerate(train_dataset_loader):
--            
-+
-             imgs = imgs.cuda()
-             train_grid = train_grid.to(torch.float32).cuda()
-             if cfg.lovasz_input == 'voxel' or cfg.ce_input == 'voxel':
-@@ -200,7 +209,7 @@ def main(local_rank, args):
-                 ce_label = train_pt_labs.squeeze(-1)
- 
-             loss = lovasz_softmax(
--                torch.nn.functional.softmax(lovasz_input, dim=1), 
-+                torch.nn.functional.softmax(lovasz_input, dim=1),
-                 lovasz_label, ignore=ignore_label
-             ) + loss_func(ce_input, ce_label)
- 
-@@ -215,14 +224,14 @@ def main(local_rank, args):
-             global_iter += 1
-             if i_iter % print_freq == 0 and dist.get_rank() == 0:
-                 lr = optimizer.param_groups[0]['lr']
--                logger.info('[TRAIN] Epoch %d Iter %5d/%d: Loss: %.3f (%.3f), grad_norm: %.1f, lr: %.7f, time: %.3f (%.3f)'%(
--                    epoch, i_iter, len(train_dataset_loader), 
--                    loss.item(), np.mean(loss_list), grad_norm, lr,
-+                logger.info('[TRAIN] Epoch %d Iter %5d/%d: Loss: %.3f, grad_norm: %.1f, lr: %.7f, time: %.3f (%.3f)' % (
-+                    epoch, i_iter, len(train_dataset_loader),
-+                    loss.item(), grad_norm, lr,
-                     time_e - time_s, data_time_e - data_time_s
-                 ))
-             data_time_s = time.time()
-             time_s = time.time()
--        
-+
-         # save checkpoint
-         if dist.get_rank() == 0:
-             dict_to_save = {
-@@ -240,7 +249,11 @@ def main(local_rank, args):
-             mmcv.symlink(save_file_name, dst_file)
- 
-         epoch += 1
--        
-+
-+        # 性能测试任务不执行eval
-+        if max_num_epochs == 1:
-+            return
-+
-         # eval
-         my_model.eval()
-         val_loss_list = []
-@@ -278,11 +291,11 @@ def main(local_rank, args):
-                 
-                 predict_labels_pts = predict_labels_pts.squeeze(-1).squeeze(-1)
-                 predict_labels_pts = torch.argmax(predict_labels_pts, dim=1) # bs, n
--                predict_labels_pts = predict_labels_pts.detach().cpu()
--                val_pt_labs = val_pt_labs.squeeze(-1).cpu()
-+                predict_labels_pts = predict_labels_pts.detach().npu()
-+                val_pt_labs = val_pt_labs.squeeze(-1).npu()
-                 
-                 predict_labels_vox = torch.argmax(predict_labels_vox, dim=1)
--                predict_labels_vox = predict_labels_vox.detach().cpu()
-+                predict_labels_vox = predict_labels_vox.detach().npu()
-                 for count in range(len(val_grid_int)):
-                     CalMeanIou_pts._after_step(predict_labels_pts[count], val_pt_labs[count])
-                     CalMeanIou_vox._after_step(
-@@ -294,7 +307,7 @@ def main(local_rank, args):
-                         val_pt_labs[count])
-                 val_loss_list.append(loss.detach().cpu().numpy())
-                 if i_iter_val % print_freq == 0 and dist.get_rank() == 0:
--                    logger.info('[EVAL] Epoch %d Iter %5d: Loss: %.3f (%.3f)'%(
-+                    logger.info('[EVAL] Epoch %d Iter %5d: Loss: %.3f (%.3f)' % (
-                         epoch, i_iter_val, loss.item(), np.mean(val_loss_list)))
-         
-         val_miou_pts = CalMeanIou_pts._after_epoch()
-@@ -319,11 +332,13 @@ if __name__ == '__main__':
-     parser.add_argument('--py-config', default='config/tpv_lidarseg.py')
-     parser.add_argument('--work-dir', type=str, default='./out/tpv_lidarseg')
-     parser.add_argument('--resume-from', type=str, default='')
-+    parser.add_argument('--max-epochs', type=int, default=0, metavar='N',
-+                        help='number of epochs to train')
- 
-     args = parser.parse_args()
--    
--    ngpus = torch.cuda.device_count()
--    args.gpus = ngpus
-+
-+    npus = torch.cuda.device_count()
-+    args.npus = npus
-     print(args)
- 
--    torch.multiprocessing.spawn(main, args=(args,), nprocs=args.gpus)
-+    torch.multiprocessing.spawn(main, args=(args,), nprocs=args.npus)
-diff --git a/utils/lovasz_losses.py b/utils/lovasz_losses.py
-index c686914..d6e6019 100644
---- a/utils/lovasz_losses.py
-+++ b/utils/lovasz_losses.py
-@@ -295,7 +295,7 @@ def hinge_jaccard_loss(probas, labels,ignore=None, classes = 'present', hinge =
- 
- # --------------------------- HELPER FUNCTIONS ---------------------------
- def isnan(x):
--    return x != x
-+    return np.isnan(x)
-     
-     
- def mean(l, ignore_nan=False, empty=0):
-diff --git a/utils/metric_util.py b/utils/metric_util.py
-index 9832f88..cb28c83 100644
---- a/utils/metric_util.py
-+++ b/utils/metric_util.py
-@@ -42,13 +42,10 @@ class MeanIoU:
-         ious = []
- 
-         for i in range(self.num_classes):
--            if self.total_seen[i] == 0:
--                ious.append(1)
--            else:
--                cur_iou = self.total_correct[i] / (self.total_seen[i]
--                                                   + self.total_positive[i]
--                                                   - self.total_correct[i])
--                ious.append(cur_iou.item())
-+            cur_iou = self.total_correct[i] / (self.total_seen[i]
-+                                                + self.total_positive[i]
-+                                                - self.total_correct[i])
-+            ious.append(cur_iou.item())
- 
-         miou = np.mean(ious)
-         logger = get_root_logger()
+diff --git a/tpvformer10/tpv_head.py b/tpvformer10/tpv_head.py
+index 193d909..824cf9e 100644
+--- a/tpvformer10/tpv_head.py
++++ b/tpvformer10/tpv_head.py
+@@ -47,9 +47,9 @@ class TPVFormerHead(BaseModule):
+         # transformer layers
+         self.encoder = build_transformer_layer_sequence(encoder)
+         self.level_embeds = nn.Parameter(
+-            torch.Tensor(self.num_feature_levels, self.embed_dims))
++            torch.Tensor(self.num_feature_levels, self.embed_dims).npu())
+         self.cams_embeds = nn.Parameter(
+-            torch.Tensor(self.num_cams, self.embed_dims))
++            torch.Tensor(self.num_cams, self.embed_dims).npu())
+         self.tpv_embedding_hw = nn.Embedding(self.tpv_h * self.tpv_w, self.embed_dims)
+         self.tpv_embedding_zh = nn.Embedding(self.tpv_z * self.tpv_h, self.embed_dims)
+         self.tpv_embedding_wz = nn.Embedding(self.tpv_w * self.tpv_z, self.embed_dims)
+diff --git a/train.py b/train.py
+index e53a80c..cb736d2 100644
+--- a/train.py
++++ b/train.py
+@@ -1,6 +1,9 @@
+ 
+ import os, time, argparse, os.path as osp, numpy as np
+ import torch
++import torch.nn as nn
++import torch_npu
++from torch_npu.contrib import transfer_to_npu
+ import torch.distributed as dist
+ 
+ from utils.metric_util import MeanIoU
+@@ -14,6 +17,9 @@ from mmcv.runner import build_optimizer
+ from mmseg.utils import get_root_logger
+ from timm.scheduler import CosineLRScheduler
+ 
++torch_npu.npu.set_compile_mode(jit_compile=False)
++torch.npu.config.allow_internal_format = False
++
+ import warnings
+ warnings.filterwarnings("ignore")
+ 
+@@ -23,7 +29,7 @@ def pass_print(*args, **kwargs):
+ 
+ def main(local_rank, args):
+     # global settings
+-    torch.backends.cudnn.benchmark = True
++    # torch.backends.cudnn.benchmark = True
+ 
+     # load config
+     cfg = Config.fromfile(args.py_config)
+@@ -35,7 +41,10 @@ def main(local_rank, args):
+     train_dataloader_config = cfg.train_data_loader
+     val_dataloader_config = cfg.val_data_loader
+ 
+-    max_num_epochs = cfg.max_epochs
++    if args.max_epochs != 0:
++        max_num_epochs = args.max_epochs
++    else:
++        max_num_epochs = cfg.max_epochs
+     grid_size = cfg.grid_size
+ 
+     # init DDP
+@@ -44,14 +53,14 @@ def main(local_rank, args):
+     port = os.environ.get("MASTER_PORT", "20506")
+     hosts = int(os.environ.get("WORLD_SIZE", 1))  # number of nodes
+     rank = int(os.environ.get("RANK", 0))  # node id
+-    gpus = torch.cuda.device_count()  # gpus per node
++    npus = torch.cuda.device_count()  # gpus per node
+     print(f"tcp://{ip}:{port}")
+     dist.init_process_group(
+-        backend="nccl", init_method=f"tcp://{ip}:{port}", 
+-        world_size=hosts * gpus, rank=rank * gpus + local_rank
++        backend="hccl", init_method=f"tcp://{ip}:{port}",
++        world_size=hosts * npus, rank=rank * npus + local_rank
+     )
+     world_size = dist.get_world_size()
+-    cfg.gpu_ids = range(world_size)
++    cfg.npu_ids = range(world_size)
+     torch.cuda.set_device(local_rank)
+ 
+     if dist.get_rank() != 0:
+@@ -113,7 +122,7 @@ def main(local_rank, args):
+         loss_builder.build(ignore_label=ignore_label)
+     scheduler = CosineLRScheduler(
+         optimizer,
+-        t_initial=len(train_dataset_loader)*max_num_epochs,
++        t_initial=len(train_dataset_loader) * max_num_epochs,
+         lr_min=1e-6,
+         warmup_t=500,
+         warmup_lr_init=1e-5,
+@@ -176,7 +185,7 @@ def main(local_rank, args):
+         data_time_s = time.time()
+         time_s = time.time()
+         for i_iter, (imgs, img_metas, train_vox_label, train_grid, train_pt_labs) in enumerate(train_dataset_loader):
+-            
++
+             imgs = imgs.cuda()
+             train_grid = train_grid.to(torch.float32).cuda()
+             if cfg.lovasz_input == 'voxel' or cfg.ce_input == 'voxel':
+@@ -200,7 +209,7 @@ def main(local_rank, args):
+                 ce_label = train_pt_labs.squeeze(-1)
+ 
+             loss = lovasz_softmax(
+-                torch.nn.functional.softmax(lovasz_input, dim=1), 
++                torch.nn.functional.softmax(lovasz_input, dim=1),
+                 lovasz_label, ignore=ignore_label
+             ) + loss_func(ce_input, ce_label)
+ 
+@@ -215,14 +224,14 @@ def main(local_rank, args):
+             global_iter += 1
+             if i_iter % print_freq == 0 and dist.get_rank() == 0:
+                 lr = optimizer.param_groups[0]['lr']
+-                logger.info('[TRAIN] Epoch %d Iter %5d/%d: Loss: %.3f (%.3f), grad_norm: %.1f, lr: %.7f, time: %.3f (%.3f)'%(
+-                    epoch, i_iter, len(train_dataset_loader), 
+-                    loss.item(), np.mean(loss_list), grad_norm, lr,
++                logger.info('[TRAIN] Epoch %d Iter %5d/%d: Loss: %.3f, grad_norm: %.1f, lr: %.7f, time: %.3f (%.3f)' % (
++                    epoch, i_iter, len(train_dataset_loader),
++                    loss.item(), grad_norm, lr,
+                     time_e - time_s, data_time_e - data_time_s
+                 ))
+             data_time_s = time.time()
+             time_s = time.time()
+-        
++
+         # save checkpoint
+         if dist.get_rank() == 0:
+             dict_to_save = {
+@@ -240,7 +249,11 @@ def main(local_rank, args):
+             mmcv.symlink(save_file_name, dst_file)
+ 
+         epoch += 1
+-        
++
++        # 性能测试任务不执行eval
++        if max_num_epochs == 1:
++            return
++
+         # eval
+         my_model.eval()
+         val_loss_list = []
+@@ -278,11 +291,11 @@ def main(local_rank, args):
+                 
+                 predict_labels_pts = predict_labels_pts.squeeze(-1).squeeze(-1)
+                 predict_labels_pts = torch.argmax(predict_labels_pts, dim=1) # bs, n
+-                predict_labels_pts = predict_labels_pts.detach().cpu()
+-                val_pt_labs = val_pt_labs.squeeze(-1).cpu()
++                predict_labels_pts = predict_labels_pts.detach().npu()
++                val_pt_labs = val_pt_labs.squeeze(-1).npu()
+                 
+                 predict_labels_vox = torch.argmax(predict_labels_vox, dim=1)
+-                predict_labels_vox = predict_labels_vox.detach().cpu()
++                predict_labels_vox = predict_labels_vox.detach().npu()
+                 for count in range(len(val_grid_int)):
+                     CalMeanIou_pts._after_step(predict_labels_pts[count], val_pt_labs[count])
+                     CalMeanIou_vox._after_step(
+@@ -294,7 +307,7 @@ def main(local_rank, args):
+                         val_pt_labs[count])
+                 val_loss_list.append(loss.detach().cpu().numpy())
+                 if i_iter_val % print_freq == 0 and dist.get_rank() == 0:
+-                    logger.info('[EVAL] Epoch %d Iter %5d: Loss: %.3f (%.3f)'%(
++                    logger.info('[EVAL] Epoch %d Iter %5d: Loss: %.3f (%.3f)' % (
+                         epoch, i_iter_val, loss.item(), np.mean(val_loss_list)))
+         
+         val_miou_pts = CalMeanIou_pts._after_epoch()
+@@ -319,11 +332,13 @@ if __name__ == '__main__':
+     parser.add_argument('--py-config', default='config/tpv_lidarseg.py')
+     parser.add_argument('--work-dir', type=str, default='./out/tpv_lidarseg')
+     parser.add_argument('--resume-from', type=str, default='')
++    parser.add_argument('--max-epochs', type=int, default=0, metavar='N',
++                        help='number of epochs to train')
+ 
+     args = parser.parse_args()
+-    
+-    ngpus = torch.cuda.device_count()
+-    args.gpus = ngpus
++
++    npus = torch.cuda.device_count()
++    args.npus = npus
+     print(args)
+ 
+-    torch.multiprocessing.spawn(main, args=(args,), nprocs=args.gpus)
++    torch.multiprocessing.spawn(main, args=(args,), nprocs=args.npus)
+diff --git a/utils/lovasz_losses.py b/utils/lovasz_losses.py
+index c686914..d6e6019 100644
+--- a/utils/lovasz_losses.py
++++ b/utils/lovasz_losses.py
+@@ -295,7 +295,7 @@ def hinge_jaccard_loss(probas, labels,ignore=None, classes = 'present', hinge =
+ 
+ # --------------------------- HELPER FUNCTIONS ---------------------------
+ def isnan(x):
+-    return x != x
++    return np.isnan(x)
+     
+     
+ def mean(l, ignore_nan=False, empty=0):
+diff --git a/utils/metric_util.py b/utils/metric_util.py
+index 9832f88..cb28c83 100644
+--- a/utils/metric_util.py
++++ b/utils/metric_util.py
+@@ -42,13 +42,10 @@ class MeanIoU:
+         ious = []
+ 
+         for i in range(self.num_classes):
+-            if self.total_seen[i] == 0:
+-                ious.append(1)
+-            else:
+-                cur_iou = self.total_correct[i] / (self.total_seen[i]
+-                                                   + self.total_positive[i]
+-                                                   - self.total_correct[i])
+-                ious.append(cur_iou.item())
++            cur_iou = self.total_correct[i] / (self.total_seen[i]
++                                                + self.total_positive[i]
++                                                - self.total_correct[i])
++            ious.append(cur_iou.item())
+ 
+         miou = np.mean(ious)
+         logger = get_root_logger()
-- 
Gitee