diff --git a/ads/common/__init__.py b/ads/common/__init__.py index 021418f12d15c708b799729a39fd351db043dd69..d13daf5c50776ae33ae2045fb0fc64e1f0ac9bd0 100644 --- a/ads/common/__init__.py +++ b/ads/common/__init__.py @@ -22,3 +22,4 @@ from .ops.npu_batch_nms import npu_batch_nms from .ops.npu_confusion_transpose import npu_confusion_transpose from .ops.npu_broadcast import npu_broadcast from .ops.npu_moe_tutel import npu_moe_tutel +from .ops.iou import npu_iou diff --git a/ads/common/ops/csrc/IouKernelNpu.cpp b/ads/common/ops/csrc/IouKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4ff33d4f54dc6788e6530d3f3c071e896872b385 --- /dev/null +++ b/ads/common/ops/csrc/IouKernelNpu.cpp @@ -0,0 +1,59 @@ +// Copyright (c) 2023 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" +#include "torch_npu/csrc/core/npu/NPUFormat.h" +#include "functions.h" + +at::Tensor npu_iou( + const at::Tensor& bboxes, + const at::Tensor& gtboxes, + int64_t mode) +{ + at::Tensor bboxes_fp16 = bboxes; + if (bboxes.scalar_type() != at::ScalarType::Half) { + bboxes_fp16 = bboxes.to(at::kHalf); + } + at::Tensor gtboxes_fp16 = gtboxes; + if (gtboxes.scalar_type() != at::ScalarType::Half) { + gtboxes_fp16 = gtboxes.to(at::kHalf); + } + + auto output_size = {gtboxes.size(0), bboxes.size(0)}; + at::Tensor overlap = at_npu::native::empty_with_format( + output_size, + bboxes_fp16.options(), + at_npu::native::get_npu_format(bboxes)); + string mode_str = "iou"; + if (mode == 1) { + mode_str = "iof"; + } + + at_npu::native::OpCommand cmd; + cmd.Name("Iou") + .Input(bboxes_fp16) + .Input(gtboxes_fp16) + .Output(overlap) + .Attr("mode", mode_str) + .Attr("eps", static_cast(0.01)) + .Run(); + + if (overlap.scalar_type() != bboxes.scalar_type()) { + overlap = overlap.to(bboxes.scalar_type()); + } + return overlap; +} diff --git a/ads/common/ops/csrc/functions.h b/ads/common/ops/csrc/functions.h index f713b98400c6d2168727ff675f61b2c73a086b74..b18b6c5d70e65af6bfff043cfe7b753890759390 100644 --- a/ads/common/ops/csrc/functions.h +++ b/ads/common/ops/csrc/functions.h @@ -132,5 +132,6 @@ at::Tensor npu_moe_tutel_gate_backward( const at::Tensor &y_grad, const at::Tensor &indices, const at::Tensor &locations); +at::Tensor npu_iou(const at::Tensor& bboxes, const at::Tensor& gtboxes, int64_t mode); #endif // __FUNCTIONS_H__ diff --git a/ads/common/ops/csrc/pybind.cpp b/ads/common/ops/csrc/pybind.cpp index c91e4093ea2c93a19f213e43557b3cdd6d25cb1e..1a5670cd8ee1c7570ae1bcaef2e64d30cd1e1e9c 100644 --- a/ads/common/ops/csrc/pybind.cpp +++ b/ads/common/ops/csrc/pybind.cpp @@ -71,4 +71,7 @@ void init_common(pybind11::module &m) m.def("npu_moe_tutel", &npu_moe_tutel, "npu_moe_tutel NPU version"); m.def("npu_moe_tutel_data_backward", &npu_moe_tutel_data_backward, "npu_moe_tutel_data_backward NPU version"); m.def("npu_moe_tutel_gate_backward", &npu_moe_tutel_gate_backward, "npu_moe_tutel_gate_backward NPU version"); + + // iou + m.def("npu_iou", &npu_iou); } diff --git a/ads/common/ops/iou.py b/ads/common/ops/iou.py new file mode 100644 index 0000000000000000000000000000000000000000..7e95086f7507e4ed69c2ad00078b74e7c0e5ab14 --- /dev/null +++ b/ads/common/ops/iou.py @@ -0,0 +1,5 @@ +import torch +import torch_npu +import ads_c + +npu_iou = ads_c.npu_iou diff --git a/tests/test_npu_iou.py b/tests/test_npu_iou.py new file mode 100644 index 0000000000000000000000000000000000000000..150d8a35d37b9aca5e2c6dfcc9f1aae23ca74c07 --- /dev/null +++ b/tests/test_npu_iou.py @@ -0,0 +1,70 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import torch + +import torch_npu +from torch_npu.testing.testcase import TestCase, run_tests +import ads.common + + +class TestNpuIou(TestCase): + def ads_op_exec(self, bboxes, gtboxes, mode=0): + output = ads.common.npu_iou(bboxes, gtboxes, mode) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec(self, bboxes, gtboxes, mode=0): + output = torch_npu.npu_iou(bboxes, gtboxes, mode) + output = output.to("cpu") + output = output.numpy() + return output + + def test_iou_fp16(self): + bboxes = torch.tensor([[0, 0, 10, 10], + [10, 10, 20, 20], + [32, 32, 38, 42]], dtype=torch.float16).to("npu") + gtboxes = torch.tensor([[0, 0, 10, 20], + [0, 10, 10, 10], + [10, 10, 20, 20]], dtype=torch.float16).to("npu") + + output_npu = self.npu_op_exec(bboxes, gtboxes, 1) + output_ads = self.ads_op_exec(bboxes, gtboxes, 1) + self.assertRtolEqual(output_npu, output_ads) + + output_npu = self.npu_op_exec(bboxes, gtboxes) + output_ads = self.ads_op_exec(bboxes, gtboxes) + self.assertRtolEqual(output_npu, output_ads) + + def test_iou_fp16_pt(self): + bboxes = torch.tensor([[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12], + [13, 14, 15, 16]], dtype=torch.float16).npu() + gtboxes = torch.tensor([[1, 2, 3, 4], + [5, 6, 7, 8]], dtype=torch.float16).npu() + + output_npu = self.npu_op_exec(bboxes, gtboxes, 1) + output_ads = self.ads_op_exec(bboxes, gtboxes, 1) + self.assertRtolEqual(output_npu, output_ads) + + output_npu = self.npu_op_exec(bboxes, gtboxes) + output_ads = self.ads_op_exec(bboxes, gtboxes) + self.assertRtolEqual(output_npu, output_ads) + + +if __name__ == "__main__": + run_tests()