diff --git a/patch/npu.patch b/patch/npu.patch index 14c9a2b015f9a880197f7275d29f92be65b92dc7..14c25d237195c40be1fdeb3639feecafe729d89f 100644 --- a/patch/npu.patch +++ b/patch/npu.patch @@ -1,6 +1,6 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/CMakeLists.txt pytorch-develop/aten/CMakeLists.txt --- pytorch-v1.5.0/aten/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/CMakeLists.txt 2021-07-13 15:30:57.594267657 +0800 ++++ pytorch-develop/aten/CMakeLists.txt 2021-07-15 20:52:26.641436929 +0800 @@ -22,8 +22,10 @@ set(ATen_CPU_INCLUDE) set(ATen_THIRD_PARTY_INCLUDE) @@ -51,7 +51,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/CMakeLists.txt pytorch-develop/aten/src/ATen/CMakeLists.txt --- pytorch-v1.5.0/aten/src/ATen/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/CMakeLists.txt 2021-07-13 15:30:57.594267657 +0800 ++++ pytorch-develop/aten/src/ATen/CMakeLists.txt 2021-07-15 20:52:26.645437073 +0800 @@ -67,6 +67,9 @@ FILE(GLOB native_quantized_h "native/quantized/*.h" "native/quantized/cpu/*.h") FILE(GLOB native_cpu_h "native/cpu/*.h") @@ -129,7 +129,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/core/dispatch/DispatchTable.h pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h --- pytorch-v1.5.0/aten/src/ATen/core/dispatch/DispatchTable.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h 2021-07-13 15:30:57.602267943 +0800 ++++ pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h 2021-07-15 20:52:26.649437216 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -170,7 +170,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/function_wrapper.py pytorch-develop/aten/src/ATen/function_wrapper.py --- pytorch-v1.5.0/aten/src/ATen/function_wrapper.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/function_wrapper.py 2021-07-13 15:30:57.610268230 +0800 ++++ pytorch-develop/aten/src/ATen/function_wrapper.py 2021-07-15 20:52:26.657437502 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -354,7 +354,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= for option in declaration['options']: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/gen.py pytorch-develop/aten/src/ATen/gen.py --- pytorch-v1.5.0/aten/src/ATen/gen.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/gen.py 2021-07-13 15:30:57.610268230 +0800 ++++ pytorch-develop/aten/src/ATen/gen.py 2021-07-15 20:52:26.657437502 +0800 @@ -1,3 +1,18 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -512,7 +512,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= generate_outputs() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/cpu/Activation.cpp pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp --- pytorch-v1.5.0/aten/src/ATen/native/cpu/Activation.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp 2021-07-13 15:30:57.622268661 +0800 ++++ pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp 2021-07-15 20:52:26.669437932 +0800 @@ -339,20 +339,20 @@ void hardsigmoid_backward_kernel(TensorIterator& iter) { @@ -540,7 +540,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= }); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/Memory.cpp pytorch-develop/aten/src/ATen/native/Memory.cpp --- pytorch-v1.5.0/aten/src/ATen/native/Memory.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/Memory.cpp 2021-07-13 15:30:57.614268374 +0800 ++++ pytorch-develop/aten/src/ATen/native/Memory.cpp 2021-07-15 20:52:26.665437789 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -595,7 +595,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= detail::computeStorageSize(self.sizes(), self.strides()), diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/native_functions.yaml pytorch-develop/aten/src/ATen/native/native_functions.yaml --- pytorch-v1.5.0/aten/src/ATen/native/native_functions.yaml 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/native_functions.yaml 2021-07-13 15:30:57.634269091 +0800 ++++ pytorch-develop/aten/src/ATen/native/native_functions.yaml 2021-07-15 20:52:26.685438507 +0800 @@ -1,6 +1,5 @@ # See README.md in this directory for more guidance @@ -5916,24 +5916,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: max_unpool2d.out(Tensor self, Tensor indices, int[2] output_size, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6118,12 +7584,16 @@ - dispatch: - CPU: reflection_pad2d_out_cpu - CUDA: reflection_pad2d_out_cuda -+ npu_dispatch: -+ NPU: reflection_pad2d_out_npu - - - func: reflection_pad2d(Tensor self, int[4] padding) -> Tensor - python_module: nn - dispatch: - CPU: reflection_pad2d_cpu - CUDA: reflection_pad2d_cuda -+ npu_dispatch: -+ NPU: reflection_pad2d_npu - - - func: reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!) - python_module: nn -@@ -6166,12 +7636,16 @@ +@@ -6166,12 +7632,16 @@ dispatch: CPU: replication_pad2d_out_cpu CUDA: replication_pad2d_out_cuda @@ -5950,7 +5933,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: replication_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -6214,12 +7688,16 @@ +@@ -6214,12 +7684,16 @@ dispatch: CPU: upsample_linear1d_out_cpu CUDA: upsample_linear1d_out_cuda @@ -5967,7 +5950,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_linear1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, bool align_corners, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -6232,12 +7710,16 @@ +@@ -6232,12 +7706,16 @@ dispatch: CPU: upsample_linear1d_backward_cpu CUDA: upsample_linear1d_backward_cuda @@ -5984,7 +5967,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_bilinear2d(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6245,96 +7727,128 @@ +@@ -6245,96 +7723,128 @@ CPU: upsample_bilinear2d_cpu CUDA: upsample_bilinear2d_cuda QuantizedCPU: quantized_upsample_bilinear2d_cpu @@ -6113,7 +6096,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6342,24 +7856,32 @@ +@@ -6342,24 +7852,32 @@ CPU: upsample_nearest2d_cpu CUDA: upsample_nearest2d_cuda QuantizedCPU: quantized_upsample_nearest2d_cpu @@ -6146,7 +6129,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_nearest3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6367,38 +7889,52 @@ +@@ -6367,38 +7885,52 @@ CPU: upsample_nearest3d_cpu CUDA: upsample_nearest3d_cuda QuantizedCPU: quantized_upsample_nearest3d_cpu @@ -6199,7 +6182,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # What's a thnn_conv_ versus a slow_conv_? # -@@ -6423,24 +7959,32 @@ +@@ -6423,24 +7955,32 @@ dispatch: CPU: slow_conv_transpose2d_out_cpu CUDA: slow_conv_transpose2d_out_cuda @@ -6232,7 +6215,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6468,21 +8012,29 @@ +@@ -6468,21 +8008,29 @@ - func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -6262,7 +6245,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: thnn_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!)? grad_input, Tensor(b!)? grad_weight, Tensor(c!)? grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!)) python_module: nn -@@ -6495,32 +8047,46 @@ +@@ -6495,32 +8043,46 @@ dispatch: CPU: slow_conv2d_backward_cpu CUDA: legacy::cuda::_thnn_conv2d_backward @@ -6309,7 +6292,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6553,12 +8119,16 @@ +@@ -6553,12 +8115,16 @@ dispatch: CPU: slow_conv_dilated2d_cpu CUDA: slow_conv_dilated2d_cuda @@ -6326,7 +6309,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1) -> Tensor python_module: nn -@@ -6577,57 +8147,405 @@ +@@ -6577,57 +8143,413 @@ dispatch: CPU: col2im_out_cpu CUDA: col2im_out_cuda @@ -6732,10 +6715,18 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +- func: npu_bert_apply_adam(Tensor(a!) var, Tensor(b!) m, Tensor(c!) v, Scalar lr, Scalar beta1, Scalar beta2, Scalar epsilon, Tensor grad, Scalar max_grad_norm, Scalar global_grad_norm, Scalar weight_decay) -> (Tensor(a!), Tensor(b!), Tensor(c!)) + npu_dispatch_only: + NPU: bert_apply_adam_npu ++ ++- func: npu_giou(Tensor self, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> Tensor ++ npu_dispatch_only: ++ NPU: giou_npu ++ ++- func: npu_giou_backward(Tensor grad, Tensor bboxes, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> (Tensor, Tensor) ++ npu_dispatch_only: ++ NPU: giou_backward_npu \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S --- pytorch-v1.5.0/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-07-13 15:30:57.674270525 +0800 ++++ pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-07-15 20:52:26.725439941 +0800 @@ -659,14 +659,14 @@ SUB x1, x1, 4 @@ -6761,7 +6752,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CMP x1, 2 diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorCompare.cpp pytorch-develop/aten/src/ATen/native/TensorCompare.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorCompare.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorCompare.cpp 2021-07-13 15:30:57.618268517 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorCompare.cpp 2021-07-15 20:52:26.669437932 +0800 @@ -64,7 +64,7 @@ Tensor isinf(const Tensor &self) { @@ -6773,7 +6764,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return AT_DISPATCH_FLOATING_TYPES_AND_HALF(self.scalar_type(), "isinf", [&]() { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorFactories.cpp pytorch-develop/aten/src/ATen/native/TensorFactories.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorFactories.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorFactories.cpp 2021-07-13 15:30:57.618268517 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorFactories.cpp 2021-07-15 20:52:26.669437932 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6818,7 +6809,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorProperties.cpp pytorch-develop/aten/src/ATen/native/TensorProperties.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorProperties.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorProperties.cpp 2021-07-13 15:30:57.618268517 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorProperties.cpp 2021-07-15 20:52:26.669437932 +0800 @@ -87,6 +87,7 @@ if (self.is_contiguous(memory_format)) { return self; @@ -6829,7 +6820,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= "preserve memory format is unsupported by the contiguous operator"); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/UpSampleBicubic2d.cpp pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp --- pytorch-v1.5.0/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-07-13 15:30:57.622268661 +0800 ++++ pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-07-15 20:52:26.669437932 +0800 @@ -26,7 +26,7 @@ const scalar_t* in = &idata[output_y * input_width + output_x]; scalar_t* out = &odata[output_y * output_width + output_x]; @@ -6841,7 +6832,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= out += output_width * output_height; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native_parse.py pytorch-develop/aten/src/ATen/native_parse.py --- pytorch-v1.5.0/aten/src/ATen/native_parse.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native_parse.py 2021-07-13 15:30:57.686270955 +0800 ++++ pytorch-develop/aten/src/ATen/native_parse.py 2021-07-15 20:52:26.737440371 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -6879,7 +6870,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= msg = '''Exception raised in processing function: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/preprocess_declarations.py pytorch-develop/aten/src/ATen/preprocess_declarations.py --- pytorch-v1.5.0/aten/src/ATen/preprocess_declarations.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/preprocess_declarations.py 2021-07-13 15:30:57.690271099 +0800 ++++ pytorch-develop/aten/src/ATen/preprocess_declarations.py 2021-07-15 20:52:26.737440371 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -6911,7 +6902,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/templates/TensorBody.h pytorch-develop/aten/src/ATen/templates/TensorBody.h --- pytorch-v1.5.0/aten/src/ATen/templates/TensorBody.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/templates/TensorBody.h 2021-07-13 15:30:57.690271099 +0800 ++++ pytorch-develop/aten/src/ATen/templates/TensorBody.h 2021-07-15 20:52:26.741440515 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6944,7 +6935,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/templates/TensorMethods.h pytorch-develop/aten/src/ATen/templates/TensorMethods.h --- pytorch-v1.5.0/aten/src/ATen/templates/TensorMethods.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/templates/TensorMethods.h 2021-07-13 15:30:57.690271099 +0800 ++++ pytorch-develop/aten/src/ATen/templates/TensorMethods.h 2021-07-15 20:52:26.741440515 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6978,7 +6969,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/CMakeLists.txt pytorch-develop/aten/src/TH/CMakeLists.txt --- pytorch-v1.5.0/aten/src/TH/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/CMakeLists.txt 2021-07-13 15:30:57.694271242 +0800 ++++ pytorch-develop/aten/src/TH/CMakeLists.txt 2021-07-15 20:52:26.741440515 +0800 @@ -48,6 +48,11 @@ ${CMAKE_CURRENT_SOURCE_DIR} PARENT_SCOPE) @@ -6993,7 +6984,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/generic/THStorage.cpp pytorch-develop/aten/src/TH/generic/THStorage.cpp --- pytorch-v1.5.0/aten/src/TH/generic/THStorage.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/generic/THStorage.cpp 2021-07-13 15:30:57.694271242 +0800 ++++ pytorch-develop/aten/src/TH/generic/THStorage.cpp 2021-07-15 20:52:26.745440658 +0800 @@ -1,9 +1,32 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7102,7 +7093,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/generic/THStorage.h pytorch-develop/aten/src/TH/generic/THStorage.h --- pytorch-v1.5.0/aten/src/TH/generic/THStorage.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/generic/THStorage.h 2021-07-13 15:30:57.694271242 +0800 ++++ pytorch-develop/aten/src/TH/generic/THStorage.h 2021-07-15 20:52:26.745440658 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7141,7 +7132,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/CMakeLists.txt pytorch-develop/c10/CMakeLists.txt --- pytorch-v1.5.0/c10/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/CMakeLists.txt 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/CMakeLists.txt 2021-07-15 20:52:26.761441232 +0800 @@ -63,6 +63,14 @@ message(STATUS "don't use NUMA") endif() @@ -7170,7 +7161,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # not checked in diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Backend.h pytorch-develop/c10/core/Backend.h --- pytorch-v1.5.0/c10/core/Backend.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Backend.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/Backend.h 2021-07-15 20:52:26.761441232 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7265,7 +7256,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Device.cpp pytorch-develop/c10/core/Device.cpp --- pytorch-v1.5.0/c10/core/Device.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Device.cpp 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/Device.cpp 2021-07-15 20:52:26.761441232 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7305,7 +7296,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= types.begin(), diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Device.h pytorch-develop/c10/core/Device.h --- pytorch-v1.5.0/c10/core/Device.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Device.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/Device.h 2021-07-15 20:52:26.761441232 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7340,7 +7331,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return type_ == DeviceType::CPU; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DeviceType.cpp pytorch-develop/c10/core/DeviceType.cpp --- pytorch-v1.5.0/c10/core/DeviceType.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DeviceType.cpp 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/DeviceType.cpp 2021-07-15 20:52:26.761441232 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7380,7 +7371,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return false; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DeviceType.h pytorch-develop/c10/core/DeviceType.h --- pytorch-v1.5.0/c10/core/DeviceType.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DeviceType.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/DeviceType.h 2021-07-15 20:52:26.761441232 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7423,7 +7414,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= constexpr DeviceType kXLA = DeviceType::XLA; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DispatchKey.cpp pytorch-develop/c10/core/DispatchKey.cpp --- pytorch-v1.5.0/c10/core/DispatchKey.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DispatchKey.cpp 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/DispatchKey.cpp 2021-07-15 20:52:26.761441232 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7455,7 +7446,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= case DispatchKey::TESTING_ONLY_GenericModeTensorId: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DispatchKey.h pytorch-develop/c10/core/DispatchKey.h --- pytorch-v1.5.0/c10/core/DispatchKey.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DispatchKey.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/DispatchKey.h 2021-07-15 20:52:26.761441232 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7487,7 +7478,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Storage.h pytorch-develop/c10/core/Storage.h --- pytorch-v1.5.0/c10/core/Storage.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Storage.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/Storage.h 2021-07-15 20:52:26.761441232 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7521,7 +7512,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= }; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/StorageImpl.h pytorch-develop/c10/core/StorageImpl.h --- pytorch-v1.5.0/c10/core/StorageImpl.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/StorageImpl.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/StorageImpl.h 2021-07-15 20:52:26.761441232 +0800 @@ -1,12 +1,39 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7578,7 +7569,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/TensorImpl.h pytorch-develop/c10/core/TensorImpl.h --- pytorch-v1.5.0/c10/core/TensorImpl.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/TensorImpl.h 2021-07-13 15:30:57.710271816 +0800 ++++ pytorch-develop/c10/core/TensorImpl.h 2021-07-15 20:52:26.761441232 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7648,7 +7639,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/TensorOptions.h pytorch-develop/c10/core/TensorOptions.h --- pytorch-v1.5.0/c10/core/TensorOptions.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/TensorOptions.h 2021-07-13 15:30:57.710271816 +0800 ++++ pytorch-develop/c10/core/TensorOptions.h 2021-07-15 20:52:26.761441232 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7689,7 +7680,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/macros/Export.h pytorch-develop/c10/macros/Export.h --- pytorch-v1.5.0/c10/macros/Export.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/macros/Export.h 2021-07-13 15:30:57.710271816 +0800 ++++ pytorch-develop/c10/macros/Export.h 2021-07-15 20:52:26.765441375 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7816,7 +7807,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/caffe2/CMakeLists.txt pytorch-develop/caffe2/CMakeLists.txt --- pytorch-v1.5.0/caffe2/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/caffe2/CMakeLists.txt 2021-07-13 15:30:57.718272102 +0800 ++++ pytorch-develop/caffe2/CMakeLists.txt 2021-07-15 20:52:26.773441662 +0800 @@ -32,6 +32,7 @@ # Add source, includes, and libs to lists list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS}) @@ -7963,7 +7954,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Call again since Caffe2_HIP_INCLUDE is extended with ATen include dirs. diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/.clang-format pytorch-develop/.clang-format --- pytorch-v1.5.0/.clang-format 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/.clang-format 2021-07-13 15:30:57.586267370 +0800 ++++ pytorch-develop/.clang-format 2021-07-15 20:52:26.637436786 +0800 @@ -84,5 +84,4 @@ SpacesInSquareBrackets: false Standard: Cpp11 @@ -7974,7 +7965,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/BuildVariables.cmake pytorch-develop/cmake/BuildVariables.cmake --- pytorch-v1.5.0/cmake/BuildVariables.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/BuildVariables.cmake 2021-07-13 15:30:57.830276118 +0800 ++++ pytorch-develop/cmake/BuildVariables.cmake 2021-07-15 20:52:26.913446682 +0800 @@ -11,6 +11,7 @@ # CMakeLists.txt files under each folder respectively. set(Caffe2_CPU_SRCS) @@ -8001,7 +7992,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # symbols. However, if the lib is whole linked in caffe2 lib, we don't want diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Codegen.cmake pytorch-develop/cmake/Codegen.cmake --- pytorch-v1.5.0/cmake/Codegen.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Codegen.cmake 2021-07-13 15:30:57.830276118 +0800 ++++ pytorch-develop/cmake/Codegen.cmake 2021-07-15 20:52:26.913446682 +0800 @@ -191,13 +191,14 @@ file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt generated_cpp) file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt-cuda cuda_generated_cpp) @@ -8032,7 +8023,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endif() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Dependencies.cmake pytorch-develop/cmake/Dependencies.cmake --- pytorch-v1.5.0/cmake/Dependencies.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Dependencies.cmake 2021-07-13 15:30:57.830276118 +0800 ++++ pytorch-develop/cmake/Dependencies.cmake 2021-07-15 20:52:26.917446825 +0800 @@ -1509,6 +1509,13 @@ ENDIF(NOT C_HAS_THREAD) endif() @@ -8049,7 +8040,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Summary.cmake pytorch-develop/cmake/Summary.cmake --- pytorch-v1.5.0/cmake/Summary.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Summary.cmake 2021-07-13 15:30:57.830276118 +0800 ++++ pytorch-develop/cmake/Summary.cmake 2021-07-15 20:52:26.917446825 +0800 @@ -134,6 +134,7 @@ if(NOT "${SELECTED_OP_LIST}" STREQUAL "") message(STATUS " SELECTED_OP_LIST : ${SELECTED_OP_LIST}") @@ -8060,7 +8051,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endfunction() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/TorchConfig.cmake.in pytorch-develop/cmake/TorchConfig.cmake.in --- pytorch-v1.5.0/cmake/TorchConfig.cmake.in 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/TorchConfig.cmake.in 2021-07-13 15:30:57.830276118 +0800 ++++ pytorch-develop/cmake/TorchConfig.cmake.in 2021-07-15 20:52:26.917446825 +0800 @@ -112,6 +112,11 @@ list(APPEND TORCH_LIBRARIES ${TORCH_CUDA_LIBRARIES}) endif() @@ -8075,7 +8066,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(TORCH_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=@GLIBCXX_USE_CXX11_ABI@") diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/CMakeLists.txt pytorch-develop/CMakeLists.txt --- pytorch-v1.5.0/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/CMakeLists.txt 2021-07-13 15:30:57.590267513 +0800 ++++ pytorch-develop/CMakeLists.txt 2021-07-15 20:52:26.637436786 +0800 @@ -205,6 +205,10 @@ option(USE_TBB "Use TBB" OFF) option(ONNX_ML "Enable traditional ONNX ML API." ON) @@ -8142,7 +8133,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces") diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/.dockerignore pytorch-develop/.dockerignore --- pytorch-v1.5.0/.dockerignore 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/.dockerignore 2021-07-13 15:30:57.586267370 +0800 ++++ pytorch-develop/.dockerignore 2021-07-15 20:52:26.637436786 +0800 @@ -1,257 +1 @@ -# READ THIS BEFORE YOU REFACTOR ME -# @@ -8405,44 +8396,44 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/docs/make.bat pytorch-develop/docs/make.bat --- pytorch-v1.5.0/docs/make.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/docs/make.bat 2021-07-13 15:30:57.834276262 +0800 ++++ pytorch-develop/docs/make.bat 2021-07-15 20:52:26.925447111 +0800 @@ -1,36 +1,36 @@ --@ECHO OFF -- --pushd %~dp0 -- --REM Command file for Sphinx documentation -- --if "%SPHINXBUILD%" == "" ( -- set SPHINXBUILD=sphinx-build --) --set SOURCEDIR=source --set BUILDDIR=build --set SPHINXPROJ=PyTorch -- --if "%1" == "" goto help -- --%SPHINXBUILD% >NUL 2>NUL --if errorlevel 9009 ( -- echo. -- echo.The 'sphinx-build' command was not found. Make sure you have Sphinx -- echo.installed, then set the SPHINXBUILD environment variable to point -- echo.to the full path of the 'sphinx-build' executable. Alternatively you -- echo.may add the Sphinx directory to PATH. -- echo. -- echo.If you don't have Sphinx installed, grab it from -- echo.http://sphinx-doc.org/ -- exit /b 1 --) -- --%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% --goto end -- --:help --%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -- --:end --popd +-@ECHO OFF +- +-pushd %~dp0 +- +-REM Command file for Sphinx documentation +- +-if "%SPHINXBUILD%" == "" ( +- set SPHINXBUILD=sphinx-build +-) +-set SOURCEDIR=source +-set BUILDDIR=build +-set SPHINXPROJ=PyTorch +- +-if "%1" == "" goto help +- +-%SPHINXBUILD% >NUL 2>NUL +-if errorlevel 9009 ( +- echo. +- echo.The 'sphinx-build' command was not found. Make sure you have Sphinx +- echo.installed, then set the SPHINXBUILD environment variable to point +- echo.to the full path of the 'sphinx-build' executable. Alternatively you +- echo.may add the Sphinx directory to PATH. +- echo. +- echo.If you don't have Sphinx installed, grab it from +- echo.http://sphinx-doc.org/ +- exit /b 1 +-) +- +-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +-goto end +- +-:help +-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +- +-:end +-popd +@ECHO OFF + +pushd %~dp0 @@ -8494,7 +8485,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/requirements.txt pytorch-develop/requirements.txt --- pytorch-v1.5.0/requirements.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/requirements.txt 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/requirements.txt 2021-07-15 20:52:26.941447686 +0800 @@ -4,4 +4,12 @@ requests setuptools @@ -8513,18 +8504,18 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/appveyor/install.bat pytorch-develop/scripts/appveyor/install.bat --- pytorch-v1.5.0/scripts/appveyor/install.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/appveyor/install.bat 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/scripts/appveyor/install.bat 2021-07-15 20:52:26.941447686 +0800 @@ -1,10 +1,10 @@ --:: Installation scripts for appveyor. -- --@echo on -- --if "%USE_CUDA%" == "ON" call %~dp0%install_cuda.bat -- --:: Miniconda path for appveyor --set PATH=C:\Miniconda-x64;C:\Miniconda-x64\Scripts;%PATH% --:: Install numpy --conda install -y numpy +-:: Installation scripts for appveyor. +- +-@echo on +- +-if "%USE_CUDA%" == "ON" call %~dp0%install_cuda.bat +- +-:: Miniconda path for appveyor +-set PATH=C:\Miniconda-x64;C:\Miniconda-x64\Scripts;%PATH% +-:: Install numpy +-conda install -y numpy +:: Installation scripts for appveyor. + +@echo on @@ -8537,30 +8528,30 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +conda install -y numpy diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/appveyor/install_cuda.bat pytorch-develop/scripts/appveyor/install_cuda.bat --- pytorch-v1.5.0/scripts/appveyor/install_cuda.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/appveyor/install_cuda.bat 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/scripts/appveyor/install_cuda.bat 2021-07-15 20:52:26.941447686 +0800 @@ -1,22 +1,22 @@ --@echo on -- --appveyor DownloadFile ^ -- https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda_8.0.44_windows-exe ^ -- -FileName cuda_8.0.44_windows.exe --appveyor Downloadfile ^ -- http://developer.download.nvidia.com/compute/redist/cudnn/v5.1/cudnn-8.0-windows10-x64-v5.1.zip ^ -- -FileName cudnn-8.0-windows10-x64-v5.1.zip -- --cuda_8.0.44_windows.exe -s compiler_8.0 cublas_8.0 cublas_dev_8.0 cudart_8.0 curand_8.0 curand_dev_8.0 nvrtc_8.0 nvrtc_dev_8.0 --set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\libnvvp;%PATH% -- --7z x cudnn-8.0-windows10-x64-v5.1.zip --copy cuda\include\cudnn.h ^ -- "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include\" --copy cuda\lib\x64\cudnn.lib ^ -- "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\x64\" --copy cuda\bin\cudnn64_5.dll ^ -- "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\" -- --:: Make sure that nvcc is working correctly. --nvcc -V || exit /b +-@echo on +- +-appveyor DownloadFile ^ +- https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda_8.0.44_windows-exe ^ +- -FileName cuda_8.0.44_windows.exe +-appveyor Downloadfile ^ +- http://developer.download.nvidia.com/compute/redist/cudnn/v5.1/cudnn-8.0-windows10-x64-v5.1.zip ^ +- -FileName cudnn-8.0-windows10-x64-v5.1.zip +- +-cuda_8.0.44_windows.exe -s compiler_8.0 cublas_8.0 cublas_dev_8.0 cudart_8.0 curand_8.0 curand_dev_8.0 nvrtc_8.0 nvrtc_dev_8.0 +-set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\libnvvp;%PATH% +- +-7z x cudnn-8.0-windows10-x64-v5.1.zip +-copy cuda\include\cudnn.h ^ +- "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include\" +-copy cuda\lib\x64\cudnn.lib ^ +- "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\x64\" +-copy cuda\bin\cudnn64_5.dll ^ +- "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\" +- +-:: Make sure that nvcc is working correctly. +-nvcc -V || exit /b +@echo on + +appveyor DownloadFile ^ @@ -8585,92 +8576,92 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +nvcc -V || exit /b diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/build_windows.bat pytorch-develop/scripts/build_windows.bat --- pytorch-v1.5.0/scripts/build_windows.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/build_windows.bat 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/scripts/build_windows.bat 2021-07-15 20:52:26.941447686 +0800 @@ -1,84 +1,84 @@ --:: ############################################################################# --:: Example command to build on Windows. --:: ############################################################################# -- --:: This script shows how one can build a Caffe2 binary for windows. -- --@echo off --setlocal -- --SET ORIGINAL_DIR=%cd% --SET CAFFE2_ROOT=%~dp0%.. -- --if NOT DEFINED BUILD_BINARY ( -- set BUILD_BINARY=OFF --) -- --if NOT DEFINED BUILD_SHARED_LIBS ( -- :: On CI, we test with BUILD_SHARED_LIBS=OFF. -- :: By default, it will be BUILD_SHARED_LIBS=ON. -- if NOT DEFINED BUILD_ENVIRONMENT ( -- set BUILD_SHARED_LIBS=OFF -- ) --) -- --IF NOT DEFINED BUILDING_WITH_TORCH_LIBS ( -- set BUILDING_WITH_TORCH_LIBS=OFF --) -- --if NOT DEFINED CAFFE2_STATIC_LINK_CUDA ( -- set CAFFE2_STATIC_LINK_CUDA=OFF --) -- --if NOT DEFINED CMAKE_BUILD_TYPE ( -- set CMAKE_BUILD_TYPE=Release --) -- --if NOT DEFINED ONNX_NAMESPACE ( -- set ONNX_NAMESPACE=onnx_c2 --) -- --if NOT DEFINED TORCH_CUDA_ARCH_LIST ( -- set TORCH_CUDA_ARCH_LIST=5.0 --) -- --if NOT DEFINED USE_CUDA ( -- set USE_CUDA=OFF --) -- --if NOT DEFINED USE_OBSERVERS ( -- set USE_OBSERVERS=OFF --) -- --if NOT DEFINED MSVC_Z7_OVERRIDE ( -- set MSVC_Z7_OVERRIDE=OFF --) -- --if NOT DEFINED CMAKE_GENERATOR ( -- set CMAKE_GENERATOR=Ninja --) -- --set CMAKE_VERBOSE_MAKEFILE=1 -- --:: Install pyyaml for Aten codegen --pip install pyyaml ninja -- --echo CAFFE2_ROOT=%CAFFE2_ROOT% --echo CMAKE_GENERATOR=%CMAKE_GENERATOR% --echo CMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% -- --:: Set up cmake. We will skip building the test files right now. --pushd %CAFFE2_ROOT% --python tools\build_libtorch.py || goto :label_error --popd -- --echo "Caffe2 built successfully" --cd %ORIGINAL_DIR% --endlocal --exit /b 0 -- --:label_error --echo "Caffe2 building failed" --cd %ORIGINAL_DIR% --endlocal --exit /b 1 +-:: ############################################################################# +-:: Example command to build on Windows. +-:: ############################################################################# +- +-:: This script shows how one can build a Caffe2 binary for windows. +- +-@echo off +-setlocal +- +-SET ORIGINAL_DIR=%cd% +-SET CAFFE2_ROOT=%~dp0%.. +- +-if NOT DEFINED BUILD_BINARY ( +- set BUILD_BINARY=OFF +-) +- +-if NOT DEFINED BUILD_SHARED_LIBS ( +- :: On CI, we test with BUILD_SHARED_LIBS=OFF. +- :: By default, it will be BUILD_SHARED_LIBS=ON. +- if NOT DEFINED BUILD_ENVIRONMENT ( +- set BUILD_SHARED_LIBS=OFF +- ) +-) +- +-IF NOT DEFINED BUILDING_WITH_TORCH_LIBS ( +- set BUILDING_WITH_TORCH_LIBS=OFF +-) +- +-if NOT DEFINED CAFFE2_STATIC_LINK_CUDA ( +- set CAFFE2_STATIC_LINK_CUDA=OFF +-) +- +-if NOT DEFINED CMAKE_BUILD_TYPE ( +- set CMAKE_BUILD_TYPE=Release +-) +- +-if NOT DEFINED ONNX_NAMESPACE ( +- set ONNX_NAMESPACE=onnx_c2 +-) +- +-if NOT DEFINED TORCH_CUDA_ARCH_LIST ( +- set TORCH_CUDA_ARCH_LIST=5.0 +-) +- +-if NOT DEFINED USE_CUDA ( +- set USE_CUDA=OFF +-) +- +-if NOT DEFINED USE_OBSERVERS ( +- set USE_OBSERVERS=OFF +-) +- +-if NOT DEFINED MSVC_Z7_OVERRIDE ( +- set MSVC_Z7_OVERRIDE=OFF +-) +- +-if NOT DEFINED CMAKE_GENERATOR ( +- set CMAKE_GENERATOR=Ninja +-) +- +-set CMAKE_VERBOSE_MAKEFILE=1 +- +-:: Install pyyaml for Aten codegen +-pip install pyyaml ninja +- +-echo CAFFE2_ROOT=%CAFFE2_ROOT% +-echo CMAKE_GENERATOR=%CMAKE_GENERATOR% +-echo CMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% +- +-:: Set up cmake. We will skip building the test files right now. +-pushd %CAFFE2_ROOT% +-python tools\build_libtorch.py || goto :label_error +-popd +- +-echo "Caffe2 built successfully" +-cd %ORIGINAL_DIR% +-endlocal +-exit /b 0 +- +-:label_error +-echo "Caffe2 building failed" +-cd %ORIGINAL_DIR% +-endlocal +-exit /b 1 +:: ############################################################################# +:: Example command to build on Windows. +:: ############################################################################# @@ -8757,25 +8748,25 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +exit /b 1 diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/proto.ps1 pytorch-develop/scripts/proto.ps1 --- pytorch-v1.5.0/scripts/proto.ps1 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/proto.ps1 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/scripts/proto.ps1 2021-07-15 20:52:26.941447686 +0800 @@ -1,17 +1,17 @@ --param( -- [string]$protoc, -- [string]$srcdir, -- [string]$unprocessed, -- [string]$processed, -- [string]$out --) --$ErrorActionPreference = "Stop" --Get-Content $unprocessed | % {$_ -Replace "caffe2/proto/caffe2.proto", "caffe2.proto"} | Set-Content $processed --Add-Content -Path $processed -Value "option optimize_for = LITE_RUNTIME;`n" -NoNewline --$dir = (Get-Item $processed).DirectoryName -- --copy $srcdir/caffe2/proto/caffe2.proto $srcdir/caffe2.proto --Add-Content -Path $srcdir/caffe2.proto -Value "option optimize_for = LITE_RUNTIME;`n" -NoNewline -- --$cmd = "$protoc -I${dir} --cpp_out=$out $processed" --Invoke-Expression $cmd +-param( +- [string]$protoc, +- [string]$srcdir, +- [string]$unprocessed, +- [string]$processed, +- [string]$out +-) +-$ErrorActionPreference = "Stop" +-Get-Content $unprocessed | % {$_ -Replace "caffe2/proto/caffe2.proto", "caffe2.proto"} | Set-Content $processed +-Add-Content -Path $processed -Value "option optimize_for = LITE_RUNTIME;`n" -NoNewline +-$dir = (Get-Item $processed).DirectoryName +- +-copy $srcdir/caffe2/proto/caffe2.proto $srcdir/caffe2.proto +-Add-Content -Path $srcdir/caffe2.proto -Value "option optimize_for = LITE_RUNTIME;`n" -NoNewline +- +-$cmd = "$protoc -I${dir} --cpp_out=$out $processed" +-Invoke-Expression $cmd +param( + [string]$protoc, + [string]$srcdir, @@ -8795,7 +8786,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +Invoke-Expression $cmd diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/setup.py pytorch-develop/setup.py --- pytorch-v1.5.0/setup.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/setup.py 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/setup.py 2021-07-15 20:52:26.941447686 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -8894,7 +8885,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= 'python/serialized_test/data/operator_test/*.zip', diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/derivatives.yaml pytorch-develop/tools/autograd/derivatives.yaml --- pytorch-v1.5.0/tools/autograd/derivatives.yaml 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/derivatives.yaml 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/derivatives.yaml 2021-07-15 20:52:28.085488704 +0800 @@ -107,6 +107,10 @@ # # NB: The parameter names here MUST be consistent with the parameter names @@ -8951,7 +8942,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # The above backward definitions are equivalent to the definitions below. Why do we bundle # everything up? It's because it's more convenient to define double backwards # when there is a single function that manages everything. -@@ -1630,3 +1643,52 @@ +@@ -1630,3 +1643,55 @@ - name: nonzero(Tensor self) -> Tensor output_differentiability: [False] @@ -9004,11 +8995,14 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +- name: npu_linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor + input, weight: npu_linear_backward(grad, input, weight) + bias: maybe_multiply(grad, 1) ++ ++- name: npu_giou(Tensor self, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> Tensor ++ self, gtboxes: npu_giou_backward(grad, self, gtboxes, trans, is_cross, mode) \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/dump_utils.py pytorch-develop/tools/autograd/dump_utils.py --- pytorch-v1.5.0/tools/autograd/dump_utils.py 1970-01-01 08:00:00.000000000 +0800 -+++ pytorch-develop/tools/autograd/dump_utils.py 2021-07-13 15:30:58.990317711 +0800 -@@ -0,0 +1,114 @@ ++++ pytorch-develop/tools/autograd/dump_utils.py 2021-07-15 20:52:28.085488704 +0800 +@@ -0,0 +1,115 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# All rights reserved. +# @@ -9121,11 +9115,12 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + "pin_memory", + "to_device", + "numpy_T", -+ "slice_Tensor" ++ "slice_Tensor", ++ "select_int" +] diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_autograd_functions.py pytorch-develop/tools/autograd/gen_autograd_functions.py --- pytorch-v1.5.0/tools/autograd/gen_autograd_functions.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_autograd_functions.py 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/gen_autograd_functions.py 2021-07-15 20:52:28.085488704 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9311,7 +9306,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_python_functions.py pytorch-develop/tools/autograd/gen_python_functions.py --- pytorch-v1.5.0/tools/autograd/gen_python_functions.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_python_functions.py 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/gen_python_functions.py 2021-07-15 20:52:28.085488704 +0800 @@ -1,3 +1,20 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9353,7 +9348,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= 'value': argname, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_variable_type.py pytorch-develop/tools/autograd/gen_variable_type.py --- pytorch-v1.5.0/tools/autograd/gen_variable_type.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_variable_type.py 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/gen_variable_type.py 2021-07-15 20:52:28.085488704 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9526,7 +9521,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/Functions.cpp pytorch-develop/tools/autograd/templates/Functions.cpp --- pytorch-v1.5.0/tools/autograd/templates/Functions.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/Functions.cpp 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/templates/Functions.cpp 2021-07-15 20:52:28.085488704 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2021 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9606,7 +9601,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto sparse = sparse_.coalesce(); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/python_torch_functions.cpp pytorch-develop/tools/autograd/templates/python_torch_functions.cpp --- pytorch-v1.5.0/tools/autograd/templates/python_torch_functions.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/python_torch_functions.cpp 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/templates/python_torch_functions.cpp 2021-07-15 20:52:28.085488704 +0800 @@ -22,7 +22,7 @@ #include "torch/csrc/autograd/generated/variable_factories.h" #include "torch/csrc/utils/structseq.h" @@ -9690,7 +9685,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/python_variable_methods.cpp pytorch-develop/tools/autograd/templates/python_variable_methods.cpp --- pytorch-v1.5.0/tools/autograd/templates/python_variable_methods.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/python_variable_methods.cpp 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/templates/python_variable_methods.cpp 2021-07-15 20:52:28.085488704 +0800 @@ -15,7 +15,13 @@ #include "torch/csrc/cuda/Stream.h" #include "torch/csrc/cuda/Event.h" @@ -9777,7 +9772,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"has_names", (PyCFunction)THPVariable_has_names, METH_NOARGS, NULL}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/VariableType.cpp pytorch-develop/tools/autograd/templates/VariableType.cpp --- pytorch-v1.5.0/tools/autograd/templates/VariableType.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/VariableType.cpp 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/templates/VariableType.cpp 2021-07-15 20:52:28.085488704 +0800 @@ -1,7 +1,27 @@ +// Copyright (c) 2021 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9808,7 +9803,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/VariableType.h pytorch-develop/tools/autograd/templates/VariableType.h --- pytorch-v1.5.0/tools/autograd/templates/VariableType.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/VariableType.h 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/templates/VariableType.h 2021-07-15 20:52:28.085488704 +0800 @@ -1,3 +1,20 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9840,7 +9835,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= const at::Tensor & unpack(const Tensor & t, const char * name, int pos); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/build_variables.bzl pytorch-develop/tools/build_variables.bzl --- pytorch-v1.5.0/tools/build_variables.bzl 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/build_variables.bzl 2021-07-13 15:30:58.994317854 +0800 ++++ pytorch-develop/tools/build_variables.bzl 2021-07-15 20:52:28.085488704 +0800 @@ -46,6 +46,7 @@ "torch/csrc/autograd/functions/utils.cpp", "torch/csrc/autograd/input_buffer.cpp", @@ -9926,7 +9921,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def grad(outputs: _TensorOrTensors, inputs: _TensorOrTensors, grad_outputs: Optional[_TensorOrTensors]=..., retain_graph: Optional[bool]=..., create_graph: bool=..., only_inputs: bool=..., allow_unused: bool=...) -> Tuple[Tensor, ...]: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/autograd/profiler.py pytorch-develop/torch/autograd/profiler.py --- pytorch-v1.5.0/torch/autograd/profiler.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/autograd/profiler.py 2021-07-13 15:30:58.998317998 +0800 ++++ pytorch-develop/torch/autograd/profiler.py 2021-07-15 20:52:28.093488991 +0800 @@ -1,8 +1,25 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -10399,7 +10394,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return ''.join(result) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/CMakeLists.txt pytorch-develop/torch/CMakeLists.txt --- pytorch-v1.5.0/torch/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/CMakeLists.txt 2021-07-13 15:30:58.994317854 +0800 ++++ pytorch-develop/torch/CMakeLists.txt 2021-07-15 20:52:28.089488848 +0800 @@ -97,6 +97,7 @@ ${TORCH_SRC_DIR}/csrc/tensor/python_tensor.cpp ${TORCH_SRC_DIR}/csrc/utils.cpp @@ -10431,7 +10426,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endif() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/engine.cpp pytorch-develop/torch/csrc/autograd/engine.cpp --- pytorch-v1.5.0/torch/csrc/autograd/engine.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/engine.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/engine.cpp 2021-07-15 20:52:28.101489278 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10554,7 +10549,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto event = c10::Event{c10::DeviceType::CUDA}; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/functions/tensor.cpp pytorch-develop/torch/csrc/autograd/functions/tensor.cpp --- pytorch-v1.5.0/torch/csrc/autograd/functions/tensor.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/functions/tensor.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/functions/tensor.cpp 2021-07-15 20:52:28.101489278 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10586,7 +10581,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= /*non_blocking=*/false, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/init.cpp pytorch-develop/torch/csrc/autograd/init.cpp --- pytorch-v1.5.0/torch/csrc/autograd/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/init.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/init.cpp 2021-07-15 20:52:28.101489278 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10629,7 +10624,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= m.def("_enable_profiler", enableProfiler); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/input_buffer.cpp pytorch-develop/torch/csrc/autograd/input_buffer.cpp --- pytorch-v1.5.0/torch/csrc/autograd/input_buffer.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/input_buffer.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/input_buffer.cpp 2021-07-15 20:52:28.101489278 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10681,7 +10676,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto& old_var = buffer[pos]; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/profiler.cpp pytorch-develop/torch/csrc/autograd/profiler.cpp --- pytorch-v1.5.0/torch/csrc/autograd/profiler.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/profiler.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/profiler.cpp 2021-07-15 20:52:28.105489421 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10877,7 +10872,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CUDAStubs::~CUDAStubs() = default; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/profiler.h pytorch-develop/torch/csrc/autograd/profiler.h --- pytorch-v1.5.0/torch/csrc/autograd/profiler.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/profiler.h 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/profiler.h 2021-07-15 20:52:28.105489421 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11002,7 +10997,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/python_variable.cpp pytorch-develop/torch/csrc/autograd/python_variable.cpp --- pytorch-v1.5.0/torch/csrc/autograd/python_variable.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/python_variable.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/python_variable.cpp 2021-07-15 20:52:28.105489421 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11056,7 +11051,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"is_complex", (getter)THPVariable_is_complex, nullptr, nullptr, nullptr}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/python_variable_indexing.cpp pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp --- pytorch-v1.5.0/torch/csrc/autograd/python_variable_indexing.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp 2021-07-15 20:52:28.105489421 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11097,7 +11092,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/utils/wrap_outputs.h pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h --- pytorch-v1.5.0/torch/csrc/autograd/utils/wrap_outputs.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h 2021-07-15 20:52:28.105489421 +0800 @@ -168,6 +168,45 @@ return r.release(); } @@ -11146,7 +11141,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if (!r) throw python_error(); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/VariableTypeManual.cpp pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp --- pytorch-v1.5.0/torch/csrc/autograd/VariableTypeManual.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp 2021-07-13 15:30:59.006318284 +0800 ++++ pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp 2021-07-15 20:52:28.101489278 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11180,7 +11175,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if (!t.defined()) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/comm.cpp pytorch-develop/torch/csrc/distributed/c10d/comm.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/comm.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/comm.cpp 2021-07-13 15:30:59.014318571 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/comm.cpp 2021-07-15 20:52:28.105489421 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11286,7 +11281,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= while (!in_flight.empty()) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/init.cpp pytorch-develop/torch/csrc/distributed/c10d/init.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/init.cpp 2021-07-13 15:30:59.014318571 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/init.cpp 2021-07-15 20:52:28.109489564 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11343,7 +11338,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= .def("is_success", &::c10d::ProcessGroup::Work::isSuccess) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/reducer.cpp pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/reducer.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp 2021-07-13 15:30:59.014318571 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp 2021-07-15 20:52:28.109489564 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11468,7 +11463,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/DynamicTypes.cpp pytorch-develop/torch/csrc/DynamicTypes.cpp --- pytorch-v1.5.0/torch/csrc/DynamicTypes.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/DynamicTypes.cpp 2021-07-13 15:30:58.998317998 +0800 ++++ pytorch-develop/torch/csrc/DynamicTypes.cpp 2021-07-15 20:52:28.093488991 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11517,7 +11512,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return it->second; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/Generator.cpp pytorch-develop/torch/csrc/Generator.cpp --- pytorch-v1.5.0/torch/csrc/Generator.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/Generator.cpp 2021-07-13 15:30:58.998317998 +0800 ++++ pytorch-develop/torch/csrc/Generator.cpp 2021-07-15 20:52:28.093488991 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11585,7 +11580,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= #endif diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/serialization.cpp pytorch-develop/torch/csrc/generic/serialization.cpp --- pytorch-v1.5.0/torch/csrc/generic/serialization.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/serialization.cpp 2021-07-13 15:30:59.018318714 +0800 ++++ pytorch-develop/torch/csrc/generic/serialization.cpp 2021-07-15 20:52:28.109489564 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11685,7 +11680,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/Storage.cpp pytorch-develop/torch/csrc/generic/Storage.cpp --- pytorch-v1.5.0/torch/csrc/generic/Storage.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/Storage.cpp 2021-07-13 15:30:59.018318714 +0800 ++++ pytorch-develop/torch/csrc/generic/Storage.cpp 2021-07-15 20:52:28.109489564 +0800 @@ -1,7 +1,25 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11764,7 +11759,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= for (Py_ssize_t i = 0; i < length; i++) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/StorageMethods.cpp pytorch-develop/torch/csrc/generic/StorageMethods.cpp --- pytorch-v1.5.0/torch/csrc/generic/StorageMethods.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/StorageMethods.cpp 2021-07-13 15:30:59.018318714 +0800 ++++ pytorch-develop/torch/csrc/generic/StorageMethods.cpp 2021-07-15 20:52:28.109489564 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11812,7 +11807,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"_write_file", (PyCFunction)THPStorage_(writeFile), METH_VARARGS, nullptr}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/Module.cpp pytorch-develop/torch/csrc/Module.cpp --- pytorch-v1.5.0/torch/csrc/Module.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/Module.cpp 2021-07-13 15:30:58.998317998 +0800 ++++ pytorch-develop/torch/csrc/Module.cpp 2021-07-15 20:52:28.093488991 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11956,7 +11951,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto set_module_attr = [&](const char* name, PyObject* v, bool incref = true) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/tensor/python_tensor.cpp pytorch-develop/torch/csrc/tensor/python_tensor.cpp --- pytorch-v1.5.0/torch/csrc/tensor/python_tensor.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/tensor/python_tensor.cpp 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/tensor/python_tensor.cpp 2021-07-15 20:52:28.133490425 +0800 @@ -1,18 +1,35 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12333,7 +12328,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +} // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/init.cpp pytorch-develop/torch/csrc/utils/init.cpp --- pytorch-v1.5.0/torch/csrc/utils/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/init.cpp 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/init.cpp 2021-07-15 20:52:28.133490425 +0800 @@ -1,6 +1,10 @@ #include #include @@ -12421,7 +12416,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/init.h pytorch-develop/torch/csrc/utils/init.h --- pytorch-v1.5.0/torch/csrc/utils/init.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/init.h 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/init.h 2021-07-15 20:52:28.133490425 +0800 @@ -8,4 +8,7 @@ void initThroughputBenchmarkBindings(PyObject* module); @@ -12432,7 +12427,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/python_arg_parser.h pytorch-develop/torch/csrc/utils/python_arg_parser.h --- pytorch-v1.5.0/torch/csrc/utils/python_arg_parser.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/python_arg_parser.h 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/python_arg_parser.h 2021-07-15 20:52:28.133490425 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12467,7 +12462,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return at::Device(device_str); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_layouts.cpp pytorch-develop/torch/csrc/utils/tensor_layouts.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_layouts.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_layouts.cpp 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_layouts.cpp 2021-07-15 20:52:28.133490425 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12498,7 +12493,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= registerLayoutObject((THPLayout*)strided_layout, at::Backend::QuantizedCPU); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_new.cpp pytorch-develop/torch/csrc/utils/tensor_new.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_new.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_new.cpp 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_new.cpp 2021-07-15 20:52:28.133490425 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12634,7 +12629,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } else if(expected_layout == c10::kSparse) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_types.cpp pytorch-develop/torch/csrc/utils/tensor_types.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_types.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_types.cpp 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_types.cpp 2021-07-15 20:52:28.133490425 +0800 @@ -1,58 +1,91 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12847,7 +12842,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def get_rng_state(): ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/distributed/distributed_c10d.py pytorch-develop/torch/distributed/distributed_c10d.py --- pytorch-v1.5.0/torch/distributed/distributed_c10d.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/distributed/distributed_c10d.py 2021-07-13 15:30:59.042319575 +0800 ++++ pytorch-develop/torch/distributed/distributed_c10d.py 2021-07-15 20:52:28.133490425 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -12928,148 +12923,148 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/distributions/von_mises.py pytorch-develop/torch/distributions/von_mises.py --- pytorch-v1.5.0/torch/distributions/von_mises.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/distributions/von_mises.py 2021-07-13 15:30:59.042319575 +0800 ++++ pytorch-develop/torch/distributions/von_mises.py 2021-07-15 20:52:28.137490568 +0800 @@ -1,140 +1,140 @@ --from __future__ import absolute_import, division, print_function -- --import math -- --import torch --import torch.jit --from torch.distributions import constraints --from torch.distributions.distribution import Distribution --from torch.distributions.utils import broadcast_all, lazy_property -- -- --def _eval_poly(y, coef): -- coef = list(coef) -- result = coef.pop() -- while coef: -- result = coef.pop() + y * result -- return result -- -- --_I0_COEF_SMALL = [1.0, 3.5156229, 3.0899424, 1.2067492, 0.2659732, 0.360768e-1, 0.45813e-2] --_I0_COEF_LARGE = [0.39894228, 0.1328592e-1, 0.225319e-2, -0.157565e-2, 0.916281e-2, -- -0.2057706e-1, 0.2635537e-1, -0.1647633e-1, 0.392377e-2] --_I1_COEF_SMALL = [0.5, 0.87890594, 0.51498869, 0.15084934, 0.2658733e-1, 0.301532e-2, 0.32411e-3] --_I1_COEF_LARGE = [0.39894228, -0.3988024e-1, -0.362018e-2, 0.163801e-2, -0.1031555e-1, -- 0.2282967e-1, -0.2895312e-1, 0.1787654e-1, -0.420059e-2] -- --_COEF_SMALL = [_I0_COEF_SMALL, _I1_COEF_SMALL] --_COEF_LARGE = [_I0_COEF_LARGE, _I1_COEF_LARGE] -- -- --def _log_modified_bessel_fn(x, order=0): -- """ -- Returns ``log(I_order(x))`` for ``x > 0``, -- where `order` is either 0 or 1. -- """ -- assert order == 0 or order == 1 -- -- # compute small solution -- y = (x / 3.75) -- y = y * y -- small = _eval_poly(y, _COEF_SMALL[order]) -- if order == 1: -- small = x.abs() * small -- small = small.log() -- -- # compute large solution -- y = 3.75 / x -- large = x - 0.5 * x.log() + _eval_poly(y, _COEF_LARGE[order]).log() -- -- result = torch.where(x < 3.75, small, large) -- return result -- -- --@torch.jit.script --def _rejection_sample(loc, concentration, proposal_r, x): -- done = torch.zeros(x.shape, dtype=torch.bool, device=loc.device) -- while not done.all(): -- u = torch.rand((3,) + x.shape, dtype=loc.dtype, device=loc.device) -- u1, u2, u3 = u.unbind() -- z = torch.cos(math.pi * u1) -- f = (1 + proposal_r * z) / (proposal_r + z) -- c = concentration * (proposal_r - f) -- accept = ((c * (2 - c) - u2) > 0) | ((c / u2).log() + 1 - c >= 0) -- if accept.any(): -- x = torch.where(accept, (u3 - 0.5).sign() * f.acos(), x) -- done = done | accept -- return (x + math.pi + loc) % (2 * math.pi) - math.pi -- -- --class VonMises(Distribution): -- """ -- A circular von Mises distribution. -- -- This implementation uses polar coordinates. The ``loc`` and ``value`` args -- can be any real number (to facilitate unconstrained optimization), but are -- interpreted as angles modulo 2 pi. -- -- Example:: -- >>> m = dist.VonMises(torch.tensor([1.0]), torch.tensor([1.0])) -- >>> m.sample() # von Mises distributed with loc=1 and concentration=1 -- tensor([1.9777]) -- -- :param torch.Tensor loc: an angle in radians. -- :param torch.Tensor concentration: concentration parameter -- """ -- arg_constraints = {'loc': constraints.real, 'concentration': constraints.positive} -- support = constraints.real -- has_rsample = False -- -- def __init__(self, loc, concentration, validate_args=None): -- self.loc, self.concentration = broadcast_all(loc, concentration) -- batch_shape = self.loc.shape -- event_shape = torch.Size() -- -- # Parameters for sampling -- tau = 1 + (1 + 4 * self.concentration ** 2).sqrt() -- rho = (tau - (2 * tau).sqrt()) / (2 * self.concentration) -- self._proposal_r = (1 + rho ** 2) / (2 * rho) -- -- super(VonMises, self).__init__(batch_shape, event_shape, validate_args) -- -- def log_prob(self, value): -- log_prob = self.concentration * torch.cos(value - self.loc) -- log_prob = log_prob - math.log(2 * math.pi) - _log_modified_bessel_fn(self.concentration, order=0) -- return log_prob -- -- @torch.no_grad() -- def sample(self, sample_shape=torch.Size()): -- """ -- The sampling algorithm for the von Mises distribution is based on the following paper: -- Best, D. J., and Nicholas I. Fisher. -- "Efficient simulation of the von Mises distribution." Applied Statistics (1979): 152-157. -- """ -- shape = self._extended_shape(sample_shape) -- x = torch.empty(shape, dtype=self.loc.dtype, device=self.loc.device) -- return _rejection_sample(self.loc, self.concentration, self._proposal_r, x) -- -- def expand(self, batch_shape): -- try: -- return super(VonMises, self).expand(batch_shape) -- except NotImplementedError: -- validate_args = self.__dict__.get('_validate_args') -- loc = self.loc.expand(batch_shape) -- concentration = self.concentration.expand(batch_shape) -- return type(self)(loc, concentration, validate_args=validate_args) -- -- @property -- def mean(self): -- """ -- The provided mean is the circular one. -- """ -- return self.loc -- -- @lazy_property -- def variance(self): -- """ -- The provided variance is the circular one. -- """ -- return 1 - (_log_modified_bessel_fn(self.concentration, order=1) - -- _log_modified_bessel_fn(self.concentration, order=0)).exp() +-from __future__ import absolute_import, division, print_function +- +-import math +- +-import torch +-import torch.jit +-from torch.distributions import constraints +-from torch.distributions.distribution import Distribution +-from torch.distributions.utils import broadcast_all, lazy_property +- +- +-def _eval_poly(y, coef): +- coef = list(coef) +- result = coef.pop() +- while coef: +- result = coef.pop() + y * result +- return result +- +- +-_I0_COEF_SMALL = [1.0, 3.5156229, 3.0899424, 1.2067492, 0.2659732, 0.360768e-1, 0.45813e-2] +-_I0_COEF_LARGE = [0.39894228, 0.1328592e-1, 0.225319e-2, -0.157565e-2, 0.916281e-2, +- -0.2057706e-1, 0.2635537e-1, -0.1647633e-1, 0.392377e-2] +-_I1_COEF_SMALL = [0.5, 0.87890594, 0.51498869, 0.15084934, 0.2658733e-1, 0.301532e-2, 0.32411e-3] +-_I1_COEF_LARGE = [0.39894228, -0.3988024e-1, -0.362018e-2, 0.163801e-2, -0.1031555e-1, +- 0.2282967e-1, -0.2895312e-1, 0.1787654e-1, -0.420059e-2] +- +-_COEF_SMALL = [_I0_COEF_SMALL, _I1_COEF_SMALL] +-_COEF_LARGE = [_I0_COEF_LARGE, _I1_COEF_LARGE] +- +- +-def _log_modified_bessel_fn(x, order=0): +- """ +- Returns ``log(I_order(x))`` for ``x > 0``, +- where `order` is either 0 or 1. +- """ +- assert order == 0 or order == 1 +- +- # compute small solution +- y = (x / 3.75) +- y = y * y +- small = _eval_poly(y, _COEF_SMALL[order]) +- if order == 1: +- small = x.abs() * small +- small = small.log() +- +- # compute large solution +- y = 3.75 / x +- large = x - 0.5 * x.log() + _eval_poly(y, _COEF_LARGE[order]).log() +- +- result = torch.where(x < 3.75, small, large) +- return result +- +- +-@torch.jit.script +-def _rejection_sample(loc, concentration, proposal_r, x): +- done = torch.zeros(x.shape, dtype=torch.bool, device=loc.device) +- while not done.all(): +- u = torch.rand((3,) + x.shape, dtype=loc.dtype, device=loc.device) +- u1, u2, u3 = u.unbind() +- z = torch.cos(math.pi * u1) +- f = (1 + proposal_r * z) / (proposal_r + z) +- c = concentration * (proposal_r - f) +- accept = ((c * (2 - c) - u2) > 0) | ((c / u2).log() + 1 - c >= 0) +- if accept.any(): +- x = torch.where(accept, (u3 - 0.5).sign() * f.acos(), x) +- done = done | accept +- return (x + math.pi + loc) % (2 * math.pi) - math.pi +- +- +-class VonMises(Distribution): +- """ +- A circular von Mises distribution. +- +- This implementation uses polar coordinates. The ``loc`` and ``value`` args +- can be any real number (to facilitate unconstrained optimization), but are +- interpreted as angles modulo 2 pi. +- +- Example:: +- >>> m = dist.VonMises(torch.tensor([1.0]), torch.tensor([1.0])) +- >>> m.sample() # von Mises distributed with loc=1 and concentration=1 +- tensor([1.9777]) +- +- :param torch.Tensor loc: an angle in radians. +- :param torch.Tensor concentration: concentration parameter +- """ +- arg_constraints = {'loc': constraints.real, 'concentration': constraints.positive} +- support = constraints.real +- has_rsample = False +- +- def __init__(self, loc, concentration, validate_args=None): +- self.loc, self.concentration = broadcast_all(loc, concentration) +- batch_shape = self.loc.shape +- event_shape = torch.Size() +- +- # Parameters for sampling +- tau = 1 + (1 + 4 * self.concentration ** 2).sqrt() +- rho = (tau - (2 * tau).sqrt()) / (2 * self.concentration) +- self._proposal_r = (1 + rho ** 2) / (2 * rho) +- +- super(VonMises, self).__init__(batch_shape, event_shape, validate_args) +- +- def log_prob(self, value): +- log_prob = self.concentration * torch.cos(value - self.loc) +- log_prob = log_prob - math.log(2 * math.pi) - _log_modified_bessel_fn(self.concentration, order=0) +- return log_prob +- +- @torch.no_grad() +- def sample(self, sample_shape=torch.Size()): +- """ +- The sampling algorithm for the von Mises distribution is based on the following paper: +- Best, D. J., and Nicholas I. Fisher. +- "Efficient simulation of the von Mises distribution." Applied Statistics (1979): 152-157. +- """ +- shape = self._extended_shape(sample_shape) +- x = torch.empty(shape, dtype=self.loc.dtype, device=self.loc.device) +- return _rejection_sample(self.loc, self.concentration, self._proposal_r, x) +- +- def expand(self, batch_shape): +- try: +- return super(VonMises, self).expand(batch_shape) +- except NotImplementedError: +- validate_args = self.__dict__.get('_validate_args') +- loc = self.loc.expand(batch_shape) +- concentration = self.concentration.expand(batch_shape) +- return type(self)(loc, concentration, validate_args=validate_args) +- +- @property +- def mean(self): +- """ +- The provided mean is the circular one. +- """ +- return self.loc +- +- @lazy_property +- def variance(self): +- """ +- The provided variance is the circular one. +- """ +- return 1 - (_log_modified_bessel_fn(self.concentration, order=1) - +- _log_modified_bessel_fn(self.concentration, order=0)).exp() +from __future__ import absolute_import, division, print_function + +import math @@ -13212,7 +13207,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + _log_modified_bessel_fn(self.concentration, order=0)).exp() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/__init__.py pytorch-develop/torch/__init__.py --- pytorch-v1.5.0/torch/__init__.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/__init__.py 2021-07-13 15:30:58.994317854 +0800 ++++ pytorch-develop/torch/__init__.py 2021-07-15 20:52:28.089488848 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13255,7 +13250,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/lib/c10d/CMakeLists.txt pytorch-develop/torch/lib/c10d/CMakeLists.txt --- pytorch-v1.5.0/torch/lib/c10d/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/lib/c10d/CMakeLists.txt 2021-07-13 15:30:59.046319718 +0800 ++++ pytorch-develop/torch/lib/c10d/CMakeLists.txt 2021-07-15 20:52:28.137490568 +0800 @@ -28,6 +28,10 @@ option(USE_C10D_NCCL "USE C10D NCCL" ON) endif() @@ -13308,7 +13303,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= copy_header(ProcessGroupMPI.hpp) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/lib/libshm/CMakeLists.txt pytorch-develop/torch/lib/libshm/CMakeLists.txt --- pytorch-v1.5.0/torch/lib/libshm/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/lib/libshm/CMakeLists.txt 2021-07-13 15:30:59.046319718 +0800 ++++ pytorch-develop/torch/lib/libshm/CMakeLists.txt 2021-07-15 20:52:28.141490712 +0800 @@ -37,8 +37,11 @@ SET_TARGET_PROPERTIES(shm PROPERTIES PREFIX "lib" @@ -13365,7 +13360,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -_maybe_indices_t = _scalar_or_tuple_2_t[Tensor] diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/functional.py pytorch-develop/torch/nn/functional.py --- pytorch-v1.5.0/torch/nn/functional.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/functional.py 2021-07-13 15:30:59.050319862 +0800 ++++ pytorch-develop/torch/nn/functional.py 2021-07-15 20:52:28.141490712 +0800 @@ -1611,7 +1611,7 @@ else: output = input.matmul(weight.t()) @@ -13388,7 +13383,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -from . import parallel as parallel diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/batchnorm.py pytorch-develop/torch/nn/modules/batchnorm.py --- pytorch-v1.5.0/torch/nn/modules/batchnorm.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/batchnorm.py 2021-07-13 15:30:59.050319862 +0800 ++++ pytorch-develop/torch/nn/modules/batchnorm.py 2021-07-15 20:52:28.145490855 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13420,7 +13415,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= self.register_parameter('running_var', None) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/module.py pytorch-develop/torch/nn/modules/module.py --- pytorch-v1.5.0/torch/nn/modules/module.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/module.py 2021-07-13 15:30:59.050319862 +0800 ++++ pytorch-develop/torch/nn/modules/module.py 2021-07-15 20:52:28.145490855 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13563,7 +13558,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return t.to(device, dtype if t.is_floating_point() else None, non_blocking, memory_format=convert_to_format) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/normalization.py pytorch-develop/torch/nn/modules/normalization.py --- pytorch-v1.5.0/torch/nn/modules/normalization.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/normalization.py 2021-07-13 15:30:59.050319862 +0800 ++++ pytorch-develop/torch/nn/modules/normalization.py 2021-07-15 20:52:28.145490855 +0800 @@ -128,13 +128,14 @@ """ __constants__ = ['normalized_shape', 'eps', 'elementwise_affine'] @@ -13596,68 +13591,68 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return '{normalized_shape}, eps={eps}, ' \ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/transformer.pyi.in pytorch-develop/torch/nn/modules/transformer.pyi.in --- pytorch-v1.5.0/torch/nn/modules/transformer.pyi.in 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/transformer.pyi.in 2021-07-13 15:30:59.054320005 +0800 ++++ pytorch-develop/torch/nn/modules/transformer.pyi.in 2021-07-15 20:52:28.145490855 +0800 @@ -1,60 +1,60 @@ --from ..init import xavier_uniform_ --from .activation import MultiheadAttention --from .container import ModuleList --from .dropout import Dropout --from .linear import Linear --from .module import Module --from .normalization import LayerNorm --from typing import Any, Optional -- --class Transformer(Module): -- encoder: Any = ... -- decoder: Any = ... -- d_model: Any = ... -- nhead: Any = ... -- def __init__(self, d_model: int = ..., nhead: int = ..., num_encoder_layers: int = ..., num_decoder_layers: int = ..., dim_feedforward: int = ..., dropout: float = ..., activation: str = ..., custom_encoder: Optional[Any] = ..., custom_decoder: Optional[Any] = ...) -> None: ... -- def forward(self, src: Any, tgt: Any, src_mask: Optional[Any] = ..., tgt_mask: Optional[Any] = ..., memory_mask: Optional[Any] = ..., src_key_padding_mask: Optional[Any] = ..., tgt_key_padding_mask: Optional[Any] = ..., memory_key_padding_mask: Optional[Any] = ...): ... -- def generate_square_subsequent_mask(self, sz: Any): ... -- --class TransformerEncoder(Module): -- layers: Any = ... -- num_layers: Any = ... -- norm: Any = ... -- def __init__(self, encoder_layer: Any, num_layers: Any, norm: Optional[Any] = ...) -> None: ... -- def forward(self, src: Any, mask: Optional[Any] = ..., src_key_padding_mask: Optional[Any] = ...): ... -- --class TransformerDecoder(Module): -- layers: Any = ... -- num_layers: Any = ... -- norm: Any = ... -- def __init__(self, decoder_layer: Any, num_layers: Any, norm: Optional[Any] = ...) -> None: ... -- def forward(self, tgt: Any, memory: Any, tgt_mask: Optional[Any] = ..., memory_mask: Optional[Any] = ..., tgt_key_padding_mask: Optional[Any] = ..., memory_key_padding_mask: Optional[Any] = ...): ... -- --class TransformerEncoderLayer(Module): -- self_attn: Any = ... -- linear1: Any = ... -- dropout: Any = ... -- linear2: Any = ... -- norm1: Any = ... -- norm2: Any = ... -- dropout1: Any = ... -- dropout2: Any = ... -- activation: Any = ... -- def __init__(self, d_model: Any, nhead: Any, dim_feedforward: int = ..., dropout: float = ..., activation: str = ...) -> None: ... -- def forward(self, src: Any, src_mask: Optional[Any] = ..., src_key_padding_mask: Optional[Any] = ...): ... -- --class TransformerDecoderLayer(Module): -- self_attn: Any = ... -- multihead_attn: Any = ... -- linear1: Any = ... -- dropout: Any = ... -- linear2: Any = ... -- norm1: Any = ... -- norm2: Any = ... -- norm3: Any = ... -- dropout1: Any = ... -- dropout2: Any = ... -- dropout3: Any = ... -- activation: Any = ... -- def __init__(self, d_model: Any, nhead: Any, dim_feedforward: int = ..., dropout: float = ..., activation: str = ...) -> None: ... -- def forward(self, tgt: Any, memory: Any, tgt_mask: Optional[Any] = ..., memory_mask: Optional[Any] = ..., tgt_key_padding_mask: Optional[Any] = ..., memory_key_padding_mask: Optional[Any] = ...): ... +-from ..init import xavier_uniform_ +-from .activation import MultiheadAttention +-from .container import ModuleList +-from .dropout import Dropout +-from .linear import Linear +-from .module import Module +-from .normalization import LayerNorm +-from typing import Any, Optional +- +-class Transformer(Module): +- encoder: Any = ... +- decoder: Any = ... +- d_model: Any = ... +- nhead: Any = ... +- def __init__(self, d_model: int = ..., nhead: int = ..., num_encoder_layers: int = ..., num_decoder_layers: int = ..., dim_feedforward: int = ..., dropout: float = ..., activation: str = ..., custom_encoder: Optional[Any] = ..., custom_decoder: Optional[Any] = ...) -> None: ... +- def forward(self, src: Any, tgt: Any, src_mask: Optional[Any] = ..., tgt_mask: Optional[Any] = ..., memory_mask: Optional[Any] = ..., src_key_padding_mask: Optional[Any] = ..., tgt_key_padding_mask: Optional[Any] = ..., memory_key_padding_mask: Optional[Any] = ...): ... +- def generate_square_subsequent_mask(self, sz: Any): ... +- +-class TransformerEncoder(Module): +- layers: Any = ... +- num_layers: Any = ... +- norm: Any = ... +- def __init__(self, encoder_layer: Any, num_layers: Any, norm: Optional[Any] = ...) -> None: ... +- def forward(self, src: Any, mask: Optional[Any] = ..., src_key_padding_mask: Optional[Any] = ...): ... +- +-class TransformerDecoder(Module): +- layers: Any = ... +- num_layers: Any = ... +- norm: Any = ... +- def __init__(self, decoder_layer: Any, num_layers: Any, norm: Optional[Any] = ...) -> None: ... +- def forward(self, tgt: Any, memory: Any, tgt_mask: Optional[Any] = ..., memory_mask: Optional[Any] = ..., tgt_key_padding_mask: Optional[Any] = ..., memory_key_padding_mask: Optional[Any] = ...): ... +- +-class TransformerEncoderLayer(Module): +- self_attn: Any = ... +- linear1: Any = ... +- dropout: Any = ... +- linear2: Any = ... +- norm1: Any = ... +- norm2: Any = ... +- dropout1: Any = ... +- dropout2: Any = ... +- activation: Any = ... +- def __init__(self, d_model: Any, nhead: Any, dim_feedforward: int = ..., dropout: float = ..., activation: str = ...) -> None: ... +- def forward(self, src: Any, src_mask: Optional[Any] = ..., src_key_padding_mask: Optional[Any] = ...): ... +- +-class TransformerDecoderLayer(Module): +- self_attn: Any = ... +- multihead_attn: Any = ... +- linear1: Any = ... +- dropout: Any = ... +- linear2: Any = ... +- norm1: Any = ... +- norm2: Any = ... +- norm3: Any = ... +- dropout1: Any = ... +- dropout2: Any = ... +- dropout3: Any = ... +- activation: Any = ... +- def __init__(self, d_model: Any, nhead: Any, dim_feedforward: int = ..., dropout: float = ..., activation: str = ...) -> None: ... +- def forward(self, tgt: Any, memory: Any, tgt_mask: Optional[Any] = ..., memory_mask: Optional[Any] = ..., tgt_key_padding_mask: Optional[Any] = ..., memory_key_padding_mask: Optional[Any] = ...): ... +from ..init import xavier_uniform_ +from .activation import MultiheadAttention +from .container import ModuleList @@ -13756,7 +13751,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - module_kwargs: Optional[Any] = ...) -> Tensor: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/parallel/distributed.py pytorch-develop/torch/nn/parallel/distributed.py --- pytorch-v1.5.0/torch/nn/parallel/distributed.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/parallel/distributed.py 2021-07-13 15:30:59.054320005 +0800 ++++ pytorch-develop/torch/nn/parallel/distributed.py 2021-07-15 20:52:28.145490855 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14107,7 +14102,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def remove_weight_norm(module: T_module, name: str = ...) -> T_module: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/onnx/symbolic_opset9.py pytorch-develop/torch/onnx/symbolic_opset9.py --- pytorch-v1.5.0/torch/onnx/symbolic_opset9.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/onnx/symbolic_opset9.py 2021-07-13 15:30:59.054320005 +0800 ++++ pytorch-develop/torch/onnx/symbolic_opset9.py 2021-07-15 20:52:28.149490998 +0800 @@ -1621,14 +1621,23 @@ slices = [sym_help._slice_helper(g, w, axes=[0], starts=[x * n], ends=[y * n]) for x, y in intervals] return g.op('Concat', *slices, axis_i=0) @@ -14185,7 +14180,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, params: _params_t, lr: float=..., lr_decay: float=..., weight_decay: float=..., initial_accumulator_value: float=..., eps: float=...) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/optim/adamax.py pytorch-develop/torch/optim/adamax.py --- pytorch-v1.5.0/torch/optim/adamax.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/optim/adamax.py 2021-07-13 15:30:59.058320149 +0800 ++++ pytorch-develop/torch/optim/adamax.py 2021-07-15 20:52:28.149490998 +0800 @@ -80,8 +80,8 @@ exp_inf.mul_(beta2).unsqueeze(0), grad.abs().add_(eps).unsqueeze_(0) @@ -14362,7 +14357,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, params: _params_t, lr: float=..., betas: Tuple[float, float]=..., eps: float=...) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/serialization.py pytorch-develop/torch/serialization.py --- pytorch-v1.5.0/torch/serialization.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/serialization.py 2021-07-13 15:30:59.058320149 +0800 ++++ pytorch-develop/torch/serialization.py 2021-07-15 20:52:28.153491142 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14446,7 +14441,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def location_tag(storage): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/storage.py pytorch-develop/torch/storage.py --- pytorch-v1.5.0/torch/storage.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/storage.py 2021-07-13 15:30:59.058320149 +0800 ++++ pytorch-develop/torch/storage.py 2021-07-15 20:52:28.153491142 +0800 @@ -7,6 +7,7 @@ class _StorageBase(object): @@ -14466,7 +14461,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= else: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/tensor.py pytorch-develop/torch/tensor.py --- pytorch-v1.5.0/torch/tensor.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/tensor.py 2021-07-13 15:30:59.058320149 +0800 ++++ pytorch-develop/torch/tensor.py 2021-07-15 20:52:28.153491142 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14528,7 +14523,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def __reversed__(self): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/_tensor_str.py pytorch-develop/torch/_tensor_str.py --- pytorch-v1.5.0/torch/_tensor_str.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/_tensor_str.py 2021-07-13 15:30:58.994317854 +0800 ++++ pytorch-develop/torch/_tensor_str.py 2021-07-15 20:52:28.089488848 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14582,7 +14577,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= has_default_dtype = self.dtype in (torch.get_default_dtype(), torch.int64, torch.bool) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/data/dataloader.py pytorch-develop/torch/utils/data/dataloader.py --- pytorch-v1.5.0/torch/utils/data/dataloader.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/data/dataloader.py 2021-07-13 15:30:59.062320292 +0800 ++++ pytorch-develop/torch/utils/data/dataloader.py 2021-07-15 20:52:28.157491286 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14791,7 +14786,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, sampler: Sampler[int], batch_size: int, drop_last: bool) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/data/_utils/pin_memory.py pytorch-develop/torch/utils/data/_utils/pin_memory.py --- pytorch-v1.5.0/torch/utils/data/_utils/pin_memory.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/data/_utils/pin_memory.py 2021-07-13 15:30:59.062320292 +0800 ++++ pytorch-develop/torch/utils/data/_utils/pin_memory.py 2021-07-15 20:52:28.157491286 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14852,7 +14847,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/__init__.py pytorch-develop/torch/utils/__init__.py --- pytorch-v1.5.0/torch/utils/__init__.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/__init__.py 2021-07-13 15:30:59.062320292 +0800 ++++ pytorch-develop/torch/utils/__init__.py 2021-07-15 20:52:28.153491142 +0800 @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals @@ -14863,7 +14858,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def set_module(obj, mod): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/_utils.py pytorch-develop/torch/_utils.py --- pytorch-v1.5.0/torch/_utils.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/_utils.py 2021-07-13 15:30:58.998317998 +0800 ++++ pytorch-develop/torch/_utils.py 2021-07-15 20:52:28.089488848 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. diff --git a/src/aten/src/ATen/native/native_functions.yaml b/src/aten/src/ATen/native/native_functions.yaml index 30c7a8aeb19a82f8bffa37cd4947172f082db5be..74c22e5b3a4fcf502e8d595da222c51f34003de3 100644 --- a/src/aten/src/ATen/native/native_functions.yaml +++ b/src/aten/src/ATen/native/native_functions.yaml @@ -7584,16 +7584,12 @@ dispatch: CPU: reflection_pad2d_out_cpu CUDA: reflection_pad2d_out_cuda - npu_dispatch: - NPU: reflection_pad2d_out_npu - func: reflection_pad2d(Tensor self, int[4] padding) -> Tensor python_module: nn dispatch: CPU: reflection_pad2d_cpu CUDA: reflection_pad2d_cuda - npu_dispatch: - NPU: reflection_pad2d_npu - func: reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn @@ -8548,4 +8544,12 @@ - func: npu_bert_apply_adam(Tensor(a!) var, Tensor(b!) m, Tensor(c!) v, Scalar lr, Scalar beta1, Scalar beta2, Scalar epsilon, Tensor grad, Scalar max_grad_norm, Scalar global_grad_norm, Scalar weight_decay) -> (Tensor(a!), Tensor(b!), Tensor(c!)) npu_dispatch_only: - NPU: bert_apply_adam_npu \ No newline at end of file + NPU: bert_apply_adam_npu + +- func: npu_giou(Tensor self, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> Tensor + npu_dispatch_only: + NPU: giou_npu + +- func: npu_giou_backward(Tensor grad, Tensor bboxes, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> (Tensor, Tensor) + npu_dispatch_only: + NPU: giou_backward_npu \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/AddmvKernelNpu.cpp b/src/aten/src/ATen/native/npu/AddmvKernelNpu.cpp index 3e11e9de5ac5307c7a8fc7ceaec6b0dfebeb132a..f2defe0dedee2c153329f45e800a7a78035edc49 100644 --- a/src/aten/src/ATen/native/npu/AddmvKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/AddmvKernelNpu.cpp @@ -1,96 +1,96 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" -#include "ATen/native/npu/utils/NpuUtils.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -static void check_1d(const Tensor& t, const char* arg, const char* fn) { - TORCH_CHECK(t.dim() == 1, fn, ": Expected 1-D argument ", arg, ", but got ", t.dim(), "-D"); -} - -Tensor& addmv_out_npu( - Tensor& result, - const Tensor& self, - const Tensor& mat, - const Tensor& vec, - Scalar beta, - Scalar alpha) { - - check_1d(vec, "vec", "addmv"); - - Tensor mat1 = vec.unsqueeze(1); - - // matmul mat*alpha - Tensor mat_alpha = at::mul(mat, alpha); - - // matmul*alpha - Tensor mmMulResult = at::mm(mat_alpha, mat1); - - Tensor mmMulResult1 = mmMulResult.squeeze(); - - // calculate the output size - auto outputSize = addmv_npu_output_size(self, mat, vec, beta, alpha); - - if (!result.sizes().equals(outputSize)) { - result.resize_(outputSize); - } - // matmul*alpha+self*beta - at::add_out(result, mmMulResult1, self, beta); - - return result; -} - -Tensor addmv_npu( - const Tensor& self, - const Tensor& mat, - const Tensor& vec, - Scalar beta, - Scalar alpha) { - - check_1d(vec, "vec", "addmv"); - auto outputSize = addmv_npu_output_size(self, mat, vec, beta, alpha); - Tensor result = OpPreparation::ApplyTensor(self, outputSize); - addmv_out_npu(result, self, mat, vec, beta, alpha); - - return result; -} - -Tensor& addmv_npu_( - Tensor& self, - const Tensor& mat, - const Tensor& vec, - Scalar beta, - Scalar alpha) { - - check_1d(vec, "vec", "addmv"); - OpPreparation::CheckMemory({self, mat, vec}, {self}); - if (!NpuUtils::check_match(&self)) { - Tensor contiguousSelf = NpuUtils::format_contiguous(self); - Tensor result = - addmv_out_npu(contiguousSelf, contiguousSelf, mat, vec, beta, alpha); - NpuUtils::format_fresh_view(self, result); - } else { - addmv_out_npu(self, self, mat, vec, beta, alpha); - } - return self; -} - -} // namespace native -} // namespace at - - +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" +#include "ATen/native/npu/utils/NpuUtils.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +static void check_1d(const Tensor& t, const char* arg, const char* fn) { + TORCH_CHECK(t.dim() == 1, fn, ": Expected 1-D argument ", arg, ", but got ", t.dim(), "-D"); +} + +Tensor& addmv_out_npu( + Tensor& result, + const Tensor& self, + const Tensor& mat, + const Tensor& vec, + Scalar beta, + Scalar alpha) { + + check_1d(vec, "vec", "addmv"); + + Tensor mat1 = vec.unsqueeze(1); + + // matmul mat*alpha + Tensor mat_alpha = at::mul(mat, alpha); + + // matmul*alpha + Tensor mmMulResult = at::mm(mat_alpha, mat1); + + Tensor mmMulResult1 = mmMulResult.squeeze(); + + // calculate the output size + auto outputSize = addmv_npu_output_size(self, mat, vec, beta, alpha); + + if (!result.sizes().equals(outputSize)) { + result.resize_(outputSize); + } + // matmul*alpha+self*beta + at::add_out(result, mmMulResult1, self, beta); + + return result; +} + +Tensor addmv_npu( + const Tensor& self, + const Tensor& mat, + const Tensor& vec, + Scalar beta, + Scalar alpha) { + + check_1d(vec, "vec", "addmv"); + auto outputSize = addmv_npu_output_size(self, mat, vec, beta, alpha); + Tensor result = OpPreparation::ApplyTensor(self, outputSize); + addmv_out_npu(result, self, mat, vec, beta, alpha); + + return result; +} + +Tensor& addmv_npu_( + Tensor& self, + const Tensor& mat, + const Tensor& vec, + Scalar beta, + Scalar alpha) { + + check_1d(vec, "vec", "addmv"); + OpPreparation::CheckMemory({self, mat, vec}, {self}); + if (!NpuUtils::check_match(&self)) { + Tensor contiguousSelf = NpuUtils::format_contiguous(self); + Tensor result = + addmv_out_npu(contiguousSelf, contiguousSelf, mat, vec, beta, alpha); + NpuUtils::format_fresh_view(self, result); + } else { + addmv_out_npu(self, self, mat, vec, beta, alpha); + } + return self; +} + +} // namespace native +} // namespace at + + diff --git a/src/aten/src/ATen/native/npu/ArgminKernelNpu.cpp b/src/aten/src/ATen/native/npu/ArgminKernelNpu.cpp old mode 100644 new mode 100755 diff --git a/src/aten/src/ATen/native/npu/AtanKernelNpu.cpp b/src/aten/src/ATen/native/npu/AtanKernelNpu.cpp index a8e38f109b1a515823350f9b1e2d9e3c9124c466..1dbfae12a22a94e82a59109e5993c3e261cd9f04 100644 --- a/src/aten/src/ATen/native/npu/AtanKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/AtanKernelNpu.cpp @@ -1,52 +1,52 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& atan_out_npu(Tensor& result, const Tensor& self) { - OpCommand cmd; - cmd.Name("Atan") - .Input(self) - .Output(result) - .Run(); - return result; -} - -Tensor atan_npu(const Tensor& self) { - Tensor result = OpPreparation::ApplyTensor(self); - //calculate the output result of the NPU - atan_out_npu(result, self); - return result; -} - -Tensor& atan_npu_(Tensor& self) { - OpPreparation::CheckMemory({self}, {self}); - if (!NpuUtils::check_match(&self)) { - Tensor contiguousSelf = NpuUtils::format_contiguous(self); - Tensor result = atan_out_npu(contiguousSelf, contiguousSelf); - NpuUtils::format_fresh_view(self, result); - } else { - atan_out_npu(self, self); - } - return self; -} - +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& atan_out_npu(Tensor& result, const Tensor& self) { + OpCommand cmd; + cmd.Name("Atan") + .Input(self) + .Output(result) + .Run(); + return result; +} + +Tensor atan_npu(const Tensor& self) { + Tensor result = OpPreparation::ApplyTensor(self); + //calculate the output result of the NPU + atan_out_npu(result, self); + return result; +} + +Tensor& atan_npu_(Tensor& self) { + OpPreparation::CheckMemory({self}, {self}); + if (!NpuUtils::check_match(&self)) { + Tensor contiguousSelf = NpuUtils::format_contiguous(self); + Tensor result = atan_out_npu(contiguousSelf, contiguousSelf); + NpuUtils::format_fresh_view(self, result); + } else { + atan_out_npu(self, self); + } + return self; +} + }} // namespace at::native \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/BernoulliKernelNpu.cpp b/src/aten/src/ATen/native/npu/BernoulliKernelNpu.cpp index a01096cd57982596ce6166413db67531d2fc6258..cdb37c1ed487079d399f2c85bea72762e8723bae 100644 --- a/src/aten/src/ATen/native/npu/BernoulliKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/BernoulliKernelNpu.cpp @@ -1,101 +1,101 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& bernoulli_out_npu(Tensor& result, const Tensor& self, double p) { - OpCommand cmd; - cmd.Name("Bernoulli") - .Input(self) - .Input(p, ScalarType::Float) - .Output(result) - .Run(); - - return result; -} - -Tensor& bernoulli_out_npu(Tensor& result, const Tensor& self, const Tensor& p) { - OpCommand cmd; - cmd.Name("Bernoulli") - .Input(self) - .Input(p) - .Output(result) - .Run(); - - return result; -} - -Tensor& bernoulli_npu_(Tensor& self, double p, Generator* gen) { - OpPreparation::CheckMemory({self}, {self}); - ScalarType selfType = self.scalar_type(); - Tensor selfFp32 = self; - if (self.scalar_type() == ScalarType::Half) { - selfFp32 = self.to(ScalarType::Float); - } - - if (!NpuUtils::check_match(&self)) { - Tensor contiguousSelf = NpuUtils::format_contiguous(selfFp32); - Tensor result = bernoulli_out_npu(contiguousSelf, contiguousSelf, p); - NpuUtils::format_fresh_view(self, result); - } else { - bernoulli_out_npu(selfFp32, selfFp32, p); - self.copy_(selfFp32); - } - - if(self.scalar_type() != selfType){ - self = self.to(ScalarType::Half); - } - return self; -} - -Tensor& bernoulli_npu_(Tensor& self, const Tensor& p, Generator* gen) { - OpPreparation::CheckMemory({self}, {self}); - ScalarType selfType = self.scalar_type(); - Tensor selfFp32 = self; - Tensor pFp32 = OpPreparation::CastBackToOriFormat(p);; - if (self.scalar_type() == ScalarType::Half) { - selfFp32 = self.to(ScalarType::Float); - pFp32 = p.to(ScalarType::Float); - } - - if (!NpuUtils::check_match(&self)) { - Tensor contiguousSelf = NpuUtils::format_contiguous(selfFp32); - Tensor result = bernoulli_out_npu(contiguousSelf, contiguousSelf, pFp32); - NpuUtils::format_fresh_view(self, result); - } else { - bernoulli_out_npu(selfFp32, selfFp32, pFp32); - self.copy_(selfFp32); - } - - if(self.scalar_type() != selfType){ - self = self.to(ScalarType::Half); - } - return self; -} - -Tensor bernoulli_npu(const Tensor& self, Generator* gen) { - const Tensor p = self; - Tensor selfCopy = at::empty_with_format( - self.sizes(), self.options(), ACL_FORMAT_ND); - selfCopy.copy_(self); - return bernoulli_npu_(selfCopy, p, gen); -} -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& bernoulli_out_npu(Tensor& result, const Tensor& self, double p) { + OpCommand cmd; + cmd.Name("Bernoulli") + .Input(self) + .Input(p, ScalarType::Float) + .Output(result) + .Run(); + + return result; +} + +Tensor& bernoulli_out_npu(Tensor& result, const Tensor& self, const Tensor& p) { + OpCommand cmd; + cmd.Name("Bernoulli") + .Input(self) + .Input(p) + .Output(result) + .Run(); + + return result; +} + +Tensor& bernoulli_npu_(Tensor& self, double p, Generator* gen) { + OpPreparation::CheckMemory({self}, {self}); + ScalarType selfType = self.scalar_type(); + Tensor selfFp32 = self; + if (self.scalar_type() == ScalarType::Half) { + selfFp32 = self.to(ScalarType::Float); + } + + if (!NpuUtils::check_match(&self)) { + Tensor contiguousSelf = NpuUtils::format_contiguous(selfFp32); + Tensor result = bernoulli_out_npu(contiguousSelf, contiguousSelf, p); + NpuUtils::format_fresh_view(self, result); + } else { + bernoulli_out_npu(selfFp32, selfFp32, p); + self.copy_(selfFp32); + } + + if(self.scalar_type() != selfType){ + self = self.to(ScalarType::Half); + } + return self; +} + +Tensor& bernoulli_npu_(Tensor& self, const Tensor& p, Generator* gen) { + OpPreparation::CheckMemory({self}, {self}); + ScalarType selfType = self.scalar_type(); + Tensor selfFp32 = self; + Tensor pFp32 = OpPreparation::CastBackToOriFormat(p);; + if (self.scalar_type() == ScalarType::Half) { + selfFp32 = self.to(ScalarType::Float); + pFp32 = p.to(ScalarType::Float); + } + + if (!NpuUtils::check_match(&self)) { + Tensor contiguousSelf = NpuUtils::format_contiguous(selfFp32); + Tensor result = bernoulli_out_npu(contiguousSelf, contiguousSelf, pFp32); + NpuUtils::format_fresh_view(self, result); + } else { + bernoulli_out_npu(selfFp32, selfFp32, pFp32); + self.copy_(selfFp32); + } + + if(self.scalar_type() != selfType){ + self = self.to(ScalarType::Half); + } + return self; +} + +Tensor bernoulli_npu(const Tensor& self, Generator* gen) { + const Tensor p = self; + Tensor selfCopy = at::empty_with_format( + self.sizes(), self.options(), ACL_FORMAT_ND); + selfCopy.copy_(self); + return bernoulli_npu_(selfCopy, p, gen); +} +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/BertApplyAdamKernelNpu.cpp b/src/aten/src/ATen/native/npu/BertApplyAdamKernelNpu.cpp index 380f65f1aac08321d6a9b72e091454e8be042f0d..20a17421f33e4731220eee7dbbaa92ea178eb48b 100644 --- a/src/aten/src/ATen/native/npu/BertApplyAdamKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/BertApplyAdamKernelNpu.cpp @@ -1,108 +1,108 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -tuple bert_apply_adam_out_npu_nocheck( - Tensor& var_out, - Tensor& m_out, - Tensor& v_out, - const Tensor& var, - const Tensor& m, - const Tensor& v, - Scalar lr, - Scalar beta1, - Scalar beta2, - Scalar epsilon, - const Tensor& grad, - Scalar max_grad_norm, - Scalar global_grad_norm, - Scalar weight_decay) { - OpCommand cmd; - cmd.Name("ApplyAdamV2") - .Input(var) - .Input(m) - .Input(v) - .Input(lr, var.scalar_type()) - .Input(beta1, var.scalar_type()) - .Input(beta2, var.scalar_type()) - .Input(epsilon, var.scalar_type()) - .Input(grad) - .Input(max_grad_norm, var.scalar_type()) - .Input(global_grad_norm, var.scalar_type()) - .Input(weight_decay, var.scalar_type()) - .Output(var_out) - .Output(m_out) - .Output(v_out) - .Run(); - return std::tie(var_out, m_out, v_out); -} - -tuple bert_apply_adam_out_npu( - Tensor& var_out, - Tensor& m_out, - Tensor& v_out, - const Tensor& var, - const Tensor& m, - const Tensor& v, - Scalar lr, - Scalar beta1, - Scalar beta2, - Scalar epsilon, - const Tensor& grad, - Scalar max_grad_norm, - Scalar global_grad_norm, - Scalar weight_decay) { - OpPipeWithDefinedOut check; - check.CheckMemory({var, m, v, grad}, {var_out, m_out, v_out}); - - auto func = [&var, &m, &v, &lr, &beta1, &beta2, &epsilon, &grad, &max_grad_norm, &global_grad_norm, &weight_decay] ( - Tensor& var_out, - Tensor& m_out, - Tensor& v_out) { - bert_apply_adam_out_npu_nocheck(var_out, m_out, v_out, var, m, v, - lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay); - }; - - OpPipeWithMultiOut pipe(var_out, m_out, v_out); - return pipe.Call(func) - .ReturnRef(); -} - -tuple bert_apply_adam_npu( - Tensor& var, - Tensor& m, - Tensor& v, - Scalar lr, - Scalar beta1, - Scalar beta2, - Scalar epsilon, - const Tensor& grad, - Scalar max_grad_norm, - Scalar global_grad_norm, - Scalar weight_decay) { - bert_apply_adam_out_npu( - var, m, v, var, m, v, - lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay); - return std::tie(var, m, v); -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +tuple bert_apply_adam_out_npu_nocheck( + Tensor& var_out, + Tensor& m_out, + Tensor& v_out, + const Tensor& var, + const Tensor& m, + const Tensor& v, + Scalar lr, + Scalar beta1, + Scalar beta2, + Scalar epsilon, + const Tensor& grad, + Scalar max_grad_norm, + Scalar global_grad_norm, + Scalar weight_decay) { + OpCommand cmd; + cmd.Name("ApplyAdamV2") + .Input(var) + .Input(m) + .Input(v) + .Input(lr, var.scalar_type()) + .Input(beta1, var.scalar_type()) + .Input(beta2, var.scalar_type()) + .Input(epsilon, var.scalar_type()) + .Input(grad) + .Input(max_grad_norm, var.scalar_type()) + .Input(global_grad_norm, var.scalar_type()) + .Input(weight_decay, var.scalar_type()) + .Output(var_out) + .Output(m_out) + .Output(v_out) + .Run(); + return std::tie(var_out, m_out, v_out); +} + +tuple bert_apply_adam_out_npu( + Tensor& var_out, + Tensor& m_out, + Tensor& v_out, + const Tensor& var, + const Tensor& m, + const Tensor& v, + Scalar lr, + Scalar beta1, + Scalar beta2, + Scalar epsilon, + const Tensor& grad, + Scalar max_grad_norm, + Scalar global_grad_norm, + Scalar weight_decay) { + OpPipeWithDefinedOut check; + check.CheckMemory({var, m, v, grad}, {var_out, m_out, v_out}); + + auto func = [&var, &m, &v, &lr, &beta1, &beta2, &epsilon, &grad, &max_grad_norm, &global_grad_norm, &weight_decay] ( + Tensor& var_out, + Tensor& m_out, + Tensor& v_out) { + bert_apply_adam_out_npu_nocheck(var_out, m_out, v_out, var, m, v, + lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay); + }; + + OpPipeWithMultiOut pipe(var_out, m_out, v_out); + return pipe.Call(func) + .ReturnRef(); +} + +tuple bert_apply_adam_npu( + Tensor& var, + Tensor& m, + Tensor& v, + Scalar lr, + Scalar beta1, + Scalar beta2, + Scalar epsilon, + const Tensor& grad, + Scalar max_grad_norm, + Scalar global_grad_norm, + Scalar weight_decay) { + bert_apply_adam_out_npu( + var, m, v, var, m, v, + lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay); + return std::tie(var, m, v); +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/BitwiseXorKernelNpu.cpp b/src/aten/src/ATen/native/npu/BitwiseXorKernelNpu.cpp index 818a660681db5b3a9053bb8a820d9977077342cb..0d2bd6fd64feb9383058280a67a44881a9bcec7d 100644 --- a/src/aten/src/ATen/native/npu/BitwiseXorKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/BitwiseXorKernelNpu.cpp @@ -1,164 +1,164 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" -#include "ATen/native/npu/utils/CalcuOpUtil.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& bitwise_xor_out_npu_nocheck( - Tensor& result, - const Tensor& self, - const Scalar other) { - // executing the NPU operator - Tensor selfInput = (self.dtype() == at::ScalarType::Bool) ? self.to(at::ScalarType::Int) : self; - result = (result.dtype() == at::ScalarType::Bool) ? result.to(at::ScalarType::Int) : result; - - OpCommand cmd; - cmd.Name("BitwiseXor") - .Input(selfInput) - .Input(other, selfInput.scalar_type()) - .Output(result) - .Run(); - - return (result = (self.dtype() == at::ScalarType::Bool) ? result.to(at::ScalarType::Bool) : result); -} - -Tensor& bitwise_xor_out_npu( - Tensor& result, - const Tensor& self, - const Scalar other) { - OpPreparation::CheckOut( - {self}, - result, - self); - - bitwise_xor_out_npu_nocheck(result, self, other); - - return result; -} - -Tensor& bitwise_xor_out_npu_nocheck( - Tensor& result, - const Tensor& self, - const Tensor& other) { - auto unified_result = OpPreparation::binary_op_check(result, self, other, true); - - Tensor selfInput = (self.dtype() == at::ScalarType::Bool) ? self.to(at::ScalarType::Int) : self; - Tensor otherInput = (other.dtype() == at::ScalarType::Bool) ? other.to(at::ScalarType::Int) : other; - result = (result.dtype() == at::ScalarType::Bool) ? result.to(at::ScalarType::Int) : result; - - if (otherInput.dim() == 0 && !otherInput.is_npu()) { - bitwise_xor_out_npu(result, selfInput, otherInput.item()); - } else if (selfInput.dim() == 0 && !selfInput.is_npu()) { - bitwise_xor_out_npu(result, otherInput, selfInput.item()); - } else { - // executing the NPU operator - OpCommand cmd; - cmd.Name("BitwiseXor") - .Expect(unified_result) - .Input(selfInput) - .Input(otherInput) - .Output(result) - .Run(); - } - - return (result = (self.dtype() == at::ScalarType::Bool) ? result.to(at::ScalarType::Bool) : result); -} - -Tensor& bitwise_xor_out_npu( - Tensor& result, - const Tensor& self, - const Tensor& other) { - bool isSelfWrapped = CalcuOpUtil::is_scalar_wrapped_to_tensor(self); - - Tensor outputTensor; - if (not isSelfWrapped) { - outputTensor = self; - } else { - outputTensor = other; - } - - auto outputSize = broadcast_ops_npu_output_size(self, other); - - OpPreparation::CheckOut( - {self}, - result, - CalcuOpUtil::get_tensor_npu_format(outputTensor), - outputTensor.scalar_type(), - outputSize); - - bitwise_xor_out_npu_nocheck(result, self, other); - - return result; -} - -Tensor bitwise_xor_npu(const Tensor& self, const Tensor& other) { - // calculate the output size - bool isSelfWrapped = CalcuOpUtil::is_scalar_wrapped_to_tensor(self); - - Tensor outputTensor; - if (not isSelfWrapped) { - outputTensor = self; - } else { - outputTensor = other; - } - - auto outputSize = broadcast_ops_npu_output_size(self, other); - - // construct the output tensor of the NPU - Tensor result = OpPreparation::ApplyTensor(outputTensor, outputSize); - // calculate the output result of the NPU - bitwise_xor_out_npu_nocheck(result, self, other); - - return result; -} - -Tensor bitwise_xor_npu(const Tensor& self, Scalar other) { - Tensor result = OpPreparation::ApplyTensor(self); - // calculate the output result of the NPU - bitwise_xor_out_npu_nocheck(result, self, other); - - return result; -} - -Tensor& bitwise_xor_npu_(Tensor& self, const Tensor& other) { - OpPreparation::CheckMemory({self, other}, {self}); - if (!NpuUtils::check_match(&self)) { - Tensor contiguousSelf = NpuUtils::format_contiguous(self); - Tensor result = bitwise_xor_out_npu_nocheck(contiguousSelf, contiguousSelf, other); - NpuUtils::format_fresh_view(self, result); - } else { - bitwise_xor_out_npu_nocheck(self, self, other); - } - - return self; -} - -Tensor& bitwise_xor_npu_(Tensor& self, Scalar other) { - if (!NpuUtils::check_match(&self)) { - Tensor contiguousSelf = NpuUtils::format_contiguous(self); - Tensor result = bitwise_xor_out_npu_nocheck(contiguousSelf, contiguousSelf, other); - NpuUtils::format_fresh_view(self, result); - } else { - bitwise_xor_out_npu_nocheck(self, self, other); - } - - return self; -} - -} // namespace native +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" +#include "ATen/native/npu/utils/CalcuOpUtil.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& bitwise_xor_out_npu_nocheck( + Tensor& result, + const Tensor& self, + const Scalar other) { + // executing the NPU operator + Tensor selfInput = (self.dtype() == at::ScalarType::Bool) ? self.to(at::ScalarType::Int) : self; + result = (result.dtype() == at::ScalarType::Bool) ? result.to(at::ScalarType::Int) : result; + + OpCommand cmd; + cmd.Name("BitwiseXor") + .Input(selfInput) + .Input(other, selfInput.scalar_type()) + .Output(result) + .Run(); + + return (result = (self.dtype() == at::ScalarType::Bool) ? result.to(at::ScalarType::Bool) : result); +} + +Tensor& bitwise_xor_out_npu( + Tensor& result, + const Tensor& self, + const Scalar other) { + OpPreparation::CheckOut( + {self}, + result, + self); + + bitwise_xor_out_npu_nocheck(result, self, other); + + return result; +} + +Tensor& bitwise_xor_out_npu_nocheck( + Tensor& result, + const Tensor& self, + const Tensor& other) { + auto unified_result = OpPreparation::binary_op_check(result, self, other, true); + + Tensor selfInput = (self.dtype() == at::ScalarType::Bool) ? self.to(at::ScalarType::Int) : self; + Tensor otherInput = (other.dtype() == at::ScalarType::Bool) ? other.to(at::ScalarType::Int) : other; + result = (result.dtype() == at::ScalarType::Bool) ? result.to(at::ScalarType::Int) : result; + + if (otherInput.dim() == 0 && !otherInput.is_npu()) { + bitwise_xor_out_npu(result, selfInput, otherInput.item()); + } else if (selfInput.dim() == 0 && !selfInput.is_npu()) { + bitwise_xor_out_npu(result, otherInput, selfInput.item()); + } else { + // executing the NPU operator + OpCommand cmd; + cmd.Name("BitwiseXor") + .Expect(unified_result) + .Input(selfInput) + .Input(otherInput) + .Output(result) + .Run(); + } + + return (result = (self.dtype() == at::ScalarType::Bool) ? result.to(at::ScalarType::Bool) : result); +} + +Tensor& bitwise_xor_out_npu( + Tensor& result, + const Tensor& self, + const Tensor& other) { + bool isSelfWrapped = CalcuOpUtil::is_scalar_wrapped_to_tensor(self); + + Tensor outputTensor; + if (not isSelfWrapped) { + outputTensor = self; + } else { + outputTensor = other; + } + + auto outputSize = broadcast_ops_npu_output_size(self, other); + + OpPreparation::CheckOut( + {self}, + result, + CalcuOpUtil::get_tensor_npu_format(outputTensor), + outputTensor.scalar_type(), + outputSize); + + bitwise_xor_out_npu_nocheck(result, self, other); + + return result; +} + +Tensor bitwise_xor_npu(const Tensor& self, const Tensor& other) { + // calculate the output size + bool isSelfWrapped = CalcuOpUtil::is_scalar_wrapped_to_tensor(self); + + Tensor outputTensor; + if (not isSelfWrapped) { + outputTensor = self; + } else { + outputTensor = other; + } + + auto outputSize = broadcast_ops_npu_output_size(self, other); + + // construct the output tensor of the NPU + Tensor result = OpPreparation::ApplyTensor(outputTensor, outputSize); + // calculate the output result of the NPU + bitwise_xor_out_npu_nocheck(result, self, other); + + return result; +} + +Tensor bitwise_xor_npu(const Tensor& self, Scalar other) { + Tensor result = OpPreparation::ApplyTensor(self); + // calculate the output result of the NPU + bitwise_xor_out_npu_nocheck(result, self, other); + + return result; +} + +Tensor& bitwise_xor_npu_(Tensor& self, const Tensor& other) { + OpPreparation::CheckMemory({self, other}, {self}); + if (!NpuUtils::check_match(&self)) { + Tensor contiguousSelf = NpuUtils::format_contiguous(self); + Tensor result = bitwise_xor_out_npu_nocheck(contiguousSelf, contiguousSelf, other); + NpuUtils::format_fresh_view(self, result); + } else { + bitwise_xor_out_npu_nocheck(self, self, other); + } + + return self; +} + +Tensor& bitwise_xor_npu_(Tensor& self, Scalar other) { + if (!NpuUtils::check_match(&self)) { + Tensor contiguousSelf = NpuUtils::format_contiguous(self); + Tensor result = bitwise_xor_out_npu_nocheck(contiguousSelf, contiguousSelf, other); + NpuUtils::format_fresh_view(self, result); + } else { + bitwise_xor_out_npu_nocheck(self, self, other); + } + + return self; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/BoundingBoxDecodeKernelNpu.cpp b/src/aten/src/ATen/native/npu/BoundingBoxDecodeKernelNpu.cpp index fa2a86b0871c7d06d9199d4663ff3986736141b3..fb3745b091294a3f9d87906f7d775a18d7d0b3fc 100644 --- a/src/aten/src/ATen/native/npu/BoundingBoxDecodeKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/BoundingBoxDecodeKernelNpu.cpp @@ -1,81 +1,81 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" -#include "ATen/native/npu/utils/CalcuOpUtil.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& bounding_box_decode_out_npu( - Tensor& result, - const Tensor& rois, - const Tensor& deltas, - SmallVector means, - SmallVector stds, - IntArrayRef max_shape, - double wh_ratio_clip) { - OpCommand cmd; - cmd.Name("BoundingBoxDecode") - .Input(rois) - .Input(deltas) - .Output(result) - .Attr("means", means) - .Attr("stds", stds) - .Attr("max_shape", max_shape) - .Attr("wh_ratio_clip", static_cast(wh_ratio_clip)) - .Run(); - - return result; -} - -Tensor bounding_box_decode_npu( - const Tensor& rois, - const Tensor& deltas, - double means0, - double means1, - double means2, - double means3, - double stds0, - double stds1, - double stds2, - double stds3, - IntArrayRef max_shape, - double wh_ratio_clip) { - SmallVector outputSize = {rois.size(0), 4}; - // construct the output tensor of the NPU - Tensor result = OpPreparation::ApplyTensor(rois, outputSize); - SmallVector means = { - static_cast(means0), - static_cast(means1), - static_cast(means2), - static_cast(means3)}; - SmallVector stds = { - static_cast(stds0), - static_cast(stds1), - static_cast(stds2), - static_cast(stds3)}; - - // calculate the output result of the NPU - bounding_box_decode_out_npu( - result, rois, deltas, means, stds, max_shape, wh_ratio_clip); - - return result; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" +#include "ATen/native/npu/utils/CalcuOpUtil.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& bounding_box_decode_out_npu( + Tensor& result, + const Tensor& rois, + const Tensor& deltas, + SmallVector means, + SmallVector stds, + IntArrayRef max_shape, + double wh_ratio_clip) { + OpCommand cmd; + cmd.Name("BoundingBoxDecode") + .Input(rois) + .Input(deltas) + .Output(result) + .Attr("means", means) + .Attr("stds", stds) + .Attr("max_shape", max_shape) + .Attr("wh_ratio_clip", static_cast(wh_ratio_clip)) + .Run(); + + return result; +} + +Tensor bounding_box_decode_npu( + const Tensor& rois, + const Tensor& deltas, + double means0, + double means1, + double means2, + double means3, + double stds0, + double stds1, + double stds2, + double stds3, + IntArrayRef max_shape, + double wh_ratio_clip) { + SmallVector outputSize = {rois.size(0), 4}; + // construct the output tensor of the NPU + Tensor result = OpPreparation::ApplyTensor(rois, outputSize); + SmallVector means = { + static_cast(means0), + static_cast(means1), + static_cast(means2), + static_cast(means3)}; + SmallVector stds = { + static_cast(stds0), + static_cast(stds1), + static_cast(stds2), + static_cast(stds3)}; + + // calculate the output result of the NPU + bounding_box_decode_out_npu( + result, rois, deltas, means, stds, max_shape, wh_ratio_clip); + + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/BoundingBoxEncodeKernelNpu.cpp b/src/aten/src/ATen/native/npu/BoundingBoxEncodeKernelNpu.cpp index 3e02aad811f780301ab953efbb7f330a3d6ecfae..e4ea87d8d49337740c771c0566eddc4940afb297 100644 --- a/src/aten/src/ATen/native/npu/BoundingBoxEncodeKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/BoundingBoxEncodeKernelNpu.cpp @@ -1,73 +1,73 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& bounding_box_encode_out_npu( - Tensor& delats, - const Tensor& anchor_box, - const Tensor& ground_truth_box, - SmallVector means, - SmallVector stds) { - OpCommand cmd; - cmd.Name("BoundingBoxEncode") - .Input(anchor_box) - .Input(ground_truth_box) - .Output(delats) - .Attr("means", means) - .Attr("stds", stds) - .Run(); - - return delats; -} - -Tensor bounding_box_encode_npu( - const Tensor& anchor_box, - const Tensor& ground_truth_box, - double means0, - double means1, - double means2, - double means3, - double stds0, - double stds1, - double stds2, - double stds3) { - // construct the output tensor of the NPU - Tensor delats = OpPreparation::ApplyTensor(anchor_box, {anchor_box.size(0), 4}); - SmallVector means = { - static_cast(means0), - static_cast(means1), - static_cast(means2), - static_cast(means3)}; - SmallVector stds = { - static_cast(stds0), - static_cast(stds1), - static_cast(stds2), - static_cast(stds3)}; - - bounding_box_encode_out_npu( - delats, anchor_box, ground_truth_box, means, stds); - - return delats; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& bounding_box_encode_out_npu( + Tensor& delats, + const Tensor& anchor_box, + const Tensor& ground_truth_box, + SmallVector means, + SmallVector stds) { + OpCommand cmd; + cmd.Name("BoundingBoxEncode") + .Input(anchor_box) + .Input(ground_truth_box) + .Output(delats) + .Attr("means", means) + .Attr("stds", stds) + .Run(); + + return delats; +} + +Tensor bounding_box_encode_npu( + const Tensor& anchor_box, + const Tensor& ground_truth_box, + double means0, + double means1, + double means2, + double means3, + double stds0, + double stds1, + double stds2, + double stds3) { + // construct the output tensor of the NPU + Tensor delats = OpPreparation::ApplyTensor(anchor_box, {anchor_box.size(0), 4}); + SmallVector means = { + static_cast(means0), + static_cast(means1), + static_cast(means2), + static_cast(means3)}; + SmallVector stds = { + static_cast(stds0), + static_cast(stds1), + static_cast(stds2), + static_cast(stds3)}; + + bounding_box_encode_out_npu( + delats, anchor_box, ground_truth_box, means, stds); + + return delats; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/CatKernelNpu.cpp b/src/aten/src/ATen/native/npu/CatKernelNpu.cpp index 8c3ac876475bf2928430246cb85f1ab59a121e4e..4bc949120d755585d1fbd1ebd9f122f5a5ec7042 100644 --- a/src/aten/src/ATen/native/npu/CatKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/CatKernelNpu.cpp @@ -154,6 +154,20 @@ Tensor& _cat_out_npu(Tensor& result, TensorList tensors, int64_t dim) { } Tensor& cat_out_npu(Tensor& result, TensorList tensors, int64_t dim) { + SmallVector inputTensors = cat_dest_tensor_list(tensors); + + int64_t dim_post_expr = 0; + if (inputTensors.size() > 0) { + dim_post_expr = inputTensors[0].dim(); + } + dim = CalcuOpUtil::make_wrap_dim(dim, dim_post_expr); + auto outputSize = cat_npu_output_size(inputTensors, dim); + OpPreparation::CheckOut( + {tensors[0]}, + result, + ACL_FORMAT_ND, + tensors[0].scalar_type(), + outputSize); return at::_cat_out(result, tensors, dim); } diff --git a/src/aten/src/ATen/native/npu/CeluKernelNpu.cpp b/src/aten/src/ATen/native/npu/CeluKernelNpu.cpp index fc4602ea8554f5cd851623c72283df02dddf158d..c6bf9db470e132e51a8c263edab0209e3ddc03f2 100644 --- a/src/aten/src/ATen/native/npu/CeluKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/CeluKernelNpu.cpp @@ -1,59 +1,59 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor celu_out_npu_nocheck(Tensor& result, const Tensor& self, Scalar alpha) { - float alpha3 = 1.0; - OpCommand cmd; - cmd.Name("Celu") - .Input(self) - .Output(result) - .Attr("alpha1", alpha) - .Attr("alpha2", alpha) - .Attr("alpha3", alpha3) - .Run(); - return result; -} - -Tensor celu_out_npu(Tensor& result, const Tensor& self, Scalar alpha) { - OpPipeWithDefinedOut pipe; - return pipe.CheckMemory({self}, {result}) - .Func([&self, &alpha](Tensor& result){celu_out_npu_nocheck(result, self, alpha);}) - .Call(result); -} - -Tensor celu_npu(const Tensor& self, Scalar alpha) { - // construct the output tensor of the NPU - Tensor result = OpPreparation::ApplyTensor(self); - - // calculate the output result of the NPU - celu_out_npu(result, self, alpha); - - return result; -} - -Tensor& celu_npu_(Tensor& self, Scalar alpha) { - celu_out_npu(self, self, alpha); - return self; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor celu_out_npu_nocheck(Tensor& result, const Tensor& self, Scalar alpha) { + float alpha3 = 1.0; + OpCommand cmd; + cmd.Name("Celu") + .Input(self) + .Output(result) + .Attr("alpha1", alpha) + .Attr("alpha2", alpha) + .Attr("alpha3", alpha3) + .Run(); + return result; +} + +Tensor celu_out_npu(Tensor& result, const Tensor& self, Scalar alpha) { + OpPipeWithDefinedOut pipe; + return pipe.CheckMemory({self}, {result}) + .Func([&self, &alpha](Tensor& result){celu_out_npu_nocheck(result, self, alpha);}) + .Call(result); +} + +Tensor celu_npu(const Tensor& self, Scalar alpha) { + // construct the output tensor of the NPU + Tensor result = OpPreparation::ApplyTensor(self); + + // calculate the output result of the NPU + celu_out_npu(result, self, alpha); + + return result; +} + +Tensor& celu_npu_(Tensor& self, Scalar alpha) { + celu_out_npu(self, self, alpha); + return self; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/ConfusionTransposeBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/ConfusionTransposeBackwardKernelNpu.cpp index 0bbe872275761d0b02f68a61e790797fbd244100..776a0a76a18f1fca1fb52569e24cbd2aa915e412 100644 --- a/src/aten/src/ATen/native/npu/ConfusionTransposeBackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/ConfusionTransposeBackwardKernelNpu.cpp @@ -1,60 +1,60 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor confusion_transpose_backward_npu( - const Tensor& grad, - IntArrayRef perm, - IntArrayRef shape, - bool transpose_first) { - SmallVector svec_shape; - if (transpose_first){ - svec_shape = array_to_small_vector(shape); - } else { - for (int i = 0; i < perm.size(); i++){ - svec_shape.emplace_back(shape[perm[i]]); - } - } - std::vector vec_perm; - int64_t perm_len = perm.size(); - int64_t temp_perm[perm_len] = {0}; - for (int64_t i = 0; i < perm_len; i++){ - temp_perm[perm[i]] = i; - } - vec_perm = std::vector(temp_perm, temp_perm+perm_len); - perm = IntArrayRef(vec_perm); - - Tensor result = OpPreparation::ApplyTensor(grad, shape); - - OpCommand cmd; - cmd.Name("ConfusionTransposeD") - .Input(grad) - .Output(result) - .Attr("perm", perm) - .Attr("shape", svec_shape) - .Attr("transpose_first", transpose_first) - .Run(); - - return result; -} - -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor confusion_transpose_backward_npu( + const Tensor& grad, + IntArrayRef perm, + IntArrayRef shape, + bool transpose_first) { + SmallVector svec_shape; + if (transpose_first){ + svec_shape = array_to_small_vector(shape); + } else { + for (int i = 0; i < perm.size(); i++){ + svec_shape.emplace_back(shape[perm[i]]); + } + } + std::vector vec_perm; + int64_t perm_len = perm.size(); + int64_t temp_perm[perm_len] = {0}; + for (int64_t i = 0; i < perm_len; i++){ + temp_perm[perm[i]] = i; + } + vec_perm = std::vector(temp_perm, temp_perm+perm_len); + perm = IntArrayRef(vec_perm); + + Tensor result = OpPreparation::ApplyTensor(grad, shape); + + OpCommand cmd; + cmd.Name("ConfusionTransposeD") + .Input(grad) + .Output(result) + .Attr("perm", perm) + .Attr("shape", svec_shape) + .Attr("transpose_first", transpose_first) + .Run(); + + return result; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/ConfusionTransposeKernelNpu.cpp b/src/aten/src/ATen/native/npu/ConfusionTransposeKernelNpu.cpp index 12b36826dedf724a54e7c63b83900ad218cf1aba..fc6b602d1f531d1f3059279f9ef4c3af0845b50c 100644 --- a/src/aten/src/ATen/native/npu/ConfusionTransposeKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/ConfusionTransposeKernelNpu.cpp @@ -1,52 +1,52 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor confusion_transpose_npu( - const Tensor& self, - IntArrayRef perm, - IntArrayRef shape, - bool transpose_first) { - SmallVector output_size; - if (transpose_first){ - output_size = array_to_small_vector(shape); - } else { - for (int i = 0; i < perm.size(); i++){ - output_size.emplace_back(shape[perm[i]]); - } - } - - // construct the output tensor of the NPU - Tensor result = OpPreparation::ApplyTensor(self, output_size); - OpCommand cmd; - cmd.Name("ConfusionTransposeD") - .Input(self) - .Output(result) - .Attr("perm", perm) - .Attr("shape", shape) - .Attr("transpose_first", transpose_first) - .Run(); - - return result; -} - -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor confusion_transpose_npu( + const Tensor& self, + IntArrayRef perm, + IntArrayRef shape, + bool transpose_first) { + SmallVector output_size; + if (transpose_first){ + output_size = array_to_small_vector(shape); + } else { + for (int i = 0; i < perm.size(); i++){ + output_size.emplace_back(shape[perm[i]]); + } + } + + // construct the output tensor of the NPU + Tensor result = OpPreparation::ApplyTensor(self, output_size); + OpCommand cmd; + cmd.Name("ConfusionTransposeD") + .Input(self) + .Output(result) + .Attr("perm", perm) + .Attr("shape", shape) + .Attr("transpose_first", transpose_first) + .Run(); + + return result; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/CrossKernelNpu.cpp b/src/aten/src/ATen/native/npu/CrossKernelNpu.cpp index 64ddb33c2eef56fe67d91c0e66759d34b34349c0..c7322bfec336b4ed6bb9324d760841949ebd230c 100644 --- a/src/aten/src/ATen/native/npu/CrossKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/CrossKernelNpu.cpp @@ -1,63 +1,63 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" -#include "ATen/native/npu/utils/CalcuOpUtil.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor cross_dest_output(const Tensor& self, const Tensor& other) { - bool isSelfWrapped = CalcuOpUtil::is_scalar_wrapped_to_tensor(self); - return isSelfWrapped ? other : self; -} - -int64_t cross_real_dim(optional dim) { - // -65530 is the default value of dim - return dim.has_value() ? dim.value() : -65530; -} - -Tensor& cross_out_npu( - Tensor& result, - const Tensor& self, - const Tensor& other, - optional dim) { - int64_t realDim = cross_real_dim(dim); - OpCommand cmd; - cmd.Name("Cross") - .Input(self) - .Input(other) - .Output(result) - .Attr("dim", realDim) - .Run(); - return result; -} - -Tensor cross_npu( - const Tensor& self, - const Tensor& other, - optional dim) { - auto outputSize = broadcast_ops_npu_output_size(self, other); - Tensor outputTensor = cross_dest_output(self, other); - Tensor result = at::empty_with_format( - outputSize, - self.options(), - CalcuOpUtil::get_tensor_npu_format(outputTensor)); - cross_out_npu(result, self, other, dim); - return result; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" +#include "ATen/native/npu/utils/CalcuOpUtil.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor cross_dest_output(const Tensor& self, const Tensor& other) { + bool isSelfWrapped = CalcuOpUtil::is_scalar_wrapped_to_tensor(self); + return isSelfWrapped ? other : self; +} + +int64_t cross_real_dim(optional dim) { + // -65530 is the default value of dim + return dim.has_value() ? dim.value() : -65530; +} + +Tensor& cross_out_npu( + Tensor& result, + const Tensor& self, + const Tensor& other, + optional dim) { + int64_t realDim = cross_real_dim(dim); + OpCommand cmd; + cmd.Name("Cross") + .Input(self) + .Input(other) + .Output(result) + .Attr("dim", realDim) + .Run(); + return result; +} + +Tensor cross_npu( + const Tensor& self, + const Tensor& other, + optional dim) { + auto outputSize = broadcast_ops_npu_output_size(self, other); + Tensor outputTensor = cross_dest_output(self, other); + Tensor result = at::empty_with_format( + outputSize, + self.options(), + CalcuOpUtil::get_tensor_npu_format(outputTensor)); + cross_out_npu(result, self, other, dim); + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/DropoutV2BackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/DropoutV2BackwardKernelNpu.cpp index 34c41be07f188d2ae1ed3400ebb97b6e1aa6d926..89b720b61bf338dc994cf0a4558a6916180e42e8 100644 --- a/src/aten/src/ATen/native/npu/DropoutV2BackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/DropoutV2BackwardKernelNpu.cpp @@ -1,64 +1,64 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - - -Tensor& dropout_v2_backward_out_npu( - Tensor& result, - const Tensor& self, - const Tensor& mask, - double p) { - - OpCommand cmd; - cmd.Name("MaskedScale") - .Input(self) - .Input(mask) - .Output(result) - .Attr("value",static_cast(1./(1-p))) - .Run(); - return result; -} - -Tensor dropout_v2_backward_npu(const Tensor& grad_output, const Tensor& mask, double p){ - TORCH_CHECK(grad_output.scalar_type() == ScalarType::Half || - grad_output.scalar_type() == ScalarType::Float, - "grad_output's dtype only support fp16 or fp32 current"); - TORCH_CHECK(mask.scalar_type() == ScalarType::Half || - mask.scalar_type() == ScalarType::Float || - mask.scalar_type() == ScalarType::Char || - mask.scalar_type() == ScalarType::Byte, - "mask's dtype should be float32, float16, or int8 and uint8" ); - TORCH_CHECK(grad_output.sizes() == mask.sizes(), - "grad_output must be the same shape with mask"); - - Tensor maskCopy = mask; - if (maskCopy.scalar_type() == ScalarType::Byte){ - maskCopy = maskCopy.to(ScalarType::Half); - } - auto result = OpPreparation::ApplyTensor(grad_output); - dropout_v2_backward_out_npu(result, grad_output, maskCopy, p); - - return result; - -} - -} // namespace native -} // namespace at - +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + + +Tensor& dropout_v2_backward_out_npu( + Tensor& result, + const Tensor& self, + const Tensor& mask, + double p) { + + OpCommand cmd; + cmd.Name("MaskedScale") + .Input(self) + .Input(mask) + .Output(result) + .Attr("value",static_cast(1./(1-p))) + .Run(); + return result; +} + +Tensor dropout_v2_backward_npu(const Tensor& grad_output, const Tensor& mask, double p){ + TORCH_CHECK(grad_output.scalar_type() == ScalarType::Half || + grad_output.scalar_type() == ScalarType::Float, + "grad_output's dtype only support fp16 or fp32 current"); + TORCH_CHECK(mask.scalar_type() == ScalarType::Half || + mask.scalar_type() == ScalarType::Float || + mask.scalar_type() == ScalarType::Char || + mask.scalar_type() == ScalarType::Byte, + "mask's dtype should be float32, float16, or int8 and uint8" ); + TORCH_CHECK(grad_output.sizes() == mask.sizes(), + "grad_output must be the same shape with mask"); + + Tensor maskCopy = mask; + if (maskCopy.scalar_type() == ScalarType::Byte){ + maskCopy = maskCopy.to(ScalarType::Half); + } + auto result = OpPreparation::ApplyTensor(grad_output); + dropout_v2_backward_out_npu(result, grad_output, maskCopy, p); + + return result; + +} + +} // namespace native +} // namespace at + diff --git a/src/aten/src/ATen/native/npu/DropoutV2KernelNpu.cpp b/src/aten/src/ATen/native/npu/DropoutV2KernelNpu.cpp index e4a435e77a4c8f9d479ae5a57b6ff43f2fa01eea..f03f5d88a6948908dde28df6f78ae4db39c498d7 100644 --- a/src/aten/src/ATen/native/npu/DropoutV2KernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/DropoutV2KernelNpu.cpp @@ -1,58 +1,58 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -tuple dropout_v2_out_npu( - Tensor& result, - Tensor& mask, - Tensor& new_seed, - const Tensor& self, - Tensor& seed, - double p) { - - OpCommand cmd; - cmd.Name("DropoutV2") - .Input(self) - .Input(seed) - .Output(result) - .Output(mask) - .Output(new_seed) - .Attr("p", static_cast(p)) - .Run(); - - return tuple(result, mask, new_seed); -} - -tuple dropout_v2_npu(const Tensor& self, Tensor& seed, double p) { - Tensor formatCastOfSelf = OpPreparation::CastBackToOriFormat(self); - Tensor formatCastOfSeed = OpPreparation::CastBackToOriFormat(seed); - - Tensor result = OpPreparation::ApplyTensor(formatCastOfSelf); - Tensor mask = OpPreparation::ApplyTensor(formatCastOfSelf, formatCastOfSeed.options()); - dropout_v2_out_npu(result, mask, formatCastOfSeed, formatCastOfSelf, formatCastOfSeed, p); - NpuUtils::format_fresh_view(seed, formatCastOfSeed); - return std::tuple(result, mask, seed); -} - - -} // namespace native -} // namespace at - +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +tuple dropout_v2_out_npu( + Tensor& result, + Tensor& mask, + Tensor& new_seed, + const Tensor& self, + Tensor& seed, + double p) { + + OpCommand cmd; + cmd.Name("DropoutV2") + .Input(self) + .Input(seed) + .Output(result) + .Output(mask) + .Output(new_seed) + .Attr("p", static_cast(p)) + .Run(); + + return tuple(result, mask, new_seed); +} + +tuple dropout_v2_npu(const Tensor& self, Tensor& seed, double p) { + Tensor formatCastOfSelf = OpPreparation::CastBackToOriFormat(self); + Tensor formatCastOfSeed = OpPreparation::CastBackToOriFormat(seed); + + Tensor result = OpPreparation::ApplyTensor(formatCastOfSelf); + Tensor mask = OpPreparation::ApplyTensor(formatCastOfSelf, formatCastOfSeed.options()); + dropout_v2_out_npu(result, mask, formatCastOfSeed, formatCastOfSelf, formatCastOfSeed, p); + NpuUtils::format_fresh_view(seed, formatCastOfSeed); + return std::tuple(result, mask, seed); +} + + +} // namespace native +} // namespace at + diff --git a/src/aten/src/ATen/native/npu/EmbeddingBagKernelNpu.cpp b/src/aten/src/ATen/native/npu/EmbeddingBagKernelNpu.cpp index 67dfb8a3f9646902118b67184afdf15bfd6c1d60..c6c6b0517c8e069f705fe049eafe4ed18c002fed 100644 --- a/src/aten/src/ATen/native/npu/EmbeddingBagKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/EmbeddingBagKernelNpu.cpp @@ -1,96 +1,96 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -namespace { -SmallVector _embedding_bag_npu_output_size( - const Tensor& weight, - const Tensor& indices, - const Tensor& offsets) { - SmallVector outputSize = {}; - if (indices.dim() == 1) { - outputSize = {offsets.size(0), weight.size(1)}; - } else { - outputSize = {indices.size(0), weight.size(1)}; - } - return outputSize; -} // _embedding_bag_npu_output_size - -string get_mode_str(bool mode) { - string modeStr = "mean"; - if (mode == 0) { - modeStr = "sum"; - } else if (mode == 1) { - modeStr = "mean"; - } else { - modeStr = "max"; - } - return modeStr; -} // get_mode_str - -} // namespace - -tuple _embedding_bag_npu( - const Tensor& weight, - const Tensor& indices, - const Tensor& offsets, - bool scale_grad_by_freq, - int64_t mode, - bool sparse, - const Tensor& per_sample_weights, - bool include_last_offset) { - auto outputSize = _embedding_bag_npu_output_size(weight, indices, offsets); - - Tensor output = OpPreparation::ApplyTensorWithFormat(outputSize, weight.options(), ACL_FORMAT_ND); - - Tensor indicesCopy = indices; - if (!(indices.dtype() == at::kInt)) { - indicesCopy = indicesCopy.to(at::kInt); - } - - string modeStr = get_mode_str(mode); - - OpCommand cmd; - cmd.Name("EmbeddingBag") - .Input(weight) - .Input(indicesCopy); - if (offsets.defined()) { - Tensor offsetsCopy = offsets; - if (!(offsets.dtype() == at::kInt)) { - offsetsCopy = offsetsCopy.to(at::kInt); - } - cmd.Input(offsetsCopy); - } - if (per_sample_weights.defined()) { - cmd.Input(per_sample_weights); - } - cmd.Output(output) - .Attr("mode", modeStr) - .Attr("scale_grad_by_freq", scale_grad_by_freq) - .Attr("sparse", sparse) - .Attr("include_last_offset", include_last_offset) - .Run(); - - return std::tie(output, output, output, output); -} - -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +namespace { +SmallVector _embedding_bag_npu_output_size( + const Tensor& weight, + const Tensor& indices, + const Tensor& offsets) { + SmallVector outputSize = {}; + if (indices.dim() == 1) { + outputSize = {offsets.size(0), weight.size(1)}; + } else { + outputSize = {indices.size(0), weight.size(1)}; + } + return outputSize; +} // _embedding_bag_npu_output_size + +string get_mode_str(bool mode) { + string modeStr = "mean"; + if (mode == 0) { + modeStr = "sum"; + } else if (mode == 1) { + modeStr = "mean"; + } else { + modeStr = "max"; + } + return modeStr; +} // get_mode_str + +} // namespace + +tuple _embedding_bag_npu( + const Tensor& weight, + const Tensor& indices, + const Tensor& offsets, + bool scale_grad_by_freq, + int64_t mode, + bool sparse, + const Tensor& per_sample_weights, + bool include_last_offset) { + auto outputSize = _embedding_bag_npu_output_size(weight, indices, offsets); + + Tensor output = OpPreparation::ApplyTensorWithFormat(outputSize, weight.options(), ACL_FORMAT_ND); + + Tensor indicesCopy = indices; + if (!(indices.dtype() == at::kInt)) { + indicesCopy = indicesCopy.to(at::kInt); + } + + string modeStr = get_mode_str(mode); + + OpCommand cmd; + cmd.Name("EmbeddingBag") + .Input(weight) + .Input(indicesCopy); + if (offsets.defined()) { + Tensor offsetsCopy = offsets; + if (!(offsets.dtype() == at::kInt)) { + offsetsCopy = offsetsCopy.to(at::kInt); + } + cmd.Input(offsetsCopy); + } + if (per_sample_weights.defined()) { + cmd.Input(per_sample_weights); + } + cmd.Output(output) + .Attr("mode", modeStr) + .Attr("scale_grad_by_freq", scale_grad_by_freq) + .Attr("sparse", sparse) + .Attr("include_last_offset", include_last_offset) + .Run(); + + return std::tie(output, output, output, output); +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/EmbeddingRenormKernelNpu.cpp b/src/aten/src/ATen/native/npu/EmbeddingRenormKernelNpu.cpp index 3a22c9157d07cec008c3c297ab515815d5349511..d10eb8bf02b61b9341ebe0cf812494a2124a1b61 100644 --- a/src/aten/src/ATen/native/npu/EmbeddingRenormKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/EmbeddingRenormKernelNpu.cpp @@ -1,168 +1,138 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/NpuUtils.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -SmallVector embedding_renorm_npu_input( - const SmallVector& inputTensor) { - return CalcuOpUtil::create_npu_input_tensor_desc(inputTensor); -} - -SmallVector embedding_renorm_npu_output( - const SmallVector& outputTensor) { - return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); -} - -SmallVector embedding_renorm_npu_attr( - double max_norm, - double norm_type){ - int64_t dim = 0; - float max_norm_float = (float) max_norm; - float norm_type_float = (float) norm_type; - NPUAttrDesc npuAttrScalarP = NPUAttrDesc("p", norm_type_float); - NPUAttrDesc npuAttrScalarMaxnorm = NPUAttrDesc("maxnorm", max_norm_float); - NPUAttrDesc npuAttrDim = NPUAttrDesc("dim", dim); - SmallVector attrs = {npuAttrScalarP, npuAttrDim, npuAttrScalarMaxnorm}; - return attrs; -} -SmallVector embedding_gather2d_npu_attr() { - NPUAttrDesc npuAttrAxis = NPUAttrDesc("axis", (int64_t)0); - SmallVector attrs = {npuAttrAxis}; - return attrs; -} - -SmallVector embedding_renorm_scatter_update_npu_attr(){ - NPUAttrDesc npuAttrAxis = NPUAttrDesc("use_locking", false); - SmallVector attrs = {npuAttrAxis}; - return attrs; -} - -Tensor& embedding_renorm_gather2d_out_npu( - Tensor& result, - const Tensor& self, - const Tensor& indices - ){ -// execute the NPU operate GatherV2D - auto inputs = embedding_renorm_npu_input({self, indices}); - auto outputs = embedding_renorm_npu_output({result}); - auto attrs = embedding_gather2d_npu_attr(); - CalcuOpUtil::execute_npu_operate("GatherV2D", inputs, outputs, attrs); - return result; -} - -Tensor& embedding_renorm_execute_out_npu( - Tensor& result, - const Tensor& self, - double max_norm, - double norm_type){ -//execute the NPU operate Renorm - auto inputs = embedding_renorm_npu_input({self}); - auto outputs = embedding_renorm_npu_output({result}); - auto attrs = embedding_renorm_npu_attr(max_norm, norm_type); - CalcuOpUtil::execute_npu_operate("Renorm", inputs, outputs, attrs); - return result; -} - - -Tensor& embedding_renorm_scatter_update_out_npu( - Tensor& result, - const Tensor& self, - const Tensor& indices, - const Tensor& update){ - auto inputs = embedding_renorm_npu_input({self, indices, update}); - auto outputs = embedding_renorm_npu_output({result}); - auto attrs = embedding_renorm_scatter_update_npu_attr(); - CalcuOpUtil::execute_npu_operate("ScatterUpdate", inputs, outputs, attrs); - return result; -} - - -Tensor& embedding_renorm_out_npu( - Tensor& result, - const Tensor& self, - const Tensor& indices, - Tensor& mid_input, - Tensor& mid_output, - double max_norm, - double norm_type){ -// execute the NPU operate GatherV2D,generate new tensor by indices - embedding_renorm_gather2d_out_npu( - mid_input, - self, - indices); -//execute the NPU operate Renorm - embedding_renorm_execute_out_npu( - mid_output, - mid_input, - max_norm, - norm_type); -// executing the NPU operator ScatterUpdate - embedding_renorm_scatter_update_out_npu( - result, - self, - indices, - mid_output); - return result; -} - -Tensor& embedding_renorm_npu_( - Tensor& self, - const Tensor& indices, - double max_norm, - double norm_type) { - -//check dim and type - auto self_arg = TensorArg(self, "self", 1); - auto indices_arg = TensorArg(indices, "indices", 2); - checkDim("embedding_renorm_", self_arg, 2); - checkScalarType("embedding_renorm_", indices_arg, kLong); - -// indices must be int64 in pytorch, but npu can only support int32 - auto indices_int32 = indices.to("cpu"); - indices_int32 = indices_int32.to(at::kInt); - indices_int32 = indices_int32.to("npu"); - -//resize indices to 1D - Tensor indices_copy = indices.clone(); - auto num_indices = indices.numel(); - resize_npu_(indices_copy, num_indices); - - SmallVector inputs = {self}; - SmallVector outputs = {self}; - CalcuOpUtil::check_memory_over_laps(inputs, outputs); - -//get the outSize of GatherV2 , the middle tensor - auto midSize = embedding_renorm_mid_npu_output_size(self, indices_copy); - Tensor mid = at::empty_with_format(midSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - Tensor mid1 = at::empty_with_format(midSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - -//inplace operate - if (!NpuUtils::check_match(&self)) { - Tensor contiguousSelf = NpuUtils::format_contiguous(self); - Tensor result = embedding_renorm_out_npu(contiguousSelf, contiguousSelf, indices_copy, mid, mid1, max_norm, norm_type); - NpuUtils::format_fresh_view(self, result); - } else { - embedding_renorm_out_npu(self, self, indices_copy, mid, mid1, max_norm, norm_type); - } - return self; -} - -} +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& embedding_renorm_gather2d_out_npu( + Tensor& result, + const Tensor& self, + const Tensor& indices) { + OpCommand cmd; + cmd.Name("GatherV2D") + .Input(self) + .Input(indices) + .Output(result) + .Attr("axis", (int64_t)0) + .Run(); + return result; +} + +Tensor& embedding_renorm_execute_out_npu( + Tensor& result, + const Tensor& self, + double max_norm, + double norm_type) { + OpCommand cmd; + cmd.Name("Renorm") + .Input(self) + .Output(result) + .Attr("p", (float)norm_type) + .Attr("dim", (int64_t)0) + .Attr("maxnorm", (float)max_norm) + .Run(); + return result; +} + + +Tensor& embedding_renorm_scatter_update_out_npu( + Tensor& result, + const Tensor& self, + const Tensor& indices, + const Tensor& update) { + OpCommand cmd; + cmd.Name("ScatterUpdate") + .Input(self) + .Input(indices) + .Input(update) + .Output(result) + .Attr("use_locking", false) + .Run(); + return result; +} + +Tensor& embedding_renorm_out_npu( + Tensor& result, + const Tensor& self, + const Tensor& indices, + double max_norm, + double norm_type){ + + //get the outSize of GatherV2 , the middle tensor + SmallVector midSize = {indices.size(0), self.size(1)}; + Tensor mid_input = OpPreparation::ApplyTensor(self, midSize); + Tensor mid_output = OpPreparation::ApplyTensor(self, midSize); + + // execute the NPU operate GatherV2D, generate new tensor by indices + embedding_renorm_gather2d_out_npu(mid_input,self,indices); + + //execute the NPU operate Renorm + embedding_renorm_execute_out_npu(mid_output, mid_input, max_norm, norm_type); + + //execute the NPU operate ZerosLike or RangeD, generate new tensor by indices.numel() + Tensor mid_output_copy = mid_output.clone(); + auto num_indices = indices.numel(); + Tensor input_indices; + + // RangeD not support range(0,0) + if (num_indices - 1 == 0) { + input_indices = at::zeros({1}, self.options()).to(at::kLong); + } else { + input_indices = at::range(0, num_indices-1, self.options()).to(at::kLong); + } + + //execute the NPU operate MUL, generate change result + auto num_mid_output = mid_output.numel(); + resize_npu_(mid_output_copy, num_mid_output); + Tensor scalar_out = OpPreparation::ApplyTensor(self, {num_indices, 1}); + embedding_renorm_gather2d_out_npu(scalar_out, mid_output_copy, input_indices); + Tensor out_res = mid_input * scalar_out; + + // executing the NPU operator ScatterUpdate + embedding_renorm_scatter_update_out_npu(result, self, indices, out_res); + + return result; +} + +Tensor& embedding_renorm_npu_( + Tensor& self, + const Tensor& indices, + double max_norm, + double norm_type) { + + //check dim and type + auto self_arg = TensorArg(self, "self", 1); + auto indices_arg = TensorArg(indices, "indices", 2); + checkDim("embedding_renorm_", self_arg, 2); + checkScalarType("embedding_renorm_", indices_arg, kLong); + + //resize indices to 1D + Tensor indices_copy = indices.clone(); + auto num_indices = indices.numel(); + resize_npu_(indices_copy, num_indices); + + OpPipeWithDefinedOut pipe; + pipe.CheckMemory({self, indices_copy}, {self}) + .Func([&self, &indices_copy, max_norm, norm_type](Tensor& result){ + embedding_renorm_out_npu(self, self, indices_copy, max_norm, norm_type);}) + .Call(self); + + return self; +} + +} } \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/EqualKernelNpu.cpp b/src/aten/src/ATen/native/npu/EqualKernelNpu.cpp index 0d21a2f24546d62519b09ca46680a56a1c3d1ab3..b3dab641a20295f55483a0cd0af891504ea97dff 100644 --- a/src/aten/src/ATen/native/npu/EqualKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/EqualKernelNpu.cpp @@ -1,55 +1,55 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -bool equal_npu(const Tensor& self, const Tensor& other) { - //check the shape of self and other - if(self.sizes() != other.sizes()) { - return false; - } - - TORCH_CHECK( - self.scalar_type() == other.scalar_type(), - "Expected object of scalar type ", - self.scalar_type(), - ", but got ", - other.scalar_type(), - " for argument #2 'other' in call to equal_npu"); - - // construct the output tensor of the NPU - Tensor result = at::empty_with_format( - {1}, - self.options().dtype(kBool), - ACL_FORMAT_ND); - - // calculate the output result of the NPU - OpCommand cmd; - cmd.Name("TensorEqual") - .Input(self) - .Input(other) - .Output(result) - .Run(); - - return result.item().to(); -} -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +bool equal_npu(const Tensor& self, const Tensor& other) { + //check the shape of self and other + if(self.sizes() != other.sizes()) { + return false; + } + + TORCH_CHECK( + self.scalar_type() == other.scalar_type(), + "Expected object of scalar type ", + self.scalar_type(), + ", but got ", + other.scalar_type(), + " for argument #2 'other' in call to equal_npu"); + + // construct the output tensor of the NPU + Tensor result = at::empty_with_format( + {1}, + self.options().dtype(kBool), + ACL_FORMAT_ND); + + // calculate the output result of the NPU + OpCommand cmd; + cmd.Name("TensorEqual") + .Input(self) + .Input(other) + .Output(result) + .Run(); + + return result.item().to(); +} +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/FastGeluBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/FastGeluBackwardKernelNpu.cpp index 9a4cc7bdfbd5c1f0bd33bd9004c4f208f8bb7b08..d0a1b9968391ac26f090476339ffa913862603ee 100644 --- a/src/aten/src/ATen/native/npu/FastGeluBackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/FastGeluBackwardKernelNpu.cpp @@ -1,60 +1,60 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - - -namespace { -Tensor& fast_gelu_backward_npu_nocheck( - Tensor& grad_input, - const Tensor& grad, - const Tensor& self) { - // constructs the input and output NPUTensorDesc - OpCommand cmd; - cmd.Name("FastGeluGrad") - .Input(grad) - .Input(self) - .Output(grad_input) - .Run(); - - return grad_input; -} - -} - -Tensor fast_gelu_backward_npu( - const Tensor& grad, - const Tensor& self) { - // calculate the output size - //Tensor outputTensor = self; - auto outputSize = input_same_output_size(self); - - // construct the output tensor of the NPU - Tensor grad_input = at::empty_with_format( - outputSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - - // calculate the output result of the NPU - fast_gelu_backward_npu_nocheck(grad_input, grad, self); - - return grad_input; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/CalcuOpUtil.h" +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + + +namespace { +Tensor& fast_gelu_backward_npu_nocheck( + Tensor& grad_input, + const Tensor& grad, + const Tensor& self) { + // constructs the input and output NPUTensorDesc + OpCommand cmd; + cmd.Name("FastGeluGrad") + .Input(grad) + .Input(self) + .Output(grad_input) + .Run(); + + return grad_input; +} + +} + +Tensor fast_gelu_backward_npu( + const Tensor& grad, + const Tensor& self) { + // calculate the output size + //Tensor outputTensor = self; + auto outputSize = input_same_output_size(self); + + // construct the output tensor of the NPU + Tensor grad_input = at::empty_with_format( + outputSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); + + // calculate the output result of the NPU + fast_gelu_backward_npu_nocheck(grad_input, grad, self); + + return grad_input; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/FastGeluKernelNpu.cpp b/src/aten/src/ATen/native/npu/FastGeluKernelNpu.cpp index ff2f136fc06ce4d7833f431828efaa953740ff73..e90d3a5af0453e7ecbfeb9a21a57762d6ee8dae3 100644 --- a/src/aten/src/ATen/native/npu/FastGeluKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/FastGeluKernelNpu.cpp @@ -1,49 +1,49 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -namespace { -Tensor fast_gelu_npu_nocheck(Tensor& result, const Tensor& self) { - - OpCommand cmd; - cmd.Name("FastGelu") - .Input(self) - .Output(result) - .Run(); - - return result; -} - -} //namespace - -Tensor fast_gelu_npu(const Tensor& self) { - // calculate the output size - auto outputSize = input_same_output_size(self); - // construct the output tensor of the NPU - Tensor result = at::empty_with_format(outputSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - // calculate the output result of the NPU - fast_gelu_npu_nocheck(result, self); - - return result; -} - -} // namespace native -} // namespace at - +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "ATen/native/npu/utils/CalcuOpUtil.h" +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +namespace { +Tensor fast_gelu_npu_nocheck(Tensor& result, const Tensor& self) { + + OpCommand cmd; + cmd.Name("FastGelu") + .Input(self) + .Output(result) + .Run(); + + return result; +} + +} //namespace + +Tensor fast_gelu_npu(const Tensor& self) { + // calculate the output size + auto outputSize = input_same_output_size(self); + // construct the output tensor of the NPU + Tensor result = at::empty_with_format(outputSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); + // calculate the output result of the NPU + fast_gelu_npu_nocheck(result, self); + + return result; +} + +} // namespace native +} // namespace at + diff --git a/src/aten/src/ATen/native/npu/GerKernelNpu.cpp b/src/aten/src/ATen/native/npu/GerKernelNpu.cpp index 8651b3e70493d9ddc86a869f2deb45154dde1f05..baac62e61a5292734160907efca70501fec08dc0 100644 --- a/src/aten/src/ATen/native/npu/GerKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/GerKernelNpu.cpp @@ -1,86 +1,86 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -SmallVector ger_npu_output_size( - const Tensor& self, - const Tensor& vec2) { - int64_t outputsize_0 = self.size(0); - int64_t outputsize_1 = vec2.size(0); - SmallVector outputsize = {outputsize_0, outputsize_1}; - - return outputsize; -} - -Tensor& ger_out_npu_nocheck(Tensor& result, const Tensor& self , const Tensor& vec2) { - OpCommand cmd; - cmd.Name("Ger") - .Input(self) - .Input(vec2) - .Output(result) - .Run(); - - return result; -} - -Tensor& ger_out_npu(Tensor& result, const Tensor& self , const Tensor& vec2) { - // check shape - TORCH_CHECK( - self.dim() == 1, "Input1 must have only1 dims."); - TORCH_CHECK( - vec2.dim() == 1, "Input2 must have only1 dims."); - - // calculate the output size - auto outputSize = ger_npu_output_size(self, vec2); - - OpPreparation::CheckOut( - {self}, - result, - self, - outputSize); - - OpPipeWithDefinedOut pipe; - return pipe.CheckMemory({self, vec2}, {result}) - .Func([&self, &vec2](Tensor& result){ger_out_npu_nocheck(result, self, vec2);}) - .Call(result); -} - -Tensor ger_npu(const Tensor& self, const Tensor& vec2) { - // check shape - TORCH_CHECK( - self.dim() == 1, "Input1 must have only1 dims."); - TORCH_CHECK( - vec2.dim() == 1, "Input2 must have only1 dims."); - - // calculate the output size - auto outputSize = ger_npu_output_size(self, vec2); - - // construct the output tensor of the NPU - Tensor result = OpPreparation::ApplyTensor(self, outputSize); - - // calculate the output result of the NPU - ger_out_npu_nocheck(result, self, vec2); - - return result; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +SmallVector ger_npu_output_size( + const Tensor& self, + const Tensor& vec2) { + int64_t outputsize_0 = self.size(0); + int64_t outputsize_1 = vec2.size(0); + SmallVector outputsize = {outputsize_0, outputsize_1}; + + return outputsize; +} + +Tensor& ger_out_npu_nocheck(Tensor& result, const Tensor& self , const Tensor& vec2) { + OpCommand cmd; + cmd.Name("Ger") + .Input(self) + .Input(vec2) + .Output(result) + .Run(); + + return result; +} + +Tensor& ger_out_npu(Tensor& result, const Tensor& self , const Tensor& vec2) { + // check shape + TORCH_CHECK( + self.dim() == 1, "Input1 must have only1 dims."); + TORCH_CHECK( + vec2.dim() == 1, "Input2 must have only1 dims."); + + // calculate the output size + auto outputSize = ger_npu_output_size(self, vec2); + + OpPreparation::CheckOut( + {self}, + result, + self, + outputSize); + + OpPipeWithDefinedOut pipe; + return pipe.CheckMemory({self, vec2}, {result}) + .Func([&self, &vec2](Tensor& result){ger_out_npu_nocheck(result, self, vec2);}) + .Call(result); +} + +Tensor ger_npu(const Tensor& self, const Tensor& vec2) { + // check shape + TORCH_CHECK( + self.dim() == 1, "Input1 must have only1 dims."); + TORCH_CHECK( + vec2.dim() == 1, "Input2 must have only1 dims."); + + // calculate the output size + auto outputSize = ger_npu_output_size(self, vec2); + + // construct the output tensor of the NPU + Tensor result = OpPreparation::ApplyTensor(self, outputSize); + + // calculate the output result of the NPU + ger_out_npu_nocheck(result, self, vec2); + + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/GiouBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/GiouBackwardKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5672a390dfea63cd1e0842ab980b7b354e2519fb --- /dev/null +++ b/src/aten/src/ATen/native/npu/GiouBackwardKernelNpu.cpp @@ -0,0 +1,73 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +std::tuple +giou_backward_inner_out_npu( + Tensor& dbboxes, + Tensor& dgtboxes, + const Tensor& grad, + const Tensor& bboxes, + const Tensor& gtboxes, + bool trans, + bool is_cross, + int64_t mode){ + string mode_str = mode == 1 ? "iof" : "iou"; + + OpCommand cmd; + cmd.Name("GIoUGrad") + .Input(grad) + .Input(bboxes) + .Input(gtboxes) + .Output(dbboxes) + .Output(dgtboxes) + .Attr("trans", trans) + .Attr("is_cross", is_cross) + .Attr("mode", mode_str) + .Run(); + return std::tie(dbboxes, dgtboxes); +} + +std::tuple +giou_backward_npu( + const Tensor& grad, + const Tensor& bboxes, + const Tensor& gtboxes, + bool trans, + bool is_cross, + int64_t mode){ + TORCH_CHECK(!trans && !is_cross && mode == 0, + "giou backward only support trans==False, ", + "is_cross==False, ", + "mode==0('iou') current version ", + "if you need to back propagation, ", + "please ensure your parameter is correct!"); + // Op need form of [n] grad + Tensor gradCp = at::squeeze(grad, 0); + Tensor dbboxes = OpPreparation::ApplyTensor(bboxes); + Tensor dgtboxes = OpPreparation::ApplyTensor(gtboxes); + + giou_backward_inner_out_npu(dbboxes, dgtboxes, gradCp, bboxes, gtboxes, trans, is_cross, mode); + return std::tie(dbboxes, dgtboxes); +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/GiouKernelNpu.cpp b/src/aten/src/ATen/native/npu/GiouKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5360ee39c8eff25fba13920109c001b77921ece6 --- /dev/null +++ b/src/aten/src/ATen/native/npu/GiouKernelNpu.cpp @@ -0,0 +1,87 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +SmallVector giou_output_size( + const Tensor& self, + const Tensor& gtboxes, + bool is_cross){ + SmallVector output_size; + if(is_cross){ + output_size = {gtboxes.size(0), self.size(0)}; + } else { + output_size = {1, self.size(0)}; + } + return output_size; +} + +Tensor& giou_inner_out_npu( + Tensor& result, + const Tensor& self, + const Tensor& gtboxes, + bool trans, + bool is_cross, + int64_t mode){ + auto output_size = giou_output_size(self, gtboxes, is_cross); + OpPreparation::CheckOut( + {self}, + result, + self, + output_size); + string mode_str = mode == 1 ? "iof" : "iou"; + + OpCommand cmd; + cmd.Name("GIoU") + .Input(self) + .Input(gtboxes) + .Output(result) + .Attr("trans", trans) + .Attr("is_cross", is_cross) + .Attr("mode", mode_str) + .Run(); + return result; +} + +Tensor giou_npu( + const Tensor& self, + const Tensor& gtboxes, + bool trans, + bool is_cross, + int64_t mode){ + TORCH_CHECK(!trans && !is_cross && mode == 0, + "giou backward only support trans==False, ", + "is_cross==False, ", + "mode==0('iou') current version ", + "if you need to back propagation, ", + "please ensure your parameter is correct!"); + // Op need form of [n, 4], but pass should be [4, n]; + Tensor selfCp = self.permute({1, 0}); + Tensor gtboxesCp = gtboxes.permute({1, 0}); + auto output_size = giou_output_size(selfCp, gtboxesCp, is_cross); + Tensor result = OpPreparation::ApplyTensor(selfCp, output_size); + + giou_inner_out_npu(result, selfCp, gtboxesCp, trans, is_cross, mode); + result = result.permute({1, 0}); + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/GridSamplerKernelNpu.cpp b/src/aten/src/ATen/native/npu/GridSamplerKernelNpu.cpp index bd98c6d8f10c03224ca0ce36ec9d8507326ecd6f..57bfa62db880278ba44b2de265bbc70ca4ea1f85 100644 --- a/src/aten/src/ATen/native/npu/GridSamplerKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/GridSamplerKernelNpu.cpp @@ -1,47 +1,47 @@ -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor grid_sampler_npu(const Tensor& self, const Tensor& grid, -int64_t interpolation_mode, int64_t padding_mode, bool align_corners) { - Tensor formatCastOfSelf = self.npu_format_cast(ACL_FORMAT_ND); - Tensor formatCastOfGrid = grid.npu_format_cast(ACL_FORMAT_ND); - if (formatCastOfSelf.scalar_type() == ScalarType::Half) { - formatCastOfSelf = formatCastOfSelf.npu_dtype_cast(ScalarType::Float); - } - if (formatCastOfGrid.scalar_type() == ScalarType::Half) { - formatCastOfGrid = formatCastOfGrid.npu_dtype_cast(ScalarType::Float); - } - - // calculate the output size - SmallVector outputSize = {formatCastOfSelf.size(0), - formatCastOfSelf.size(1), - formatCastOfGrid.size(1), - formatCastOfGrid.size(2)}; - - // construct the output tensor of the NPU - Tensor result = at::empty_with_format( - outputSize, formatCastOfSelf.options(), ACL_FORMAT_ND); - - // calculate the output result of the NPU - OpCommand cmd; - cmd.Name("GridSampler2D") - .Input(formatCastOfSelf) - .Input(formatCastOfGrid) - .Output(result) - .Attr("interpolation_mode", interpolation_mode) - .Attr("padding_mode", padding_mode) - .Attr("align_corners", align_corners) - .Run(); - - if (result.scalar_type() != self.scalar_type()) { - result = result.npu_dtype_cast(ScalarType::Half); - } - - return result; -} -} // namespace native -} // namespace at +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor grid_sampler_npu(const Tensor& self, const Tensor& grid, +int64_t interpolation_mode, int64_t padding_mode, bool align_corners) { + Tensor formatCastOfSelf = self.npu_format_cast(ACL_FORMAT_ND); + Tensor formatCastOfGrid = grid.npu_format_cast(ACL_FORMAT_ND); + if (formatCastOfSelf.scalar_type() == ScalarType::Half) { + formatCastOfSelf = formatCastOfSelf.npu_dtype_cast(ScalarType::Float); + } + if (formatCastOfGrid.scalar_type() == ScalarType::Half) { + formatCastOfGrid = formatCastOfGrid.npu_dtype_cast(ScalarType::Float); + } + + // calculate the output size + SmallVector outputSize = {formatCastOfSelf.size(0), + formatCastOfSelf.size(1), + formatCastOfGrid.size(1), + formatCastOfGrid.size(2)}; + + // construct the output tensor of the NPU + Tensor result = at::empty_with_format( + outputSize, formatCastOfSelf.options(), ACL_FORMAT_ND); + + // calculate the output result of the NPU + OpCommand cmd; + cmd.Name("GridSampler2D") + .Input(formatCastOfSelf) + .Input(formatCastOfGrid) + .Output(result) + .Attr("interpolation_mode", interpolation_mode) + .Attr("padding_mode", padding_mode) + .Attr("align_corners", align_corners) + .Run(); + + if (result.scalar_type() != self.scalar_type()) { + result = result.npu_dtype_cast(ScalarType::Half); + } + + return result; +} +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/GruBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/GruBackwardKernelNpu.cpp index 6084597d4920e8c6a9946482323daa59c7ff9ba4..d6ab34e8b04744dfc128c02e889bfb505a37bd00 100644 --- a/src/aten/src/ATen/native/npu/GruBackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/GruBackwardKernelNpu.cpp @@ -1,96 +1,96 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -std::tuple gru_backward_npu( - const Tensor& grady, - const Tensor& gradh, - const Tensor& input, - const Tensor& weight_input, - const Tensor& weight_hidden, - const Tensor& bias_input, - const Tensor& bias_hidden, - const Tensor& seq_length, - const Tensor& init_h, - const Tensor& output_y, - const Tensor& output_h, - const Tensor& output_updata, - const Tensor& output_reset, - const Tensor& output_new, - const Tensor& hidden_new) { - - Tensor inh = at::squeeze(init_h, 0); - auto grad_y = - grady.defined() ? grady : OpPreparation::ApplyTensorWithFormat(output_y.sizes(), output_y.options(), ACL_FORMAT_FRACTAL_NZ).mul(0); - auto grad_h = - gradh.defined() ? gradh[input.size(0)-1] : OpPreparation::ApplyTensorWithFormat(inh.sizes(), output_h.options(), ACL_FORMAT_FRACTAL_NZ).mul(0); - - Tensor mask = at::zeros({}, input.options().dtype(kByte)); // uint8 - Tensor seq_lengths = at::zeros({}, input.options()); - - int64_t npu_format = ACL_FORMAT_ND; - - Tensor grad_w_input = OpPreparation::ApplyTensorWithFormat(weight_input.sizes(), input.options(), npu_format); - Tensor grad_w_hidden = OpPreparation::ApplyTensorWithFormat(weight_hidden.sizes(), input.options(), npu_format); - Tensor grad_x = OpPreparation::ApplyTensorWithFormat(input.sizes(), input.options(), npu_format); - Tensor grad_b_input = OpPreparation::ApplyTensorWithFormat(bias_input.sizes(), input.options(), npu_format); - Tensor grad_b_hidden = OpPreparation::ApplyTensorWithFormat(bias_hidden.sizes(), input.options(), npu_format); - Tensor grad_h_prev = OpPreparation::ApplyTensorWithFormat(init_h.sizes(), input.options(), npu_format); - - OpCommand cmd; - cmd.Name("DynamicGRUV2Grad") - .Input(input) - .Input(weight_input) - .Input(weight_hidden) - .Input(output_y) - .Input(inh) - .Input(output_h) - .Input(grad_y) - .Input(grad_h) - .Input(output_updata) - .Input(output_reset) - .Input(output_new) - .Input(hidden_new) - .Input(seq_lengths) - .Input(mask) - .Output(grad_w_input) - .Output(grad_w_hidden) - .Output(grad_b_input) - .Output(grad_b_hidden) - .Output(grad_x) - .Output(grad_h_prev) - .Attr("direction", (string) "UNIDIRECTIONAL") - .Attr("cell_depth", (int64_t)1) - .Attr("keep_prob", (float)1.0) - .Attr("cell_clip", (float)-1.0) - .Attr("num_proj", (int64_t)0) - .Attr("time_major", (bool)true) - .Attr("bias_type", (string) "no_bias") - .Attr("gate_order", (string) "rzh") - .Attr("reset_after", (bool)true) - .Run(); - - return std::tuple { - grad_w_input, grad_w_hidden, grad_x, grad_b_input, grad_b_hidden, grad_h_prev - }; -} - -} // namespace native +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +std::tuple gru_backward_npu( + const Tensor& grady, + const Tensor& gradh, + const Tensor& input, + const Tensor& weight_input, + const Tensor& weight_hidden, + const Tensor& bias_input, + const Tensor& bias_hidden, + const Tensor& seq_length, + const Tensor& init_h, + const Tensor& output_y, + const Tensor& output_h, + const Tensor& output_updata, + const Tensor& output_reset, + const Tensor& output_new, + const Tensor& hidden_new) { + + Tensor inh = at::squeeze(init_h, 0); + auto grad_y = + grady.defined() ? grady : OpPreparation::ApplyTensorWithFormat(output_y.sizes(), output_y.options(), ACL_FORMAT_FRACTAL_NZ).mul(0); + auto grad_h = + gradh.defined() ? gradh[input.size(0)-1] : OpPreparation::ApplyTensorWithFormat(inh.sizes(), output_h.options(), ACL_FORMAT_FRACTAL_NZ).mul(0); + + Tensor mask = at::zeros({}, input.options().dtype(kByte)); // uint8 + Tensor seq_lengths = at::zeros({}, input.options()); + + int64_t npu_format = ACL_FORMAT_ND; + + Tensor grad_w_input = OpPreparation::ApplyTensorWithFormat(weight_input.sizes(), input.options(), npu_format); + Tensor grad_w_hidden = OpPreparation::ApplyTensorWithFormat(weight_hidden.sizes(), input.options(), npu_format); + Tensor grad_x = OpPreparation::ApplyTensorWithFormat(input.sizes(), input.options(), npu_format); + Tensor grad_b_input = OpPreparation::ApplyTensorWithFormat(bias_input.sizes(), input.options(), npu_format); + Tensor grad_b_hidden = OpPreparation::ApplyTensorWithFormat(bias_hidden.sizes(), input.options(), npu_format); + Tensor grad_h_prev = OpPreparation::ApplyTensorWithFormat(init_h.sizes(), input.options(), npu_format); + + OpCommand cmd; + cmd.Name("DynamicGRUV2Grad") + .Input(input) + .Input(weight_input) + .Input(weight_hidden) + .Input(output_y) + .Input(inh) + .Input(output_h) + .Input(grad_y) + .Input(grad_h) + .Input(output_updata) + .Input(output_reset) + .Input(output_new) + .Input(hidden_new) + .Input(seq_lengths) + .Input(mask) + .Output(grad_w_input) + .Output(grad_w_hidden) + .Output(grad_b_input) + .Output(grad_b_hidden) + .Output(grad_x) + .Output(grad_h_prev) + .Attr("direction", (string) "UNIDIRECTIONAL") + .Attr("cell_depth", (int64_t)1) + .Attr("keep_prob", (float)1.0) + .Attr("cell_clip", (float)-1.0) + .Attr("num_proj", (int64_t)0) + .Attr("time_major", (bool)true) + .Attr("bias_type", (string) "no_bias") + .Attr("gate_order", (string) "rzh") + .Attr("reset_after", (bool)true) + .Run(); + + return std::tuple { + grad_w_input, grad_w_hidden, grad_x, grad_b_input, grad_b_hidden, grad_h_prev + }; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/GruKernelNpu.cpp b/src/aten/src/ATen/native/npu/GruKernelNpu.cpp index 7a67a4aaec1558bc80d13790f61ac4f32f031ae7..ebbdd5474f6a44e80195b241e9ee61c5484a2c73 100644 --- a/src/aten/src/ATen/native/npu/GruKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/GruKernelNpu.cpp @@ -1,147 +1,147 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -tuple gru_npu( - const Tensor& input, - const Tensor& hx, - const Tensor& weight_input, - const Tensor& weight_hidden, - const Tensor& bias_input, - const Tensor& bias_hidden, - const Tensor& seq_length, - bool has_biases, - int64_t num_layers, - double dropout, - bool train, - bool bidirectional, - bool batch_first) { - int64_t numStep = input.size(0); - int64_t batchSize = input.size(1); - int64_t hiddenSize = bias_input.size(0) / 3; - SmallVector outputSize = {numStep, batchSize, hiddenSize}; - int64_t npu_format = ACL_FORMAT_FRACTAL_NZ; - - Tensor output_y = OpPreparation::ApplyTensorWithFormat( - outputSize, - bias_input.options(), - npu_format); - Tensor output_h = OpPreparation::ApplyTensorWithFormat( - outputSize, - bias_input.options(), - ACL_FORMAT_ND); // 后续需要做slice和unsqueeze,BaseFormat方便 - Tensor output_updata = OpPreparation::ApplyTensorWithFormat( - outputSize, - bias_input.options(), - npu_format); - Tensor output_reset = OpPreparation::ApplyTensorWithFormat( - outputSize, - bias_input.options(), - npu_format); - Tensor output_new = OpPreparation::ApplyTensorWithFormat( - outputSize, - bias_input.options(), - npu_format); - Tensor hidden_new = OpPreparation::ApplyTensorWithFormat( - outputSize, - bias_input.options(), - npu_format); - - - OpCommand cmd; - cmd.Name("DynamicGRUV2") - .Input(input) - .Input(weight_input) - .Input(weight_hidden) - .Input(bias_input) - .Input(bias_hidden) - .Input() - .Input(hx) - .Output(output_y) - .Output(output_h) - .Output(output_updata) - .Output(output_reset) - .Output(output_new) - .Output(hidden_new) - .Attr("direction", (string)"UNIDIRECTIONAL") - .Attr("cell_depth", (int64_t)1) - .Attr("keep_prob", (float)1.0) - .Attr("cell_clip", (float)-1.0) - .Attr("num_proj", (int64_t)0) - .Attr("time_major", true) - .Attr("activation", (string)"tanh") - .Attr("gate_order", (string)"rzh") - .Attr("reset_after", true) - .Attr("is_training", true) - .Run(); - - return std::tuple( - output_y, output_h, output_updata, output_reset, output_new, hidden_new); -} - -tuple gru_npu_( - const Tensor& input, - const Tensor& hx, - TensorList params, - bool has_biases, - int64_t num_layers, - double dropout, - bool train, - bool bidirectional, - bool batch_first) { - // get weight fp16 - Tensor weight_input = params[0].t(); - Tensor weight_hidden = params[1].t(); - - // get bias fp16 / fp32 - Tensor bias_input; - Tensor bias_hidden; - if (has_biases) { - bias_input = params[2].to(input.dtype()); - bias_hidden = params[3].to(input.dtype()); - } else { - bias_input = OpPreparation::ApplyTensorWithFormat(weight_input.size(0), input.options(), ACL_FORMAT_FRACTAL_NZ).mul(0); - bias_hidden = OpPreparation::ApplyTensorWithFormat(weight_hidden.size(0), input.options(), ACL_FORMAT_FRACTAL_NZ).mul(0); - } - - Tensor seq_length = OpPreparation::ApplyTensorWithFormat({}, input.options(), ACL_FORMAT_ND); - - auto results = at::npu_gru( - input, - hx, - weight_input, - weight_hidden, - bias_input, - bias_hidden, - seq_length, - has_biases, - num_layers, - dropout, - train, - bidirectional, - batch_first); - int64_t numStep = input.size(0); - Tensor output_hy = at::unsqueeze(std::get<1>(results)[numStep - 1], 0); - - return std::tuple(std::get<0>(results), output_hy); -} - -} // namespace native -} // namespace at +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +tuple gru_npu( + const Tensor& input, + const Tensor& hx, + const Tensor& weight_input, + const Tensor& weight_hidden, + const Tensor& bias_input, + const Tensor& bias_hidden, + const Tensor& seq_length, + bool has_biases, + int64_t num_layers, + double dropout, + bool train, + bool bidirectional, + bool batch_first) { + int64_t numStep = input.size(0); + int64_t batchSize = input.size(1); + int64_t hiddenSize = bias_input.size(0) / 3; + SmallVector outputSize = {numStep, batchSize, hiddenSize}; + int64_t npu_format = ACL_FORMAT_FRACTAL_NZ; + + Tensor output_y = OpPreparation::ApplyTensorWithFormat( + outputSize, + bias_input.options(), + npu_format); + Tensor output_h = OpPreparation::ApplyTensorWithFormat( + outputSize, + bias_input.options(), + ACL_FORMAT_ND); // 后续需要做slice和unsqueeze,BaseFormat方便 + Tensor output_updata = OpPreparation::ApplyTensorWithFormat( + outputSize, + bias_input.options(), + npu_format); + Tensor output_reset = OpPreparation::ApplyTensorWithFormat( + outputSize, + bias_input.options(), + npu_format); + Tensor output_new = OpPreparation::ApplyTensorWithFormat( + outputSize, + bias_input.options(), + npu_format); + Tensor hidden_new = OpPreparation::ApplyTensorWithFormat( + outputSize, + bias_input.options(), + npu_format); + + + OpCommand cmd; + cmd.Name("DynamicGRUV2") + .Input(input) + .Input(weight_input) + .Input(weight_hidden) + .Input(bias_input) + .Input(bias_hidden) + .Input() + .Input(hx) + .Output(output_y) + .Output(output_h) + .Output(output_updata) + .Output(output_reset) + .Output(output_new) + .Output(hidden_new) + .Attr("direction", (string)"UNIDIRECTIONAL") + .Attr("cell_depth", (int64_t)1) + .Attr("keep_prob", (float)1.0) + .Attr("cell_clip", (float)-1.0) + .Attr("num_proj", (int64_t)0) + .Attr("time_major", true) + .Attr("activation", (string)"tanh") + .Attr("gate_order", (string)"rzh") + .Attr("reset_after", true) + .Attr("is_training", true) + .Run(); + + return std::tuple( + output_y, output_h, output_updata, output_reset, output_new, hidden_new); +} + +tuple gru_npu_( + const Tensor& input, + const Tensor& hx, + TensorList params, + bool has_biases, + int64_t num_layers, + double dropout, + bool train, + bool bidirectional, + bool batch_first) { + // get weight fp16 + Tensor weight_input = params[0].t(); + Tensor weight_hidden = params[1].t(); + + // get bias fp16 / fp32 + Tensor bias_input; + Tensor bias_hidden; + if (has_biases) { + bias_input = params[2].to(input.dtype()); + bias_hidden = params[3].to(input.dtype()); + } else { + bias_input = OpPreparation::ApplyTensorWithFormat(weight_input.size(0), input.options(), ACL_FORMAT_FRACTAL_NZ).mul(0); + bias_hidden = OpPreparation::ApplyTensorWithFormat(weight_hidden.size(0), input.options(), ACL_FORMAT_FRACTAL_NZ).mul(0); + } + + Tensor seq_length = OpPreparation::ApplyTensorWithFormat({}, input.options(), ACL_FORMAT_ND); + + auto results = at::npu_gru( + input, + hx, + weight_input, + weight_hidden, + bias_input, + bias_hidden, + seq_length, + has_biases, + num_layers, + dropout, + train, + bidirectional, + batch_first); + int64_t numStep = input.size(0); + Tensor output_hy = at::unsqueeze(std::get<1>(results)[numStep - 1], 0); + + return std::tuple(std::get<0>(results), output_hy); +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/HardShrinkBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/HardShrinkBackwardKernelNpu.cpp index 5a2c8646e3a46e71c464b3b44826d03a9a66b998..74ea38b0f3584f17eb9b66c336273a4945589d27 100644 --- a/src/aten/src/ATen/native/npu/HardShrinkBackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/HardShrinkBackwardKernelNpu.cpp @@ -1,52 +1,52 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -namespace { - -Tensor& hardshrink_backward_nocheck( - Tensor& grad_input, - const Tensor& grad_output, - const Tensor& self, - Scalar lambd) { - OpCommand cmd; - cmd.Name("HardShrinkGrad") - .Input(grad_output) - .Input(self) - .Attr("lambd", lambd) - .Output(grad_input) - .Run(); - - return grad_input; -} -} // namespace - -Tensor hardshrink_backward_npu( - const Tensor& grad_output, - const Tensor& self, - Scalar lambd) { - Tensor grad_input = OpPreparation::ApplyTensor(self); - // calculate the output result of the NPU - hardshrink_backward_nocheck(grad_input, grad_output, self, lambd); - - return grad_input; -} - -} +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +namespace { + +Tensor& hardshrink_backward_nocheck( + Tensor& grad_input, + const Tensor& grad_output, + const Tensor& self, + Scalar lambd) { + OpCommand cmd; + cmd.Name("HardShrinkGrad") + .Input(grad_output) + .Input(self) + .Attr("lambd", lambd) + .Output(grad_input) + .Run(); + + return grad_input; +} +} // namespace + +Tensor hardshrink_backward_npu( + const Tensor& grad_output, + const Tensor& self, + Scalar lambd) { + Tensor grad_input = OpPreparation::ApplyTensor(self); + // calculate the output result of the NPU + hardshrink_backward_nocheck(grad_input, grad_output, self, lambd); + + return grad_input; +} + +} } \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/HardShrinkKernelNpu.cpp b/src/aten/src/ATen/native/npu/HardShrinkKernelNpu.cpp index cfd43a8c8618d087d57c7c748797d780a7e58b45..11204519d0819ddbf1b8e6771d32d7293d6b75a5 100644 --- a/src/aten/src/ATen/native/npu/HardShrinkKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/HardShrinkKernelNpu.cpp @@ -1,43 +1,43 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -namespace { - -Tensor& hardshrink_nocheck(Tensor& result, const Tensor& self, Scalar lambd) { - OpCommand cmd; - cmd.Name("HardShrink") - .Input(self) - .Attr("lambd", lambd) - .Output(result).Run(); - - return result; -} -} // namespace - -Tensor hardshrink_npu(const Tensor& self, Scalar lambd) { - // Tensor outputTensor = logical_or_dest_output(self, other); - Tensor result = OpPreparation::ApplyTensor(self); - hardshrink_nocheck(result, self, lambd); - - return result; -} - -} // namespace native +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +namespace { + +Tensor& hardshrink_nocheck(Tensor& result, const Tensor& self, Scalar lambd) { + OpCommand cmd; + cmd.Name("HardShrink") + .Input(self) + .Attr("lambd", lambd) + .Output(result).Run(); + + return result; +} +} // namespace + +Tensor hardshrink_npu(const Tensor& self, Scalar lambd) { + // Tensor outputTensor = logical_or_dest_output(self, other); + Tensor result = OpPreparation::ApplyTensor(self); + hardshrink_nocheck(result, self, lambd); + + return result; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/Im2colBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/Im2colBackwardKernelNpu.cpp index c0a96311251e930e14be19ea74610e6a8f22671a..036d3c3e748ae38000eae195605b9df4b973d3b8 100644 --- a/src/aten/src/ATen/native/npu/Im2colBackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/Im2colBackwardKernelNpu.cpp @@ -1,121 +1,121 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& im2col_backward_out_npu_nocheck( - Tensor& grad_input, - const Tensor& grad_output, - IntArrayRef input_size, - IntArrayRef kernel_size, - IntArrayRef dilation, - IntArrayRef padding, - IntArrayRef stride) { - Tensor gradOutput = grad_output; - gradOutput = gradOutput.view({ - grad_output.size(0), - grad_output.size(1) / (kernel_size[0] * kernel_size[1]), - kernel_size[0] * kernel_size[1], - grad_output.size(2)}); - - SmallVector inputSize = {input_size[0], input_size[1]}; - - SmallVector kernelSize = {kernel_size[0], kernel_size[1]}; - SmallVector dilations = {dilation[0], dilation[1]}; - SmallVector paddings = {padding[0], padding[1]}; - SmallVector stridesSize = {stride[0], stride[1]}; - - OpCommand cmd; - cmd.Name("Col2im") - .Input(gradOutput) - .Input(inputSize, at::kInt) - .Output(grad_input) - .Attr("kernel_size", kernelSize) - .Attr("dilation", dilations) - .Attr("padding", paddings) - .Attr("stride", stridesSize) - .Run(); - - return grad_input; -} - -Tensor& im2col_backward_out_npu( - Tensor& grad_input, - const Tensor& grad_output, - IntArrayRef input_size, - IntArrayRef kernel_size, - IntArrayRef dilation, - IntArrayRef padding, - IntArrayRef stride) { - SmallVector outputSize = { - grad_output.size(0), - grad_output.size(1) / (kernel_size[0] * kernel_size[1]), - input_size[0], - input_size[1]}; - - OpPreparation::CheckOut( - {grad_output}, - grad_input, - grad_output, - outputSize); - - OpPipeWithDefinedOut pipe; - return pipe.CheckMemory({grad_output}, {grad_input}) - .Func([&grad_output, &input_size, &kernel_size, &dilation, &padding, &stride] - (Tensor& grad_input) - {im2col_backward_out_npu_nocheck( - grad_input, - grad_output, - input_size, - kernel_size, - dilation, - padding, - stride);}) - .Call(grad_input); -} - -Tensor im2col_backward_npu( - const Tensor& grad_output, - IntArrayRef input_size, - IntArrayRef kernel_size, - IntArrayRef dilation, - IntArrayRef padding, - IntArrayRef stride) { - // calculate the output size - SmallVector outputSize = { - grad_output.size(0), - grad_output.size(1) / (kernel_size[0] * kernel_size[1]), - input_size[0], - input_size[1]}; - - // construct the input tensor of the NPU - Tensor grad_input = OpPreparation::ApplyTensor(grad_output, outputSize); - - im2col_backward_out_npu_nocheck( - grad_input, - grad_output, - input_size, - kernel_size, - dilation, - padding, - stride); - - return grad_input; -} -} // namespace native -} // namespace at +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& im2col_backward_out_npu_nocheck( + Tensor& grad_input, + const Tensor& grad_output, + IntArrayRef input_size, + IntArrayRef kernel_size, + IntArrayRef dilation, + IntArrayRef padding, + IntArrayRef stride) { + Tensor gradOutput = grad_output; + gradOutput = gradOutput.view({ + grad_output.size(0), + grad_output.size(1) / (kernel_size[0] * kernel_size[1]), + kernel_size[0] * kernel_size[1], + grad_output.size(2)}); + + SmallVector inputSize = {input_size[0], input_size[1]}; + + SmallVector kernelSize = {kernel_size[0], kernel_size[1]}; + SmallVector dilations = {dilation[0], dilation[1]}; + SmallVector paddings = {padding[0], padding[1]}; + SmallVector stridesSize = {stride[0], stride[1]}; + + OpCommand cmd; + cmd.Name("Col2im") + .Input(gradOutput) + .Input(inputSize, at::kInt) + .Output(grad_input) + .Attr("kernel_size", kernelSize) + .Attr("dilation", dilations) + .Attr("padding", paddings) + .Attr("stride", stridesSize) + .Run(); + + return grad_input; +} + +Tensor& im2col_backward_out_npu( + Tensor& grad_input, + const Tensor& grad_output, + IntArrayRef input_size, + IntArrayRef kernel_size, + IntArrayRef dilation, + IntArrayRef padding, + IntArrayRef stride) { + SmallVector outputSize = { + grad_output.size(0), + grad_output.size(1) / (kernel_size[0] * kernel_size[1]), + input_size[0], + input_size[1]}; + + OpPreparation::CheckOut( + {grad_output}, + grad_input, + grad_output, + outputSize); + + OpPipeWithDefinedOut pipe; + return pipe.CheckMemory({grad_output}, {grad_input}) + .Func([&grad_output, &input_size, &kernel_size, &dilation, &padding, &stride] + (Tensor& grad_input) + {im2col_backward_out_npu_nocheck( + grad_input, + grad_output, + input_size, + kernel_size, + dilation, + padding, + stride);}) + .Call(grad_input); +} + +Tensor im2col_backward_npu( + const Tensor& grad_output, + IntArrayRef input_size, + IntArrayRef kernel_size, + IntArrayRef dilation, + IntArrayRef padding, + IntArrayRef stride) { + // calculate the output size + SmallVector outputSize = { + grad_output.size(0), + grad_output.size(1) / (kernel_size[0] * kernel_size[1]), + input_size[0], + input_size[1]}; + + // construct the input tensor of the NPU + Tensor grad_input = OpPreparation::ApplyTensor(grad_output, outputSize); + + im2col_backward_out_npu_nocheck( + grad_input, + grad_output, + input_size, + kernel_size, + dilation, + padding, + stride); + + return grad_input; +} +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp b/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp index 9cbbf8f8416d6117e5b701cab3bb973e1c8b3fc8..6814d60261599c033c0b6f16b62965e660799d96 100644 --- a/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp @@ -26,6 +26,9 @@ Tensor& index_put_nocheck( const TensorList& indices, const Tensor& value, bool accumulate) { + if (value.numel() == 0) { + return result; + } // masks corresponds to indices. 0 indicates undefined tensor. SmallVector masks; std::vector allDefinedIndices; diff --git a/src/aten/src/ATen/native/npu/InverseKernelNpu.cpp b/src/aten/src/ATen/native/npu/InverseKernelNpu.cpp index c25c31599b68bcf1ea88fc5227561448cf10f19f..61e7cada9aa0f6b2d207ec621193f6472530a97c 100644 --- a/src/aten/src/ATen/native/npu/InverseKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/InverseKernelNpu.cpp @@ -1,48 +1,48 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& inverse_out_npu( - Tensor& result, - const Tensor& self) { - - OpCommand cmd; - cmd.Name("MatrixInverse") - .Input(self) - .Output(result) - .Attr("adjoint", false) - .Run(); - - return result; -} - -Tensor inverse_npu(const Tensor& self) { - Tensor result = OpPreparation::ApplyTensor(self); - - inverse_out_npu(result, self); - - return result; -} - - -} // namespace native -} // namespace at - +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& inverse_out_npu( + Tensor& result, + const Tensor& self) { + + OpCommand cmd; + cmd.Name("MatrixInverse") + .Input(self) + .Output(result) + .Attr("adjoint", false) + .Run(); + + return result; +} + +Tensor inverse_npu(const Tensor& self) { + Tensor result = OpPreparation::ApplyTensor(self); + + inverse_out_npu(result, self); + + return result; +} + + +} // namespace native +} // namespace at + diff --git a/src/aten/src/ATen/native/npu/IouKernelNpu.cpp b/src/aten/src/ATen/native/npu/IouKernelNpu.cpp index 306022e5193be6acc203e0ce1e4c82c3846f2379..dd2ff4ac09ab63621b16d1b0b7a59ed87317a60a 100644 --- a/src/aten/src/ATen/native/npu/IouKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/IouKernelNpu.cpp @@ -1,93 +1,93 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/NpuUtils.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -SmallVector iou_npu_input( - const SmallVector& inputTensor) { - return CalcuOpUtil::create_npu_input_tensor_desc(inputTensor); -} - -SmallVector iou_npu_output( - const SmallVector& outputTensor) { - return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); -} - -SmallVector iou_npu_attr(int64_t mode) { - string modeStr = "iou"; - if (mode == 1) { - modeStr = "iof"; - } - NPUAttrDesc npuAttrIou = NPUAttrDesc("mode", modeStr); - SmallVector attrs = {npuAttrIou}; - return attrs; -} - -Tensor& iou_out_npu( - Tensor& overlap, - const Tensor& bboxes, - const Tensor& gtboxes, - int64_t mode) { - // constructs the input and output NPUTensorDesc - auto inputs = iou_npu_input({bboxes, gtboxes}); - auto outputs = iou_npu_output({overlap}); - - // constructs the attr of the NPUAttrDesc - auto attrs = iou_npu_attr(mode); - - // executing the NPU operator - CalcuOpUtil::execute_npu_operate("Iou", inputs, outputs, attrs); - - // return std::make_tuple(boxes, idx, mask); - return overlap; -} - -Tensor iou_npu( - const Tensor& bboxes, - const Tensor& gtboxes, - int64_t mode) { - // calculate the output size - auto outputSize = iou_npu_output_size(bboxes, gtboxes); - - Tensor bboxesFP16 = bboxes; - if (bboxes.scalar_type() != at::ScalarType::Half) { - bboxesFP16 = bboxes.to(at::kHalf); - } - Tensor gtboxesFP16 = gtboxes; - if (gtboxes.scalar_type() != at::ScalarType::Half) { - gtboxesFP16 = gtboxes.to(at::kHalf); - } - - // construct the output tensor of the NPU - Tensor overlap = at::empty_with_format(outputSize, bboxesFP16.options(), CalcuOpUtil::get_tensor_npu_format(bboxes)); - - iou_out_npu(overlap, bboxesFP16, gtboxesFP16, mode); - - if (overlap.scalar_type() != bboxes.scalar_type()) { - overlap = overlap.to(bboxes.scalar_type()); - } - - return overlap; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/CalcuOpUtil.h" +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/NpuUtils.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +SmallVector iou_npu_input( + const SmallVector& inputTensor) { + return CalcuOpUtil::create_npu_input_tensor_desc(inputTensor); +} + +SmallVector iou_npu_output( + const SmallVector& outputTensor) { + return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); +} + +SmallVector iou_npu_attr(int64_t mode) { + string modeStr = "iou"; + if (mode == 1) { + modeStr = "iof"; + } + NPUAttrDesc npuAttrIou = NPUAttrDesc("mode", modeStr); + SmallVector attrs = {npuAttrIou}; + return attrs; +} + +Tensor& iou_out_npu( + Tensor& overlap, + const Tensor& bboxes, + const Tensor& gtboxes, + int64_t mode) { + // constructs the input and output NPUTensorDesc + auto inputs = iou_npu_input({bboxes, gtboxes}); + auto outputs = iou_npu_output({overlap}); + + // constructs the attr of the NPUAttrDesc + auto attrs = iou_npu_attr(mode); + + // executing the NPU operator + CalcuOpUtil::execute_npu_operate("Iou", inputs, outputs, attrs); + + // return std::make_tuple(boxes, idx, mask); + return overlap; +} + +Tensor iou_npu( + const Tensor& bboxes, + const Tensor& gtboxes, + int64_t mode) { + // calculate the output size + auto outputSize = iou_npu_output_size(bboxes, gtboxes); + + Tensor bboxesFP16 = bboxes; + if (bboxes.scalar_type() != at::ScalarType::Half) { + bboxesFP16 = bboxes.to(at::kHalf); + } + Tensor gtboxesFP16 = gtboxes; + if (gtboxes.scalar_type() != at::ScalarType::Half) { + gtboxesFP16 = gtboxes.to(at::kHalf); + } + + // construct the output tensor of the NPU + Tensor overlap = at::empty_with_format(outputSize, bboxesFP16.options(), CalcuOpUtil::get_tensor_npu_format(bboxes)); + + iou_out_npu(overlap, bboxesFP16, gtboxesFP16, mode); + + if (overlap.scalar_type() != bboxes.scalar_type()) { + overlap = overlap.to(bboxes.scalar_type()); + } + + return overlap; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/IscloseKernelNpu.cpp b/src/aten/src/ATen/native/npu/IscloseKernelNpu.cpp index 5a5ebb2d7ba71a504041c40cc78be875b3bab23a..6c783adfa79b8df294edbcaaac98137b0725bb6f 100644 --- a/src/aten/src/ATen/native/npu/IscloseKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/IscloseKernelNpu.cpp @@ -1,69 +1,69 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -namespace { - -Tensor& isclose_nocheck( - Tensor& result, - const Tensor& self, - const Tensor& other, - double rtol, - double atol, - bool equal_nan) { - auto rtol1 = static_cast(rtol); - auto atol1 = static_cast(atol); - - OpCommand cmd; - cmd.Name("IsClose") - .Input(self) - .Input(other) - .Attr("rtol", rtol1) - .Attr("atol", atol1) - .Attr("equal_nan", equal_nan) - .Output(result) - .Run(); - - return result; -} -} // namespace - -Tensor isclose_npu( - const Tensor& self, - const Tensor& other, - double rtol, - double atol, - bool equal_nan) { - - TORCH_CHECK(self.scalar_type() == other.scalar_type(), self.scalar_type(), " did not match ", other.scalar_type()); - - //calculate the output size - auto outputSize = input_same_output_size(self); - - //construct the output tensor of the NPU - Tensor result = OpPreparation::ApplyTensor(outputSize, self.options().dtype(kBool), self); - // constructs the attr of the NPUAttrDesc - result = isclose_nocheck(result, self, other, rtol, atol, equal_nan); - - return result; -} - -}} // namespace at::native - - +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +namespace { + +Tensor& isclose_nocheck( + Tensor& result, + const Tensor& self, + const Tensor& other, + double rtol, + double atol, + bool equal_nan) { + auto rtol1 = static_cast(rtol); + auto atol1 = static_cast(atol); + + OpCommand cmd; + cmd.Name("IsClose") + .Input(self) + .Input(other) + .Attr("rtol", rtol1) + .Attr("atol", atol1) + .Attr("equal_nan", equal_nan) + .Output(result) + .Run(); + + return result; +} +} // namespace + +Tensor isclose_npu( + const Tensor& self, + const Tensor& other, + double rtol, + double atol, + bool equal_nan) { + + TORCH_CHECK(self.scalar_type() == other.scalar_type(), self.scalar_type(), " did not match ", other.scalar_type()); + + //calculate the output size + auto outputSize = input_same_output_size(self); + + //construct the output tensor of the NPU + Tensor result = OpPreparation::ApplyTensor(outputSize, self.options().dtype(kBool), self); + // constructs the attr of the NPUAttrDesc + result = isclose_nocheck(result, self, other, rtol, atol, equal_nan); + + return result; +} + +}} // namespace at::native + + diff --git a/src/aten/src/ATen/native/npu/KlDivKernelNpu.cpp b/src/aten/src/ATen/native/npu/KlDivKernelNpu.cpp index 6daf5011c95e661a56110eb93665c38d4269ba76..1799ba71e83fc841d4a0ed62eadd0e001e966487 100644 --- a/src/aten/src/ATen/native/npu/KlDivKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/KlDivKernelNpu.cpp @@ -1,57 +1,57 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor kl_div_npu( - const Tensor& self, - const Tensor& target, - int64_t reduction) { - TORCH_CHECK(reduction != Reduction::None, - "Reduction of None has not been supported at present."); - - Tensor result = at::empty_with_format( - {}, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - - string reductionStr; - if (reduction == Reduction::Mean) { - reductionStr = "batchmean"; - } else if (reduction == Reduction::Sum) { - reductionStr = "sum"; - } - - OpCommand cmd; - cmd.Name("KLDiv") - .Input(self) - .Input(target) - .Output(result) - .Attr("reduction", reductionStr) - .Run(); - - if (reduction == Reduction::Mean) { - auto inputShape = self.sizes(); - int batchSquareSize = prod_intlist(inputShape) / inputShape[0]; - result.div_(batchSquareSize); - } - - return result; -} - -} // namespace native +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/CalcuOpUtil.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor kl_div_npu( + const Tensor& self, + const Tensor& target, + int64_t reduction) { + TORCH_CHECK(reduction != Reduction::None, + "Reduction of None has not been supported at present."); + + Tensor result = at::empty_with_format( + {}, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); + + string reductionStr; + if (reduction == Reduction::Mean) { + reductionStr = "batchmean"; + } else if (reduction == Reduction::Sum) { + reductionStr = "sum"; + } + + OpCommand cmd; + cmd.Name("KLDiv") + .Input(self) + .Input(target) + .Output(result) + .Attr("reduction", reductionStr) + .Run(); + + if (reduction == Reduction::Mean) { + auto inputShape = self.sizes(); + int batchSquareSize = prod_intlist(inputShape) / inputShape[0]; + result.div_(batchSquareSize); + } + + return result; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/LinearBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/LinearBackwardKernelNpu.cpp index 492007773216401faeafdadc57ea23b482ea4e59..6e5779920c17c23913f25a2f79389b30c3dc5ba5 100644 --- a/src/aten/src/ATen/native/npu/LinearBackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/LinearBackwardKernelNpu.cpp @@ -1,62 +1,62 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor linear_backward_out_npu( - Tensor& result, - const Tensor& input, - const Tensor& weight, - bool transpose_x1, - bool transpose_x2) { - int64_t offset_x = 0; - OpCommand cmd; - cmd.Name("MatMulV2") - .Input(input) - .Input(weight) - .Output(result) - .Attr("transpose_x1", transpose_x1) - .Attr("transpose_x2", transpose_x2) - .Attr("offset_x", offset_x) - .Run(); - return result; -} - -tuple linear_backward_npu( - const Tensor& grad, - const Tensor& input, - const Tensor& weight) { - SmallVector inputGradOutputSize = { - grad.size(0), - weight.size(1)}; - SmallVector weightGradOutputSize = { - grad.size(1), - input.size(1)}; - Tensor inputGrad = OpPreparation::ApplyTensor(input, inputGradOutputSize); - Tensor weightGrad = OpPreparation::ApplyTensor(weight, weightGradOutputSize); - - linear_backward_out_npu(inputGrad, grad, weight, false, false); - linear_backward_out_npu(weightGrad, grad, input, true, false); - - return std::tie(inputGrad, weightGrad); -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor linear_backward_out_npu( + Tensor& result, + const Tensor& input, + const Tensor& weight, + bool transpose_x1, + bool transpose_x2) { + int64_t offset_x = 0; + OpCommand cmd; + cmd.Name("MatMulV2") + .Input(input) + .Input(weight) + .Output(result) + .Attr("transpose_x1", transpose_x1) + .Attr("transpose_x2", transpose_x2) + .Attr("offset_x", offset_x) + .Run(); + return result; +} + +tuple linear_backward_npu( + const Tensor& grad, + const Tensor& input, + const Tensor& weight) { + SmallVector inputGradOutputSize = { + grad.size(0), + weight.size(1)}; + SmallVector weightGradOutputSize = { + grad.size(1), + input.size(1)}; + Tensor inputGrad = OpPreparation::ApplyTensor(input, inputGradOutputSize); + Tensor weightGrad = OpPreparation::ApplyTensor(weight, weightGradOutputSize); + + linear_backward_out_npu(inputGrad, grad, weight, false, false); + linear_backward_out_npu(weightGrad, grad, input, true, false); + + return std::tie(inputGrad, weightGrad); +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/LinearKernelNpu.cpp b/src/aten/src/ATen/native/npu/LinearKernelNpu.cpp index f35c5de9fa2450586a10e89843f7d653b99207ba..fb64c66874e83cacf8bd5230b7327c55dc8de376 100644 --- a/src/aten/src/ATen/native/npu/LinearKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/LinearKernelNpu.cpp @@ -1,48 +1,48 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor linear_npu( - const Tensor& input, - const Tensor& weight, - const Tensor& bias) { - SmallVector outputSize = {input.size(0), weight.size(0)}; - Tensor output = OpPreparation::ApplyTensor(input, outputSize); - - int64_t offset_x = 0; - OpCommand cmd; - cmd.Name("MatMulV2") - .Input(input) - .Input(weight); - if (bias.defined()) { - cmd.Input(bias); - } - cmd.Output(output) - .Attr("transpose_x1", false) - .Attr("transpose_x2", true) - .Attr("offset_x", offset_x) - .Run(); - - return output; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor linear_npu( + const Tensor& input, + const Tensor& weight, + const Tensor& bias) { + SmallVector outputSize = {input.size(0), weight.size(0)}; + Tensor output = OpPreparation::ApplyTensor(input, outputSize); + + int64_t offset_x = 0; + OpCommand cmd; + cmd.Name("MatMulV2") + .Input(input) + .Input(weight); + if (bias.defined()) { + cmd.Input(bias); + } + cmd.Output(output) + .Attr("transpose_x1", false) + .Attr("transpose_x2", true) + .Attr("offset_x", offset_x) + .Run(); + + return output; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/LogSigmoidBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/LogSigmoidBackwardKernelNpu.cpp index cafa78fdf39463a96ad2ec355e28ca3f1df2d1db..a31b44bbb00d9081b31002cb86997eef8b3f62fc 100644 --- a/src/aten/src/ATen/native/npu/LogSigmoidBackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/LogSigmoidBackwardKernelNpu.cpp @@ -1,80 +1,80 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/NpuUtils.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -IntArrayRef log_sigmoid_backward_npu_output_size(const Tensor& grad_output) { - return input_same_output_size(grad_output); -} - -SmallVector log_sigmoid_backward_npu_input( - const SmallVector& inputTensor) { - return CalcuOpUtil::create_npu_input_tensor_desc(inputTensor); -} - -SmallVector log_sigmoid_backward_npu_output( - const SmallVector& outputTensor) { - return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); -} - -SmallVector log_sigmoid_backward_npu_attr(const Tensor& self) { - SmallVector attrs = {}; - return attrs; -} - -Tensor& log_sigmoid_backward_out_npu( - Tensor& grad_input, - const Tensor& grad_output, - const Tensor& self, - const Tensor& buffer) { - // constructs the input and output NPUTensorDesc - auto inputs = log_sigmoid_backward_npu_input({grad_output, self}); - auto outputs = log_sigmoid_backward_npu_output({grad_input}); - - // constructs the attr of the NPUAttrDesc - auto attrs = log_sigmoid_backward_npu_attr(self); - - // executing the NPU operator - CalcuOpUtil::execute_npu_operate("LogSigmoidGrad", inputs, outputs, attrs); - - return grad_input; -} - -Tensor log_sigmoid_backward_npu( - const Tensor& grad_output, - const Tensor& self, - const Tensor& buffer) { - // calculate the output size - auto outputSize = log_sigmoid_backward_npu_output_size(grad_output); - - // construct the output tensor of the NPU - Tensor grad_input = at::empty_with_format( - outputSize, - grad_output.options(), - CalcuOpUtil::get_tensor_npu_format(grad_output)); - - // calculate the output result of the NPU - log_sigmoid_backward_out_npu(grad_input, grad_output, self, buffer); - - return grad_input; -} - -} // namespace native +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/CalcuOpUtil.h" +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/NpuUtils.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +IntArrayRef log_sigmoid_backward_npu_output_size(const Tensor& grad_output) { + return input_same_output_size(grad_output); +} + +SmallVector log_sigmoid_backward_npu_input( + const SmallVector& inputTensor) { + return CalcuOpUtil::create_npu_input_tensor_desc(inputTensor); +} + +SmallVector log_sigmoid_backward_npu_output( + const SmallVector& outputTensor) { + return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); +} + +SmallVector log_sigmoid_backward_npu_attr(const Tensor& self) { + SmallVector attrs = {}; + return attrs; +} + +Tensor& log_sigmoid_backward_out_npu( + Tensor& grad_input, + const Tensor& grad_output, + const Tensor& self, + const Tensor& buffer) { + // constructs the input and output NPUTensorDesc + auto inputs = log_sigmoid_backward_npu_input({grad_output, self}); + auto outputs = log_sigmoid_backward_npu_output({grad_input}); + + // constructs the attr of the NPUAttrDesc + auto attrs = log_sigmoid_backward_npu_attr(self); + + // executing the NPU operator + CalcuOpUtil::execute_npu_operate("LogSigmoidGrad", inputs, outputs, attrs); + + return grad_input; +} + +Tensor log_sigmoid_backward_npu( + const Tensor& grad_output, + const Tensor& self, + const Tensor& buffer) { + // calculate the output size + auto outputSize = log_sigmoid_backward_npu_output_size(grad_output); + + // construct the output tensor of the NPU + Tensor grad_input = at::empty_with_format( + outputSize, + grad_output.options(), + CalcuOpUtil::get_tensor_npu_format(grad_output)); + + // calculate the output result of the NPU + log_sigmoid_backward_out_npu(grad_input, grad_output, self, buffer); + + return grad_input; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/LogicalAndKernelNpu.cpp b/src/aten/src/ATen/native/npu/LogicalAndKernelNpu.cpp index a967b73e29a350be8588f4df3a8acf982666d72c..3c4d9209ad404fc83c20117b2580e5735cc5985f 100644 --- a/src/aten/src/ATen/native/npu/LogicalAndKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/LogicalAndKernelNpu.cpp @@ -1,113 +1,113 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/NpuUtils.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -SmallVector logical_and_npu_input( - const Tensor& self, - const Tensor& other) { - bool isSelfWrapped = CalcuOpUtil::is_scalar_wrapped_to_tensor(self); - bool isOtherWrapped = CalcuOpUtil::is_scalar_wrapped_to_tensor(other); - auto inputs = CalcuOpUtil::create_npu_input_tensor_desc({self, other}); - - // 't + 2' to work with any type of tensor, not just LongTensor (which is what - // integersin Python represent). - if (isSelfWrapped && (!isOtherWrapped)) { - inputs[0].scalarType = other.scalar_type(); - } else if (isOtherWrapped && (!isSelfWrapped)) { - inputs[1].scalarType = self.scalar_type(); - } - - return inputs; -} - -SmallVector logical_and_npu_output(const SmallVector& outputTensor) { - return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); -} - -SmallVector logical_and_npu_attr(const Tensor& self) { - SmallVector attrs = {}; - return attrs; -} - -Tensor& logical_and_out_npu_nocheck( - Tensor& result, - const Tensor& self, - const Tensor& other) { - - //constructs the input and output NPUTensorDesc - auto inputs = logical_and_npu_input(self, other); - auto outputs = logical_and_npu_output({result}); - //constructs the attr of the NPUAttrDesc - auto attrs = logical_and_npu_attr(self); - - //executing the NPU operator - CalcuOpUtil::execute_npu_operate("LogicalAnd", inputs, outputs, attrs); - - return result; -} - -Tensor& logical_and_out_npu(Tensor& result, const Tensor& self, const Tensor& other) { - auto outputSize = broadcast_ops_npu_output_size(self, other); - OpPreparation::CheckOut( - {self}, - result, - CalcuOpUtil::get_tensor_npu_format(self), - result.scalar_type(), - outputSize); - - logical_and_out_npu_nocheck(result, self, other); - - return result; -} - -Tensor logical_and_npu(const Tensor& self, const Tensor& other) { - auto outputSize = broadcast_ops_npu_output_size(self, other); - - Tensor result = at::empty_with_format( - outputSize, - self.options(), - CalcuOpUtil::get_tensor_npu_format(self)); - - logical_and_out_npu_nocheck(result, self, other); - - return result.toType(kBool); - //return result; -} - -Tensor& logical_and_npu_(Tensor& self, const Tensor& other) { - SmallVector inputs = {self, other}; - SmallVector outputs = {self}; - CalcuOpUtil::check_memory_over_laps(inputs, outputs); - - if (!NpuUtils::check_match(&self)) { - Tensor contiguousSelf = NpuUtils::format_contiguous(self); - Tensor result = logical_and_out_npu_nocheck(contiguousSelf, contiguousSelf, other); - NpuUtils::format_fresh_view(self, result); - } else { - logical_and_out_npu_nocheck(self, self, other); - } - - return self; -} - -} // namespace native +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/CalcuOpUtil.h" +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/NpuUtils.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +SmallVector logical_and_npu_input( + const Tensor& self, + const Tensor& other) { + bool isSelfWrapped = CalcuOpUtil::is_scalar_wrapped_to_tensor(self); + bool isOtherWrapped = CalcuOpUtil::is_scalar_wrapped_to_tensor(other); + auto inputs = CalcuOpUtil::create_npu_input_tensor_desc({self, other}); + + // 't + 2' to work with any type of tensor, not just LongTensor (which is what + // integersin Python represent). + if (isSelfWrapped && (!isOtherWrapped)) { + inputs[0].scalarType = other.scalar_type(); + } else if (isOtherWrapped && (!isSelfWrapped)) { + inputs[1].scalarType = self.scalar_type(); + } + + return inputs; +} + +SmallVector logical_and_npu_output(const SmallVector& outputTensor) { + return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); +} + +SmallVector logical_and_npu_attr(const Tensor& self) { + SmallVector attrs = {}; + return attrs; +} + +Tensor& logical_and_out_npu_nocheck( + Tensor& result, + const Tensor& self, + const Tensor& other) { + + //constructs the input and output NPUTensorDesc + auto inputs = logical_and_npu_input(self, other); + auto outputs = logical_and_npu_output({result}); + //constructs the attr of the NPUAttrDesc + auto attrs = logical_and_npu_attr(self); + + //executing the NPU operator + CalcuOpUtil::execute_npu_operate("LogicalAnd", inputs, outputs, attrs); + + return result; +} + +Tensor& logical_and_out_npu(Tensor& result, const Tensor& self, const Tensor& other) { + auto outputSize = broadcast_ops_npu_output_size(self, other); + OpPreparation::CheckOut( + {self}, + result, + CalcuOpUtil::get_tensor_npu_format(self), + result.scalar_type(), + outputSize); + + logical_and_out_npu_nocheck(result, self, other); + + return result; +} + +Tensor logical_and_npu(const Tensor& self, const Tensor& other) { + auto outputSize = broadcast_ops_npu_output_size(self, other); + + Tensor result = at::empty_with_format( + outputSize, + self.options(), + CalcuOpUtil::get_tensor_npu_format(self)); + + logical_and_out_npu_nocheck(result, self, other); + + return result.toType(kBool); + //return result; +} + +Tensor& logical_and_npu_(Tensor& self, const Tensor& other) { + SmallVector inputs = {self, other}; + SmallVector outputs = {self}; + CalcuOpUtil::check_memory_over_laps(inputs, outputs); + + if (!NpuUtils::check_match(&self)) { + Tensor contiguousSelf = NpuUtils::format_contiguous(self); + Tensor result = logical_and_out_npu_nocheck(contiguousSelf, contiguousSelf, other); + NpuUtils::format_fresh_view(self, result); + } else { + logical_and_out_npu_nocheck(self, self, other); + } + + return self; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/MaxV1BackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/MaxV1BackwardKernelNpu.cpp index 5cc9e6445f343b50cadd26cbb620f1b4752f6d67..b73257b1a34f289a7352d07c2657e8c877ac8907 100644 --- a/src/aten/src/ATen/native/npu/MaxV1BackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MaxV1BackwardKernelNpu.cpp @@ -1,36 +1,36 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor max_backward_npu(const Tensor& grad, int64_t dim, const Tensor& indices, IntArrayRef sizes, bool keepdim) { - Tensor new_grad = grad; - Tensor new_indices = indices; - if (keepdim && sizes.size() > 0) { - new_grad = grad.squeeze(dim); - new_indices = indices.squeeze(dim); - } - auto grad_input = at::zeros(sizes, new_grad.options()).npu_scatter(new_indices, new_grad, dim); - return grad_input; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor max_backward_npu(const Tensor& grad, int64_t dim, const Tensor& indices, IntArrayRef sizes, bool keepdim) { + Tensor new_grad = grad; + Tensor new_indices = indices; + if (keepdim && sizes.size() > 0) { + new_grad = grad.squeeze(dim); + new_indices = indices.squeeze(dim); + } + auto grad_input = at::zeros(sizes, new_grad.options()).npu_scatter(new_indices, new_grad, dim); + return grad_input; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/MaxV1KernelNpu.cpp b/src/aten/src/ATen/native/npu/MaxV1KernelNpu.cpp index d05fc30726260ed549e35d9274ff5913593be062..7c54e4a2787ca7cdbf880db4280f60f9b8f7ef47 100644 --- a/src/aten/src/ATen/native/npu/MaxV1KernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MaxV1KernelNpu.cpp @@ -1,69 +1,69 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -tuple max_v1_out_npu( - Tensor& output, - Tensor& indices, - const Tensor& self, - int64_t dim, - bool keepdim) { - OpCommand cmd; - cmd.Name("ArgMaxWithValue") - .Input(self) - .Output(indices) - .Output(output) - .Attr("dimension", dim) - .Attr("keep_dims", keepdim) - .Run(); - - return std::tie(output, indices); -} - -tuple max_v1_npu(const Tensor& self, int64_t dim, bool keepdim) { - SmallVector dims = {dim}; - SmallVector outputSize = - reduce_ops_npu_output_size(self, dims, keepdim); - SmallVector indicesSize = - reduce_ops_npu_output_size(self, dims, keepdim); - - int64_t npu_format = CalcuOpUtil::get_tensor_npu_format(self); - if (outputSize.empty()) { - npu_format = ACL_FORMAT_NCHW; - } - - Tensor outputs = at::empty_with_format( - outputSize, self.options(), npu_format); - Tensor indices = at::empty_with_format( - indicesSize, self.options().dtype(kInt), ACL_FORMAT_NCHW); - max_v1_out_npu(outputs, indices, self, dim, keepdim); - - return std::tie(outputs, indices); -} - -tuple max_v1_npu(const Tensor& self, Dimname dim, bool keepdim) { - return max_v1_npu(self, dimname_to_position(self, dim), keepdim); -} - - -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/CalcuOpUtil.h" +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +tuple max_v1_out_npu( + Tensor& output, + Tensor& indices, + const Tensor& self, + int64_t dim, + bool keepdim) { + OpCommand cmd; + cmd.Name("ArgMaxWithValue") + .Input(self) + .Output(indices) + .Output(output) + .Attr("dimension", dim) + .Attr("keep_dims", keepdim) + .Run(); + + return std::tie(output, indices); +} + +tuple max_v1_npu(const Tensor& self, int64_t dim, bool keepdim) { + SmallVector dims = {dim}; + SmallVector outputSize = + reduce_ops_npu_output_size(self, dims, keepdim); + SmallVector indicesSize = + reduce_ops_npu_output_size(self, dims, keepdim); + + int64_t npu_format = CalcuOpUtil::get_tensor_npu_format(self); + if (outputSize.empty()) { + npu_format = ACL_FORMAT_NCHW; + } + + Tensor outputs = at::empty_with_format( + outputSize, self.options(), npu_format); + Tensor indices = at::empty_with_format( + indicesSize, self.options().dtype(kInt), ACL_FORMAT_NCHW); + max_v1_out_npu(outputs, indices, self, dim, keepdim); + + return std::tie(outputs, indices); +} + +tuple max_v1_npu(const Tensor& self, Dimname dim, bool keepdim) { + return max_v1_npu(self, dimname_to_position(self, dim), keepdim); +} + + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/MinKernelNpu.cpp b/src/aten/src/ATen/native/npu/MinKernelNpu.cpp index f45ae27e9b5f9eedb40a54b483f692d9cd6f6129..680ec911791c8185a888ba54864166592954fb9d 100644 --- a/src/aten/src/ATen/native/npu/MinKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MinKernelNpu.cpp @@ -67,21 +67,32 @@ tuple min_out_npu( } tuple min_npu(const Tensor& self, int64_t dim, bool keepdim) { + Tensor selfCast = self; + if(self.dtype() == ScalarType::Bool){ + selfCast = self.to(ScalarType::Float); + } + SmallVector dims = {dim}; - auto outputSize = reduce_ops_npu_output_size(self, dims, keepdim); + auto outputSize = reduce_ops_npu_output_size(selfCast, dims, keepdim); SmallVector indicesSize = outputSize; - auto func = [&self, dim, keepdim](Tensor outputs, Tensor indices) { - min_out_npu_nocheck(outputs, indices, self, dim, keepdim); + auto func = [&selfCast, dim, keepdim](Tensor outputs, Tensor indices) { + min_out_npu_nocheck(outputs, indices, selfCast, dim, keepdim); }; Tensor outputs, indices; OpPipeWithDefinedMultiOut pipe(outputs, indices); - return pipe.ApplyOutputWithSpecailParams<0>(outputSize, self.options(), ACL_FORMAT_ND) - .ApplyOutputWithSpecailParams<1>(indicesSize, self.options().dtype(ScalarType::Int), ACL_FORMAT_NCHW) - .Call(func) - .ReflushOutputDtype<1>(ScalarType::Long) - .Return(); + std::tie(outputs, indices) = pipe.ApplyOutputWithSpecailParams<0>(outputSize, selfCast.options(), ACL_FORMAT_ND) + .ApplyOutputWithSpecailParams<1>(indicesSize, selfCast.options().dtype(ScalarType::Int), ACL_FORMAT_NCHW) + .Call(func) + .ReflushOutputDtype<1>(ScalarType::Long) + .Return(); + + if(self.dtype() == ScalarType::Bool){ + outputs = outputs.to(ScalarType::Bool); + } + + return std::tie(outputs, indices); } tuple min_out_npu( diff --git a/src/aten/src/ATen/native/npu/MishBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/MishBackwardKernelNpu.cpp index ff5f154f36a364fa0e1bdb1879bbf7f598fde99a..39373555d1e568daa0f4d4ebd5f5ec354c4ee355 100644 --- a/src/aten/src/ATen/native/npu/MishBackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MishBackwardKernelNpu.cpp @@ -1,37 +1,37 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor mish_backward_npu(const Tensor& grad, const Tensor& input) { - Tensor result = OpPreparation::ApplyTensor(input); - - OpCommand cmd; - cmd.Name("MishGrad") - .Input(grad) - .Input(input) - .Output(result) - .Run(); - - return result; -} - -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor mish_backward_npu(const Tensor& grad, const Tensor& input) { + Tensor result = OpPreparation::ApplyTensor(input); + + OpCommand cmd; + cmd.Name("MishGrad") + .Input(grad) + .Input(input) + .Output(result) + .Run(); + + return result; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/MishKernelNpu.cpp b/src/aten/src/ATen/native/npu/MishKernelNpu.cpp index 41cce69c017a0dc5196c7db38f99bc04f14ad42a..e623547ec941ebadabaf283e9b17b35061d0544b 100644 --- a/src/aten/src/ATen/native/npu/MishKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MishKernelNpu.cpp @@ -1,36 +1,36 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor mish_npu(const Tensor& self) { - Tensor result = OpPreparation::ApplyTensor(self); - - OpCommand cmd; - cmd.Name("Mish") - .Input(self) - .Output(result) - .Run(); - - return result; -} - -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor mish_npu(const Tensor& self) { + Tensor result = OpPreparation::ApplyTensor(self); + + OpCommand cmd; + cmd.Name("Mish") + .Input(self) + .Output(result) + .Run(); + + return result; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/MmKernelNpu.cpp b/src/aten/src/ATen/native/npu/MmKernelNpu.cpp index 91af42d2af1248cc22cdd88ce4f58dc578fe4e6a..28ab0aa98118511eec34cdd63b981685a87124e2 100644 --- a/src/aten/src/ATen/native/npu/MmKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MmKernelNpu.cpp @@ -18,8 +18,6 @@ #include "ATen/native/npu/utils/KernelNpuOutputSize.h" #include "ATen/native/npu/utils/NpuUtils.h" #include "ATen/native/npu/utils/OpAdapter.h" -#include "ATen/native/npu/common/InnerNpuNativeFunction.h" -#include "ATen/native/npu/frame/StorageDescHelper.h" namespace at { namespace native { @@ -28,7 +26,7 @@ using namespace at::native::npu; // Flexible transpose judgement for view+transpose+Matmul, // i.e., tensors with dim=2 and base_size_.size=3 can also be Matmul directly! bool is_transpose_last_two_dims_flex(const Tensor& tensor) { - if (tensor.dim() != 2) { + if (tensor.dim() < 2 || tensor.dim() > 3) { return false; } int64_t numel = 1; @@ -115,17 +113,10 @@ Tensor mm_npu(const Tensor& self, const Tensor& mat2) { // Matmul cannot directly deal with view+transposed tensor with NZ format, so Transdata is necessary if (self.sizes().size() != self_desc.base_sizes_.size()) { selfFormatCast = OpPreparation::CastBackToOriFormat(self); - // refresh storage desc info [origin shape and storage shape] of reshaped Tensor - if (is_transpose_last_two_dims_flex(selfFormatCast)) { - StorageDescHelper::ReflushDescBySelf(selfFormatCast.transpose(-2, -1)); - } } if (mat2.sizes().size() != mat2_desc.base_sizes_.size()) { mat2FormatCast = OpPreparation::CastBackToOriFormat(mat2); - if (is_transpose_last_two_dims_flex(mat2FormatCast)) { - StorageDescHelper::ReflushDescBySelf(mat2FormatCast.transpose(-2, -1)); - } } // construct the output tensor of the NPU diff --git a/src/aten/src/ATen/native/npu/MvKernelNpu.cpp b/src/aten/src/ATen/native/npu/MvKernelNpu.cpp index 3a09b43910915830761955d4c3074d15b1965120..a2ded7dbdf7d2a427f9754768a35e51be7347005 100644 --- a/src/aten/src/ATen/native/npu/MvKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MvKernelNpu.cpp @@ -1,72 +1,72 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/common/InnerNpuNativeFunction.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& mv_out_npu_nocheck(Tensor& result, const Tensor& self, const Tensor& vec) { - bool isSelfT = CalcuOpUtil::is_transpose_last_two_dims(self); - Tensor contiguousSelf; - contiguousSelf = isSelfT ? self : NpuUtils::format_contiguous(self); - Tensor vecT = at::unsqueeze(vec, 1); - - OpCommand cmd; - cmd.Name("MatMul") - .Input(contiguousSelf) - .Input(vecT) - .Attr("transpose_x1", isSelfT) - .Attr("transpose_x2", false) - .Output(result) - .Run(); - - result = at::squeeze(result, 1); - npu_fast_reshape_(result); - return result; -} - -Tensor& mv_out_npu(Tensor& result, const Tensor& self, const Tensor& vec) { - - OpPreparation::CheckOut( - {self}, - result, - CalcuOpUtil::get_tensor_npu_format(self), - self.scalar_type(), - {self.size(0)}); - - result = at::unsqueeze(result, 1); - OpPipeWithDefinedOut pipe; - return pipe.CheckMemory({self, vec}, {result}) - .Func([&self, &vec](Tensor& result){mv_out_npu_nocheck(result, self, vec);}) - .Call(result); -} - -Tensor mv_npu(const Tensor& self, const Tensor& vec) { - - Tensor result = OpPreparation::ApplyTensor(self, {self.size(0), 1}); - - // calculate the output result of the NPU - mv_out_npu_nocheck(result, self, vec); - - return result; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" +#include "ATen/native/npu/utils/CalcuOpUtil.h" +#include "ATen/native/npu/common/InnerNpuNativeFunction.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& mv_out_npu_nocheck(Tensor& result, const Tensor& self, const Tensor& vec) { + bool isSelfT = CalcuOpUtil::is_transpose_last_two_dims(self); + Tensor contiguousSelf; + contiguousSelf = isSelfT ? self : NpuUtils::format_contiguous(self); + Tensor vecT = at::unsqueeze(vec, 1); + + OpCommand cmd; + cmd.Name("MatMul") + .Input(contiguousSelf) + .Input(vecT) + .Attr("transpose_x1", isSelfT) + .Attr("transpose_x2", false) + .Output(result) + .Run(); + + result = at::squeeze(result, 1); + npu_fast_reshape_(result); + return result; +} + +Tensor& mv_out_npu(Tensor& result, const Tensor& self, const Tensor& vec) { + + OpPreparation::CheckOut( + {self}, + result, + CalcuOpUtil::get_tensor_npu_format(self), + self.scalar_type(), + {self.size(0)}); + + result = at::unsqueeze(result, 1); + OpPipeWithDefinedOut pipe; + return pipe.CheckMemory({self, vec}, {result}) + .Func([&self, &vec](Tensor& result){mv_out_npu_nocheck(result, self, vec);}) + .Call(result); +} + +Tensor mv_npu(const Tensor& self, const Tensor& vec) { + + Tensor result = OpPreparation::ApplyTensor(self, {self.size(0), 1}); + + // calculate the output result of the NPU + mv_out_npu_nocheck(result, self, vec); + + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/NmsV4KernelNpu.cpp b/src/aten/src/ATen/native/npu/NmsV4KernelNpu.cpp index d3dea1461ba00e05b227f4a4ca071eb7743a5203..0ef5e8fd0d4bf9084c0bce904b41c47019fd6a74 100644 --- a/src/aten/src/ATen/native/npu/NmsV4KernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/NmsV4KernelNpu.cpp @@ -1,108 +1,108 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/NpuUtils.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -SmallVector nms_v4_npu_input( - const Tensor& self, - const Tensor& scores, - Scalar max_output_size, - const Tensor& iou_threshold, - const Tensor& scores_threshold) { - SmallVector inputs; - - Tensor max_output_size_tensor = at::empty_with_format( - {}, self.options().dtype(at::kInt), CalcuOpUtil::get_tensor_npu_format(self)) - .fill_(max_output_size); - return CalcuOpUtil::create_npu_input_tensor_desc({self, scores, max_output_size_tensor, iou_threshold, scores_threshold}); -} - -SmallVector nms_v4_npu_output( - const SmallVector& outputTensor) { - return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); -} - -SmallVector nms_v4_npu_attr(bool pad_to_max_output_size) { - NPUAttrDesc npuAttrPadToMaxOutputSize = - NPUAttrDesc("pad_to_max_output_size", pad_to_max_output_size); - - SmallVector attrs = {npuAttrPadToMaxOutputSize}; - return attrs; -} - -tuple nms_v4_out_npu( - Tensor& selected_indices, - Tensor& valid_outputs, - const Tensor& self, - const Tensor& scores, - Scalar max_output_size, - const Tensor& iou_threshold, - const Tensor& scores_threshold, - bool pad_to_max_output_size) { - // constructs the input and output NPUTensorDesc - auto inputs = nms_v4_npu_input(self, scores, max_output_size, iou_threshold, scores_threshold); - auto outputs = nms_v4_npu_output({selected_indices, valid_outputs}); - - // constructs the attr of the NPUAttrDesc - auto attrs = nms_v4_npu_attr(pad_to_max_output_size); - - // executing the NPU operator - CalcuOpUtil::execute_npu_operate("NonMaxSuppressionV4", inputs, outputs, attrs); - - // return std::make_tuple(selected_indices, valid_outputs) - return std::tuple(selected_indices, valid_outputs); -} - -tuple nms_v4_npu( - const Tensor& self, - const Tensor& scores, - Scalar max_output_size, - const Tensor& iou_threshold, - const Tensor& scores_threshold, - bool pad_to_max_output_size) { - // calculate the output size - auto outputSizes = nms_v4_npu_output_size(max_output_size); - - // construct the output tensor of the NPU - Tensor selected_indices = at::empty_with_format( - std::get<0>(outputSizes), - self.options().dtype(at::kInt), - CalcuOpUtil::get_tensor_npu_format(self)); - - Tensor valid_outputs = at::empty_with_format( - std::get<1>(outputSizes), - self.options().dtype(at::kInt), - CalcuOpUtil::get_tensor_npu_format(self)); - - nms_v4_out_npu( - selected_indices, - valid_outputs, - self, - scores, - max_output_size, - iou_threshold, - scores_threshold, - pad_to_max_output_size); - - return std::tuple(selected_indices, valid_outputs); -} - -} // namespace native -} // namespace at +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/CalcuOpUtil.h" +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/NpuUtils.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +SmallVector nms_v4_npu_input( + const Tensor& self, + const Tensor& scores, + Scalar max_output_size, + const Tensor& iou_threshold, + const Tensor& scores_threshold) { + SmallVector inputs; + + Tensor max_output_size_tensor = at::empty_with_format( + {}, self.options().dtype(at::kInt), CalcuOpUtil::get_tensor_npu_format(self)) + .fill_(max_output_size); + return CalcuOpUtil::create_npu_input_tensor_desc({self, scores, max_output_size_tensor, iou_threshold, scores_threshold}); +} + +SmallVector nms_v4_npu_output( + const SmallVector& outputTensor) { + return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); +} + +SmallVector nms_v4_npu_attr(bool pad_to_max_output_size) { + NPUAttrDesc npuAttrPadToMaxOutputSize = + NPUAttrDesc("pad_to_max_output_size", pad_to_max_output_size); + + SmallVector attrs = {npuAttrPadToMaxOutputSize}; + return attrs; +} + +tuple nms_v4_out_npu( + Tensor& selected_indices, + Tensor& valid_outputs, + const Tensor& self, + const Tensor& scores, + Scalar max_output_size, + const Tensor& iou_threshold, + const Tensor& scores_threshold, + bool pad_to_max_output_size) { + // constructs the input and output NPUTensorDesc + auto inputs = nms_v4_npu_input(self, scores, max_output_size, iou_threshold, scores_threshold); + auto outputs = nms_v4_npu_output({selected_indices, valid_outputs}); + + // constructs the attr of the NPUAttrDesc + auto attrs = nms_v4_npu_attr(pad_to_max_output_size); + + // executing the NPU operator + CalcuOpUtil::execute_npu_operate("NonMaxSuppressionV4", inputs, outputs, attrs); + + // return std::make_tuple(selected_indices, valid_outputs) + return std::tuple(selected_indices, valid_outputs); +} + +tuple nms_v4_npu( + const Tensor& self, + const Tensor& scores, + Scalar max_output_size, + const Tensor& iou_threshold, + const Tensor& scores_threshold, + bool pad_to_max_output_size) { + // calculate the output size + auto outputSizes = nms_v4_npu_output_size(max_output_size); + + // construct the output tensor of the NPU + Tensor selected_indices = at::empty_with_format( + std::get<0>(outputSizes), + self.options().dtype(at::kInt), + CalcuOpUtil::get_tensor_npu_format(self)); + + Tensor valid_outputs = at::empty_with_format( + std::get<1>(outputSizes), + self.options().dtype(at::kInt), + CalcuOpUtil::get_tensor_npu_format(self)); + + nms_v4_out_npu( + selected_indices, + valid_outputs, + self, + scores, + max_output_size, + iou_threshold, + scores_threshold, + pad_to_max_output_size); + + return std::tuple(selected_indices, valid_outputs); +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/NormKernelNpu.cpp b/src/aten/src/ATen/native/npu/NormKernelNpu.cpp index 8308e4763aa4923b358d9ac4f23594b85cc3a159..2f25260240785bb679773e9bfd9cb76b04d38d4f 100644 --- a/src/aten/src/ATen/native/npu/NormKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/NormKernelNpu.cpp @@ -38,7 +38,7 @@ int64_t calculate_p(optional p) { // norm.dtype_out -Tensor& norm_out_npu( +Tensor& norm_out_npu_nocheck( Tensor& out, const Tensor& self, optional p, @@ -80,11 +80,36 @@ Tensor& norm_out_npu( optional p, IntArrayRef dim, bool keepdim) { - norm_out_npu(out, self, p, dim, keepdim, self.scalar_type()); + auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); + OpPreparation::CheckOut( + {self}, + out, + ACL_FORMAT_ND, + self.scalar_type(), + outputSize); + norm_out_npu_nocheck(out, self, p, dim, keepdim, self.scalar_type()); return out; } +Tensor& norm_out_npu( + Tensor& out, + const Tensor& self, + optional p, + IntArrayRef dim, + bool keepdim, + ScalarType dtype) { + auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); + OpPreparation::CheckOut( + {self}, + out, + ACL_FORMAT_ND, + self.scalar_type(), + outputSize); + norm_out_npu_nocheck(out, self, p, dim, keepdim, dtype); + + return out; +} // norm.ScalarOpt_dim_dtype Tensor norm_npu( const Tensor& self, @@ -99,7 +124,7 @@ Tensor norm_npu( Tensor out = OpPreparation::ApplyTensorWithSizes(outputSize, self.options().dtype(dtype)); // calculate the output result of the NPU - norm_out_npu(out, self, p, dim, keepdim, dtype); + norm_out_npu_nocheck(out, self, p, dim, keepdim, dtype); return out; } diff --git a/src/aten/src/ATen/native/npu/OneHotKernelNpu.cpp b/src/aten/src/ATen/native/npu/OneHotKernelNpu.cpp index d92440dec002580786506999774f36fdfb2464aa..8030418ef0d429581493ba48d7ffa27043c00b39 100644 --- a/src/aten/src/ATen/native/npu/OneHotKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/OneHotKernelNpu.cpp @@ -1,80 +1,80 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor one_hot_npu1(const Tensor& self, int64_t num_classes) { - Scalar on_value = 1; - Scalar off_value = 0; - int64_t axis = -1; - int64_t depth; - - auto self_temp = self.to(at::kFloat); - - //When run in NPU,the input tensor's dim must be smaller than 8. - TORCH_CHECK( - self_temp.dim() < 8, "NPU error,can not support the input tensor's dim bigger than 7."); - - // empty tensor could be converted to one hot representation, - // but shape inference is not possible. - if (self.numel() == 0) { - if (num_classes <= 0) { - AT_ERROR("Can not infer total number of classes from empty tensor."); - } else { - depth = num_classes; - } - } - - // non-empty tensor - TORCH_CHECK( - self_temp.min().item().toLong() >= 0, "Class values must be non-negative."); - if (num_classes == -1) { - depth = self_temp.max().item().toLong() + 1; - } else { - TORCH_CHECK( - num_classes > self_temp.max().item().toLong(), - "Class values must be smaller than num_classes."); - depth = num_classes; - } - - // calculate output size - auto outputSize = array_to_small_vector(self.sizes()); - outputSize.emplace_back(depth); - - Tensor result = OpPreparation::ApplyTensor( - outputSize, - self.options().dtype(ScalarType::Int), - self); - - SmallVector depthList = {depth}; - - OpCommand cmd; - cmd.Name("OneHot") - .Input(self) - .Input(depthList, at::kInt) - .Input(on_value, ScalarType::Int) - .Input(off_value, ScalarType::Int) - .Output(result) - .Attr("axis", axis) - .Run(); - - return result; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor one_hot_npu1(const Tensor& self, int64_t num_classes) { + Scalar on_value = 1; + Scalar off_value = 0; + int64_t axis = -1; + int64_t depth; + + auto self_temp = self.to(at::kFloat); + + //When run in NPU,the input tensor's dim must be smaller than 8. + TORCH_CHECK( + self_temp.dim() < 8, "NPU error,can not support the input tensor's dim bigger than 7."); + + // empty tensor could be converted to one hot representation, + // but shape inference is not possible. + if (self.numel() == 0) { + if (num_classes <= 0) { + AT_ERROR("Can not infer total number of classes from empty tensor."); + } else { + depth = num_classes; + } + } + + // non-empty tensor + TORCH_CHECK( + self_temp.min().item().toLong() >= 0, "Class values must be non-negative."); + if (num_classes == -1) { + depth = self_temp.max().item().toLong() + 1; + } else { + TORCH_CHECK( + num_classes > self_temp.max().item().toLong(), + "Class values must be smaller than num_classes."); + depth = num_classes; + } + + // calculate output size + auto outputSize = array_to_small_vector(self.sizes()); + outputSize.emplace_back(depth); + + Tensor result = OpPreparation::ApplyTensor( + outputSize, + self.options().dtype(ScalarType::Int), + self); + + SmallVector depthList = {depth}; + + OpCommand cmd; + cmd.Name("OneHot") + .Input(self) + .Input(depthList, at::kInt) + .Input(on_value, ScalarType::Int) + .Input(off_value, ScalarType::Int) + .Output(result) + .Attr("axis", axis) + .Run(); + + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/PadKernelNpu.cpp b/src/aten/src/ATen/native/npu/PadKernelNpu.cpp index d2d338f12527d66c3a3dd2d6eb413621981dbee3..0aeda8ced95bb8778854d2989848502e8de534e7 100644 --- a/src/aten/src/ATen/native/npu/PadKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/PadKernelNpu.cpp @@ -1,47 +1,47 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& pad_out_npu( - Tensor& output, - const Tensor& input, - IntArrayRef paddings) { - SmallVector paddingsVector = array_to_small_vector(paddings); - paddingsVector.resize(2 * input.dim(), 0); - - OpCommand cmd; - cmd.Name("Pad") - .Input(input) - .Input(paddingsVector) - .Output(output) - .Run(); - return output; -} - -Tensor pad_npu(const Tensor& input, IntArrayRef paddings) { - auto outputSize = pad_npu_output_size(input, paddings); - Tensor output = OpPreparation::ApplyTensor(input, outputSize); - pad_out_npu(output, input, paddings); - return output; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& pad_out_npu( + Tensor& output, + const Tensor& input, + IntArrayRef paddings) { + SmallVector paddingsVector = array_to_small_vector(paddings); + paddingsVector.resize(2 * input.dim(), 0); + + OpCommand cmd; + cmd.Name("Pad") + .Input(input) + .Input(paddingsVector) + .Output(output) + .Run(); + return output; +} + +Tensor pad_npu(const Tensor& input, IntArrayRef paddings) { + auto outputSize = pad_npu_output_size(input, paddings); + Tensor output = OpPreparation::ApplyTensor(input, outputSize); + pad_out_npu(output, input, paddings); + return output; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/PdistKernelNpu.cpp b/src/aten/src/ATen/native/npu/PdistKernelNpu.cpp index 5063ed5b35fe75cd2ede349f7d2b19ba36fe3ddd..c6dd231d07c2483f2f5585870f161acb3a8893bf 100644 --- a/src/aten/src/ATen/native/npu/PdistKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/PdistKernelNpu.cpp @@ -1,95 +1,95 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/NpuUtils.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -SmallVector pdist_npu_input( - const SmallVector& inputTensor){ - return CalcuOpUtil::create_npu_input_tensor_desc(inputTensor); -} - -SmallVector pdist_npu_output( - const SmallVector& outputTensor) { - return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); -} - -SmallVector pdist_npu_attr(float p_value) { - NPUAttrDesc P = NPUAttrDesc("p", p_value); - SmallVector attrs = {P}; - return attrs; -} - -Tensor& pdist_out_npu( - Tensor& result, - const Tensor& self, - float p) { - // constructs the input and output NPUTensorDesc - auto inputs = pdist_npu_input({self}); - auto outputs = pdist_npu_output({result}); - - // constructs the attr of the NPUAttrDesc - auto attrs = pdist_npu_attr(p); - - // executing the NPU operator - CalcuOpUtil::execute_npu_operate("Pdist", inputs, outputs, attrs); - - return result; -} - -Tensor pdist_npu(const Tensor& self, double p) { - TORCH_CHECK(self.dim() == 2, - "pdist only supports 2D tensors, got: ", self.dim(), "D"); - TORCH_CHECK(at::isFloatingType(self.scalar_type()), "pdist only supports floating-point dtypes"); - TORCH_CHECK(p >= 0, "pdist only supports non-negative p values"); - return at::_pdist_forward(self, p); -} - -Tensor _pdist_forward_npu(const Tensor& self, double p) { - Tensor result; - if (self.size(0) <= 1) { - result = at::empty_with_format( - {0}, - self.options(), - CalcuOpUtil::get_tensor_npu_format(self)); - } else { - // double is not supported in NPU, type of P needs to be converted from double to float. - float p_float; - if (std::isinf(p)) { - p_float = std::numeric_limits::infinity(); - } else { - TORCH_CHECK(p <= std::numeric_limits::max(), "npu dose not support float64" ); - p_float = (float) p; - } - auto outputSize = pdist_npu_output_size(self, p_float); - result = at::empty_with_format( - outputSize, - self.options(), - CalcuOpUtil::get_tensor_npu_format(self)); - if(self.size(1) == 0){ - result.fill_(0); - } else { - pdist_out_npu(result, self, p_float); - } - } - return result; -} - -} // namespace native +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/CalcuOpUtil.h" +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/NpuUtils.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +SmallVector pdist_npu_input( + const SmallVector& inputTensor){ + return CalcuOpUtil::create_npu_input_tensor_desc(inputTensor); +} + +SmallVector pdist_npu_output( + const SmallVector& outputTensor) { + return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); +} + +SmallVector pdist_npu_attr(float p_value) { + NPUAttrDesc P = NPUAttrDesc("p", p_value); + SmallVector attrs = {P}; + return attrs; +} + +Tensor& pdist_out_npu( + Tensor& result, + const Tensor& self, + float p) { + // constructs the input and output NPUTensorDesc + auto inputs = pdist_npu_input({self}); + auto outputs = pdist_npu_output({result}); + + // constructs the attr of the NPUAttrDesc + auto attrs = pdist_npu_attr(p); + + // executing the NPU operator + CalcuOpUtil::execute_npu_operate("Pdist", inputs, outputs, attrs); + + return result; +} + +Tensor pdist_npu(const Tensor& self, double p) { + TORCH_CHECK(self.dim() == 2, + "pdist only supports 2D tensors, got: ", self.dim(), "D"); + TORCH_CHECK(at::isFloatingType(self.scalar_type()), "pdist only supports floating-point dtypes"); + TORCH_CHECK(p >= 0, "pdist only supports non-negative p values"); + return at::_pdist_forward(self, p); +} + +Tensor _pdist_forward_npu(const Tensor& self, double p) { + Tensor result; + if (self.size(0) <= 1) { + result = at::empty_with_format( + {0}, + self.options(), + CalcuOpUtil::get_tensor_npu_format(self)); + } else { + // double is not supported in NPU, type of P needs to be converted from double to float. + float p_float; + if (std::isinf(p)) { + p_float = std::numeric_limits::infinity(); + } else { + TORCH_CHECK(p <= std::numeric_limits::max(), "npu dose not support float64" ); + p_float = (float) p; + } + auto outputSize = pdist_npu_output_size(self, p_float); + result = at::empty_with_format( + outputSize, + self.options(), + CalcuOpUtil::get_tensor_npu_format(self)); + if(self.size(1) == 0){ + result.fill_(0); + } else { + pdist_out_npu(result, self, p_float); + } + } + return result; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/PreluKernelNpu.cpp b/src/aten/src/ATen/native/npu/PreluKernelNpu.cpp index 52b248ae36364df35ba901f8a936b037b7d6376f..41e3974f122a3c68ba861351b6b402204ecf59da 100644 --- a/src/aten/src/ATen/native/npu/PreluKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/PreluKernelNpu.cpp @@ -1,39 +1,39 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor prelu_npu(const Tensor& self, const Tensor& weight_) { - auto input = self.contiguous(); - auto weight = weight_.contiguous(); - - // calculate the output size - auto outputSize = input_same_output_size(self); - Tensor result = OpPreparation::ApplyTensor(input, outputSize); - - OpCommand cmd; - cmd.Name("PRelu") - .Input(self) - .Input(weight) - .Output(result) - .Run(); - return result; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor prelu_npu(const Tensor& self, const Tensor& weight_) { + auto input = self.contiguous(); + auto weight = weight_.contiguous(); + + // calculate the output size + auto outputSize = input_same_output_size(self); + Tensor result = OpPreparation::ApplyTensor(input, outputSize); + + OpCommand cmd; + cmd.Name("PRelu") + .Input(self) + .Input(weight) + .Output(result) + .Run(); + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/PtIouKernelNpu.cpp b/src/aten/src/ATen/native/npu/PtIouKernelNpu.cpp index 2875adc7aff695009560413b3f9e5a38e7cfb3ea..f186d78b1e929f33f43a93aedf7838c0af756d71 100644 --- a/src/aten/src/ATen/native/npu/PtIouKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/PtIouKernelNpu.cpp @@ -1,45 +1,45 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor ptiou_npu( - const Tensor& bboxes, - const Tensor& gtboxes, - int64_t mode) { - auto outputSize = {gtboxes.size(0), bboxes.size(0)}; - Tensor overlap = OpPreparation::ApplyTensor(bboxes, outputSize); - string modeStr = "iou"; - if (mode == 1) { - modeStr = "iof"; - } - OpCommand cmd; - cmd.Name("PtIou") - .Input(bboxes) - .Input(gtboxes) - .Output(overlap) - .Attr("mode", modeStr) - .Run(); - - return overlap; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor ptiou_npu( + const Tensor& bboxes, + const Tensor& gtboxes, + int64_t mode) { + auto outputSize = {gtboxes.size(0), bboxes.size(0)}; + Tensor overlap = OpPreparation::ApplyTensor(bboxes, outputSize); + string modeStr = "iou"; + if (mode == 1) { + modeStr = "iof"; + } + OpCommand cmd; + cmd.Name("PtIou") + .Input(bboxes) + .Input(gtboxes) + .Output(overlap) + .Attr("mode", modeStr) + .Run(); + + return overlap; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/QrKernelNpu.cpp b/src/aten/src/ATen/native/npu/QrKernelNpu.cpp index a1212802cff3b79915fd17c36ea4e7eac94ebfca..b94c6824e69f4989a7d1933ad95b2ce789c14d74 100644 --- a/src/aten/src/ATen/native/npu/QrKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/QrKernelNpu.cpp @@ -1,102 +1,102 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -std::tuple, SmallVector> -qr_npu_output_size( - const Tensor& self, - bool some) -{ - int m = self.size(-2); - int n = self.size(-1); - auto k = std::min(m, n); - auto shape = array_to_small_vector(self.sizes()); - SmallVector Qsize(shape.begin(), shape.end()-2); - SmallVector Rsize(shape.begin(), shape.end()-2); - // allocate size - if(some){ - Qsize.insert(Qsize.end(), {m, k}); - Rsize.insert(Rsize.end(), {k, n}); - } else { - Qsize.insert(Qsize.end(), {m, m}); - Rsize.insert(Rsize.end(), {m, n}); - } - return std::tie(Qsize, Rsize); -} - -static inline void qr_check( - const Tensor& self){ - TORCH_CHECK( - self.ndimension() >= 2, - "Expected nonempty least 2D tensor, but got a tensor with sizes ", - self.dim()); -} - -std::tuple qr_out_npu_nocheck( - Tensor& Q, - Tensor& R, - const Tensor& self, - bool some){ - bool full_matrices = !some; - OpCommand cmd; - cmd.Name("Qr") - .Input(self) - .Output(Q) - .Output(R) - .Attr("full_matrices", full_matrices) - .Run(); - return std::tie(Q, R); -} - -std::tuple qr_out_npu( - Tensor& Q, - Tensor& R, - const Tensor& self, - bool some){ - qr_check(self); - auto sizes = qr_npu_output_size(self, some); - OpPreparation::CheckOut( - {self}, - Q, - self, - std::get<0>(sizes)); - OpPreparation::CheckOut( - {self}, - R, - self, - std::get<1>(sizes)); - return qr_out_npu_nocheck(Q, R, self, some); -} - -std::tuple qr_npu( - const Tensor& self, - bool some){ - qr_check(self); - auto sizes = qr_npu_output_size(self, some); - Tensor Q = OpPreparation::ApplyTensor(self, std::get<0>(sizes)); - Tensor R = OpPreparation::ApplyTensor(self, std::get<1>(sizes)); - - qr_out_npu(Q, R, self, some); - return std::tie(Q, R); -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +std::tuple, SmallVector> +qr_npu_output_size( + const Tensor& self, + bool some) +{ + int m = self.size(-2); + int n = self.size(-1); + auto k = std::min(m, n); + auto shape = array_to_small_vector(self.sizes()); + SmallVector Qsize(shape.begin(), shape.end()-2); + SmallVector Rsize(shape.begin(), shape.end()-2); + // allocate size + if(some){ + Qsize.insert(Qsize.end(), {m, k}); + Rsize.insert(Rsize.end(), {k, n}); + } else { + Qsize.insert(Qsize.end(), {m, m}); + Rsize.insert(Rsize.end(), {m, n}); + } + return std::tie(Qsize, Rsize); +} + +static inline void qr_check( + const Tensor& self){ + TORCH_CHECK( + self.ndimension() >= 2, + "Expected nonempty least 2D tensor, but got a tensor with sizes ", + self.dim()); +} + +std::tuple qr_out_npu_nocheck( + Tensor& Q, + Tensor& R, + const Tensor& self, + bool some){ + bool full_matrices = !some; + OpCommand cmd; + cmd.Name("Qr") + .Input(self) + .Output(Q) + .Output(R) + .Attr("full_matrices", full_matrices) + .Run(); + return std::tie(Q, R); +} + +std::tuple qr_out_npu( + Tensor& Q, + Tensor& R, + const Tensor& self, + bool some){ + qr_check(self); + auto sizes = qr_npu_output_size(self, some); + OpPreparation::CheckOut( + {self}, + Q, + self, + std::get<0>(sizes)); + OpPreparation::CheckOut( + {self}, + R, + self, + std::get<1>(sizes)); + return qr_out_npu_nocheck(Q, R, self, some); +} + +std::tuple qr_npu( + const Tensor& self, + bool some){ + qr_check(self); + auto sizes = qr_npu_output_size(self, some); + Tensor Q = OpPreparation::ApplyTensor(self, std::get<0>(sizes)); + Tensor R = OpPreparation::ApplyTensor(self, std::get<1>(sizes)); + + qr_out_npu(Q, R, self, some); + return std::tie(Q, R); +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/ReflectionPad2dKernelNpu.cpp b/src/aten/src/ATen/native/npu/ReflectionPad2dKernelNpu.cpp deleted file mode 100644 index c3daebe725a216729aa2f7ed9f0114e0c13f74bd..0000000000000000000000000000000000000000 --- a/src/aten/src/ATen/native/npu/ReflectionPad2dKernelNpu.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/NpuUtils.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; -SmallVector reflection_pad2d_npu_input(SmallVector inputs) { - return CalcuOpUtil::create_npu_input_tensor_desc(inputs); -} - -SmallVector reflection_pad2d_npu_output(const SmallVector &outputTensor) { - return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); -} - -SmallVector reflection_pad2d_npu_attr(const Tensor& input, IntArrayRef paddingSize) { - int64_t pad_l = 0; - int64_t pad_r = 0; - int64_t pad_t = 0; - int64_t pad_b = 0; - int64_t pad_zeros = 0; - - TORCH_CHECK(paddingSize.size() == 4, "padding size is expected to be 4"); - - pad_l = paddingSize[0]; - pad_r = paddingSize[1]; - pad_t = paddingSize[2]; - pad_b = paddingSize[3]; - - SmallVector vectorInt = {}; - SmallVector, SIZE> vectorVectorInt = {}; - SmallVector vectorListInt = {}; - SmallVector paddingsVector = array_to_small_vector(paddingSize); - paddingsVector.resize(input.dim(), 0); - - for (int i = 0; i < paddingsVector.size(); i ++) { - if (i<2) { - vectorInt.emplace_back(pad_zeros); - vectorInt.emplace_back(pad_zeros); - } - else if (i == 2) { - vectorInt.emplace_back(pad_t); - vectorInt.emplace_back(pad_b); - } - else { - vectorInt.emplace_back(pad_l); - vectorInt.emplace_back(pad_r); - } - vectorVectorInt.emplace_back(vectorInt); - vectorInt.clear(); - vectorListInt.emplace_back(IntArrayRef(vectorVectorInt.back())); - } - int64_t constant_values = 0; - // string mode = "constant"; - string mode = "reflect"; - bool padding_contiguous = true; - NPUAttrDesc npuAttrConstantValues = NPUAttrDesc("constant_values", constant_values); - NPUAttrDesc npuAttrMode = NPUAttrDesc("mode", mode); - NPUAttrDesc npuAttrPaddingContiguous = NPUAttrDesc("padding_contiguous", padding_contiguous); - NPUAttrDesc npuAttrPadding = NPUAttrDesc("paddings", vectorListInt); - SmallVector attrs = { - npuAttrPadding, - npuAttrConstantValues, - npuAttrMode, - npuAttrPaddingContiguous - }; - return attrs; -} - -Tensor& reflection_pad2d_out_npu_nocheck(Tensor& out, const Tensor& self, IntArrayRef padding) { - //constructs the input and output NPUTensorDesc - auto inputs = reflection_pad2d_npu_input({self}); - auto outputs = reflection_pad2d_npu_output({out}); - - //constructs the attr of the NPUAttrDesc - auto attrs = reflection_pad2d_npu_attr(self, padding); - - //executing the NPU operator - CalcuOpUtil::execute_npu_operate("PadV3D", inputs, outputs, attrs); - - return out; -} - -Tensor& reflection_pad2d_out_npu(Tensor& result, const Tensor& self, IntArrayRef padding){ - //calculate the output size - auto outputSize = reflection_pad2d_npu_output_size(self, padding); - //construct the output tensor of the NPU - result = at::empty_with_format(outputSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - OpPreparation::CheckOut( - {self}, - result, - CalcuOpUtil::get_tensor_npu_format(self), - self.scalar_type(), - outputSize); - reflection_pad2d_out_npu_nocheck(result, self, padding); - - return result; -} - -Tensor reflection_pad2d_npu(const Tensor& self, IntArrayRef padding) { - //calculate the output size - auto outputSize = reflection_pad2d_npu_output_size(self, padding); - //construct the output tensor of the NPU - Tensor out = at::empty_with_format(outputSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - - //calculate the output result of the NPU - reflection_pad2d_out_npu_nocheck(out, self, padding); - - return out; -} -} -} // namespace at::native diff --git a/src/aten/src/ATen/native/npu/ReplicationPad2dKernelNpu.cpp b/src/aten/src/ATen/native/npu/ReplicationPad2dKernelNpu.cpp old mode 100644 new mode 100755 diff --git a/src/aten/src/ATen/native/npu/ScatterV1KernelNpu.cpp b/src/aten/src/ATen/native/npu/ScatterV1KernelNpu.cpp index 3485b0608a1a9d1b591dcba3fc1466e5ed388410..9c653b13bd6f1e144e45e70508ea8ac069462cd9 100644 --- a/src/aten/src/ATen/native/npu/ScatterV1KernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/ScatterV1KernelNpu.cpp @@ -1,50 +1,50 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& scatter_out_npu( - Tensor& output, - const Tensor& self, - const Tensor& indices, - const Tensor& updates, - int64_t dim) { - OpCommand cmd; - cmd.Name("ArgMaxGrad") - .Input(self) - .Input(indices) - .Input(updates) - .Output(output) - .Attr("dimension", dim) - .Run(); - - return output; -} - -Tensor scatter_npu(const Tensor& self, const Tensor& indices, const Tensor& updates, int64_t dim) { - Tensor outputs = OpPreparation::ApplyTensor(self); - scatter_out_npu(outputs, self, indices, updates, dim); - - return outputs; -} - - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& scatter_out_npu( + Tensor& output, + const Tensor& self, + const Tensor& indices, + const Tensor& updates, + int64_t dim) { + OpCommand cmd; + cmd.Name("ArgMaxGrad") + .Input(self) + .Input(indices) + .Input(updates) + .Output(output) + .Attr("dimension", dim) + .Run(); + + return output; +} + +Tensor scatter_npu(const Tensor& self, const Tensor& indices, const Tensor& updates, int64_t dim) { + Tensor outputs = OpPreparation::ApplyTensor(self); + scatter_out_npu(outputs, self, indices, updates, dim); + + return outputs; +} + + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/SliceKernelNpu.cpp b/src/aten/src/ATen/native/npu/SliceKernelNpu.cpp index 4779f9418561b7339feeba7e8100bb9d41652e4e..276ef231dcbf61885e816c1e61250b7701f574d9 100644 --- a/src/aten/src/ATen/native/npu/SliceKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/SliceKernelNpu.cpp @@ -1,78 +1,78 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" -#include "ATen/native/npu/utils/CalcuOpUtil.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& slice_out_npu( - Tensor& result, - const Tensor& self, - IntArrayRef offsets, - IntArrayRef size) { - - SmallVector offsetVec = array_to_small_vector(offsets); - SmallVector sizeVec = array_to_small_vector(size); - - if (!c10::npu::OptionsManager::CheckDynamicEnable()) { - OpCommand cmd; - cmd.Name("Slice") - .Input(self) - .Input(offsetVec) - .Input(sizeVec) - .Output(result) - .Run(); - } else { - SmallVector offsetsList = array_to_small_vector(offsets); - SmallVector sizeList = array_to_small_vector(size); - OpDynamicCommand cmd; - cmd.Name("SliceD") - .Input(self) - .Output(result) - .Attr("offsets", offsets) - .Attr("size", size); - Tensor offsetCpuTensor = from_blob((void*)offsetVec.data(), {offsetVec.size()}, at::kLong).to(at::kInt); - Tensor offsetNpuTensor = CalcuOpUtil::copy_tensor_host_to_device(offsetCpuTensor); - Tensor sizeCpuTensor = from_blob((void*)sizeVec.data(), {sizeVec.size()}, at::kLong); - Tensor sizeNpuTensor = CalcuOpUtil::copy_tensor_host_to_device(sizeCpuTensor); - cmd.DynamicName("Slice") - .DynamicInput(self) - .DynamicInput(offsetsList, at::kLong, at::kInt, "offsets", true, FIXED_CONST_VALUE) - .DynamicInput(sizeList, at::kLong, at::kInt, "size", true, FIXED_CONST_VALUE) - .DynamicOutput(result) - .DynamicOpRun(); - } - return result; -} - -Tensor slice_npu(const Tensor& self, IntArrayRef offsets, IntArrayRef size) { - // calculate the output size - SmallVector outputSize = - CalcuOpUtil::ConvertIntArrayRefToSmallVector(size); - // construct the output tensor of the NPU - Tensor result = OpPreparation::ApplyTensor(self, outputSize); - - // calculate the output result of the NPU - slice_out_npu(result, self, offsets, size); - - return result; -} - -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" +#include "ATen/native/npu/utils/CalcuOpUtil.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& slice_out_npu( + Tensor& result, + const Tensor& self, + IntArrayRef offsets, + IntArrayRef size) { + + SmallVector offsetVec = array_to_small_vector(offsets); + SmallVector sizeVec = array_to_small_vector(size); + + if (!c10::npu::OptionsManager::CheckDynamicEnable()) { + OpCommand cmd; + cmd.Name("Slice") + .Input(self) + .Input(offsetVec) + .Input(sizeVec) + .Output(result) + .Run(); + } else { + SmallVector offsetsList = array_to_small_vector(offsets); + SmallVector sizeList = array_to_small_vector(size); + OpDynamicCommand cmd; + cmd.Name("SliceD") + .Input(self) + .Output(result) + .Attr("offsets", offsets) + .Attr("size", size); + Tensor offsetCpuTensor = from_blob((void*)offsetVec.data(), {offsetVec.size()}, at::kLong).to(at::kInt); + Tensor offsetNpuTensor = CalcuOpUtil::copy_tensor_host_to_device(offsetCpuTensor); + Tensor sizeCpuTensor = from_blob((void*)sizeVec.data(), {sizeVec.size()}, at::kLong); + Tensor sizeNpuTensor = CalcuOpUtil::copy_tensor_host_to_device(sizeCpuTensor); + cmd.DynamicName("Slice") + .DynamicInput(self) + .DynamicInput(offsetsList, at::kLong, at::kInt, "offsets", true, FIXED_CONST_VALUE) + .DynamicInput(sizeList, at::kLong, at::kInt, "size", true, FIXED_CONST_VALUE) + .DynamicOutput(result) + .DynamicOpRun(); + } + return result; +} + +Tensor slice_npu(const Tensor& self, IntArrayRef offsets, IntArrayRef size) { + // calculate the output size + SmallVector outputSize = + CalcuOpUtil::ConvertIntArrayRefToSmallVector(size); + // construct the output tensor of the NPU + Tensor result = OpPreparation::ApplyTensor(self, outputSize); + + // calculate the output result of the NPU + slice_out_npu(result, self, offsets, size); + + return result; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/SlogdetKernelNpu.cpp b/src/aten/src/ATen/native/npu/SlogdetKernelNpu.cpp index 8283cf0d52e7279b30e7c3b37e036985686efc0f..223bb3de9f9b8c69734413bb5b4a6b7b2b92cebd 100644 --- a/src/aten/src/ATen/native/npu/SlogdetKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/SlogdetKernelNpu.cpp @@ -1,56 +1,56 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -tuple slogdet_out_npu( - Tensor& sign, - Tensor& y, - const Tensor& self) { - OpCommand cmd; - cmd.Name("LogMatrixDeterminant") - .Input(self) - .Output(sign) - .Output(y) - .Run(); - - return std::tie(sign, y); -} - -tuple slogdet_npu(const Tensor& self) { - - TORCH_CHECK(self.dim() >= 2, "input must be at least 2 dimensions"); - - // calculate the output size - auto outputSize = array_to_small_vector(self.sizes()); - outputSize.erase(outputSize.end() - 2, outputSize.end()); - - // construct the output tensor of the NPU - Tensor sign = OpPreparation::ApplyTensor(self, outputSize); - Tensor y = OpPreparation::ApplyTensor(self, outputSize); - - // calculate the output result of the NPU - slogdet_out_npu(sign, y, self); - - return std::tie(sign, y); -} - -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +tuple slogdet_out_npu( + Tensor& sign, + Tensor& y, + const Tensor& self) { + OpCommand cmd; + cmd.Name("LogMatrixDeterminant") + .Input(self) + .Output(sign) + .Output(y) + .Run(); + + return std::tie(sign, y); +} + +tuple slogdet_npu(const Tensor& self) { + + TORCH_CHECK(self.dim() >= 2, "input must be at least 2 dimensions"); + + // calculate the output size + auto outputSize = array_to_small_vector(self.sizes()); + outputSize.erase(outputSize.end() - 2, outputSize.end()); + + // construct the output tensor of the NPU + Tensor sign = OpPreparation::ApplyTensor(self, outputSize); + Tensor y = OpPreparation::ApplyTensor(self, outputSize); + + // calculate the output result of the NPU + slogdet_out_npu(sign, y, self); + + return std::tie(sign, y); +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/SlowConvDilated2DKernelNpu.cpp b/src/aten/src/ATen/native/npu/SlowConvDilated2DKernelNpu.cpp index 8c10534e9b561409ab43a4d0d43690c75b4b0b96..ca32f0180a2ebf1046985b966245d34035c09796 100644 --- a/src/aten/src/ATen/native/npu/SlowConvDilated2DKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/SlowConvDilated2DKernelNpu.cpp @@ -1,69 +1,69 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor slow_conv_dilated2d_npu( - const Tensor& self, - const Tensor& weight, - IntArrayRef kernel_size, - const Tensor& bias, - IntArrayRef stride, - IntArrayRef padding, - IntArrayRef dilation) { - - if (stride[0] == 0) { - AT_ERROR("slow_conv_dilated2d_npu_output_size: stride[0] can not be zero"); - } - if (padding[0] < 0 || padding[1] < 0){ - AT_ERROR("slow_conv_dilated2d_npu_output_size: padding can not be less than zero"); - } - auto outputSize = slow_conv_dilated2d_npu_output_size( - self, weight, stride, padding, dilation); - // construct the output tensor of the NPU - Tensor result = - at::empty_with_format(outputSize, self.options(), ACL_FORMAT_NC1HWC0); - - int64_t groups = 1; - string dataFormat = "NCHW"; - SmallVector stridesSize = {1,1,stride[0],stride[1]}; - SmallVector paddings = { - padding[0], padding[0], padding[1], padding[1]}; - SmallVector dilations = {1, 1, dilation[0], dilation[1]}; - - // calculate the output result of the NPU - OpCommand cmd; - cmd.Name("Conv2D") - .Input(self) - .Input(weight); - if (bias.defined()){ - cmd.Input(bias); - } - cmd.Output(result) - .Attr("strides", stridesSize) - .Attr("pads", paddings) - .Attr("dilations", dilations) - .Attr("groups",groups) - .Attr("data_format",dataFormat) - .Run(); - - return result; -} -} // namespace native -} // namespace at +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor slow_conv_dilated2d_npu( + const Tensor& self, + const Tensor& weight, + IntArrayRef kernel_size, + const Tensor& bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef dilation) { + + if (stride[0] == 0) { + AT_ERROR("slow_conv_dilated2d_npu_output_size: stride[0] can not be zero"); + } + if (padding[0] < 0 || padding[1] < 0){ + AT_ERROR("slow_conv_dilated2d_npu_output_size: padding can not be less than zero"); + } + auto outputSize = slow_conv_dilated2d_npu_output_size( + self, weight, stride, padding, dilation); + // construct the output tensor of the NPU + Tensor result = + at::empty_with_format(outputSize, self.options(), ACL_FORMAT_NC1HWC0); + + int64_t groups = 1; + string dataFormat = "NCHW"; + SmallVector stridesSize = {1,1,stride[0],stride[1]}; + SmallVector paddings = { + padding[0], padding[0], padding[1], padding[1]}; + SmallVector dilations = {1, 1, dilation[0], dilation[1]}; + + // calculate the output result of the NPU + OpCommand cmd; + cmd.Name("Conv2D") + .Input(self) + .Input(weight); + if (bias.defined()){ + cmd.Input(bias); + } + cmd.Output(result) + .Attr("strides", stridesSize) + .Attr("pads", paddings) + .Attr("dilations", dilations) + .Attr("groups",groups) + .Attr("data_format",dataFormat) + .Run(); + + return result; +} +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/SlowConvTranspose2dKernelNpu.cpp b/src/aten/src/ATen/native/npu/SlowConvTranspose2dKernelNpu.cpp index 5e2d8af1e230cb1e82d6c0972fb6912d63e890d2..92a0f12a19a729cdd167dbe7d1d24ef36a9844f6 100644 --- a/src/aten/src/ATen/native/npu/SlowConvTranspose2dKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/SlowConvTranspose2dKernelNpu.cpp @@ -1,230 +1,230 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -SmallVector slow_conv_transpose2d_npu_output_size( - const Tensor & self, - const Tensor & weight, - IntArrayRef kernel_size, - const Tensor & bias, - IntArrayRef stride, - IntArrayRef padding, - IntArrayRef output_padding, - IntArrayRef dilation) { - int ndim = self.dim(); - int dimh = 1; - int dimw = 2; - - if (ndim == 4) { - dimh++; - dimw++; - } - - TORCH_CHECK( - self.numel() != 0 && (ndim == 3 || ndim == 4), - "non-empty 3D or 4D input tensor expected but got a tensor with size ", - self.sizes()); - int64_t N = self.size(0); - int64_t Co = weight.size(1); - int64_t H = self.size(dimh); - int64_t W = self.size(dimw); - - - int64_t Ho = (H - 1) * stride[0] - 2 * padding[0] + - dilation[0] * (kernel_size[0] - 1) + output_padding[0] + 1; - int64_t Wo = (W - 1) * stride[1] - 2 * padding[1] + - dilation[1] * (kernel_size[1] - 1) + output_padding[1] + 1; - - SmallVector outputSize = {N, Co, Ho, Wo}; - - return outputSize; -} - -static inline void slow_conv_transpose2d_shape_check_npu( - const Tensor & self, - const Tensor & weight, - IntArrayRef kernel_size, - const Tensor & bias, - IntArrayRef stride, - IntArrayRef padding, - IntArrayRef output_padding, - IntArrayRef dilation) { - TORCH_CHECK( - kernel_size[0] > 0 && kernel_size[1] > 0, - "kernel size should be greater than zero, but got kernel_height: ", - kernel_size[0], - " kernel_width: ", - kernel_size[1]); - TORCH_CHECK( - stride[0] > 0 && stride[1] > 0, - "stride should be greater than zero, but got stride_height: ", - stride[0], - " stride_width: ", - stride[1]); - TORCH_CHECK( - dilation[0] > 0 && dilation[1] > 0, - "dilation should be greater than zero, but got dilation_height: ", - dilation[0], - ", dilation_width: ", - dilation[1]); - TORCH_CHECK( - (output_padding[1] < stride[1] || - output_padding[1] < dilation[1]) && - (output_padding[0] < stride[0] || - output_padding[0] < dilation[0]), - "output padding must be smaller than either stride or dilation, but got output_padding_height: ", - output_padding[0], - " output_padding_width: ", - output_padding[1], - " stride_height: ", - stride[0], - " stride_width: ", - stride[1], - " dilation_height: ", - dilation[0], - " dilation_width: ", - dilation[1]); - - TORCH_CHECK( - weight.numel() != 0 && (weight.dim() == 2 || weight.dim() == 4), - "non-empty 2D or 4D weight tensor expected, but got: ", - weight.sizes()); - if (bias.defined()) { - check_dim_size(bias, 1, 0, weight.size(1)); - } - - TORCH_CHECK( - kernel_size.size() == 2, - "It is expected kernel_size equals to 2, but got size ", - kernel_size.size()); - - TORCH_CHECK( - dilation.size() == 2, - "It is expected dilation equals to 2, but got size ", - dilation.size()); - - TORCH_CHECK( - padding.size() == 2, - "It is expected padding equals to 2, but got size ", - padding.size()); - - TORCH_CHECK( - stride.size() == 2, - "It is expected stride equals to 2, but got size ", - stride.size()); - - TORCH_CHECK( - output_padding.size() == 2, - "It is expected stride equals to 2, but got size ", - output_padding.size()); -} - -Tensor& slow_conv_transpose2d_out_npu( - Tensor& out, - const Tensor & self, - const Tensor & weight, - IntArrayRef kernel_size, - const Tensor & bias, - IntArrayRef stride, - IntArrayRef padding, - IntArrayRef output_padding, - IntArrayRef dilation) { - slow_conv_transpose2d_shape_check_npu( - self, weight, kernel_size, bias, stride, padding, output_padding, dilation); - - auto outputSize = slow_conv_transpose2d_npu_output_size( - self, weight, kernel_size, bias, stride, padding, output_padding, dilation); - if (!out.sizes().equals(outputSize)) { - out.resize_(outputSize); - } - - SmallVector paddings = { - padding[0], padding[0], padding[1], padding[1]}; - SmallVector stridesSize = {1, 1, stride[0], stride[1]}; - SmallVector dilations = {1, 1, dilation[0], dilation[1]}; - SmallVector outputpadding = { - output_padding[0], output_padding[0], output_padding[1], output_padding[1]}; - string dataFormat = "NCHW"; - int64_t groups = 1; - SmallVector sizeVec = array_to_small_vector(out.sizes()); - if (!c10::npu::OptionsManager::CheckDynamicEnable()) { - OpCommand cmd; - cmd.Name("Conv2DTranspose") - .Input(sizeVec, at::kInt) - .Input(self) - .Input(weight); - if (bias.defined()){ - cmd.Input(bias); - } - cmd.Output(out) - .Attr("pads", paddings) - .Attr("output_padding", outputpadding) - .Attr("strides", stridesSize) - .Attr("dilations", dilations) - .Attr("groups", groups) - .Attr("data_format", dataFormat) - .Run(); - } else { - OpCommand cmd; - cmd.Name("Conv2DTransposeD") - .Input(self) - .Input(weight); - if (bias.defined()){ - cmd.Input(bias); - } - cmd.Output(out) - .Attr("input_size", sizeVec) - .Attr("pads", paddings) - .Attr("output_padding", outputpadding) - .Attr("strides", stridesSize) - .Attr("dilations", dilations) - .Attr("groups", groups) - .Attr("data_format", dataFormat) - .Run(); - } - - return out; -} - -Tensor slow_conv_transpose2d_npu( - const Tensor & self, - const Tensor & weight, - IntArrayRef kernel_size, - const Tensor & bias, - IntArrayRef stride, - IntArrayRef padding, - IntArrayRef output_padding, - IntArrayRef dilation) { - // calculate the output size - auto outputSize = slow_conv_transpose2d_npu_output_size( - self, weight, kernel_size, bias, stride, padding, output_padding, dilation); - - // construct the output tensor of the NPU - Tensor result = - at::empty_with_format(outputSize, self.options(), ACL_FORMAT_NC1HWC0); - - // calculate the output result of the NPU - slow_conv_transpose2d_out_npu( - result, self, weight, kernel_size, bias, stride, padding, output_padding, dilation); - - return result; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +SmallVector slow_conv_transpose2d_npu_output_size( + const Tensor & self, + const Tensor & weight, + IntArrayRef kernel_size, + const Tensor & bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef output_padding, + IntArrayRef dilation) { + int ndim = self.dim(); + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimh++; + dimw++; + } + + TORCH_CHECK( + self.numel() != 0 && (ndim == 3 || ndim == 4), + "non-empty 3D or 4D input tensor expected but got a tensor with size ", + self.sizes()); + int64_t N = self.size(0); + int64_t Co = weight.size(1); + int64_t H = self.size(dimh); + int64_t W = self.size(dimw); + + + int64_t Ho = (H - 1) * stride[0] - 2 * padding[0] + + dilation[0] * (kernel_size[0] - 1) + output_padding[0] + 1; + int64_t Wo = (W - 1) * stride[1] - 2 * padding[1] + + dilation[1] * (kernel_size[1] - 1) + output_padding[1] + 1; + + SmallVector outputSize = {N, Co, Ho, Wo}; + + return outputSize; +} + +static inline void slow_conv_transpose2d_shape_check_npu( + const Tensor & self, + const Tensor & weight, + IntArrayRef kernel_size, + const Tensor & bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef output_padding, + IntArrayRef dilation) { + TORCH_CHECK( + kernel_size[0] > 0 && kernel_size[1] > 0, + "kernel size should be greater than zero, but got kernel_height: ", + kernel_size[0], + " kernel_width: ", + kernel_size[1]); + TORCH_CHECK( + stride[0] > 0 && stride[1] > 0, + "stride should be greater than zero, but got stride_height: ", + stride[0], + " stride_width: ", + stride[1]); + TORCH_CHECK( + dilation[0] > 0 && dilation[1] > 0, + "dilation should be greater than zero, but got dilation_height: ", + dilation[0], + ", dilation_width: ", + dilation[1]); + TORCH_CHECK( + (output_padding[1] < stride[1] || + output_padding[1] < dilation[1]) && + (output_padding[0] < stride[0] || + output_padding[0] < dilation[0]), + "output padding must be smaller than either stride or dilation, but got output_padding_height: ", + output_padding[0], + " output_padding_width: ", + output_padding[1], + " stride_height: ", + stride[0], + " stride_width: ", + stride[1], + " dilation_height: ", + dilation[0], + " dilation_width: ", + dilation[1]); + + TORCH_CHECK( + weight.numel() != 0 && (weight.dim() == 2 || weight.dim() == 4), + "non-empty 2D or 4D weight tensor expected, but got: ", + weight.sizes()); + if (bias.defined()) { + check_dim_size(bias, 1, 0, weight.size(1)); + } + + TORCH_CHECK( + kernel_size.size() == 2, + "It is expected kernel_size equals to 2, but got size ", + kernel_size.size()); + + TORCH_CHECK( + dilation.size() == 2, + "It is expected dilation equals to 2, but got size ", + dilation.size()); + + TORCH_CHECK( + padding.size() == 2, + "It is expected padding equals to 2, but got size ", + padding.size()); + + TORCH_CHECK( + stride.size() == 2, + "It is expected stride equals to 2, but got size ", + stride.size()); + + TORCH_CHECK( + output_padding.size() == 2, + "It is expected stride equals to 2, but got size ", + output_padding.size()); +} + +Tensor& slow_conv_transpose2d_out_npu( + Tensor& out, + const Tensor & self, + const Tensor & weight, + IntArrayRef kernel_size, + const Tensor & bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef output_padding, + IntArrayRef dilation) { + slow_conv_transpose2d_shape_check_npu( + self, weight, kernel_size, bias, stride, padding, output_padding, dilation); + + auto outputSize = slow_conv_transpose2d_npu_output_size( + self, weight, kernel_size, bias, stride, padding, output_padding, dilation); + if (!out.sizes().equals(outputSize)) { + out.resize_(outputSize); + } + + SmallVector paddings = { + padding[0], padding[0], padding[1], padding[1]}; + SmallVector stridesSize = {1, 1, stride[0], stride[1]}; + SmallVector dilations = {1, 1, dilation[0], dilation[1]}; + SmallVector outputpadding = { + output_padding[0], output_padding[0], output_padding[1], output_padding[1]}; + string dataFormat = "NCHW"; + int64_t groups = 1; + SmallVector sizeVec = array_to_small_vector(out.sizes()); + if (!c10::npu::OptionsManager::CheckDynamicEnable()) { + OpCommand cmd; + cmd.Name("Conv2DTranspose") + .Input(sizeVec, at::kInt) + .Input(self) + .Input(weight); + if (bias.defined()){ + cmd.Input(bias); + } + cmd.Output(out) + .Attr("pads", paddings) + .Attr("output_padding", outputpadding) + .Attr("strides", stridesSize) + .Attr("dilations", dilations) + .Attr("groups", groups) + .Attr("data_format", dataFormat) + .Run(); + } else { + OpCommand cmd; + cmd.Name("Conv2DTransposeD") + .Input(self) + .Input(weight); + if (bias.defined()){ + cmd.Input(bias); + } + cmd.Output(out) + .Attr("input_size", sizeVec) + .Attr("pads", paddings) + .Attr("output_padding", outputpadding) + .Attr("strides", stridesSize) + .Attr("dilations", dilations) + .Attr("groups", groups) + .Attr("data_format", dataFormat) + .Run(); + } + + return out; +} + +Tensor slow_conv_transpose2d_npu( + const Tensor & self, + const Tensor & weight, + IntArrayRef kernel_size, + const Tensor & bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef output_padding, + IntArrayRef dilation) { + // calculate the output size + auto outputSize = slow_conv_transpose2d_npu_output_size( + self, weight, kernel_size, bias, stride, padding, output_padding, dilation); + + // construct the output tensor of the NPU + Tensor result = + at::empty_with_format(outputSize, self.options(), ACL_FORMAT_NC1HWC0); + + // calculate the output result of the NPU + slow_conv_transpose2d_out_npu( + result, self, weight, kernel_size, bias, stride, padding, output_padding, dilation); + + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/SoftMarginLossBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/SoftMarginLossBackwardKernelNpu.cpp index 60a6b4cbf43406d9562de8f3059ff6e1e1859308..bbf589375b03b94d72463ea5f4159956ef155b1f 100644 --- a/src/aten/src/ATen/native/npu/SoftMarginLossBackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/SoftMarginLossBackwardKernelNpu.cpp @@ -1,61 +1,61 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& soft_margin_loss_backward_out_npu( - Tensor& grad_input, - const Tensor& grad_output, - const Tensor& input, - const Tensor& target, - int64_t reduction) { - string reductionStr; - if (reduction == Reduction::None) { - reductionStr = "none"; - } else if (reduction == Reduction::Mean) { - reductionStr = "mean"; - } else if (reduction == Reduction::Sum) { - reductionStr = "sum"; - } - - // calculate the output result of the NPU - OpCommand cmd; - cmd.Name("SoftMarginLossGrad") - .Input(input) - .Input(target) - .Input(grad_output) - .Output(grad_input) - .Attr("reduction", reductionStr) - .Run(); - - return grad_input; -} - -Tensor soft_margin_loss_backward_npu( - const Tensor& grad_output, - const Tensor& input, - const Tensor& target, - int64_t reduction) { - Tensor grad_input = OpPreparation::ApplyTensor(input); - soft_margin_loss_backward_out_npu( - grad_input, grad_output, input, target, reduction); - return grad_input; -} - -} // namespace native +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& soft_margin_loss_backward_out_npu( + Tensor& grad_input, + const Tensor& grad_output, + const Tensor& input, + const Tensor& target, + int64_t reduction) { + string reductionStr; + if (reduction == Reduction::None) { + reductionStr = "none"; + } else if (reduction == Reduction::Mean) { + reductionStr = "mean"; + } else if (reduction == Reduction::Sum) { + reductionStr = "sum"; + } + + // calculate the output result of the NPU + OpCommand cmd; + cmd.Name("SoftMarginLossGrad") + .Input(input) + .Input(target) + .Input(grad_output) + .Output(grad_input) + .Attr("reduction", reductionStr) + .Run(); + + return grad_input; +} + +Tensor soft_margin_loss_backward_npu( + const Tensor& grad_output, + const Tensor& input, + const Tensor& target, + int64_t reduction) { + Tensor grad_input = OpPreparation::ApplyTensor(input); + soft_margin_loss_backward_out_npu( + grad_input, grad_output, input, target, reduction); + return grad_input; +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/SoftMarginLossKernelNpu.cpp b/src/aten/src/ATen/native/npu/SoftMarginLossKernelNpu.cpp index 4247b5c3d1bfcb829857be7f5e5d451b9e3fb98f..28896488ad0669d900ad082756682717620ecf14 100644 --- a/src/aten/src/ATen/native/npu/SoftMarginLossKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/SoftMarginLossKernelNpu.cpp @@ -1,87 +1,87 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/NpuUtils.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -SmallVector soft_margin_loss_npu_input( - const SmallVector& inputTensor) { - return CalcuOpUtil::create_npu_input_tensor_desc(inputTensor); -} - -SmallVector soft_margin_loss_npu_output( - const SmallVector& outputTensor) { - return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); -} - -SmallVector soft_margin_loss_npu_attr( - int64_t reduction) { - string reductionStr; - if (reduction == Reduction::None) { - reductionStr = "none"; - } else if (reduction == Reduction::Mean) { - reductionStr = "mean"; - } else if (reduction == Reduction::Sum) { - reductionStr = "sum"; - } - - NPUAttrDesc npuAttrReduction = NPUAttrDesc("reduction", reductionStr); - SmallVector attrs = {npuAttrReduction}; - return attrs; -} - -Tensor& soft_margin_loss_out_npu(Tensor& result, const Tensor& self, const Tensor& target, int64_t reduction) { -// constructs the input and output NPUTensorDesc - Tensor target_broadcast = target; - if(target.sizes() != self.sizes()) { - target_broadcast = broadcast_npu(target, self.sizes()); - } - auto inputs = soft_margin_loss_npu_input({self, target_broadcast}); - auto outputs = soft_margin_loss_npu_output({result}); - -// constructs the attr of the NPUAttrDesc - auto attrs = soft_margin_loss_npu_attr(reduction); - -// executing the NPU operator - CalcuOpUtil::execute_npu_operate("SoftMarginLoss", inputs, outputs, attrs); - return result; -} - -Tensor soft_margin_loss_npu(const Tensor& self, const Tensor& target, int64_t reduction) { -// calculate the output size - auto outputSize = soft_margin_loss_npu_output_size( - self, - target, - reduction); - -// construct the output tensor of the NPU - Tensor result = at::empty_with_format( - outputSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - -// calculate the output result of the NPU - soft_margin_loss_out_npu(result, self, target, reduction); - if (reduction == Reduction::None) { - return result; - } else { - return result.reshape({}); - } -} - -} // namespace native -} // namespace at +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/CalcuOpUtil.h" +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/NpuUtils.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +SmallVector soft_margin_loss_npu_input( + const SmallVector& inputTensor) { + return CalcuOpUtil::create_npu_input_tensor_desc(inputTensor); +} + +SmallVector soft_margin_loss_npu_output( + const SmallVector& outputTensor) { + return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); +} + +SmallVector soft_margin_loss_npu_attr( + int64_t reduction) { + string reductionStr; + if (reduction == Reduction::None) { + reductionStr = "none"; + } else if (reduction == Reduction::Mean) { + reductionStr = "mean"; + } else if (reduction == Reduction::Sum) { + reductionStr = "sum"; + } + + NPUAttrDesc npuAttrReduction = NPUAttrDesc("reduction", reductionStr); + SmallVector attrs = {npuAttrReduction}; + return attrs; +} + +Tensor& soft_margin_loss_out_npu(Tensor& result, const Tensor& self, const Tensor& target, int64_t reduction) { +// constructs the input and output NPUTensorDesc + Tensor target_broadcast = target; + if(target.sizes() != self.sizes()) { + target_broadcast = broadcast_npu(target, self.sizes()); + } + auto inputs = soft_margin_loss_npu_input({self, target_broadcast}); + auto outputs = soft_margin_loss_npu_output({result}); + +// constructs the attr of the NPUAttrDesc + auto attrs = soft_margin_loss_npu_attr(reduction); + +// executing the NPU operator + CalcuOpUtil::execute_npu_operate("SoftMarginLoss", inputs, outputs, attrs); + return result; +} + +Tensor soft_margin_loss_npu(const Tensor& self, const Tensor& target, int64_t reduction) { +// calculate the output size + auto outputSize = soft_margin_loss_npu_output_size( + self, + target, + reduction); + +// construct the output tensor of the NPU + Tensor result = at::empty_with_format( + outputSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); + +// calculate the output result of the NPU + soft_margin_loss_out_npu(result, self, target, reduction); + if (reduction == Reduction::None) { + return result; + } else { + return result.reshape({}); + } +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/StdKernelNpu.cpp b/src/aten/src/ATen/native/npu/StdKernelNpu.cpp index 91b36d5d73f1b92e9d497f0ae1731b2426d3bfe1..b71d4a1c309aeef4aac5c04ad9aaf2b71b18e0df 100644 --- a/src/aten/src/ATen/native/npu/StdKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/StdKernelNpu.cpp @@ -1,185 +1,185 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" -#include "ATen/native/npu/utils/CalcuOpUtil.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -tuple std_mean_out_npu_nocheck( - Tensor& resultStd, - Tensor& resultMean, - const Tensor& self, - IntArrayRef dim, - bool unbiased, - bool keepdim) { - // executing the NPU operator - OpCommand cmd1; - cmd1.Name("ReduceMeanD") - .Input(self) - .Output(resultMean) - .Attr("axes", dim) - .Attr("keep_dims", keepdim) - .Run(); - Tensor resultMeanCopy = resultMean; - if (resultMean.dim() != 0 && keepdim == false) { - auto dimVector = array_to_small_vector(dim); - std::sort(dimVector.begin(), dimVector.end()); - for (int64_t i = 0; i < dimVector.size(); i++) { - resultMeanCopy = resultMeanCopy.unsqueeze(dimVector[i]); - } - } - resultMeanCopy = resultMeanCopy.expand(self.sizes()); - OpCommand cmd2; - cmd2.Name("ReduceStdWithMean") - .Input(self) - .Input(resultMeanCopy) - .Output(resultStd) - .Attr("dim", dim) - .Attr("unbiased", unbiased) - .Attr("keepdim", keepdim) - .Run(); - - return std::tie(resultStd, resultMean); -} - -Tensor& std_out_npu( - Tensor& result, - const Tensor& self, - DimnameList dim, - bool unbiased, - bool keepdim) { - return std_out_npu(result, self, dimnames_to_positions(self, dim), unbiased, keepdim); -} - -Tensor& std_out_npu( - Tensor& result, - const Tensor& self, - IntArrayRef dim, - bool unbiased, - bool keepdim) { - auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); - Tensor meanResult = OpPreparation::ApplyTensor(self, outputSize); - - OpPreparation::CheckOut( - {self}, - result, - ACL_FORMAT_ND, - self.scalar_type(), - outputSize); - - // executing the NPU operator - std_mean_out_npu_nocheck(result, meanResult, self, dim, unbiased, keepdim); - - return result; -} - -tuple std_mean_out_npu( - Tensor& result1, - Tensor& result2, - const Tensor& self, - IntArrayRef dim, - bool unbiased, - bool keepdim) { - auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); - - OpPreparation::CheckOut( - {self}, - result1, - ACL_FORMAT_ND, - self.scalar_type(), - outputSize); - OpPreparation::CheckOut( - {self}, - result2, - ACL_FORMAT_ND, - self.scalar_type(), - outputSize); - - // executing the NPU operator - std_mean_out_npu_nocheck(result1, result2, self, dim, unbiased, keepdim); - - return std::tie(result1, result2); -} - -Tensor std_dim_npu( - const Tensor & self, - IntArrayRef dim, - bool unbiased, - bool keepdim) { - // calculate the output size - auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); - - // construct the output tensor of the NPU - Tensor result1 = OpPreparation::ApplyTensor(self, outputSize); - Tensor result2 = OpPreparation::ApplyTensor(self, outputSize); - - // calculate the output result of the NPU - std_mean_out_npu(result1, result2, self, dim, unbiased, keepdim); - return result1; -} - -Tensor std_npu( - const Tensor & self, - bool unbiased) { - SmallVector dims = CalcuOpUtil::get_dimlist_for_tensor(self); - return std_dim_npu(self, dims, unbiased, false); -} - -tuple std_mean_npu( - const Tensor & self, - bool unbiased) { - SmallVector dims = CalcuOpUtil::get_dimlist_for_tensor(self); - return std_mean_dim_npu(self, dims, unbiased, false); -} - -tuple std_mean_dim_npu( - const Tensor & self, - IntArrayRef dim, - bool unbiased, - bool keepdim) { - // calculate the output size - auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); - - // construct the output tensor of the NPU - Tensor result1 = OpPreparation::ApplyTensor(self, outputSize); - Tensor result2 = OpPreparation::ApplyTensor(self, outputSize); - - // calculate the output result of the NPU - std_mean_out_npu(result1, result2, self, dim, unbiased, keepdim); - return std::tie(result1, result2); -} - -tuple std_mean_names_npu( - const Tensor & self, - DimnameList dim, - bool unbiased, - bool keepdim) { - return std_mean_dim_npu(self, dimnames_to_positions(self, dim), unbiased, keepdim); -} - -Tensor std_names_npu( - const Tensor & self, - DimnameList dim, - bool unbiased, - bool keepdim) { - return std_dim_npu(self, dimnames_to_positions(self, dim), unbiased, keepdim); -} - -} // namespace native -} // namespace at::native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" +#include "ATen/native/npu/utils/CalcuOpUtil.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +tuple std_mean_out_npu_nocheck( + Tensor& resultStd, + Tensor& resultMean, + const Tensor& self, + IntArrayRef dim, + bool unbiased, + bool keepdim) { + // executing the NPU operator + OpCommand cmd1; + cmd1.Name("ReduceMeanD") + .Input(self) + .Output(resultMean) + .Attr("axes", dim) + .Attr("keep_dims", keepdim) + .Run(); + Tensor resultMeanCopy = resultMean; + if (resultMean.dim() != 0 && keepdim == false) { + auto dimVector = array_to_small_vector(dim); + std::sort(dimVector.begin(), dimVector.end()); + for (int64_t i = 0; i < dimVector.size(); i++) { + resultMeanCopy = resultMeanCopy.unsqueeze(dimVector[i]); + } + } + resultMeanCopy = resultMeanCopy.expand(self.sizes()); + OpCommand cmd2; + cmd2.Name("ReduceStdWithMean") + .Input(self) + .Input(resultMeanCopy) + .Output(resultStd) + .Attr("dim", dim) + .Attr("unbiased", unbiased) + .Attr("keepdim", keepdim) + .Run(); + + return std::tie(resultStd, resultMean); +} + +Tensor& std_out_npu( + Tensor& result, + const Tensor& self, + DimnameList dim, + bool unbiased, + bool keepdim) { + return std_out_npu(result, self, dimnames_to_positions(self, dim), unbiased, keepdim); +} + +Tensor& std_out_npu( + Tensor& result, + const Tensor& self, + IntArrayRef dim, + bool unbiased, + bool keepdim) { + auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); + Tensor meanResult = OpPreparation::ApplyTensor(self, outputSize); + + OpPreparation::CheckOut( + {self}, + result, + ACL_FORMAT_ND, + self.scalar_type(), + outputSize); + + // executing the NPU operator + std_mean_out_npu_nocheck(result, meanResult, self, dim, unbiased, keepdim); + + return result; +} + +tuple std_mean_out_npu( + Tensor& result1, + Tensor& result2, + const Tensor& self, + IntArrayRef dim, + bool unbiased, + bool keepdim) { + auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); + + OpPreparation::CheckOut( + {self}, + result1, + ACL_FORMAT_ND, + self.scalar_type(), + outputSize); + OpPreparation::CheckOut( + {self}, + result2, + ACL_FORMAT_ND, + self.scalar_type(), + outputSize); + + // executing the NPU operator + std_mean_out_npu_nocheck(result1, result2, self, dim, unbiased, keepdim); + + return std::tie(result1, result2); +} + +Tensor std_dim_npu( + const Tensor & self, + IntArrayRef dim, + bool unbiased, + bool keepdim) { + // calculate the output size + auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); + + // construct the output tensor of the NPU + Tensor result1 = OpPreparation::ApplyTensor(self, outputSize); + Tensor result2 = OpPreparation::ApplyTensor(self, outputSize); + + // calculate the output result of the NPU + std_mean_out_npu(result1, result2, self, dim, unbiased, keepdim); + return result1; +} + +Tensor std_npu( + const Tensor & self, + bool unbiased) { + SmallVector dims = CalcuOpUtil::get_dimlist_for_tensor(self); + return std_dim_npu(self, dims, unbiased, false); +} + +tuple std_mean_npu( + const Tensor & self, + bool unbiased) { + SmallVector dims = CalcuOpUtil::get_dimlist_for_tensor(self); + return std_mean_dim_npu(self, dims, unbiased, false); +} + +tuple std_mean_dim_npu( + const Tensor & self, + IntArrayRef dim, + bool unbiased, + bool keepdim) { + // calculate the output size + auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); + + // construct the output tensor of the NPU + Tensor result1 = OpPreparation::ApplyTensor(self, outputSize); + Tensor result2 = OpPreparation::ApplyTensor(self, outputSize); + + // calculate the output result of the NPU + std_mean_out_npu(result1, result2, self, dim, unbiased, keepdim); + return std::tie(result1, result2); +} + +tuple std_mean_names_npu( + const Tensor & self, + DimnameList dim, + bool unbiased, + bool keepdim) { + return std_mean_dim_npu(self, dimnames_to_positions(self, dim), unbiased, keepdim); +} + +Tensor std_names_npu( + const Tensor & self, + DimnameList dim, + bool unbiased, + bool keepdim) { + return std_dim_npu(self, dimnames_to_positions(self, dim), unbiased, keepdim); +} + +} // namespace native +} // namespace at::native diff --git a/src/aten/src/ATen/native/npu/TanKernelNpu.cpp b/src/aten/src/ATen/native/npu/TanKernelNpu.cpp index 316263404daf412bb2abb82afc7ea7b66763ad30..a87735f0ef94f91259043aaadbf6abc647e5b038 100644 --- a/src/aten/src/ATen/native/npu/TanKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/TanKernelNpu.cpp @@ -1,54 +1,54 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& tan_out_npu(Tensor& result, const Tensor& self) { - OpCommand cmd; - cmd.Name("Tan") - .Input(self) - .Output(result) - .Run(); - - return result; -} - -Tensor tan_npu(const Tensor& self) { - Tensor result = OpPreparation::ApplyTensor(self); - tan_out_npu(result, self); - return result; -} - -Tensor& tan_npu_(Tensor& self) { - OpPreparation::CheckMemory({self}, {self}); - if (!NpuUtils::check_match(&self)) { - Tensor contiguousSelf = NpuUtils::format_contiguous(self); - Tensor result = tan_out_npu(contiguousSelf, contiguousSelf); - NpuUtils::format_fresh_view(self, result); - } else { - tan_out_npu(self, self); - } - - return self; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& tan_out_npu(Tensor& result, const Tensor& self) { + OpCommand cmd; + cmd.Name("Tan") + .Input(self) + .Output(result) + .Run(); + + return result; +} + +Tensor tan_npu(const Tensor& self) { + Tensor result = OpPreparation::ApplyTensor(self); + tan_out_npu(result, self); + return result; +} + +Tensor& tan_npu_(Tensor& self) { + OpPreparation::CheckMemory({self}, {self}); + if (!NpuUtils::check_match(&self)) { + Tensor contiguousSelf = NpuUtils::format_contiguous(self); + Tensor result = tan_out_npu(contiguousSelf, contiguousSelf); + NpuUtils::format_fresh_view(self, result); + } else { + tan_out_npu(self, self); + } + + return self; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/TrilKernelNpu.cpp b/src/aten/src/ATen/native/npu/TrilKernelNpu.cpp index 678854c4a391045571ecfa69175acda9d8ecdde1..35fda36df01bb4bb50978e7ab29e924ed0af9cbf 100644 --- a/src/aten/src/ATen/native/npu/TrilKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/TrilKernelNpu.cpp @@ -1,63 +1,63 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& tril_out_npu(Tensor& result, const Tensor& self, int64_t diagonal){ - OpCommand cmd; - cmd.Name("Tril") - .Input(self) - .Output(result) - .Attr("diagonal", diagonal) - .Run(); - return result; -} - -Tensor tril_npu(const Tensor& self, int64_t diagonal){ - auto selfCopy = self.npu_format_cast(ACL_FORMAT_NCHW); - auto is_last_two_dims = [&selfCopy](){ - auto selfStorage = selfCopy.storage().get_npu_desc().storage_sizes_; - if (selfStorage.size() <= 1){ - return false; - } - return true; - }; - - TORCH_CHECK(is_last_two_dims(), "tril require tensor should be last two dims"); - Tensor result = OpPreparation::ApplyTensor(selfCopy); - tril_out_npu(result, selfCopy, diagonal); - return result; -} - -Tensor& tril_npu_(Tensor& self, int64_t diagonal){ - OpPreparation::CheckMemory({self}, {self}); - self.npu_format_cast_(ACL_FORMAT_NCHW); - if(!NpuUtils::check_match(&self)){ - Tensor contiguousSelf = NpuUtils::format_contiguous(self); - tril_out_npu(contiguousSelf, contiguousSelf, diagonal); - NpuUtils::format_fresh_view(self, contiguousSelf); - } else { - tril_out_npu(self, self, diagonal); - } - return self; -} - -} // native -} // at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& tril_out_npu(Tensor& result, const Tensor& self, int64_t diagonal){ + OpCommand cmd; + cmd.Name("Tril") + .Input(self) + .Output(result) + .Attr("diagonal", diagonal) + .Run(); + return result; +} + +Tensor tril_npu(const Tensor& self, int64_t diagonal){ + auto selfCopy = self.npu_format_cast(ACL_FORMAT_NCHW); + auto is_last_two_dims = [&selfCopy](){ + auto selfStorage = selfCopy.storage().get_npu_desc().storage_sizes_; + if (selfStorage.size() <= 1){ + return false; + } + return true; + }; + + TORCH_CHECK(is_last_two_dims(), "tril require tensor should be last two dims"); + Tensor result = OpPreparation::ApplyTensor(selfCopy); + tril_out_npu(result, selfCopy, diagonal); + return result; +} + +Tensor& tril_npu_(Tensor& self, int64_t diagonal){ + OpPreparation::CheckMemory({self}, {self}); + self.npu_format_cast_(ACL_FORMAT_NCHW); + if(!NpuUtils::check_match(&self)){ + Tensor contiguousSelf = NpuUtils::format_contiguous(self); + tril_out_npu(contiguousSelf, contiguousSelf, diagonal); + NpuUtils::format_fresh_view(self, contiguousSelf); + } else { + tril_out_npu(self, self, diagonal); + } + return self; +} + +} // native +} // at diff --git a/src/aten/src/ATen/native/npu/UpsampleNearest1dKernelNpu.cpp b/src/aten/src/ATen/native/npu/UpsampleNearest1dKernelNpu.cpp index 944f92fd8295f24439392940b7e7bd8e8fcbaa7f..2ca0f1130263bf3ba3ec54aec87feeed4459c623 100644 --- a/src/aten/src/ATen/native/npu/UpsampleNearest1dKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/UpsampleNearest1dKernelNpu.cpp @@ -1,78 +1,78 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -SmallVector upsample_nearest1d_npu_output_size( - const Tensor& input, - IntArrayRef output_size, - c10::optional scales){ - SmallVector outputSize; - int64_t N = input.size(0); - int64_t C = input.size(1); - int64_t W; - if(output_size.size() != 0) { - W = output_size[0]; - } else { - float temp_scales = (float)scales.value(); - W = temp_scales * input.size(2); - } - outputSize = {N, C, W}; - return outputSize; -} - -Tensor& upsample_nearest1d_out_npu( - Tensor& result, - const Tensor& self, - IntArrayRef output_size, - c10::optional scales) { - - OpCommand cmd; - cmd.Name("UpsampleNearest1d") - - .Input(self) - .Output(result) - .Attr("output_size", output_size); - if (scales.has_value()) { - cmd.Attr("scales", static_cast(scales.value())); - } - cmd.Run(); - - return result; -} - -Tensor upsample_nearest1d_npu( - const Tensor& self, - IntArrayRef output_size, - c10::optional scales) { - // calculate the output size - SmallVector outputSize = upsample_nearest1d_npu_output_size(self, output_size, scales); - - // construct the output tensor of the NPU - Tensor result = OpPreparation::ApplyTensor(self, outputSize); - - // calculate the output result of the NPU - upsample_nearest1d_out_npu(result, self, output_size, scales); - - return result; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +SmallVector upsample_nearest1d_npu_output_size( + const Tensor& input, + IntArrayRef output_size, + c10::optional scales){ + SmallVector outputSize; + int64_t N = input.size(0); + int64_t C = input.size(1); + int64_t W; + if(output_size.size() != 0) { + W = output_size[0]; + } else { + float temp_scales = (float)scales.value(); + W = temp_scales * input.size(2); + } + outputSize = {N, C, W}; + return outputSize; +} + +Tensor& upsample_nearest1d_out_npu( + Tensor& result, + const Tensor& self, + IntArrayRef output_size, + c10::optional scales) { + + OpCommand cmd; + cmd.Name("UpsampleNearest1d") + + .Input(self) + .Output(result) + .Attr("output_size", output_size); + if (scales.has_value()) { + cmd.Attr("scales", static_cast(scales.value())); + } + cmd.Run(); + + return result; +} + +Tensor upsample_nearest1d_npu( + const Tensor& self, + IntArrayRef output_size, + c10::optional scales) { + // calculate the output size + SmallVector outputSize = upsample_nearest1d_npu_output_size(self, output_size, scales); + + // construct the output tensor of the NPU + Tensor result = OpPreparation::ApplyTensor(self, outputSize); + + // calculate the output result of the NPU + upsample_nearest1d_out_npu(result, self, output_size, scales); + + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/_Unique2KernelNpu.cpp b/src/aten/src/ATen/native/npu/_Unique2KernelNpu.cpp index 757bc2e258b143db8fa7b2928c10c4a8357c2feb..33d9e0cd815918a05d215c457ae6ca935498e08f 100644 --- a/src/aten/src/ATen/native/npu/_Unique2KernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/_Unique2KernelNpu.cpp @@ -1,96 +1,96 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -std::tuple _unique2_out_npu( - Tensor& y, - Tensor& yOutputSize, - Tensor& yInverse, - Tensor& yCounts, - const Tensor& self, - bool sorted, - bool return_inverse, - bool return_counts) { - OpCommand cmd; - cmd.Name("UniqueWithCountsAndSorting") - .Input(self) - .Output(y) - .Output(yOutputSize) - .Output(yInverse) - .Output(yCounts) - .Attr("sorted", sorted) - .Attr("return_inverse", true) - .Attr("return_counts", true) - .Run(); - - return std::tuple(y, yOutputSize, yInverse, yCounts); -} - -tuple _unique2_npu( - const Tensor& self, - bool sorted, - bool return_inverse, - bool return_counts) { - if(self.numel() == 0){ - Tensor result= OpPreparation::ApplyTensor(self, {0}); - Tensor yInverse = OpPreparation::ApplyTensor({0}, self.options().dtype(kLong), self); - Tensor yCounts = OpPreparation::ApplyTensor({0}, self.options().dtype(kLong), self); - return std::tie(result, yInverse, yCounts); - } - - auto yInverseSize = input_same_output_size(self); - auto outputSizes = tuple, SmallVector, IntArrayRef>( - {self.numel()}, {1}, yInverseSize); - - Tensor selfCopy = self; - if (self.scalar_type() == ScalarType::Half) { - selfCopy = self.to(ScalarType::Float); - } - - Tensor y = OpPreparation::ApplyTensor(selfCopy, std::get<0>(outputSizes)); - Tensor yOutputSize = at::empty_with_format(std::get<1>(outputSizes), self.options().dtype(kLong), ACL_FORMAT_ND); - Tensor yInverse = at::empty_with_format(std::get<2>(outputSizes), self.options().dtype(kLong), ACL_FORMAT_ND); - Tensor yCounts = at::empty_with_format(std::get<0>(outputSizes), self.options().dtype(kLong), ACL_FORMAT_ND); - - _unique2_out_npu(y, yOutputSize, yInverse, yCounts, selfCopy, sorted, return_inverse, return_counts); - - int64_t count = yOutputSize[0].item().toLong(); - Tensor result = y.slice(0, 0, count, 1); - result = NpuUtils::format_contiguous(result); - - if (self.scalar_type() == ScalarType::Half) { - result = result.to(ScalarType::Half); - } - - if (return_counts) { - yCounts = yCounts.slice(0, 0, count, 1); - yCounts = NpuUtils::format_contiguous(yCounts); - } else { - yCounts = at::empty({0}, self.options().dtype(kLong)); - } - - if (!(return_counts || return_inverse)) { - yInverse = at::empty({0}, self.options().dtype(kLong)); - } - - return std::tuple(result, yInverse, yCounts); -} - -} // namespace native -} // namespace at +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +std::tuple _unique2_out_npu( + Tensor& y, + Tensor& yOutputSize, + Tensor& yInverse, + Tensor& yCounts, + const Tensor& self, + bool sorted, + bool return_inverse, + bool return_counts) { + OpCommand cmd; + cmd.Name("UniqueWithCountsAndSorting") + .Input(self) + .Output(y) + .Output(yOutputSize) + .Output(yInverse) + .Output(yCounts) + .Attr("sorted", sorted) + .Attr("return_inverse", true) + .Attr("return_counts", true) + .Run(); + + return std::tuple(y, yOutputSize, yInverse, yCounts); +} + +tuple _unique2_npu( + const Tensor& self, + bool sorted, + bool return_inverse, + bool return_counts) { + if(self.numel() == 0){ + Tensor result= OpPreparation::ApplyTensor(self, {0}); + Tensor yInverse = OpPreparation::ApplyTensor({0}, self.options().dtype(kLong), self); + Tensor yCounts = OpPreparation::ApplyTensor({0}, self.options().dtype(kLong), self); + return std::tie(result, yInverse, yCounts); + } + + auto yInverseSize = input_same_output_size(self); + auto outputSizes = tuple, SmallVector, IntArrayRef>( + {self.numel()}, {1}, yInverseSize); + + Tensor selfCopy = self; + if (self.scalar_type() == ScalarType::Half) { + selfCopy = self.to(ScalarType::Float); + } + + Tensor y = OpPreparation::ApplyTensor(selfCopy, std::get<0>(outputSizes)); + Tensor yOutputSize = at::empty_with_format(std::get<1>(outputSizes), self.options().dtype(kLong), ACL_FORMAT_ND); + Tensor yInverse = at::empty_with_format(std::get<2>(outputSizes), self.options().dtype(kLong), ACL_FORMAT_ND); + Tensor yCounts = at::empty_with_format(std::get<0>(outputSizes), self.options().dtype(kLong), ACL_FORMAT_ND); + + _unique2_out_npu(y, yOutputSize, yInverse, yCounts, selfCopy, sorted, return_inverse, return_counts); + + int64_t count = yOutputSize[0].item().toLong(); + Tensor result = y.slice(0, 0, count, 1); + result = NpuUtils::format_contiguous(result); + + if (self.scalar_type() == ScalarType::Half) { + result = result.to(ScalarType::Half); + } + + if (return_counts) { + yCounts = yCounts.slice(0, 0, count, 1); + yCounts = NpuUtils::format_contiguous(yCounts); + } else { + yCounts = at::empty({0}, self.options().dtype(kLong)); + } + + if (!(return_counts || return_inverse)) { + yInverse = at::empty({0}, self.options().dtype(kLong)); + } + + return std::tuple(result, yInverse, yCounts); +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/common/FormatCastHelper.cpp b/src/aten/src/ATen/native/npu/common/FormatCastHelper.cpp index 28909262503eb84f9cd6357f353f64b89df92e5c..bf72d425e5a4d981a20e0ea2eace74b3bb3d568b 100644 --- a/src/aten/src/ATen/native/npu/common/FormatCastHelper.cpp +++ b/src/aten/src/ATen/native/npu/common/FormatCastHelper.cpp @@ -36,7 +36,9 @@ void FormatCastHelper::format_cast_as_base_format(const Tensor& src, aclFormat f AT_ASSERT(FormatHelper::IsBaseFormatType(src), "src format must be base format"); auto& src_desc = src.storage().unsafeGetStorageImpl()->npu_desc_; - src_desc.storage_sizes_ = FormatHelper::GetSizeOfBaseFormat(src, format); + // due to CANN principle : if the ori format of a tensor is the + // same as the npu format, then its base shape must be same as storage shape + // so we should not change the storage shape when format cast between base format src_desc.origin_format_ = format; src_desc.npu_format_ = format; return; diff --git a/src/aten/src/ATen/native/npu/convolution/Conv3dBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/convolution/Conv3dBackwardKernelNpu.cpp index 9d2970bd36c55913115a97d187f5153c8e5c71c5..0d2c525df5ccdeaed479ce874f62e777ab63a009 100644 --- a/src/aten/src/ATen/native/npu/convolution/Conv3dBackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/convolution/Conv3dBackwardKernelNpu.cpp @@ -1,163 +1,163 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor conv3d_backward_inputmask(Tensor &gradInput, const Tensor &input, - const Tensor &grad, const Tensor &weight, - IntArrayRef stride, IntArrayRef padding, - IntArrayRef dilation, int64_t groups) { - SmallVector stridesSize = {1, 1, stride[0], stride[1], stride[2]}; - SmallVector paddings = {padding[0], padding[0], padding[1], - padding[1], padding[2], padding[2]}; - SmallVector dilations = {1, 1, dilation[0], dilation[1], dilation[2]}; - IntArrayRef inputSize = input.sizes(); - Tensor weightCast = weight.to(grad.dtype()); - - OpCommand cmd; - if (!c10::npu::OptionsManager::CheckDynamicEnable()) { - cmd.Name("Conv3DBackpropInput") - .Input(inputSize, at::kInt) - .Input(weightCast) - .Input(grad) - .Output(gradInput) - .Attr("strides", stridesSize) - .Attr("pads", paddings) - .Attr("dilations", dilations) - .Attr("groups", groups) - .Attr("data_format", (string) "NCDHW") - .Run(); - } else { - cmd.Name("Conv3DBackpropInputD") - .Input(weightCast) - .Input(grad) - .Output(gradInput) - .Attr("input_size", inputSize) - .Attr("strides", stridesSize) - .Attr("pads", paddings) - .Attr("dilations", dilations) - .Attr("groups", groups) - .Attr("data_format", (string) "NCDHW") - .Run(); - } - return gradInput; -} - -Tensor conv3d_backward_weightmask(Tensor &gradWeight, const Tensor &input, - const Tensor &grad, const Tensor &weight, - IntArrayRef stride, IntArrayRef padding, - IntArrayRef dilation, int64_t groups) { - SmallVector stridesSize = {1, 1, stride[0], stride[1], stride[2]}; - SmallVector paddings = {padding[0], padding[0], padding[1], - padding[1], padding[2], padding[2]}; - SmallVector dilations = {1, 1, dilation[0], dilation[1], dilation[2]}; - IntArrayRef inputSize = weight.sizes(); - - OpCommand cmd; - if (!c10::npu::OptionsManager::CheckDynamicEnable()) { - cmd.Name("Conv3DBackpropFilter") - .Input(input) - .Input(inputSize, at::kInt) - .Input(grad) - .Output(gradWeight) - .Attr("strides", stridesSize) - .Attr("pads", paddings) - .Attr("dilations", dilations) - .Attr("groups", groups) - .Attr("data_format", (string) "NCDHW") - .Run(); - } else { - cmd.Name("Conv3DBackpropFilterD") - .Input(input) - .Input(grad) - .Output(gradWeight) - .Attr("filter_size", inputSize) - .Attr("strides", stridesSize) - .Attr("pads", paddings) - .Attr("dilations", dilations) - .Attr("groups", groups) - .Attr("data_format", (string) "NCDHW") - .Run(); - } - - return gradWeight; -} - -Tensor conv3d_backward_biasmask(Tensor &gradBias, const Tensor &input, - const Tensor &grad, const Tensor &weight, - IntArrayRef stride, IntArrayRef padding, - IntArrayRef dilation, int64_t groups) { - // constructs the input and output NPUTensorDesc - if (input.numel() == input.size(0) * input.size(1) * input.size(2)) { - Tensor gradView = - grad.contiguous().view({grad.size(0), grad.size(1), grad.size(2)}); - at::sum_out(gradBias, gradView, SmallVector{0}); - } else { - Tensor gradView = - grad.contiguous().view({grad.size(0), grad.size(1), grad.size(2), -1}); - at::sum_out(gradBias, gradView, SmallVector{0, 2, 3}); - } - - return gradBias; -} - -//interface -tuple -conv3d_backward_npu(const Tensor &input, const Tensor &grad, - const Tensor &weight, IntArrayRef stride, - IntArrayRef padding, IntArrayRef dilation, int64_t groups, - std::array grad_input_mask) { - - Tensor gradInput; - Tensor gradWeight; - Tensor gradBias; - - if (grad_input_mask[0]) { - //format should be NDC1HWC0 - gradInput = at::empty_with_format( - input.sizes(), input.options(), ACL_FORMAT_NDC1HWC0); - - conv3d_backward_inputmask( - gradInput, input, grad, weight, stride, padding, dilation, groups); - } - - if (grad_input_mask[1]) { - //format should be FRACTAL_Z_3D - gradWeight = at::empty_with_format( - weight.sizes(), weight.options().dtype(kFloat), ACL_FRACTAL_Z_3D); - - conv3d_backward_weightmask( - gradWeight, input, grad, weight, stride, padding, dilation, groups); - } - - if (grad_input_mask[2]) { - //format should be NCHW, gradias.size = grad.size(1) - gradBias = at::empty_with_format( - {grad.size(1)}, grad.options(), ACL_FORMAT_NCHW); - - conv3d_backward_biasmask( - gradBias, input, grad, weight, stride, padding, dilation, groups); - } - - return std::make_tuple(gradInput, gradWeight, gradBias); -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor conv3d_backward_inputmask(Tensor &gradInput, const Tensor &input, + const Tensor &grad, const Tensor &weight, + IntArrayRef stride, IntArrayRef padding, + IntArrayRef dilation, int64_t groups) { + SmallVector stridesSize = {1, 1, stride[0], stride[1], stride[2]}; + SmallVector paddings = {padding[0], padding[0], padding[1], + padding[1], padding[2], padding[2]}; + SmallVector dilations = {1, 1, dilation[0], dilation[1], dilation[2]}; + IntArrayRef inputSize = input.sizes(); + Tensor weightCast = weight.to(grad.dtype()); + + OpCommand cmd; + if (!c10::npu::OptionsManager::CheckDynamicEnable()) { + cmd.Name("Conv3DBackpropInput") + .Input(inputSize, at::kInt) + .Input(weightCast) + .Input(grad) + .Output(gradInput) + .Attr("strides", stridesSize) + .Attr("pads", paddings) + .Attr("dilations", dilations) + .Attr("groups", groups) + .Attr("data_format", (string) "NCDHW") + .Run(); + } else { + cmd.Name("Conv3DBackpropInputD") + .Input(weightCast) + .Input(grad) + .Output(gradInput) + .Attr("input_size", inputSize) + .Attr("strides", stridesSize) + .Attr("pads", paddings) + .Attr("dilations", dilations) + .Attr("groups", groups) + .Attr("data_format", (string) "NCDHW") + .Run(); + } + return gradInput; +} + +Tensor conv3d_backward_weightmask(Tensor &gradWeight, const Tensor &input, + const Tensor &grad, const Tensor &weight, + IntArrayRef stride, IntArrayRef padding, + IntArrayRef dilation, int64_t groups) { + SmallVector stridesSize = {1, 1, stride[0], stride[1], stride[2]}; + SmallVector paddings = {padding[0], padding[0], padding[1], + padding[1], padding[2], padding[2]}; + SmallVector dilations = {1, 1, dilation[0], dilation[1], dilation[2]}; + IntArrayRef inputSize = weight.sizes(); + + OpCommand cmd; + if (!c10::npu::OptionsManager::CheckDynamicEnable()) { + cmd.Name("Conv3DBackpropFilter") + .Input(input) + .Input(inputSize, at::kInt) + .Input(grad) + .Output(gradWeight) + .Attr("strides", stridesSize) + .Attr("pads", paddings) + .Attr("dilations", dilations) + .Attr("groups", groups) + .Attr("data_format", (string) "NCDHW") + .Run(); + } else { + cmd.Name("Conv3DBackpropFilterD") + .Input(input) + .Input(grad) + .Output(gradWeight) + .Attr("filter_size", inputSize) + .Attr("strides", stridesSize) + .Attr("pads", paddings) + .Attr("dilations", dilations) + .Attr("groups", groups) + .Attr("data_format", (string) "NCDHW") + .Run(); + } + + return gradWeight; +} + +Tensor conv3d_backward_biasmask(Tensor &gradBias, const Tensor &input, + const Tensor &grad, const Tensor &weight, + IntArrayRef stride, IntArrayRef padding, + IntArrayRef dilation, int64_t groups) { + // constructs the input and output NPUTensorDesc + if (input.numel() == input.size(0) * input.size(1) * input.size(2)) { + Tensor gradView = + grad.contiguous().view({grad.size(0), grad.size(1), grad.size(2)}); + at::sum_out(gradBias, gradView, SmallVector{0}); + } else { + Tensor gradView = + grad.contiguous().view({grad.size(0), grad.size(1), grad.size(2), -1}); + at::sum_out(gradBias, gradView, SmallVector{0, 2, 3}); + } + + return gradBias; +} + +//interface +tuple +conv3d_backward_npu(const Tensor &input, const Tensor &grad, + const Tensor &weight, IntArrayRef stride, + IntArrayRef padding, IntArrayRef dilation, int64_t groups, + std::array grad_input_mask) { + + Tensor gradInput; + Tensor gradWeight; + Tensor gradBias; + + if (grad_input_mask[0]) { + //format should be NDC1HWC0 + gradInput = at::empty_with_format( + input.sizes(), input.options(), ACL_FORMAT_NDC1HWC0); + + conv3d_backward_inputmask( + gradInput, input, grad, weight, stride, padding, dilation, groups); + } + + if (grad_input_mask[1]) { + //format should be FRACTAL_Z_3D + gradWeight = at::empty_with_format( + weight.sizes(), weight.options().dtype(kFloat), ACL_FRACTAL_Z_3D); + + conv3d_backward_weightmask( + gradWeight, input, grad, weight, stride, padding, dilation, groups); + } + + if (grad_input_mask[2]) { + //format should be NCHW, gradias.size = grad.size(1) + gradBias = at::empty_with_format( + {grad.size(1)}, grad.options(), ACL_FORMAT_NCHW); + + conv3d_backward_biasmask( + gradBias, input, grad, weight, stride, padding, dilation, groups); + } + + return std::make_tuple(gradInput, gradWeight, gradBias); +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/convolution/Conv3dKernelNpu.cpp b/src/aten/src/ATen/native/npu/convolution/Conv3dKernelNpu.cpp index f1cd79906cbef0e93bd36a73524de2bba9a988a6..8e51890c37b8f5880240cf5f57cb0344dfa647f1 100644 --- a/src/aten/src/ATen/native/npu/convolution/Conv3dKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/convolution/Conv3dKernelNpu.cpp @@ -1,102 +1,102 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpAdapter.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -SmallVector -conv3d_npu_output_size(const Tensor &input, const Tensor &weight, - const Tensor &bias, IntArrayRef stride, - IntArrayRef padding, IntArrayRef dilation, - int64_t groups) { - int64_t N = input.size(0); - int64_t D = input.size(2); - int64_t H = input.size(3); - int64_t W = input.size(4); - int64_t Co = weight.size(0); - auto kernel_size = weight.sizes().slice(2); - int64_t Do = - (D + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1; - int64_t Ho = - (H + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1; - int64_t Wo = - (W + 2 * padding[2] - dilation[2] * (kernel_size[2] - 1) - 1) / stride[2] + 1; - - SmallVector outputSize = {N, Co, Do, Ho, Wo}; - - return outputSize; -} - -Tensor &conv3d_out_npu_nocheck(Tensor &result, const Tensor &input, - const Tensor &weight, const Tensor &bias, - IntArrayRef stride, IntArrayRef padding, - IntArrayRef dilation, int64_t groups) { - Tensor filter = weight.to(input.dtype()); - SmallVector inputTensor = {input, filter, bias}; - SmallVector stridesSize = {1, 1, stride[0], stride[1], stride[2]}; - SmallVector paddings = {padding[0], padding[0], padding[1], - padding[1], padding[2], padding[2]}; - SmallVector dilations = {1, 1, dilation[0], dilation[1], dilation[2]}; - - OpCommand cmd; - cmd.Name("Conv3D"); - cmd.Input(input); - cmd.Input(filter); - if (bias.defined()) { - cmd.Input(bias); - } - cmd.Output(result); - cmd.Attr("strides", stridesSize); - cmd.Attr("pads", paddings); - cmd.Attr("dilations", dilations); - cmd.Attr("groups", groups); - cmd.Attr("data_format", (string) "NCDHW"); - cmd.Run(); - - return result; -} - -Tensor &conv3d_out_npu(Tensor &result, const Tensor &input, - const Tensor &weight, const Tensor &bias, - IntArrayRef stride, IntArrayRef padding, - IntArrayRef dilation, int64_t groups) { - OpPipeWithDefinedOut pipe; - return pipe.CheckMemory({input, weight, bias}, {result}) - .Func([&input, &weight, &bias, stride, padding, dilation, groups](Tensor &result) { - conv3d_out_npu_nocheck( - result, input, weight, bias, stride, padding, dilation, groups); - }) - .Call(result); -} - -Tensor conv3d_npu(const Tensor &input, const Tensor &weight, const Tensor &bias, - IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, - int64_t groups) { - auto outputSize = conv3d_npu_output_size( - input, weight, bias, stride, padding, dilation, groups); - Tensor result = OpPreparation::ApplyTensor(input, outputSize); - conv3d_out_npu(result, input, weight, bias, stride, padding, dilation, groups); - - return result; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/KernelNpuOutputSize.h" +#include "ATen/native/npu/utils/OpAdapter.h" +#include "ATen/native/npu/utils/OpTemplate.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +SmallVector +conv3d_npu_output_size(const Tensor &input, const Tensor &weight, + const Tensor &bias, IntArrayRef stride, + IntArrayRef padding, IntArrayRef dilation, + int64_t groups) { + int64_t N = input.size(0); + int64_t D = input.size(2); + int64_t H = input.size(3); + int64_t W = input.size(4); + int64_t Co = weight.size(0); + auto kernel_size = weight.sizes().slice(2); + int64_t Do = + (D + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1; + int64_t Ho = + (H + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1; + int64_t Wo = + (W + 2 * padding[2] - dilation[2] * (kernel_size[2] - 1) - 1) / stride[2] + 1; + + SmallVector outputSize = {N, Co, Do, Ho, Wo}; + + return outputSize; +} + +Tensor &conv3d_out_npu_nocheck(Tensor &result, const Tensor &input, + const Tensor &weight, const Tensor &bias, + IntArrayRef stride, IntArrayRef padding, + IntArrayRef dilation, int64_t groups) { + Tensor filter = weight.to(input.dtype()); + SmallVector inputTensor = {input, filter, bias}; + SmallVector stridesSize = {1, 1, stride[0], stride[1], stride[2]}; + SmallVector paddings = {padding[0], padding[0], padding[1], + padding[1], padding[2], padding[2]}; + SmallVector dilations = {1, 1, dilation[0], dilation[1], dilation[2]}; + + OpCommand cmd; + cmd.Name("Conv3D"); + cmd.Input(input); + cmd.Input(filter); + if (bias.defined()) { + cmd.Input(bias); + } + cmd.Output(result); + cmd.Attr("strides", stridesSize); + cmd.Attr("pads", paddings); + cmd.Attr("dilations", dilations); + cmd.Attr("groups", groups); + cmd.Attr("data_format", (string) "NCDHW"); + cmd.Run(); + + return result; +} + +Tensor &conv3d_out_npu(Tensor &result, const Tensor &input, + const Tensor &weight, const Tensor &bias, + IntArrayRef stride, IntArrayRef padding, + IntArrayRef dilation, int64_t groups) { + OpPipeWithDefinedOut pipe; + return pipe.CheckMemory({input, weight, bias}, {result}) + .Func([&input, &weight, &bias, stride, padding, dilation, groups](Tensor &result) { + conv3d_out_npu_nocheck( + result, input, weight, bias, stride, padding, dilation, groups); + }) + .Call(result); +} + +Tensor conv3d_npu(const Tensor &input, const Tensor &weight, const Tensor &bias, + IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, + int64_t groups) { + auto outputSize = conv3d_npu_output_size( + input, weight, bias, stride, padding, dilation, groups); + Tensor result = OpPreparation::ApplyTensor(input, outputSize); + conv3d_out_npu(result, input, weight, bias, stride, padding, dilation, groups); + + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/convolution/DeformableConv2dBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/convolution/DeformableConv2dBackwardKernelNpu.cpp index a5f8a2cad0b7f418dd78eea651705dbc7fe78be1..3b0f3d2ba2bb97343ccc9e18a0ed742684303561 100644 --- a/src/aten/src/ATen/native/npu/convolution/DeformableConv2dBackwardKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/convolution/DeformableConv2dBackwardKernelNpu.cpp @@ -1,74 +1,74 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -tuple deformable_conv2d_backward_npu( - const Tensor& input, - const Tensor& grad_output, - const Tensor& offset_out, - const Tensor& weight, - const Tensor& offset, - IntArrayRef kernel_size, - IntArrayRef stride, - IntArrayRef padding, - IntArrayRef dilation, - int64_t groups, - int64_t deformable_groups, - bool modulated) { - // construct the output tensor of the NPU - Tensor grad_input = OpPreparation::ApplyTensorWithFormat(input, ACL_FORMAT_NCHW); - Tensor grad_offset = OpPreparation::ApplyTensorWithFormat(offset, ACL_FORMAT_NCHW); - - // deformable_conv2d_backward includes conv2d_backward and DeformableOffsetsGrad - SmallVector conv2dStride = array_to_small_vector(kernel_size); - SmallVector conv2dPadding = {0, 0, 0, 0}; - SmallVector conv2dDilation = {1, 1}; - auto conv2dBackwardOutput = at::npu_conv2d_backward( - offset_out, grad_output, weight, conv2dStride, conv2dPadding, conv2dDilation, groups, {true, true, true}); - - // DeformableOffsetsGrad's input 'grad' is the output[0] of conv2d_backward - Tensor deformableOffsetsBackwardInput = get<0>(conv2dBackwardOutput); - Tensor grad_weight = get<1>(conv2dBackwardOutput); - Tensor grad_bias = get<2>(conv2dBackwardOutput); - - string dataFormat = "NCHW"; - // calculate the output result of the NPU - OpCommand cmd; - cmd.Name("DeformableOffsetsGrad") - .Input(deformableOffsetsBackwardInput) - .Input(input) - .Input(offset) - .Output(grad_input) - .Output(grad_offset) - .Attr("strides", stride) - .Attr("pads", padding) - .Attr("ksize", kernel_size) - .Attr("dilations", dilation) - .Attr("data_format",dataFormat) - .Attr("deformable_groups", deformable_groups) - .Attr("modulated",modulated) - .Run(); - - return std::tie(grad_input, grad_weight, grad_offset, grad_bias); -} - -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +tuple deformable_conv2d_backward_npu( + const Tensor& input, + const Tensor& grad_output, + const Tensor& offset_out, + const Tensor& weight, + const Tensor& offset, + IntArrayRef kernel_size, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef dilation, + int64_t groups, + int64_t deformable_groups, + bool modulated) { + // construct the output tensor of the NPU + Tensor grad_input = OpPreparation::ApplyTensorWithFormat(input, ACL_FORMAT_NCHW); + Tensor grad_offset = OpPreparation::ApplyTensorWithFormat(offset, ACL_FORMAT_NCHW); + + // deformable_conv2d_backward includes conv2d_backward and DeformableOffsetsGrad + SmallVector conv2dStride = array_to_small_vector(kernel_size); + SmallVector conv2dPadding = {0, 0, 0, 0}; + SmallVector conv2dDilation = {1, 1}; + auto conv2dBackwardOutput = at::npu_conv2d_backward( + offset_out, grad_output, weight, conv2dStride, conv2dPadding, conv2dDilation, groups, {true, true, true}); + + // DeformableOffsetsGrad's input 'grad' is the output[0] of conv2d_backward + Tensor deformableOffsetsBackwardInput = get<0>(conv2dBackwardOutput); + Tensor grad_weight = get<1>(conv2dBackwardOutput); + Tensor grad_bias = get<2>(conv2dBackwardOutput); + + string dataFormat = "NCHW"; + // calculate the output result of the NPU + OpCommand cmd; + cmd.Name("DeformableOffsetsGrad") + .Input(deformableOffsetsBackwardInput) + .Input(input) + .Input(offset) + .Output(grad_input) + .Output(grad_offset) + .Attr("strides", stride) + .Attr("pads", padding) + .Attr("ksize", kernel_size) + .Attr("dilations", dilation) + .Attr("data_format",dataFormat) + .Attr("deformable_groups", deformable_groups) + .Attr("modulated",modulated) + .Run(); + + return std::tie(grad_input, grad_weight, grad_offset, grad_bias); +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/convolution/DeformableConv2dKernelNpu.cpp b/src/aten/src/ATen/native/npu/convolution/DeformableConv2dKernelNpu.cpp index 106c81a2b14592516309c9c744ce366c924f483a..5648d4fc2e17b24937fed6166386c95e9bf5fca7 100644 --- a/src/aten/src/ATen/native/npu/convolution/DeformableConv2dKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/convolution/DeformableConv2dKernelNpu.cpp @@ -1,68 +1,68 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -tuple deformable_conv2d_npu( - const Tensor& input, - const Tensor& weight, - const Tensor& offset, - const Tensor& bias, - IntArrayRef kernel_size, - IntArrayRef stride, - IntArrayRef padding, - IntArrayRef dilation, - int64_t groups, - int64_t deformable_groups, - bool modulated) { - // calculate the output size - auto outputSize = deformable_conv2d_npu_output_size( - input, weight, offset, bias, kernel_size, stride, padding, dilation, groups, deformable_groups, modulated); - - // construct the output tensor of the NPU - Tensor deformableOffsetsOutput = OpPreparation::ApplyTensorWithFormat(outputSize, input.options(), ACL_FORMAT_NCHW); - - string dataFormat = "NCHW"; - // calculate the output result of the NPU - OpCommand cmd; - cmd.Name("DeformableOffsets") - .Input(input) - .Input(offset) - .Output(deformableOffsetsOutput) - .Attr("ksize", kernel_size) - .Attr("strides", stride) - .Attr("pads", padding) - .Attr("dilations", dilation) - .Attr("deformable_groups", deformable_groups) - .Attr("data_format",dataFormat) - .Attr("modulated",modulated) - .Run(); - - SmallVector conv2dStride = array_to_small_vector(kernel_size); - SmallVector conv2dPadding = {0, 0, 0, 0}; - SmallVector conv2dDilation = {1, 1}; - Tensor conv2dOutput = at::npu_conv2d( - deformableOffsetsOutput, weight, bias, conv2dStride, conv2dPadding, conv2dDilation, groups); - - return std::tie(conv2dOutput, deformableOffsetsOutput); -} - -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +tuple deformable_conv2d_npu( + const Tensor& input, + const Tensor& weight, + const Tensor& offset, + const Tensor& bias, + IntArrayRef kernel_size, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef dilation, + int64_t groups, + int64_t deformable_groups, + bool modulated) { + // calculate the output size + auto outputSize = deformable_conv2d_npu_output_size( + input, weight, offset, bias, kernel_size, stride, padding, dilation, groups, deformable_groups, modulated); + + // construct the output tensor of the NPU + Tensor deformableOffsetsOutput = OpPreparation::ApplyTensorWithFormat(outputSize, input.options(), ACL_FORMAT_NCHW); + + string dataFormat = "NCHW"; + // calculate the output result of the NPU + OpCommand cmd; + cmd.Name("DeformableOffsets") + .Input(input) + .Input(offset) + .Output(deformableOffsetsOutput) + .Attr("ksize", kernel_size) + .Attr("strides", stride) + .Attr("pads", padding) + .Attr("dilations", dilation) + .Attr("deformable_groups", deformable_groups) + .Attr("data_format",dataFormat) + .Attr("modulated",modulated) + .Run(); + + SmallVector conv2dStride = array_to_small_vector(kernel_size); + SmallVector conv2dPadding = {0, 0, 0, 0}; + SmallVector conv2dDilation = {1, 1}; + Tensor conv2dOutput = at::npu_conv2d( + deformableOffsetsOutput, weight, bias, conv2dStride, conv2dPadding, conv2dDilation, groups); + + return std::tie(conv2dOutput, deformableOffsetsOutput); +} + +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/dynamicstrategy/LayerNormGardStrategy.cpp b/src/aten/src/ATen/native/npu/dynamicstrategy/LayerNormGardStrategy.cpp index f845336972f25c1dc619358c5db55db474afa8bf..e3213ce7e68192154d890131d3c5cdd4012aa8bf 100644 --- a/src/aten/src/ATen/native/npu/dynamicstrategy/LayerNormGardStrategy.cpp +++ b/src/aten/src/ATen/native/npu/dynamicstrategy/LayerNormGardStrategy.cpp @@ -1,87 +1,87 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include -#include -#include "ATen/native/npu/utils/NpuUtils.h" -#include -#include - -namespace at { -namespace native { -namespace npu { - -class LayerNormGardStrategy : public DescStrategyBase -{ -public: - virtual void CreateInputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) override; - - virtual void CreateOutputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) override; -}; - -// create input shape -void LayerNormGardStrategy::CreateInputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) { - for (int64_t i = 0; i < params.input_num; ++i) { - aclTensorDesc* desc = const_cast(params.input_desc[i]); - aclFormat storageFormat = params.inputFormats[i]; - if (i < 2) { - FormatShape shape = {-1, -1, -1}; - aclGetTensorDescDimV2(desc, 2, &shape[2]); - FormatShape storageShape = FormatHelper::GetStorageSizes(storageFormat, shape); - compileShape.inputShape.emplace_back(shape); - compileShape.inputStorageShape.emplace_back(storageShape); - } else if (i == 2 || i == 3) { - FormatShape shape = {-1, -1, 1}; - FormatShape storageShape = {-1, -1, 1}; - compileShape.inputShape.emplace_back(shape); - compileShape.inputStorageShape.emplace_back(storageShape); - } else { - FormatShape shape = {-1}; - aclGetTensorDescDimV2(desc, 0, &shape[0]); - FormatShape storageShape = FormatHelper::GetStorageSizes(storageFormat, shape); - compileShape.inputShape.emplace_back(shape); - compileShape.inputStorageShape.emplace_back(storageShape); - } - } -} - -void LayerNormGardStrategy::CreateOutputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) { - // create output shape - for (int64_t i = 0; i < params.output_num; ++i) { - aclTensorDesc* desc = const_cast(params.output_desc[i]); - aclFormat storageFormat = params.inputFormats[i]; - if (i == 0) { - FormatShape shape = {-1, -1, -1}; - aclGetTensorDescDimV2(desc, 2, &shape[2]); - FormatShape storageShape = FormatHelper::GetStorageSizes(storageFormat, shape); - compileShape.outputShape.emplace_back(shape); - compileShape.outputStorageShape.emplace_back(storageShape); - } else { - FormatShape shape = {-1}; - aclGetTensorDescDimV2(desc, 0, &shape[0]); - FormatShape storageShape = FormatHelper::GetStorageSizes(storageFormat, shape); - compileShape.outputShape.emplace_back(shape); - compileShape.outputStorageShape.emplace_back(storageShape); - } - } -} -REGISTER_DYNAMIC_SHAPE_OPT(LayerNormGrad, LayerNormGardStrategy) - -} // namespace npu -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include "ATen/native/npu/utils/NpuUtils.h" +#include +#include + +namespace at { +namespace native { +namespace npu { + +class LayerNormGardStrategy : public DescStrategyBase +{ +public: + virtual void CreateInputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) override; + + virtual void CreateOutputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) override; +}; + +// create input shape +void LayerNormGardStrategy::CreateInputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) { + for (int64_t i = 0; i < params.input_num; ++i) { + aclTensorDesc* desc = const_cast(params.input_desc[i]); + aclFormat storageFormat = params.inputFormats[i]; + if (i < 2) { + FormatShape shape = {-1, -1, -1}; + aclGetTensorDescDimV2(desc, 2, &shape[2]); + FormatShape storageShape = FormatHelper::GetStorageSizes(storageFormat, shape); + compileShape.inputShape.emplace_back(shape); + compileShape.inputStorageShape.emplace_back(storageShape); + } else if (i == 2 || i == 3) { + FormatShape shape = {-1, -1, 1}; + FormatShape storageShape = {-1, -1, 1}; + compileShape.inputShape.emplace_back(shape); + compileShape.inputStorageShape.emplace_back(storageShape); + } else { + FormatShape shape = {-1}; + aclGetTensorDescDimV2(desc, 0, &shape[0]); + FormatShape storageShape = FormatHelper::GetStorageSizes(storageFormat, shape); + compileShape.inputShape.emplace_back(shape); + compileShape.inputStorageShape.emplace_back(storageShape); + } + } +} + +void LayerNormGardStrategy::CreateOutputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) { + // create output shape + for (int64_t i = 0; i < params.output_num; ++i) { + aclTensorDesc* desc = const_cast(params.output_desc[i]); + aclFormat storageFormat = params.inputFormats[i]; + if (i == 0) { + FormatShape shape = {-1, -1, -1}; + aclGetTensorDescDimV2(desc, 2, &shape[2]); + FormatShape storageShape = FormatHelper::GetStorageSizes(storageFormat, shape); + compileShape.outputShape.emplace_back(shape); + compileShape.outputStorageShape.emplace_back(storageShape); + } else { + FormatShape shape = {-1}; + aclGetTensorDescDimV2(desc, 0, &shape[0]); + FormatShape storageShape = FormatHelper::GetStorageSizes(storageFormat, shape); + compileShape.outputShape.emplace_back(shape); + compileShape.outputStorageShape.emplace_back(storageShape); + } + } +} +REGISTER_DYNAMIC_SHAPE_OPT(LayerNormGrad, LayerNormGardStrategy) + +} // namespace npu +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/dynamicstrategy/RoiAlignBackwardStrategy.cpp b/src/aten/src/ATen/native/npu/dynamicstrategy/RoiAlignBackwardStrategy.cpp index cc7cb6d77405ded9b50af4a06b79a12854a9e780..f69e3601f3c0a810edda5a86c59dd163d4d55c05 100644 --- a/src/aten/src/ATen/native/npu/dynamicstrategy/RoiAlignBackwardStrategy.cpp +++ b/src/aten/src/ATen/native/npu/dynamicstrategy/RoiAlignBackwardStrategy.cpp @@ -1,76 +1,76 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include -#include -#include "ATen/native/npu/utils/NpuUtils.h" -#include -#include - -namespace at { -namespace native { -namespace npu { - -class ROIAlignBackwardStrategy : public DescStrategyBase -{ -public: - virtual void CreateInputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) override; - - virtual void CreateOutputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) override; -}; - -void ROIAlignBackwardStrategy::CreateInputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) { - CreateDefaultDescInfo(params.input_desc, - params.input_num, - params.inputDims, - params.inputFormats, - compileShape.inputShape, - compileShape.inputStorageShape); -} - -void ROIAlignBackwardStrategy::CreateOutputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) { - for (int64_t i = 0; i < params.output_num; ++i) { - aclTensorDesc* desc = const_cast(params.output_desc[i]); - - int64_t dim = (int64_t)aclGetTensorDescNumDims(desc); - dim = (dim == 0) ? 1 : dim; - - int64_t storageDim = (params.outputDims[i] == 0) ? 1 : params.outputDims[i]; - aclFormat storageFormat = params.outputFormats[i]; - - FormatShape shape(dim, -1); - FormatShape storageShape(storageDim, -1); - - // fix all dims - aclGetTensorDescDimV2(desc, 0, &shape[0]); - aclGetTensorDescDimV2(desc, 1, &shape[1]); - aclGetTensorDescDimV2(desc, 2, &shape[2]); - aclGetTensorDescDimV2(desc, 3, &shape[3]); - - storageShape = FormatHelper::GetStorageSizes(storageFormat, shape); - - compileShape.outputShape.emplace_back(shape); - compileShape.outputStorageShape.emplace_back(storageShape); - } -} - -REGISTER_DYNAMIC_SHAPE_OPT(ROIAlignGrad, ROIAlignBackwardStrategy) - -} // namespace npu -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include "ATen/native/npu/utils/NpuUtils.h" +#include +#include + +namespace at { +namespace native { +namespace npu { + +class ROIAlignBackwardStrategy : public DescStrategyBase +{ +public: + virtual void CreateInputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) override; + + virtual void CreateOutputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) override; +}; + +void ROIAlignBackwardStrategy::CreateInputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) { + CreateDefaultDescInfo(params.input_desc, + params.input_num, + params.inputDims, + params.inputFormats, + compileShape.inputShape, + compileShape.inputStorageShape); +} + +void ROIAlignBackwardStrategy::CreateOutputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) { + for (int64_t i = 0; i < params.output_num; ++i) { + aclTensorDesc* desc = const_cast(params.output_desc[i]); + + int64_t dim = (int64_t)aclGetTensorDescNumDims(desc); + dim = (dim == 0) ? 1 : dim; + + int64_t storageDim = (params.outputDims[i] == 0) ? 1 : params.outputDims[i]; + aclFormat storageFormat = params.outputFormats[i]; + + FormatShape shape(dim, -1); + FormatShape storageShape(storageDim, -1); + + // fix all dims + aclGetTensorDescDimV2(desc, 0, &shape[0]); + aclGetTensorDescDimV2(desc, 1, &shape[1]); + aclGetTensorDescDimV2(desc, 2, &shape[2]); + aclGetTensorDescDimV2(desc, 3, &shape[3]); + + storageShape = FormatHelper::GetStorageSizes(storageFormat, shape); + + compileShape.outputShape.emplace_back(shape); + compileShape.outputStorageShape.emplace_back(storageShape); + } +} + +REGISTER_DYNAMIC_SHAPE_OPT(ROIAlignGrad, ROIAlignBackwardStrategy) + +} // namespace npu +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/dynamicstrategy/RoiAlignStrategy.cpp b/src/aten/src/ATen/native/npu/dynamicstrategy/RoiAlignStrategy.cpp index 716a38ae19e343567db771f3f443323a4b2bbda1..513ac6f52d73bf7e696379898da148a1aabe4993 100644 --- a/src/aten/src/ATen/native/npu/dynamicstrategy/RoiAlignStrategy.cpp +++ b/src/aten/src/ATen/native/npu/dynamicstrategy/RoiAlignStrategy.cpp @@ -1,86 +1,86 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include -#include -#include "ATen/native/npu/utils/NpuUtils.h" -#include -#include - - -namespace at { -namespace native { -namespace npu { - -class ROIAlignStrategy : public DescStrategyBase -{ -public: - virtual void CreateInputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) override; - - virtual void CreateOutputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) override; -}; - -void ROIAlignStrategy::CreateInputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) { - CreateDefaultDescInfo(params.input_desc, - params.input_num, - params.inputDims, - params.inputFormats, - compileShape.inputShape, - compileShape.inputStorageShape); -} - -void ROIAlignStrategy::CreateOutputDescInfo(ACL_PARAMS& params, - DynamicCompileShape& compileShape) { - for (int64_t i = 0; i < params.output_num; ++i) { - aclTensorDesc* desc = const_cast(params.output_desc[i]); - - int64_t dim = (int64_t)aclGetTensorDescNumDims(desc); - dim = (dim == 0) ? 1 : dim; - - int64_t storageDim = (params.outputDims[i] == 0) ? 1 : params.outputDims[i]; - aclFormat storageFormat = params.outputFormats[i]; - - FormatShape shape(dim, -1); - FormatShape storageShape(storageDim, -1); - - // fix height dim value - int64_t index_h = dim - 2; - aclGetTensorDescDimV2(desc, index_h, &shape[index_h]); - - // fix width dim value - int64_t index_w = dim - 1; - aclGetTensorDescDimV2(desc, index_w, &shape[index_w]); - - if (storageFormat == ACL_FORMAT_NC1HWC0) { - storageShape[storageDim - 3] = shape[dim - 2]; - storageShape[storageDim - 2] = shape[dim - 1]; - storageShape[storageDim - 1] = 16; - } else { - storageShape[storageDim - 2] = shape[dim - 2]; - storageShape[storageDim - 1] = shape[dim - 1]; - } - - compileShape.outputShape.emplace_back(shape); - compileShape.outputStorageShape.emplace_back(storageShape); - } -} - -REGISTER_DYNAMIC_SHAPE_OPT(ROIAlign, ROIAlignStrategy) - -} // namespace npu -} // namespace native +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include "ATen/native/npu/utils/NpuUtils.h" +#include +#include + + +namespace at { +namespace native { +namespace npu { + +class ROIAlignStrategy : public DescStrategyBase +{ +public: + virtual void CreateInputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) override; + + virtual void CreateOutputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) override; +}; + +void ROIAlignStrategy::CreateInputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) { + CreateDefaultDescInfo(params.input_desc, + params.input_num, + params.inputDims, + params.inputFormats, + compileShape.inputShape, + compileShape.inputStorageShape); +} + +void ROIAlignStrategy::CreateOutputDescInfo(ACL_PARAMS& params, + DynamicCompileShape& compileShape) { + for (int64_t i = 0; i < params.output_num; ++i) { + aclTensorDesc* desc = const_cast(params.output_desc[i]); + + int64_t dim = (int64_t)aclGetTensorDescNumDims(desc); + dim = (dim == 0) ? 1 : dim; + + int64_t storageDim = (params.outputDims[i] == 0) ? 1 : params.outputDims[i]; + aclFormat storageFormat = params.outputFormats[i]; + + FormatShape shape(dim, -1); + FormatShape storageShape(storageDim, -1); + + // fix height dim value + int64_t index_h = dim - 2; + aclGetTensorDescDimV2(desc, index_h, &shape[index_h]); + + // fix width dim value + int64_t index_w = dim - 1; + aclGetTensorDescDimV2(desc, index_w, &shape[index_w]); + + if (storageFormat == ACL_FORMAT_NC1HWC0) { + storageShape[storageDim - 3] = shape[dim - 2]; + storageShape[storageDim - 2] = shape[dim - 1]; + storageShape[storageDim - 1] = 16; + } else { + storageShape[storageDim - 2] = shape[dim - 2]; + storageShape[storageDim - 1] = shape[dim - 1]; + } + + compileShape.outputShape.emplace_back(shape); + compileShape.outputStorageShape.emplace_back(storageShape); + } +} + +REGISTER_DYNAMIC_SHAPE_OPT(ROIAlign, ROIAlignStrategy) + +} // namespace npu +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/frame/FormatHelper.cpp b/src/aten/src/ATen/native/npu/frame/FormatHelper.cpp index 018f6e2707fd4960e8afde21a86203b7e7ff29ee..d13bccb4c09be79ef1197124baf5395a6e64231d 100644 --- a/src/aten/src/ATen/native/npu/frame/FormatHelper.cpp +++ b/src/aten/src/ATen/native/npu/frame/FormatHelper.cpp @@ -59,22 +59,6 @@ std::unordered_map FormatHelper::info = { {ACL_FRACTAL_Z_3D, (FormatInfo){ACL_FRACTAL_Z_3D, ACL_FORMAT_NCDHW, InferShapeOfFZ3D, "FRACTAL_Z_3D", true}}, }; -std::unordered_map> FormatHelper::base_format_convert_info = { - {ACL_FORMAT_ND, { - {ACL_FORMAT_NCHW, InferShapeNDToNCHW}, - {ACL_FORMAT_NCDHW, InferShapeNDToNCDHW}, - } - }, - {ACL_FORMAT_NCHW, { - {ACL_FORMAT_ND, InferShapeNCHWToND}, - } - }, - {ACL_FORMAT_NCDHW, { - {ACL_FORMAT_ND, InferShapeNCDHWToND}, - } - }, -}; - bool FormatHelper::IsPadded(const Tensor* tensor) { auto format = tensor->storage().unsafeGetStorageImpl()->npu_desc_.npu_format_; return IsPadded(format); @@ -136,20 +120,6 @@ FormatShape FormatHelper::GetStorageSizes(NPUStorageDesc desc) { return GetStorageSizes(format, ori_size); } -FormatShape FormatHelper::GetSizeOfBaseFormat(const Tensor& src, aclFormat dst_format) { - auto src_format = GetBaseFormat(src); - auto itr = base_format_convert_info.find(src_format); - if (itr != base_format_convert_info.end()) { - auto next_itr = itr->second.find(dst_format); - if (next_itr != itr->second.end()) { - auto src_desc = src.storage().unsafeGetStorageImpl()->npu_desc_; - return next_itr->second(src_desc.storage_sizes_, src_desc.base_sizes_); - } - } - AT_ERROR("unsupport InferShape from ", GetFormatName(src_format), " to ", GetFormatName(dst_format)); - return {}; -} - // namespace { FormatShape InferShapeLessTo4(IntArrayRef dims) { diff --git a/src/aten/src/ATen/native/npu/frame/FormatHelper.h b/src/aten/src/ATen/native/npu/frame/FormatHelper.h index 862ff1b7d33d6d034dfda7b8923ed6ef993e2872..9f0d1f024239fcce5cc7dc5136f3634d2334e8a0 100644 --- a/src/aten/src/ATen/native/npu/frame/FormatHelper.h +++ b/src/aten/src/ATen/native/npu/frame/FormatHelper.h @@ -48,7 +48,6 @@ public: static FormatShape GetStorageSizes(aclFormat format, sizeType ori_size); // GetStorageSizes used to calculate the storage sizes of op at npu device at different format. static FormatShape GetStorageSizes(NPUStorageDesc desc); - static FormatShape GetSizeOfBaseFormat(const Tensor& src, aclFormat dst_format); private: static bool IsPadded(aclFormat format); @@ -64,7 +63,6 @@ private: bool isPadded = false; } FormatInfo; static std::unordered_map info; - static std::unordered_map> base_format_convert_info; }; // class FormatHelper // template impl diff --git a/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp b/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp index 1985cbffbbc309661fae435fda629967cf31f0ca..46abd15c008bb4f760ea7aea96c530489dea9c2e 100644 --- a/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp +++ b/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp @@ -42,9 +42,7 @@ REGISTER_OPTION_HOOK(ACL_OP_COMPILER_CACHE_DIR, [](const std::string& val) { aclSetCompileopt(aclCompileOpt::ACL_OP_COMPILER_CACHE_DIR, val.c_str()); }) REGISTER_OPTION_HOOK(NPU_FUZZY_COMPILE_BLACKLIST, [](const std::string& val) { - if (CheckFuzzyEnable()) { FuzzyCompileBlacklist::GetInstance().RegisterBlacklist(val); - } }) REGISTER_OPTION_INIT_BY_ENV(PROFILING_MODE) diff --git a/src/aten/src/ATen/native/npu/pooling/AvgPool3dKernelNpu.cpp b/src/aten/src/ATen/native/npu/pooling/AvgPool3dKernelNpu.cpp index 5f5aa51aa94d838603630bbe3b2d52689366f024..f93424e278279034ea0d1b8fab7be3cbc3eb88c4 100644 --- a/src/aten/src/ATen/native/npu/pooling/AvgPool3dKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/pooling/AvgPool3dKernelNpu.cpp @@ -1,177 +1,177 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/OpAdapter.h" -#include - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& avg_pool3d_out_npu( - Tensor& out, - const Tensor& self, - IntArrayRef kernel_size, - IntArrayRef stride, - IntArrayRef padding, - bool ceil_mode, - bool count_include_pad, - c10::optional divisor_override) { - SmallVector pads = {0, 0, 0, padding[0], padding[1], padding[2]}; - - Tensor input = self; - if (self.ndimension() == 4) { - input = input.unsqueeze(0); - out = out.unsqueeze(0); - } - - // calculate the output size - int D = self.size(-3); - int H = self.size(-2); - int W = self.size(-1); - - int64_t D_size = ceil_mode - ? (CeilDiv(D + 2 * padding[0] - kernel_size[0], stride[0]) + 1) - : ((D + 2 * padding[0] - kernel_size[0]) / stride[0] + 1); - int64_t H_size = ceil_mode - ? (CeilDiv(H + 2 * padding[1] - kernel_size[1], stride[1]) + 1) - : ((H + 2 * padding[1] - kernel_size[1]) / stride[1] + 1); - int64_t W_size = ceil_mode - ? (CeilDiv(W + 2 * padding[2] - kernel_size[2], stride[2]) + 1) - : ((W + 2 * padding[2] - kernel_size[2]) / stride[2] + 1); - - SmallVector outputSize = {input.size(0), input.size(1), D_size, H_size, W_size}; - OpPreparation::CheckOut( - {self}, - out, - ACL_FORMAT_NCDHW, - out.scalar_type(), - outputSize); - - OpCommand cmd; - cmd.Name("AvgPool3D") - .Input(input) - .Output(out) - .Attr("ksize", kernel_size) - .Attr("strides", stride) - .Attr("pads", pads) - .Attr("ceil_mode", ceil_mode) - .Attr("count_include_pad", count_include_pad); - - if (divisor_override.has_value()) { - cmd.Attr("divisor_override", divisor_override.value()); - } - - cmd.Attr("data_format", (string)"NCDHW") - .Run(); - - if (self.ndimension() == 4) { - out = out.squeeze(0); - } - return out; -} - -Tensor avg_pool3d_npu( - const Tensor& self, - IntArrayRef kernel_size, - IntArrayRef stride, - IntArrayRef padding, - bool ceil_mode, - bool count_include_pad, - c10::optional divisor_override) { - - // #20866, #22032: Guarantee this for the official C++ API? - TORCH_CHECK(kernel_size.size() == 1 || kernel_size.size() == 3, - "avg_pool3d: kernel_size must be a single int, or a tuple of three ints"); - const int kT = safe_downcast(kernel_size[0]); - const int kH = kernel_size.size() == 1 ? kT : safe_downcast(kernel_size[1]); - const int kW = kernel_size.size() == 1 ? kT : safe_downcast(kernel_size[2]); - SmallVector kernel_sizes = {kT, kH, kW}; - IntArrayRef kernel_sizess = IntArrayRef(kernel_sizes); - - TORCH_CHECK(stride.empty() || stride.size() == 1 || stride.size() == 3, - "avg_pool3d: stride must be omitted, a single int, or a tuple of three ints"); - const int dT = stride.empty() ? kT : safe_downcast(stride[0]); - const int dH = stride.empty() ? kH : - stride.size() == 1 ? dT : safe_downcast(stride[1]); - const int dW = stride.empty() ? kW : - stride.size() == 1 ? dT : safe_downcast(stride[2]); - - SmallVector strides = {dT, dH, dW}; - IntArrayRef stridess = IntArrayRef(strides); - - TORCH_CHECK(padding.size() == 1 || padding.size() == 3, - "avg_pool3d: padding must be a single int, or a tuple of three ints"); - const int padT = safe_downcast(padding[0]); - const int padH = padding.size() == 1 ? padT : safe_downcast(padding[1]); - const int padW = padding.size() == 1 ? padT : safe_downcast(padding[2]); - SmallVector paddings = {padT, padH, padW}; - IntArrayRef paddingss = IntArrayRef(paddings); - - TORCH_CHECK((self.ndimension() == 4 || self.ndimension() == 5), - "non-empty 4D or 5D (batch mode) tensor expected for input"); - - TORCH_CHECK(!divisor_override.has_value() || divisor_override.value() != 0, - "divisor must be not zero"); - - const int64_t nslices = self.size(-4); - const int64_t itime = self.size(-3); - const int64_t iheight = self.size(-2); - const int64_t iwidth = self.size(-1); - - const int64_t otime = pooling_output_shape(itime, kT, padT, dT, 1, ceil_mode); - const int64_t oheight = pooling_output_shape(iheight, kH, padH, dH, 1, ceil_mode); - const int64_t owidth = pooling_output_shape(iwidth, kW, padW, dW, 1, ceil_mode); - - pool3d_shape_check( - self, - nslices, - kT, kH, kW, - dT, dH, dW, - padT, padH, padW, - 1, 1, 1, - itime, iheight, iwidth, - otime, oheight, owidth, - /*check_input_size=*/ true); - - Tensor input = self; - if (self.ndimension() == 4) { - input = self.unsqueeze(0); - } - - SmallVector outputSize = {input.size(0), input.size(1), otime, oheight, owidth}; - - Tensor result = OpPreparation::ApplyTensorWithFormat(input, outputSize, ACL_FORMAT_NCDHW); - - // calculate the output result of the NPU - avg_pool3d_out_npu( - result, - input, - kernel_sizess, - stridess, - paddingss, - ceil_mode, - count_include_pad, - divisor_override); - - if (self.ndimension() == 4) { - result = result.squeeze(0); - } - return result; -} - -} // namespace native -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" +#include + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& avg_pool3d_out_npu( + Tensor& out, + const Tensor& self, + IntArrayRef kernel_size, + IntArrayRef stride, + IntArrayRef padding, + bool ceil_mode, + bool count_include_pad, + c10::optional divisor_override) { + SmallVector pads = {0, 0, 0, padding[0], padding[1], padding[2]}; + + Tensor input = self; + if (self.ndimension() == 4) { + input = input.unsqueeze(0); + out = out.unsqueeze(0); + } + + // calculate the output size + int D = self.size(-3); + int H = self.size(-2); + int W = self.size(-1); + + int64_t D_size = ceil_mode + ? (CeilDiv(D + 2 * padding[0] - kernel_size[0], stride[0]) + 1) + : ((D + 2 * padding[0] - kernel_size[0]) / stride[0] + 1); + int64_t H_size = ceil_mode + ? (CeilDiv(H + 2 * padding[1] - kernel_size[1], stride[1]) + 1) + : ((H + 2 * padding[1] - kernel_size[1]) / stride[1] + 1); + int64_t W_size = ceil_mode + ? (CeilDiv(W + 2 * padding[2] - kernel_size[2], stride[2]) + 1) + : ((W + 2 * padding[2] - kernel_size[2]) / stride[2] + 1); + + SmallVector outputSize = {input.size(0), input.size(1), D_size, H_size, W_size}; + OpPreparation::CheckOut( + {self}, + out, + ACL_FORMAT_NCDHW, + out.scalar_type(), + outputSize); + + OpCommand cmd; + cmd.Name("AvgPool3D") + .Input(input) + .Output(out) + .Attr("ksize", kernel_size) + .Attr("strides", stride) + .Attr("pads", pads) + .Attr("ceil_mode", ceil_mode) + .Attr("count_include_pad", count_include_pad); + + if (divisor_override.has_value()) { + cmd.Attr("divisor_override", divisor_override.value()); + } + + cmd.Attr("data_format", (string)"NCDHW") + .Run(); + + if (self.ndimension() == 4) { + out = out.squeeze(0); + } + return out; +} + +Tensor avg_pool3d_npu( + const Tensor& self, + IntArrayRef kernel_size, + IntArrayRef stride, + IntArrayRef padding, + bool ceil_mode, + bool count_include_pad, + c10::optional divisor_override) { + + // #20866, #22032: Guarantee this for the official C++ API? + TORCH_CHECK(kernel_size.size() == 1 || kernel_size.size() == 3, + "avg_pool3d: kernel_size must be a single int, or a tuple of three ints"); + const int kT = safe_downcast(kernel_size[0]); + const int kH = kernel_size.size() == 1 ? kT : safe_downcast(kernel_size[1]); + const int kW = kernel_size.size() == 1 ? kT : safe_downcast(kernel_size[2]); + SmallVector kernel_sizes = {kT, kH, kW}; + IntArrayRef kernel_sizess = IntArrayRef(kernel_sizes); + + TORCH_CHECK(stride.empty() || stride.size() == 1 || stride.size() == 3, + "avg_pool3d: stride must be omitted, a single int, or a tuple of three ints"); + const int dT = stride.empty() ? kT : safe_downcast(stride[0]); + const int dH = stride.empty() ? kH : + stride.size() == 1 ? dT : safe_downcast(stride[1]); + const int dW = stride.empty() ? kW : + stride.size() == 1 ? dT : safe_downcast(stride[2]); + + SmallVector strides = {dT, dH, dW}; + IntArrayRef stridess = IntArrayRef(strides); + + TORCH_CHECK(padding.size() == 1 || padding.size() == 3, + "avg_pool3d: padding must be a single int, or a tuple of three ints"); + const int padT = safe_downcast(padding[0]); + const int padH = padding.size() == 1 ? padT : safe_downcast(padding[1]); + const int padW = padding.size() == 1 ? padT : safe_downcast(padding[2]); + SmallVector paddings = {padT, padH, padW}; + IntArrayRef paddingss = IntArrayRef(paddings); + + TORCH_CHECK((self.ndimension() == 4 || self.ndimension() == 5), + "non-empty 4D or 5D (batch mode) tensor expected for input"); + + TORCH_CHECK(!divisor_override.has_value() || divisor_override.value() != 0, + "divisor must be not zero"); + + const int64_t nslices = self.size(-4); + const int64_t itime = self.size(-3); + const int64_t iheight = self.size(-2); + const int64_t iwidth = self.size(-1); + + const int64_t otime = pooling_output_shape(itime, kT, padT, dT, 1, ceil_mode); + const int64_t oheight = pooling_output_shape(iheight, kH, padH, dH, 1, ceil_mode); + const int64_t owidth = pooling_output_shape(iwidth, kW, padW, dW, 1, ceil_mode); + + pool3d_shape_check( + self, + nslices, + kT, kH, kW, + dT, dH, dW, + padT, padH, padW, + 1, 1, 1, + itime, iheight, iwidth, + otime, oheight, owidth, + /*check_input_size=*/ true); + + Tensor input = self; + if (self.ndimension() == 4) { + input = self.unsqueeze(0); + } + + SmallVector outputSize = {input.size(0), input.size(1), otime, oheight, owidth}; + + Tensor result = OpPreparation::ApplyTensorWithFormat(input, outputSize, ACL_FORMAT_NCDHW); + + // calculate the output result of the NPU + avg_pool3d_out_npu( + result, + input, + kernel_sizess, + stridess, + paddingss, + ceil_mode, + count_include_pad, + divisor_override); + + if (self.ndimension() == 4) { + result = result.squeeze(0); + } + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp b/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp index a49aa9b9945afa46ac43295733adce86ded1ece0..412d1fc32b7bca4bb8f5d7bcac31eee8458a5bc8 100644 --- a/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp +++ b/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp @@ -347,7 +347,18 @@ NPUStatus CalcuOpUtil::CreateAclTensorDescInfo( input[i].tensorDescType == NPUTensorDesc::TensorDescType::TENSOR) { Tensor* aclInput = &input[i].tensor; SmallVector dims; - dims = aclInput->storage().get_npu_desc().base_sizes_; + if (opName == "MatMul") { + auto dims_pre = aclInput->sizes(); + if (attrs[i].boolAttrValue == 1) { + dims.push_back(dims_pre[1]); + dims.push_back(dims_pre[0]); + } else if (attrs[i].boolAttrValue == 0) { + dims.push_back(dims_pre[0]); + dims.push_back(dims_pre[1]); + } + } else { + dims = aclInput->storage().get_npu_desc().base_sizes_; + } auto storageDims = aclInput->storage().get_npu_desc().storage_sizes_; int64_t numel = 1; for (int j = 0; j < storageDims.size(); j++) { diff --git a/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.cpp b/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.cpp index 773f25ab306403e43169c5a489625a18d9aea2a5..10672bf1137986a0becedc8714eae163b64b7190 100644 --- a/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.cpp +++ b/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.cpp @@ -333,12 +333,6 @@ SmallVector embedding_dense_backward_npu_output_size( return {num_weights, grad_output.size(-1)}; } -SmallVector embedding_renorm_mid_npu_output_size( - const Tensor& self, - const Tensor& indices){ - return {indices.size(0), self.size(1)}; -} - SmallVector equal_npu_output_size(void) { int64_t outputshape = 1; SmallVector outputSize = {outputshape}; diff --git a/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.h b/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.h index 9290da7ddd91ee55d3e88cf46fc065973ab0a4be..b676141652f53263bae5911302824bbe69d66c8b 100644 --- a/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.h +++ b/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.h @@ -182,10 +182,6 @@ SmallVector embedding_dense_backward_npu_output_size( int64_t padding_idx, bool scale_grad_by_freq); -SmallVector embedding_renorm_mid_npu_output_size( - const Tensor& self, - const Tensor& indices); - SmallVector index_npu_output_size( const Tensor& self, TensorList indices); diff --git a/src/aten/src/ATen/npu/NPUGenerator.cpp b/src/aten/src/ATen/npu/NPUGenerator.cpp index 93f75473c1eb61cc513b88ef86c09ead04a89690..2609b15f68d3eb4fa31342029c9c549726f967ad 100644 --- a/src/aten/src/ATen/npu/NPUGenerator.cpp +++ b/src/aten/src/ATen/npu/NPUGenerator.cpp @@ -1,195 +1,195 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -namespace at { - -namespace npu { namespace detail { - -// Ensures we only call npuGetDeviceCount only once. -static std::once_flag num_npu_init_flag; - -// Total number of npus in the system. -static int64_t num_npus; - -// Ensures default_gens_npu is initialized once. -static std::deque npu_gens_init_flag; - -// Default, global NPU generators, one per NPU. -static std::vector> default_gens_npu; - -/* -* Populates the global variables related to NPU generators -* Warning: this function must only be called once! -*/ -static void initNPUGenVector(){ - num_npus = c10::npu::device_count(); - npu_gens_init_flag.resize(num_npus); - default_gens_npu.resize(num_npus); -} - -/** - * PyTorch maintains a collection of default generators that get - * initialized once. The purpose of these default generators is to - * maintain a global running state of the pseudo random number generation, - * when a user does not explicitly mention any generator. - * getDefaultNPUGenerator gets the default generator for a particular - * npu device. - */ -NPUGenerator* getDefaultNPUGenerator(DeviceIndex device_index) { - std::call_once(num_npu_init_flag, initNPUGenVector); - DeviceIndex idx = device_index; - if (idx == -1) { - idx = c10::npu::current_device(); - } else { - TORCH_CHECK(idx >= 0 && idx < num_npus); - } - std::call_once(npu_gens_init_flag[idx], [&] { - default_gens_npu[idx] = std::make_shared(idx); - default_gens_npu[idx]->seed(); - }); - return default_gens_npu[idx].get(); -} - -/** - * Utility to create a NPUGenerator. Returns a shared_ptr - */ -std::shared_ptr createNPUGenerator(DeviceIndex device_index) { - std::call_once(num_npu_init_flag, initNPUGenVector); - DeviceIndex idx = device_index; - if (idx == -1) { - idx = c10::npu::current_device(); - } - TORCH_CHECK(idx >= 0 && idx < num_npus, "The device_index is invalid."); - auto gen = std::make_shared(idx); - gen->set_current_seed(default_rng_seed_val); - gen->set_philox_offset_per_thread(0); - return gen; -} - -} // namespace detail -} // namespace npu - - -/** - * NPUGenerator class implementation - */ -NPUGenerator::NPUGenerator(DeviceIndex device_index) - : Generator{Device(DeviceType::NPU, device_index), - DispatchKeySet(c10::DispatchKey::NPUTensorId)} { } - -/** - * Sets the seed to be used by curandStatePhilox4_32_10 - * Resets the philox_offset_per_thread_ to 0 - * - * See Note [Acquire lock when using random generators] - */ -void NPUGenerator::set_current_seed(uint64_t seed) { - seed_ = seed; - philox_offset_per_thread_ = 0; -} - -/** - * Gets the current seed of NPUGenerator. - */ -uint64_t NPUGenerator::current_seed() const { - return seed_; -} - -/** - * Gets a nondeterministic random number from /dev/urandom or time, - * seeds the CPUGenerator with it and then returns that number. - * - * FIXME: You can move this function to Generator.cpp if the algorithm - * in getNonDeterministicRandom is unified for both CPU and NPU - */ -uint64_t NPUGenerator::seed() { - auto random = at::detail::getNonDeterministicRandom(true); - this->set_current_seed(random); - return random; -} - -/** - * Sets the philox_offset_per_thread_ to be used by curandStatePhilox4_32_10 - * - * See Note [Acquire lock when using random generators] - */ -void NPUGenerator::set_philox_offset_per_thread(uint64_t offset) { - philox_offset_per_thread_ = offset; -} - -/** - * Gets the current philox_offset_per_thread_ of NPUGenerator. - */ -uint64_t NPUGenerator::philox_offset_per_thread() { - return philox_offset_per_thread_; -} - -/** - * Gets the seed and philox offset value to be used in - * curandStatePhilox4_32_10 - * - * Each kernel using philox has to sensibly increment offset - * for future users of philox. So it gets the "old" value for - * itself (before add), and tells subsequent users which offset - * they should use, since only the kernel knows how many randoms - * it intends to generate. - * - * Increment should be at least the number of curand() random numbers used in - * each thread. It is the user's responsibility to make sure that the increment - * for philox is never smaller than the number of curand() calls. Increment - * value > the number of curand() calls won't harm but anything less would mean - * that you would be reusing random values from previous calls. - * - * See Note [Acquire lock when using random generators] - */ -std::pair NPUGenerator::philox_engine_inputs(uint64_t increment) { - uint64_t offset = this->philox_offset_per_thread_; - this->philox_offset_per_thread_ += increment; - return std::make_pair(this->seed_, offset); -} - -/* - * Gets the DeviceType of NPUGenerator. - * Used for type checking during run time. - */ -DeviceType NPUGenerator::device_type() { - return DeviceType::NPU; -} - -/** - * Public clone method implementation - * - * See Note [Acquire lock when using random generators] - */ -std::shared_ptr NPUGenerator::clone() const { - return std::shared_ptr(this->clone_impl()); -} - -/** - * Private clone method implementation - * - * See Note [Acquire lock when using random generators] - */ -NPUGenerator* NPUGenerator::clone_impl() const { - auto gen = new NPUGenerator(this->device().index()); - gen->set_current_seed(this->seed_); - gen->set_philox_offset_per_thread(this->philox_offset_per_thread_); - return gen; -} -} // namespace at +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace at { + +namespace npu { namespace detail { + +// Ensures we only call npuGetDeviceCount only once. +static std::once_flag num_npu_init_flag; + +// Total number of npus in the system. +static int64_t num_npus; + +// Ensures default_gens_npu is initialized once. +static std::deque npu_gens_init_flag; + +// Default, global NPU generators, one per NPU. +static std::vector> default_gens_npu; + +/* +* Populates the global variables related to NPU generators +* Warning: this function must only be called once! +*/ +static void initNPUGenVector(){ + num_npus = c10::npu::device_count(); + npu_gens_init_flag.resize(num_npus); + default_gens_npu.resize(num_npus); +} + +/** + * PyTorch maintains a collection of default generators that get + * initialized once. The purpose of these default generators is to + * maintain a global running state of the pseudo random number generation, + * when a user does not explicitly mention any generator. + * getDefaultNPUGenerator gets the default generator for a particular + * npu device. + */ +NPUGenerator* getDefaultNPUGenerator(DeviceIndex device_index) { + std::call_once(num_npu_init_flag, initNPUGenVector); + DeviceIndex idx = device_index; + if (idx == -1) { + idx = c10::npu::current_device(); + } else { + TORCH_CHECK(idx >= 0 && idx < num_npus); + } + std::call_once(npu_gens_init_flag[idx], [&] { + default_gens_npu[idx] = std::make_shared(idx); + default_gens_npu[idx]->seed(); + }); + return default_gens_npu[idx].get(); +} + +/** + * Utility to create a NPUGenerator. Returns a shared_ptr + */ +std::shared_ptr createNPUGenerator(DeviceIndex device_index) { + std::call_once(num_npu_init_flag, initNPUGenVector); + DeviceIndex idx = device_index; + if (idx == -1) { + idx = c10::npu::current_device(); + } + TORCH_CHECK(idx >= 0 && idx < num_npus, "The device_index is invalid."); + auto gen = std::make_shared(idx); + gen->set_current_seed(default_rng_seed_val); + gen->set_philox_offset_per_thread(0); + return gen; +} + +} // namespace detail +} // namespace npu + + +/** + * NPUGenerator class implementation + */ +NPUGenerator::NPUGenerator(DeviceIndex device_index) + : Generator{Device(DeviceType::NPU, device_index), + DispatchKeySet(c10::DispatchKey::NPUTensorId)} { } + +/** + * Sets the seed to be used by curandStatePhilox4_32_10 + * Resets the philox_offset_per_thread_ to 0 + * + * See Note [Acquire lock when using random generators] + */ +void NPUGenerator::set_current_seed(uint64_t seed) { + seed_ = seed; + philox_offset_per_thread_ = 0; +} + +/** + * Gets the current seed of NPUGenerator. + */ +uint64_t NPUGenerator::current_seed() const { + return seed_; +} + +/** + * Gets a nondeterministic random number from /dev/urandom or time, + * seeds the CPUGenerator with it and then returns that number. + * + * FIXME: You can move this function to Generator.cpp if the algorithm + * in getNonDeterministicRandom is unified for both CPU and NPU + */ +uint64_t NPUGenerator::seed() { + auto random = at::detail::getNonDeterministicRandom(true); + this->set_current_seed(random); + return random; +} + +/** + * Sets the philox_offset_per_thread_ to be used by curandStatePhilox4_32_10 + * + * See Note [Acquire lock when using random generators] + */ +void NPUGenerator::set_philox_offset_per_thread(uint64_t offset) { + philox_offset_per_thread_ = offset; +} + +/** + * Gets the current philox_offset_per_thread_ of NPUGenerator. + */ +uint64_t NPUGenerator::philox_offset_per_thread() { + return philox_offset_per_thread_; +} + +/** + * Gets the seed and philox offset value to be used in + * curandStatePhilox4_32_10 + * + * Each kernel using philox has to sensibly increment offset + * for future users of philox. So it gets the "old" value for + * itself (before add), and tells subsequent users which offset + * they should use, since only the kernel knows how many randoms + * it intends to generate. + * + * Increment should be at least the number of curand() random numbers used in + * each thread. It is the user's responsibility to make sure that the increment + * for philox is never smaller than the number of curand() calls. Increment + * value > the number of curand() calls won't harm but anything less would mean + * that you would be reusing random values from previous calls. + * + * See Note [Acquire lock when using random generators] + */ +std::pair NPUGenerator::philox_engine_inputs(uint64_t increment) { + uint64_t offset = this->philox_offset_per_thread_; + this->philox_offset_per_thread_ += increment; + return std::make_pair(this->seed_, offset); +} + +/* + * Gets the DeviceType of NPUGenerator. + * Used for type checking during run time. + */ +DeviceType NPUGenerator::device_type() { + return DeviceType::NPU; +} + +/** + * Public clone method implementation + * + * See Note [Acquire lock when using random generators] + */ +std::shared_ptr NPUGenerator::clone() const { + return std::shared_ptr(this->clone_impl()); +} + +/** + * Private clone method implementation + * + * See Note [Acquire lock when using random generators] + */ +NPUGenerator* NPUGenerator::clone_impl() const { + auto gen = new NPUGenerator(this->device().index()); + gen->set_current_seed(this->seed_); + gen->set_philox_offset_per_thread(this->philox_offset_per_thread_); + return gen; +} +} // namespace at diff --git a/src/aten/src/ATen/npu/NPUGenerator.h b/src/aten/src/ATen/npu/NPUGenerator.h index 5ae9ba3da6fc343474a8193bae865e2109e155bf..896a96c5a6b9e4005a5d6879b301606b04118b35 100644 --- a/src/aten/src/ATen/npu/NPUGenerator.h +++ b/src/aten/src/ATen/npu/NPUGenerator.h @@ -1,53 +1,53 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -namespace at { - -struct TORCH_NPU_API NPUGenerator : public Generator { - // Constructors - NPUGenerator(DeviceIndex device_index = -1); - ~NPUGenerator() = default; - - // NPUGenerator methods - std::shared_ptr clone() const; - void set_current_seed(uint64_t seed) override; - uint64_t current_seed() const override; - uint64_t seed() override; - void set_philox_offset_per_thread(uint64_t offset); - uint64_t philox_offset_per_thread(); - std::pair philox_engine_inputs(uint64_t increment); - static DeviceType device_type(); - -private: - NPUGenerator* clone_impl() const override; - uint64_t seed_ = default_rng_seed_val; - uint64_t philox_offset_per_thread_ = 0; -}; - -namespace npu { -namespace detail { - - TORCH_NPU_API NPUGenerator* getDefaultNPUGenerator(DeviceIndex device_index = -1); - TORCH_NPU_API std::shared_ptr createNPUGenerator(DeviceIndex device_index = -1); - -} // namespace detail -} // namespace npu -} // namespace at - +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace at { + +struct TORCH_NPU_API NPUGenerator : public Generator { + // Constructors + NPUGenerator(DeviceIndex device_index = -1); + ~NPUGenerator() = default; + + // NPUGenerator methods + std::shared_ptr clone() const; + void set_current_seed(uint64_t seed) override; + uint64_t current_seed() const override; + uint64_t seed() override; + void set_philox_offset_per_thread(uint64_t offset); + uint64_t philox_offset_per_thread(); + std::pair philox_engine_inputs(uint64_t increment); + static DeviceType device_type(); + +private: + NPUGenerator* clone_impl() const override; + uint64_t seed_ = default_rng_seed_val; + uint64_t philox_offset_per_thread_ = 0; +}; + +namespace npu { +namespace detail { + + TORCH_NPU_API NPUGenerator* getDefaultNPUGenerator(DeviceIndex device_index = -1); + TORCH_NPU_API std::shared_ptr createNPUGenerator(DeviceIndex device_index = -1); + +} // namespace detail +} // namespace npu +} // namespace at + diff --git a/src/aten/src/ATen/utils/LoadUtils.cpp b/src/aten/src/ATen/utils/LoadUtils.cpp index 10ed418f73741cbe46f7c78e5193da07fd1013a1..52ecc66d213e539a47b7ee51ebf839287f3fa050 100644 --- a/src/aten/src/ATen/utils/LoadUtils.cpp +++ b/src/aten/src/ATen/utils/LoadUtils.cpp @@ -689,17 +689,23 @@ namespace at { } + void ZeroStrideClear(Tensor& dst, Tensor& src) { + auto strides = dst.strides().vec(); + auto position = std::find(strides.begin(), strides.end(), 0); + if (position != strides.end()) { + dst = dst.select(position - strides.begin(), 0); + src = src.select(position - strides.begin(), 0); + } else { + return; + } + ZeroStrideClear(dst, src); + } + // when the stride of some dim is zero, the tensor may has been "expand", copy should only // process on any axis of that dim // To do: is this kind of copy matches other zero stride cases? void CopyMaybeWithZeroStride(Tensor dst, Tensor src) { - auto strides = dst.strides().vec(); - for (int i = 0; i < strides.size(); i++) { - if (strides[i] == 0) { - dst = dst.select(i, 0); - src = src.select(i, 0); - } - } + ZeroStrideClear(dst, src); dst.copy_(src); } diff --git a/src/third_party/hccl/inc/hccl/hccl.h b/src/third_party/hccl/inc/hccl/hccl.h index 311e78f2cbe8b97e0545b075a5e2ebef15ec855c..9606e89443003765b4d5506b93aeadd8dd29bb0c 100644 --- a/src/third_party/hccl/inc/hccl/hccl.h +++ b/src/third_party/hccl/inc/hccl/hccl.h @@ -1,133 +1,133 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hccl.h - * @brief HCCL API - */ - -#ifndef HCCL_H_ -#define HCCL_H_ - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -/** - * @brief Initialize HCCL. - * - * @param clusterInfo A string identifying the cluster info file path, include file name. - * @param rank A integer identifying the identify for the rank. - * @param comm A pointer identifying the initialized communication resource. - * @return HcclResult - * @see HcclCommDestroy() - */ -extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); - -/** - * @brief Get hccl root info. - * - * @param rootInfo A pointer identifying the hccl root info. - * @return HcclResult - */ -extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); - -/** - * @brief Initialize HCCL with root info. - * - * @param nRanks A integer identifying the rank size of the cluster. - * @param rootInfo A struct identifying the hccl root info. - * @param rank A integer identifying the identify for the rank. - * @param comm A pointer identifying the initialized communication resource. - * @return HcclResult - * @see HcclCommDestroy() - */ -extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); - -/** - * @brief AllReduce operator. - * - * @param sendBuf A pointer identifying the input data address of the operator. - * @param recvBuf A pointer identifying the output data address of the operator. - * @param count An integer(u64) identifying the number of the output data. - * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32. - * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. - * @param comm A pointer identifying the communication resource based on. - * @param stream A pointer identifying the stream information. - * @return HcclResult - */ -extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, -HcclReduceOp op, HcclComm comm, aclrtStream stream); - -/** - * @brief Broadcast operator. - * - * @param buf A pointer identifying the data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param root An integer(u32) identifying the the root rank in the operator. - * @param comm A pointer identifying the communication resource based on - * @param stream A pointer identifying the stream information. - * @return HcclResult - */ -extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, -aclrtStream stream); - -/** - * @brief ReduceScatter operator. - * - * @param sendBuf A pointer identifying the input data address of the operator. - * @param recvBuf A pointer identifying the output data address of the operator. - * @param recvCount An integer(u64) identifying the number of the output data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. - * @param comm A pointer identifying the communication resource based on. - * @param stream A pointer identifying the stream information. - * @return HcclResult - */ -extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, -HcclReduceOp op, HcclComm comm, aclrtStream stream); - -/** - * @brief AllGather operator. - * - * @param sendBuf A pointer identifying the input data address of the operator. - * @param recvBuf A pointer identifying the output data address of the operator. - * @param sendCount An integer(u64) identifying the number of the input data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param comm A pointer identifying the communication resource based on. - * @param stream A pointer identifying the stream information. - * @return HcclResult - */ -extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, -HcclComm comm, aclrtStream stream); - -/** - * @brief Destroy HCCL comm - * - * @param comm A pointer identifying the communication resource targetting - * @return HcclResult - * @see HcclCommInitClusterInfo() - */ -extern HcclResult HcclCommDestroy(HcclComm comm); - -#ifdef __cplusplus -} -#endif // __cplusplus -#endif // HCCL_H_ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl.h + * @brief HCCL API + */ + +#ifndef HCCL_H_ +#define HCCL_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief Initialize HCCL. + * + * @param clusterInfo A string identifying the cluster info file path, include file name. + * @param rank A integer identifying the identify for the rank. + * @param comm A pointer identifying the initialized communication resource. + * @return HcclResult + * @see HcclCommDestroy() + */ +extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); + +/** + * @brief Get hccl root info. + * + * @param rootInfo A pointer identifying the hccl root info. + * @return HcclResult + */ +extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); + +/** + * @brief Initialize HCCL with root info. + * + * @param nRanks A integer identifying the rank size of the cluster. + * @param rootInfo A struct identifying the hccl root info. + * @param rank A integer identifying the identify for the rank. + * @param comm A pointer identifying the initialized communication resource. + * @return HcclResult + * @see HcclCommDestroy() + */ +extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); + +/** + * @brief AllReduce operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the output data. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, +HcclReduceOp op, HcclComm comm, aclrtStream stream); + +/** + * @brief Broadcast operator. + * + * @param buf A pointer identifying the data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param root An integer(u32) identifying the the root rank in the operator. + * @param comm A pointer identifying the communication resource based on + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, +aclrtStream stream); + +/** + * @brief ReduceScatter operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param recvCount An integer(u64) identifying the number of the output data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, +HcclReduceOp op, HcclComm comm, aclrtStream stream); + +/** + * @brief AllGather operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param sendCount An integer(u64) identifying the number of the input data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, +HcclComm comm, aclrtStream stream); + +/** + * @brief Destroy HCCL comm + * + * @param comm A pointer identifying the communication resource targetting + * @return HcclResult + * @see HcclCommInitClusterInfo() + */ +extern HcclResult HcclCommDestroy(HcclComm comm); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_H_ diff --git a/src/third_party/hccl/inc/hccl/hccl_types.h b/src/third_party/hccl/inc/hccl/hccl_types.h index 3fe701c044d354f9a128b5229f39fd7610a9b9b9..29ab1a95cbaebad063021900247c94bb63ed377c 100644 --- a/src/third_party/hccl/inc/hccl/hccl_types.h +++ b/src/third_party/hccl/inc/hccl/hccl_types.h @@ -1,100 +1,100 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hccl_types.h - * @brief HCCL data type definition - * - */ - -#ifndef HCCL_TYPES_H_ -#define HCCL_TYPES_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -/** - * @brief HCCL functions return value definition - */ -typedef enum { - HCCL_SUCCESS = 0, /**< success */ - HCCL_E_PARA = 1, /**< parameter error */ - HCCL_E_PTR = 2, /**< empty pointer */ - HCCL_E_MEMORY = 3, /**< memory error */ - HCCL_E_INTERNAL = 4, /**< internal error */ - HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ - HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ - HCCL_E_UNAVAIL = 7, /**< resource unavailable */ - HCCL_E_SYSCALL = 8, /**< call system interface error */ - HCCL_E_TIMEOUT = 9, /**< timeout */ - HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ - HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ - HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ - HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ - HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ - HCCL_E_RUNTIME = 15, /**< call runtime api fail */ - HCCL_E_DRV = 16, /**< call driver api fail */ - HCCL_E_PROFILING = 17, /**< call profiling api fail */ - HCCL_E_CCE = 18, /**< call cce api fail */ - HCCL_E_NETWORK = 19, /**< call network api fail */ - HCCL_E_RESERVED /**< reserved */ -} HcclResult; - -/** - * @brief handle to HCCL communicator - */ -typedef void *HcclComm; - -/** - * @brief HCCL Reduction opperation - */ -typedef enum { - HCCL_REDUCE_SUM = 0, /**< sum */ - HCCL_REDUCE_PROD = 1, /**< prod */ - HCCL_REDUCE_MAX = 2, /**< max */ - HCCL_REDUCE_MIN = 3, /**< min */ - HCCL_REDUCE_RESERVED /**< reserved */ -} HcclReduceOp; - -/** - * @brief HCCL data type - */ -typedef enum { - HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ - HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ - HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ - HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ - HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ - HCCL_DATA_TYPE_INT64 = 5, /**< int 64 */ - HCCL_DATA_TYPE_RESERVED /**< reserved */ -} HcclDataType; - -const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length - -/** - * @brief HCCL root info - */ -typedef struct HcclRootInfoDef { - char internal[HCCL_ROOT_INFO_BYTES]; -} HcclRootInfo; - -#ifdef __cplusplus -} -#endif // __cplusplus -#endif // HCCL_TYPES_H_ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl_types.h + * @brief HCCL data type definition + * + */ + +#ifndef HCCL_TYPES_H_ +#define HCCL_TYPES_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief HCCL functions return value definition + */ +typedef enum { + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ +} HcclResult; + +/** + * @brief handle to HCCL communicator + */ +typedef void *HcclComm; + +/** + * @brief HCCL Reduction opperation + */ +typedef enum { + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ +} HcclReduceOp; + +/** + * @brief HCCL data type + */ +typedef enum { + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int 64 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ +} HcclDataType; + +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length + +/** + * @brief HCCL root info + */ +typedef struct HcclRootInfoDef { + char internal[HCCL_ROOT_INFO_BYTES]; +} HcclRootInfo; + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_TYPES_H_ diff --git a/src/tools/autograd/derivatives.yaml b/src/tools/autograd/derivatives.yaml index 1db83b1c5a6a2870f5721b3d2483ec24b45e2ab3..ee68e09e8dccd12c5bd3023a5cc16d06814822e5 100644 --- a/src/tools/autograd/derivatives.yaml +++ b/src/tools/autograd/derivatives.yaml @@ -1691,4 +1691,7 @@ - name: npu_linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor input, weight: npu_linear_backward(grad, input, weight) - bias: maybe_multiply(grad, 1) \ No newline at end of file + bias: maybe_multiply(grad, 1) + +- name: npu_giou(Tensor self, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> Tensor + self, gtboxes: npu_giou_backward(grad, self, gtboxes, trans, is_cross, mode) \ No newline at end of file diff --git a/src/torch/lib/c10d/HCCLUtils.hpp b/src/torch/lib/c10d/HCCLUtils.hpp index 6f19a66b4b5d1f03dd3936e09ed3e5ccc313e1d9..46e98d8b3e49cdb83b22db388f0f64ee15e3f02f 100644 --- a/src/torch/lib/c10d/HCCLUtils.hpp +++ b/src/torch/lib/c10d/HCCLUtils.hpp @@ -1,79 +1,79 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -#define C10D_HCCL_CHECK(cmd) \ - do { \ - HcclResult error = cmd; \ - if (error != HCCL_SUCCESS) { \ - std::string err = "HCCL error in: " + std::string(__FILE__) + \ - std::to_string(__LINE__) + ", " + std::to_string(error); \ - throw std::runtime_error(err); \ - } \ - } while (0) - -namespace c10d { - -// RAII wrapper for HCCL communicator -class HCCLComm { - public: - explicit HCCLComm(HcclComm hcclComm) : hcclComm_(hcclComm) {} - - HCCLComm() : HCCLComm(nullptr) {} - - ~HCCLComm() noexcept { - if (hcclComm_) { - HcclCommDestroy(hcclComm_); - } - } - - static std::shared_ptr create( - int numRanks, - int rank, - HcclRootInfo& rootInfo) { - auto comm = std::make_shared(); - C10D_HCCL_CHECK( - HcclCommInitRootInfo(numRanks, &rootInfo, rank, &(comm->hcclComm_))); - return comm; - } - - // Must not be copyable - HCCLComm(const HCCLComm&) = delete; - HCCLComm& operator=(const HCCLComm&) = delete; - - // Move constructable - HCCLComm(HCCLComm&& other) { - std::swap(hcclComm_, other.hcclComm_); - } - - // Move assignable - HCCLComm& operator=(HCCLComm&& other) { - std::swap(hcclComm_, other.hcclComm_); - return *this; - } - - HcclComm getHcclComm() const{ - return hcclComm_; - } - - protected: - HcclComm hcclComm_; -}; -} // namespace c10d +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#define C10D_HCCL_CHECK(cmd) \ + do { \ + HcclResult error = cmd; \ + if (error != HCCL_SUCCESS) { \ + std::string err = "HCCL error in: " + std::string(__FILE__) + \ + std::to_string(__LINE__) + ", " + std::to_string(error); \ + throw std::runtime_error(err); \ + } \ + } while (0) + +namespace c10d { + +// RAII wrapper for HCCL communicator +class HCCLComm { + public: + explicit HCCLComm(HcclComm hcclComm) : hcclComm_(hcclComm) {} + + HCCLComm() : HCCLComm(nullptr) {} + + ~HCCLComm() noexcept { + if (hcclComm_) { + HcclCommDestroy(hcclComm_); + } + } + + static std::shared_ptr create( + int numRanks, + int rank, + HcclRootInfo& rootInfo) { + auto comm = std::make_shared(); + C10D_HCCL_CHECK( + HcclCommInitRootInfo(numRanks, &rootInfo, rank, &(comm->hcclComm_))); + return comm; + } + + // Must not be copyable + HCCLComm(const HCCLComm&) = delete; + HCCLComm& operator=(const HCCLComm&) = delete; + + // Move constructable + HCCLComm(HCCLComm&& other) { + std::swap(hcclComm_, other.hcclComm_); + } + + // Move assignable + HCCLComm& operator=(HCCLComm&& other) { + std::swap(hcclComm_, other.hcclComm_); + return *this; + } + + HcclComm getHcclComm() const{ + return hcclComm_; + } + + protected: + HcclComm hcclComm_; +}; +} // namespace c10d diff --git a/src/torch/lib/c10d/ProcessGroupHCCL.cpp b/src/torch/lib/c10d/ProcessGroupHCCL.cpp index d821404dacef4bb6c5fdb1ae28d70012b8d9ea74..a94f0e593103c28df39596a1b0285aedb8c056a6 100644 --- a/src/torch/lib/c10d/ProcessGroupHCCL.cpp +++ b/src/torch/lib/c10d/ProcessGroupHCCL.cpp @@ -1,774 +1,774 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace c10d { -namespace { -using hcclUs = std::chrono::steady_clock::time_point; -#define DURATION_US(x) \ - (std::chrono::duration_cast(x)) -#define TIME_NOW() ({ std::chrono::steady_clock::now(); }) - -// HCCL ReduceOp mapping -std::map hcclOp = { - {ReduceOp::MIN, HCCL_REDUCE_MIN}, - {ReduceOp::MAX, HCCL_REDUCE_MAX}, - {ReduceOp::SUM, HCCL_REDUCE_SUM}, - {ReduceOp::PRODUCT, HCCL_REDUCE_PROD}, -}; - -// HCCL DataType mapping -std::map hcclDataType = { - {at::kChar, HCCL_DATA_TYPE_INT8}, - {at::kFloat, HCCL_DATA_TYPE_FP32}, - {at::kInt, HCCL_DATA_TYPE_INT32}, - {at::kHalf, HCCL_DATA_TYPE_FP16}, - {at::kShort, HCCL_DATA_TYPE_INT16}, - {at::kLong, HCCL_DATA_TYPE_INT64}, -}; - -// Helper function that gets the data type and issues error if not supported -HcclDataType getHcclDataType(at::ScalarType type) { - try { - return hcclDataType.at(type); - } catch (std::out_of_range& e) { - throw std::runtime_error("Unsupported data type for HCCL process group"); - } -} - -// Get the deviceList String from the list of devices -std::string getKeyFromDevices(const std::vector& devices) { - std::string deviceList; - for (auto& device : devices) { - if (deviceList.empty()) { - deviceList = std::to_string(device.index()); - } else { - deviceList += "," + std::to_string(device.index()); - } - } - return deviceList; -} - -// Get the list of devices from list of tensors -std::vector getDeviceList(const std::vector& tensors) { - std::vector res; - res.reserve(tensors.size()); - for (auto& tensor : tensors) { - res.push_back(tensor.device()); - } - return res; -} - -// [Sync Streams] Helper that lets the input hcclStreams to wait for the current -// stream. HCCL communications run on hcclStreams, but input tensors are -// allocated on different streams (i.e., current streams). Communications on -// hcclStreams cannot start before pending input tensor ops on current streams -// finish. Otherwise, ops on two streams might read/write same tensors -// concurrently. - -// The synchronization above alone is not enough. We also need to make sure -// input tensors are not freed before their usages on hcclStreams finish. This -// can be achieved by calling ::recordStream, -// which remembers the usage stream (hcclStream), creates an event on the usage -// stream when GC attempts to free the input tensor, and delays GC until that -// event is done. -void syncStreams( - const std::vector& devices, - std::vector& hcclEvents, - std::vector& hcclStreams) { - for (size_t i = 0; i < devices.size(); ++i) { - c10::npu::NPUStream& hcclStream = hcclStreams[i]; - at::npu::NPUEvent& hcclEvent = hcclEvents[i]; - hcclEvent.record(c10::npu::getCurrentNPUStream(devices[i].index())); - hcclEvent.block(hcclStream); - } -} - -// exit call back for allreduce error -void exceptionCallback(aclrtExceptionInfo* exceptionInfo) { - std::string err = "AllReduce error in:" + std::string(__FILE__) + ": " + - std::to_string(__LINE__); - throw std::runtime_error(err); -} -} // namespace - -constexpr int64_t kSynchronizeBusyWaitMillis = 10; -constexpr int64_t maxOpNumPerSyncPoint = 2; -const int64_t ProcessGroupHCCL::kProcessGroupHCCLOpTimeoutMillis = 10 * 1000; -ProcessGroupHCCL::WorkHCCL::WorkHCCL(const std::vector& devices) - : devices_(devices), workStartTime_(std::chrono::steady_clock::now()) { - // Creates the npu event wrappers - // Note: The actual events are lazily created when first recorded to with - // DEFAULT_FLAGS = npuEventDisableTiming. - npuEvents_.resize(devices.size()); - hcclComms_.resize(devices.size()); -} - -ProcessGroupHCCL::WorkHCCL::~WorkHCCL() {} - -bool ProcessGroupHCCL::WorkHCCL::isCompleted() { - checkAndSetException(); - return exception() || finishedNPUExecutionInternal(); -} - -bool ProcessGroupHCCL::WorkHCCL::isSuccess() const { - if (exception()) { - // Already detected an exception. - return false; - } - // TODO support checkForHCCLErrors - return finishedNPUExecutionInternal(); -} - -void ProcessGroupHCCL::WorkHCCL::checkAndSetException() { - if (exception()) { - // We already have an exception. - return; - } - // TODO support checkForHCCLErrors -} - -// Helper that checks if the HCCL kernels are completed on the NPU -bool ProcessGroupHCCL::WorkHCCL::finishedNPUExecution() { - checkAndSetException(); - return finishedNPUExecutionInternal(); -} - -// check if HCCL task is finished -bool ProcessGroupHCCL::WorkHCCL::finishedNPUExecutionInternal() const { - for (size_t i = 0; i < devices_.size(); ++i) { - // Checking Event completed by Eventquery - aclrtEventStatus status; - auto ret = aclrtQueryEvent(npuEvents_[i], &status); - if (ret != ACL_ERROR_NONE || status == ACL_EVENT_STATUS_NOT_READY) { - return false; - } - } - return true; -} - -void ProcessGroupHCCL::WorkHCCL::checkAndThrowException() { - // Set the appropriate exception if found. - checkAndSetException(); - - // Throw an exception, only if we have a valid exception. - if (exception()) { - std::rethrow_exception(exception()); - } -} - -// Waiting on the work's corresponding NPU events -void ProcessGroupHCCL::WorkHCCL::synchronize() { - for (size_t i = 0; i < devices_.size(); ++i) { - auto currentStream = at::npu::getCurrentNPUStream(devices_[i].index()); - // Block the current stream on the HCCL stream - npuEvents_[i].block(currentStream); - // If we use the work to do barrier, we should block here - if (!barrierTensors_.empty()) { - c10::npu::NPUGuard npuGuard(devices_[i]); - c10::npu::npuSynchronizeDevice(); - } - } - - // In case of blocking, wait for the operation to complete. - if (blockingWait_) { - // Wait for the operation to complete. - while (!isCompleted()) { - auto currentTimepoint = std::chrono::steady_clock::now(); - if (std::chrono::duration_cast( - currentTimepoint - workStartTime_) > opTimeout_) { - throw std::runtime_error("Operation timed out!"); - } - // Check for errors and throw appropriate exception. - checkAndThrowException(); // TODO support checkAndThrowException - std::this_thread::sleep_for( - std::chrono::milliseconds(kSynchronizeBusyWaitMillis)); - } - checkAndThrowException(); // TODO support checkAndThrowException - } -} - -// Same as calling synchronize(). -bool ProcessGroupHCCL::WorkHCCL::wait() { - synchronize(); - // Always return true, because abort API is not implemented. - return true; -} - -ProcessGroupHCCL::ProcessGroupHCCL( - const std::shared_ptr& store, - int rank, - int size, - const std::chrono::milliseconds& opTimeout) - : ProcessGroup(rank, size), - store_(store), - hcclCommCounter_(0), - terminateWatchdog_(false), - opTimeout_(opTimeout) { - char* blockingWait = getenv(HCCL_BLOCKING_WAIT); - try { - if (blockingWait != nullptr) { - auto val = std::stoi(blockingWait); - if (val == 1) { - // Make wait() and synchronize() a blocking call. - blockingWait_ = true; - } else if (val != 0) { - throw std::runtime_error( - "Invalid value for environment variable: " + - std::string(HCCL_BLOCKING_WAIT)); - } - } - } catch (std::exception& e) { - throw std::runtime_error( - "Invalid value for environment variable: " + - std::string(HCCL_BLOCKING_WAIT)); - } -} - -ProcessGroupHCCL::~ProcessGroupHCCL() {} - -void ProcessGroupHCCL::broadcastMasterID(HcclRootInfo* hcclID) { - // For every HCCL communicator that we create we need to broadcast - // a unique ID from rank 0 to all other ranks. This broadcast is - // done by rank 0 setting a key in the store and all other ranks - // retrieving the contents of that key. A single process group - // may create multiple HCCL communicators, so we use a sequence - // number to differentiate between them. - std::string storeKey = std::to_string(hcclCommCounter_++); - if (rank_ == 0) { - auto vec = std::vector( - reinterpret_cast(hcclID), - reinterpret_cast(hcclID) + HCCL_ROOT_INFO_BYTES); - store_->set(storeKey, vec); - } else { - auto vec = store_->get(storeKey); - TORCH_CHECK(vec.size() == HCCL_ROOT_INFO_BYTES); - std::memcpy(hcclID, vec.data(), vec.size()); - } -} - -/* -void ProcessGroupHCCL::fluxLimit ( - const std::string& devicesKey, - const int index) { - // event sync every two allreduce - if ((++collectiveCnts_[devicesKey][index]) < maxOpNumPerSyncPoint) { - return; - } - // sync with last sync point - at::npu::NPUEvent &fluxEvent = rateCtrlEvents_[devicesKey][index]; - if (fluxEvent.isCreated()) { - // printf("synchronize point reached. begin event sync\r\n"); - while(!fluxEvent.query()) { - std::this_thread::sleep_for( - std::chrono::milliseconds(1)); - } - fluxEvent.synchronize(); - } else { - // printf("fluxEvent[%s][%d] is not created\r\n", devicesKey.c_str(), -index); - } - // record new sync point - c10::npu::NPUStream& hcclStream = hcclStreams_[devicesKey][index]; - fluxEvent.record(hcclStream); - - // clear collective count - collectiveCnts_[devicesKey][index] = 0; -} -*/ - -std::vector>& ProcessGroupHCCL::getHCCLComm( - const std::string& devicesKey, - const std::vector& devices) { - // Sanity check - if (devicesKey.empty()) { - throw std::runtime_error( - "Not able to create/get the HCCL Communicator since " - "the NPU devices are not known"); - } - - for (auto& device : devices) { - usedDeviceIdxs_.insert(device.index()); - } - - { - std::lock_guard lock(devHCCLCommMapLock_); - if (devHCCLCommMap_.find(devicesKey) != devHCCLCommMap_.end()) { - // Reuse the cached communicator if there is one. - return devHCCLCommMap_[devicesKey]; - } - } - - // HCCL communicator not cached, create a new entry - std::vector> hcclComms; - hcclComms.resize(devices.size()); - - HcclRootInfo hcclID; - if (rank_ == 0) { - C10D_HCCL_CHECK(HcclGetRootInfo(&hcclID)); - } - broadcastMasterID(&hcclID); - - c10::npu::OptionalNPUGuard npuGuard; - std::vector streamVal; - streamVal.reserve(devices.size()); - - for (size_t i = 0; i < devices.size(); ++i) { - int numRanks = getSize(); - int rank = getRank() * devices.size() + i; - - npuGuard.set_index(devices[i].index()); - hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID); - - // Creates the HCCL streams - streamVal.push_back(c10::npu::getNPUStreamFromPool(devices[i].index())); - } - - hcclStreams_.emplace(devicesKey, std::move(streamVal)); - - // Note: these events are created with the (default) cudaEventDisableTiming - // flag This flag provides the best performance when used with - // StreamWaitEvent() and EventQuery(). Since we here don't measure the - // performance using npuEvent, this should be set. - hcclEvents_.emplace( - std::piecewise_construct, - std::make_tuple(devicesKey), - std::make_tuple(devices.size())); - - // stream length is 1024, - rateCtrlEvents_.emplace( - std::piecewise_construct, - std::make_tuple(devicesKey), - std::make_tuple(devices.size())); - - // record collectiveCnts. - collectiveCnts_.emplace( - std::piecewise_construct, - std::make_tuple(devicesKey), - std::make_tuple(devices.size())); - - // Hold the lock before modifying the cache. - std::lock_guard lock(devHCCLCommMapLock_); - - // Move the NCCL resource to cache - devHCCLCommMap_.emplace(devicesKey, std::move(hcclComms)); - return devHCCLCommMap_[devicesKey]; -} - -namespace { - -// Check that all `tensors' have the same type and shape and are distributed -// across distinct NPUs. -void check_npu_tensors(const std::vector& tensors) { - // HCCL support one NPU per process only - if (tensors.size() != 1) { - throw std::runtime_error( - "Tensor list mustn't be larger than the number of available NPUs"); - } - // HCCL support contiguous tensor only - if (!tensors[0].is_contiguous()) { - throw std::runtime_error("Tensors must be contiguous"); - } -} - -// Flatten each list in `tensor_lists' for a gather or scatter operation, and -// ensure compatibility with the corresponding tensor in `other'. -std::vector flatten_for_scatter_gather( - std::vector>& tensor_lists, - std::vector& other, - size_t world_size) { - if (tensor_lists.size() != other.size()) { - throw std::runtime_error( - "Tensor list operands to scatter/gather must have the same length"); - } - const auto num_devices = tensor_lists.size(); - - std::vector flattened; - flattened.resize(num_devices); - - for (auto i = size_t{}; i < num_devices; ++i) { - if (tensor_lists[i].size() != world_size * num_devices) { - throw std::runtime_error( - "Tensor list input to scatter/gather must match number of collective" - " participants"); - } - - // Only check device match for the first tensor in the list; the call to - // newLikeFlat() below will check the rest. - if (tensor_lists[i].front().get_device() != other[i].get_device()) { - throw std::runtime_error( - "Corresponding input/output tensors to scatter/gather must all reside" - " on the same device"); - } - - for (const auto& t : tensor_lists[i]) { - if (t.numel() != other[i].numel()) { - throw std::runtime_error( - "All tensor operands to scatter/gather must have the same size"); - } - } - // Flatten the tensors (from all ranks) into a single big tensor. - flattened[i] = newLikeFlat(tensor_lists, i); - } - return flattened; -} - -} // namespace - -std::shared_ptr ProcessGroupHCCL::initWork( - std::vector devices) { - if (devices.size() != 1) { - throw std::runtime_error( - "ProcessGroupHCCL support one device per process only"); - } - return std::make_shared(devices); -} - -template -std::shared_ptr ProcessGroupHCCL::collective( - std::vector& inputs, - std::vector& outputs, - Fn fn, - PreProcess pre, - PostProcess post) { - const auto devices = getDeviceList(inputs); - const auto key = getKeyFromDevices(devices); - auto& hcclComms = getHCCLComm(key, devices); - // First let HCCL streams wait for input tensors allocation streams - syncStreams(devices, hcclEvents_[key], hcclStreams_[key]); - // Work itself will create the events on all NPUs of tensors - auto work = initWork(devices); - - c10::npu::OptionalNPUGuard npuGuard; - pre(hcclStreams_[key]); - - for (size_t i = 0; i < inputs.size(); ++i) { - npuGuard.set_index(devices[i].index()); - c10::npu::NPUStream& hcclStream = hcclStreams_[key][i]; - - // Both `inputs' and `outputs' are created on a worker stream and used in - // different hcclStreams. Hence, both must record the hcclStream to - // prevent being freed before the collective finishes. - // - // We only record `inputs' here, and leave recording `outputs' to `fn' for - // operations where `inputs' and `outputs' are not the same. - // - // See [Sync Streams]. - c10::npu::NPUCachingAllocator::recordStream( - inputs[i].storage().data_ptr(), hcclStream); - } - { - for (size_t i = 0; i < inputs.size(); ++i) { - npuGuard.set_index(devices[i].index()); - // to avoid to much task pushed to the stream, leading to stream overflow - // insert sync point - // fluxLimit(key, i); - c10::npu::NPUStream& hcclStream = hcclStreams_[key][i]; - hcclUs startut = TIME_NOW(); - C10D_HCCL_CHECK( - fn(inputs[i], outputs[i], hcclComms[i]->getHcclComm(), hcclStream)); - } - } - post(hcclStreams_[key]); - - for (size_t i = 0; i < inputs.size(); ++i) { - c10::npu::NPUStream& hcclStream = hcclStreams_[key][i]; - work->npuEvents_[i].record(hcclStream); - work->hcclComms_[i] = hcclComms[i]; - work->blockingWait_ = blockingWait_; - work->opTimeout_ = opTimeout_; - } - - return work; -} - -template -std::shared_ptr ProcessGroupHCCL::collective( - std::vector& inputs, - std::vector& outputs, - Fn fn) { - return collective( - inputs, - outputs, - fn, - [](std::vector&) {}, - [](std::vector&) {}); -} - -int g_allreduceID = 0; -std::shared_ptr ProcessGroupHCCL::allreduce( - std::vector& tensors, - const AllreduceOptions& opts) { - check_npu_tensors(tensors); - return collective( - tensors, - tensors, - [&](at::Tensor& input, - at::Tensor& output, - HcclComm comm, - c10::npu::NPUStream& stream) { - aclrtSetExceptionInfoCallback(exceptionCallback); - RECORD_FUNCTION("HcclAllreduce", std::vector({input})); - return HcclAllReduce( - input.data_ptr(), - output.data_ptr(), - input.storage().unsafeGetStorageImpl()->numel(), - getHcclDataType(input.scalar_type()), - hcclOp[opts.reduceOp], - comm, - stream.stream()); - }); -} -int g_broadcastID = 100000; -std::shared_ptr ProcessGroupHCCL::broadcast( - std::vector& tensors, - const BroadcastOptions& opts) { - check_npu_tensors(tensors); - return collective( - tensors, - tensors, - [&](at::Tensor& input, - at::Tensor& output, - HcclComm comm, - c10::npu::NPUStream& stream) { - RECORD_FUNCTION("HcclBroadcast", std::vector({input})); - const auto root = opts.rootRank * tensors.size() + opts.rootTensor; - return HcclBroadcast( - input.data_ptr(), - input.storage().unsafeGetStorageImpl()->numel(), - getHcclDataType(input.scalar_type()), - root, - comm, - stream.stream()); - }); -} - -std::shared_ptr ProcessGroupHCCL::allreduce_coalesced( - std::vector& /* unused */, - const AllreduceCoalescedOptions& /* unused */) { - throw std::runtime_error( - "ProcessGroupHCCL does not support allreduce_coalesced"); -} - -std::shared_ptr ProcessGroupHCCL::reduce( - std::vector& /* unused */, - const ReduceOptions& /* unused */) { - throw std::runtime_error("ProcessGroupHCCL does not support reduce"); -} - -std::shared_ptr ProcessGroupHCCL::allgather( - std::vector>& outputTensors, - std::vector& inputTensors, - const AllgatherOptions& opts) { - check_npu_tensors(inputTensors); - auto outputFlattened = - flatten_for_scatter_gather(outputTensors, inputTensors, size_); - check_npu_tensors(outputFlattened); - - return collective( - inputTensors, - outputFlattened, - [&](at::Tensor& input, - at::Tensor& output, - HcclComm comm, - c10::npu::NPUStream& stream) { - RECORD_FUNCTION("HcclAllgather", std::vector({input})); - c10::npu::NPUCachingAllocator::recordStream( - output.storage().data_ptr(), stream); - return HcclAllGather( - input.data_ptr(), - output.data_ptr(), - input.storage().unsafeGetStorageImpl()->numel(), - getHcclDataType(input.scalar_type()), - comm, - stream.stream()); - }, - [&](std::vector& hcclStreams) {}, - [&](std::vector& hcclStreams) { - // Copy the flattened output tensors to the outputs. - for (size_t i = 0; i < outputTensors.size(); ++i) { - c10::npu::NPUStreamGuard guard(hcclStreams[i]); - for (size_t j = 0; j < outputTensors[0].size(); ++j) { - // See [Sync Streams]. - c10::npu::NPUCachingAllocator::recordStream( - outputTensors[i][j].storage().data_ptr(), hcclStreams[i]); - - outputTensors[i][j].copy_(outputFlattened[i][j], true); - } - } - }); -} - -std::shared_ptr ProcessGroupHCCL::allgather_base( - at::Tensor& /*unused */, - at::Tensor& /*unused */, - const AllgatherOptions& /*unused */) { - throw std::runtime_error("ProcessGroupHCCL does not support allgather_base"); -} - -std::shared_ptr ProcessGroupHCCL::reduce_scatter( - std::vector& outputTensors, - std::vector>& inputTensors, - const ReduceScatterOptions& opts) { - check_npu_tensors(outputTensors); - - auto inputFlattened = - flatten_for_scatter_gather(inputTensors, outputTensors, size_); - check_npu_tensors(inputFlattened); - - return collective( - inputFlattened, - outputTensors, - [&](at::Tensor& input, - at::Tensor& output, - HcclComm comm, - c10::npu::NPUStream& stream) { - RECORD_FUNCTION("HcclReduceScatter", std::vector({input})); - c10::npu::NPUCachingAllocator::recordStream( - output.storage().data_ptr(), stream); - return HcclReduceScatter( - input.data_ptr(), - output.data_ptr(), - output.numel(), - getHcclDataType(input.scalar_type()), - hcclOp[opts.reduceOp], - comm, - stream.stream()); - }, - [&](std::vector& hcclStreams) { - // Copy the input tensors to the flattened inputs. - for (size_t i = 0; i < inputTensors.size(); ++i) { - c10::npu::NPUStreamGuard guard(hcclStreams[i]); - for (size_t j = 0; j < inputTensors[0].size(); ++j) { - // See [Sync Streams]. - c10::npu::NPUCachingAllocator::recordStream( - inputTensors[i][j].storage().data_ptr(), hcclStreams[i]); - - inputFlattened[i][j].copy_(inputTensors[i][j], true); - } - } - }, - [&](std::vector& hcclStreams) {}); -} - -std::shared_ptr ProcessGroupHCCL::barrier( - const BarrierOptions& opts) { - std::vector devices; - if (usedDeviceIdxs_.empty()) { - auto numNPUs = c10::npu::device_count(); - int16_t deviceIdx = static_cast(rank_ % numNPUs); - devices.push_back(at::Device(at::DeviceType::NPU, deviceIdx)); - } else { - for (auto usedDeviceIdx : usedDeviceIdxs_) { - devices.push_back(at::Device(at::DeviceType::NPU, usedDeviceIdx)); - } - } - - std::vector barrierTensors; - barrierTensors.reserve(devices.size()); - - at::npu::OptionalNPUGuard npuGuard; - for (auto& device : devices) { - npuGuard.set_index(device.index()); - barrierTensors.push_back(at::empty( - {1}, - at::TensorOptions().device(at::DeviceType::NPU).dtype(at::kFloat))); - } - - auto work = BarrierInside(barrierTensors); - - // Work will take over barrierTensors - auto hcclWork = dynamic_cast(work.get()); - TORCH_CHECK(hcclWork); - hcclWork->barrierTensors_ = std::move(barrierTensors); - - return work; -} - -std::shared_ptr ProcessGroupHCCL::BarrierInside( - std::vector& tensors) { - check_npu_tensors(tensors); - - return collective( - tensors, - tensors, - [&](at::Tensor& input, - at::Tensor& output, - HcclComm comm, - c10::npu::NPUStream& stream) { - aclrtSetExceptionInfoCallback(exceptionCallback); - auto ret = c10::npu::hccl::hccl_barrier(comm, stream.stream()); - if (ret == HcclResult::HCCL_E_NOT_SUPPORT) { - return HcclAllReduce( - input.data_ptr(), - output.data_ptr(), - input.storage().unsafeGetStorageImpl()->numel(), - getHcclDataType(input.scalar_type()), - hcclOp[ReduceOp::SUM], - comm, - stream.stream()); - } - else { - return ret; - } - }); -} - -std::shared_ptr ProcessGroupHCCL::gather( - std::vector>& /* unused */, - std::vector& /* unused */, - const GatherOptions& /* unused */) { - throw std::runtime_error("ProcessGroupHCCL does not support gather"); -} - -std::shared_ptr ProcessGroupHCCL::scatter( - std::vector& /* unused */, - std::vector>& /* unused */, - const ScatterOptions& /* unused */) { - throw std::runtime_error("ProcessGroupHCCL does not support scatter"); -} - -std::shared_ptr ProcessGroupHCCL::send( - std::vector& /* unused */, - int /* unused */, - int /* unused */) { - throw std::runtime_error("ProcessGroupHCCL does not support send"); -} - -std::shared_ptr ProcessGroupHCCL::recv( - std::vector& /* unused */, - int /* unused */, - int /* unused */) { - throw std::runtime_error("ProcessGroupHCCL does not support recv"); -} - -std::shared_ptr ProcessGroupHCCL::recvAnysource( - std::vector& /* unused */, - int /* unused */) { - throw std::runtime_error("ProcessGroupHCCL does not support recv"); -} -} // namespace c10d +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10d { +namespace { +using hcclUs = std::chrono::steady_clock::time_point; +#define DURATION_US(x) \ + (std::chrono::duration_cast(x)) +#define TIME_NOW() ({ std::chrono::steady_clock::now(); }) + +// HCCL ReduceOp mapping +std::map hcclOp = { + {ReduceOp::MIN, HCCL_REDUCE_MIN}, + {ReduceOp::MAX, HCCL_REDUCE_MAX}, + {ReduceOp::SUM, HCCL_REDUCE_SUM}, + {ReduceOp::PRODUCT, HCCL_REDUCE_PROD}, +}; + +// HCCL DataType mapping +std::map hcclDataType = { + {at::kChar, HCCL_DATA_TYPE_INT8}, + {at::kFloat, HCCL_DATA_TYPE_FP32}, + {at::kInt, HCCL_DATA_TYPE_INT32}, + {at::kHalf, HCCL_DATA_TYPE_FP16}, + {at::kShort, HCCL_DATA_TYPE_INT16}, + {at::kLong, HCCL_DATA_TYPE_INT64}, +}; + +// Helper function that gets the data type and issues error if not supported +HcclDataType getHcclDataType(at::ScalarType type) { + try { + return hcclDataType.at(type); + } catch (std::out_of_range& e) { + throw std::runtime_error("Unsupported data type for HCCL process group"); + } +} + +// Get the deviceList String from the list of devices +std::string getKeyFromDevices(const std::vector& devices) { + std::string deviceList; + for (auto& device : devices) { + if (deviceList.empty()) { + deviceList = std::to_string(device.index()); + } else { + deviceList += "," + std::to_string(device.index()); + } + } + return deviceList; +} + +// Get the list of devices from list of tensors +std::vector getDeviceList(const std::vector& tensors) { + std::vector res; + res.reserve(tensors.size()); + for (auto& tensor : tensors) { + res.push_back(tensor.device()); + } + return res; +} + +// [Sync Streams] Helper that lets the input hcclStreams to wait for the current +// stream. HCCL communications run on hcclStreams, but input tensors are +// allocated on different streams (i.e., current streams). Communications on +// hcclStreams cannot start before pending input tensor ops on current streams +// finish. Otherwise, ops on two streams might read/write same tensors +// concurrently. + +// The synchronization above alone is not enough. We also need to make sure +// input tensors are not freed before their usages on hcclStreams finish. This +// can be achieved by calling ::recordStream, +// which remembers the usage stream (hcclStream), creates an event on the usage +// stream when GC attempts to free the input tensor, and delays GC until that +// event is done. +void syncStreams( + const std::vector& devices, + std::vector& hcclEvents, + std::vector& hcclStreams) { + for (size_t i = 0; i < devices.size(); ++i) { + c10::npu::NPUStream& hcclStream = hcclStreams[i]; + at::npu::NPUEvent& hcclEvent = hcclEvents[i]; + hcclEvent.record(c10::npu::getCurrentNPUStream(devices[i].index())); + hcclEvent.block(hcclStream); + } +} + +// exit call back for allreduce error +void exceptionCallback(aclrtExceptionInfo* exceptionInfo) { + std::string err = "AllReduce error in:" + std::string(__FILE__) + ": " + + std::to_string(__LINE__); + throw std::runtime_error(err); +} +} // namespace + +constexpr int64_t kSynchronizeBusyWaitMillis = 10; +constexpr int64_t maxOpNumPerSyncPoint = 2; +const int64_t ProcessGroupHCCL::kProcessGroupHCCLOpTimeoutMillis = 10 * 1000; +ProcessGroupHCCL::WorkHCCL::WorkHCCL(const std::vector& devices) + : devices_(devices), workStartTime_(std::chrono::steady_clock::now()) { + // Creates the npu event wrappers + // Note: The actual events are lazily created when first recorded to with + // DEFAULT_FLAGS = npuEventDisableTiming. + npuEvents_.resize(devices.size()); + hcclComms_.resize(devices.size()); +} + +ProcessGroupHCCL::WorkHCCL::~WorkHCCL() {} + +bool ProcessGroupHCCL::WorkHCCL::isCompleted() { + checkAndSetException(); + return exception() || finishedNPUExecutionInternal(); +} + +bool ProcessGroupHCCL::WorkHCCL::isSuccess() const { + if (exception()) { + // Already detected an exception. + return false; + } + // TODO support checkForHCCLErrors + return finishedNPUExecutionInternal(); +} + +void ProcessGroupHCCL::WorkHCCL::checkAndSetException() { + if (exception()) { + // We already have an exception. + return; + } + // TODO support checkForHCCLErrors +} + +// Helper that checks if the HCCL kernels are completed on the NPU +bool ProcessGroupHCCL::WorkHCCL::finishedNPUExecution() { + checkAndSetException(); + return finishedNPUExecutionInternal(); +} + +// check if HCCL task is finished +bool ProcessGroupHCCL::WorkHCCL::finishedNPUExecutionInternal() const { + for (size_t i = 0; i < devices_.size(); ++i) { + // Checking Event completed by Eventquery + aclrtEventStatus status; + auto ret = aclrtQueryEvent(npuEvents_[i], &status); + if (ret != ACL_ERROR_NONE || status == ACL_EVENT_STATUS_NOT_READY) { + return false; + } + } + return true; +} + +void ProcessGroupHCCL::WorkHCCL::checkAndThrowException() { + // Set the appropriate exception if found. + checkAndSetException(); + + // Throw an exception, only if we have a valid exception. + if (exception()) { + std::rethrow_exception(exception()); + } +} + +// Waiting on the work's corresponding NPU events +void ProcessGroupHCCL::WorkHCCL::synchronize() { + for (size_t i = 0; i < devices_.size(); ++i) { + auto currentStream = at::npu::getCurrentNPUStream(devices_[i].index()); + // Block the current stream on the HCCL stream + npuEvents_[i].block(currentStream); + // If we use the work to do barrier, we should block here + if (!barrierTensors_.empty()) { + c10::npu::NPUGuard npuGuard(devices_[i]); + c10::npu::npuSynchronizeDevice(); + } + } + + // In case of blocking, wait for the operation to complete. + if (blockingWait_) { + // Wait for the operation to complete. + while (!isCompleted()) { + auto currentTimepoint = std::chrono::steady_clock::now(); + if (std::chrono::duration_cast( + currentTimepoint - workStartTime_) > opTimeout_) { + throw std::runtime_error("Operation timed out!"); + } + // Check for errors and throw appropriate exception. + checkAndThrowException(); // TODO support checkAndThrowException + std::this_thread::sleep_for( + std::chrono::milliseconds(kSynchronizeBusyWaitMillis)); + } + checkAndThrowException(); // TODO support checkAndThrowException + } +} + +// Same as calling synchronize(). +bool ProcessGroupHCCL::WorkHCCL::wait() { + synchronize(); + // Always return true, because abort API is not implemented. + return true; +} + +ProcessGroupHCCL::ProcessGroupHCCL( + const std::shared_ptr& store, + int rank, + int size, + const std::chrono::milliseconds& opTimeout) + : ProcessGroup(rank, size), + store_(store), + hcclCommCounter_(0), + terminateWatchdog_(false), + opTimeout_(opTimeout) { + char* blockingWait = getenv(HCCL_BLOCKING_WAIT); + try { + if (blockingWait != nullptr) { + auto val = std::stoi(blockingWait); + if (val == 1) { + // Make wait() and synchronize() a blocking call. + blockingWait_ = true; + } else if (val != 0) { + throw std::runtime_error( + "Invalid value for environment variable: " + + std::string(HCCL_BLOCKING_WAIT)); + } + } + } catch (std::exception& e) { + throw std::runtime_error( + "Invalid value for environment variable: " + + std::string(HCCL_BLOCKING_WAIT)); + } +} + +ProcessGroupHCCL::~ProcessGroupHCCL() {} + +void ProcessGroupHCCL::broadcastMasterID(HcclRootInfo* hcclID) { + // For every HCCL communicator that we create we need to broadcast + // a unique ID from rank 0 to all other ranks. This broadcast is + // done by rank 0 setting a key in the store and all other ranks + // retrieving the contents of that key. A single process group + // may create multiple HCCL communicators, so we use a sequence + // number to differentiate between them. + std::string storeKey = std::to_string(hcclCommCounter_++); + if (rank_ == 0) { + auto vec = std::vector( + reinterpret_cast(hcclID), + reinterpret_cast(hcclID) + HCCL_ROOT_INFO_BYTES); + store_->set(storeKey, vec); + } else { + auto vec = store_->get(storeKey); + TORCH_CHECK(vec.size() == HCCL_ROOT_INFO_BYTES); + std::memcpy(hcclID, vec.data(), vec.size()); + } +} + +/* +void ProcessGroupHCCL::fluxLimit ( + const std::string& devicesKey, + const int index) { + // event sync every two allreduce + if ((++collectiveCnts_[devicesKey][index]) < maxOpNumPerSyncPoint) { + return; + } + // sync with last sync point + at::npu::NPUEvent &fluxEvent = rateCtrlEvents_[devicesKey][index]; + if (fluxEvent.isCreated()) { + // printf("synchronize point reached. begin event sync\r\n"); + while(!fluxEvent.query()) { + std::this_thread::sleep_for( + std::chrono::milliseconds(1)); + } + fluxEvent.synchronize(); + } else { + // printf("fluxEvent[%s][%d] is not created\r\n", devicesKey.c_str(), +index); + } + // record new sync point + c10::npu::NPUStream& hcclStream = hcclStreams_[devicesKey][index]; + fluxEvent.record(hcclStream); + + // clear collective count + collectiveCnts_[devicesKey][index] = 0; +} +*/ + +std::vector>& ProcessGroupHCCL::getHCCLComm( + const std::string& devicesKey, + const std::vector& devices) { + // Sanity check + if (devicesKey.empty()) { + throw std::runtime_error( + "Not able to create/get the HCCL Communicator since " + "the NPU devices are not known"); + } + + for (auto& device : devices) { + usedDeviceIdxs_.insert(device.index()); + } + + { + std::lock_guard lock(devHCCLCommMapLock_); + if (devHCCLCommMap_.find(devicesKey) != devHCCLCommMap_.end()) { + // Reuse the cached communicator if there is one. + return devHCCLCommMap_[devicesKey]; + } + } + + // HCCL communicator not cached, create a new entry + std::vector> hcclComms; + hcclComms.resize(devices.size()); + + HcclRootInfo hcclID; + if (rank_ == 0) { + C10D_HCCL_CHECK(HcclGetRootInfo(&hcclID)); + } + broadcastMasterID(&hcclID); + + c10::npu::OptionalNPUGuard npuGuard; + std::vector streamVal; + streamVal.reserve(devices.size()); + + for (size_t i = 0; i < devices.size(); ++i) { + int numRanks = getSize(); + int rank = getRank() * devices.size() + i; + + npuGuard.set_index(devices[i].index()); + hcclComms[i] = HCCLComm::create(numRanks, rank, hcclID); + + // Creates the HCCL streams + streamVal.push_back(c10::npu::getNPUStreamFromPool(devices[i].index())); + } + + hcclStreams_.emplace(devicesKey, std::move(streamVal)); + + // Note: these events are created with the (default) cudaEventDisableTiming + // flag This flag provides the best performance when used with + // StreamWaitEvent() and EventQuery(). Since we here don't measure the + // performance using npuEvent, this should be set. + hcclEvents_.emplace( + std::piecewise_construct, + std::make_tuple(devicesKey), + std::make_tuple(devices.size())); + + // stream length is 1024, + rateCtrlEvents_.emplace( + std::piecewise_construct, + std::make_tuple(devicesKey), + std::make_tuple(devices.size())); + + // record collectiveCnts. + collectiveCnts_.emplace( + std::piecewise_construct, + std::make_tuple(devicesKey), + std::make_tuple(devices.size())); + + // Hold the lock before modifying the cache. + std::lock_guard lock(devHCCLCommMapLock_); + + // Move the NCCL resource to cache + devHCCLCommMap_.emplace(devicesKey, std::move(hcclComms)); + return devHCCLCommMap_[devicesKey]; +} + +namespace { + +// Check that all `tensors' have the same type and shape and are distributed +// across distinct NPUs. +void check_npu_tensors(const std::vector& tensors) { + // HCCL support one NPU per process only + if (tensors.size() != 1) { + throw std::runtime_error( + "Tensor list mustn't be larger than the number of available NPUs"); + } + // HCCL support contiguous tensor only + if (!tensors[0].is_contiguous()) { + throw std::runtime_error("Tensors must be contiguous"); + } +} + +// Flatten each list in `tensor_lists' for a gather or scatter operation, and +// ensure compatibility with the corresponding tensor in `other'. +std::vector flatten_for_scatter_gather( + std::vector>& tensor_lists, + std::vector& other, + size_t world_size) { + if (tensor_lists.size() != other.size()) { + throw std::runtime_error( + "Tensor list operands to scatter/gather must have the same length"); + } + const auto num_devices = tensor_lists.size(); + + std::vector flattened; + flattened.resize(num_devices); + + for (auto i = size_t{}; i < num_devices; ++i) { + if (tensor_lists[i].size() != world_size * num_devices) { + throw std::runtime_error( + "Tensor list input to scatter/gather must match number of collective" + " participants"); + } + + // Only check device match for the first tensor in the list; the call to + // newLikeFlat() below will check the rest. + if (tensor_lists[i].front().get_device() != other[i].get_device()) { + throw std::runtime_error( + "Corresponding input/output tensors to scatter/gather must all reside" + " on the same device"); + } + + for (const auto& t : tensor_lists[i]) { + if (t.numel() != other[i].numel()) { + throw std::runtime_error( + "All tensor operands to scatter/gather must have the same size"); + } + } + // Flatten the tensors (from all ranks) into a single big tensor. + flattened[i] = newLikeFlat(tensor_lists, i); + } + return flattened; +} + +} // namespace + +std::shared_ptr ProcessGroupHCCL::initWork( + std::vector devices) { + if (devices.size() != 1) { + throw std::runtime_error( + "ProcessGroupHCCL support one device per process only"); + } + return std::make_shared(devices); +} + +template +std::shared_ptr ProcessGroupHCCL::collective( + std::vector& inputs, + std::vector& outputs, + Fn fn, + PreProcess pre, + PostProcess post) { + const auto devices = getDeviceList(inputs); + const auto key = getKeyFromDevices(devices); + auto& hcclComms = getHCCLComm(key, devices); + // First let HCCL streams wait for input tensors allocation streams + syncStreams(devices, hcclEvents_[key], hcclStreams_[key]); + // Work itself will create the events on all NPUs of tensors + auto work = initWork(devices); + + c10::npu::OptionalNPUGuard npuGuard; + pre(hcclStreams_[key]); + + for (size_t i = 0; i < inputs.size(); ++i) { + npuGuard.set_index(devices[i].index()); + c10::npu::NPUStream& hcclStream = hcclStreams_[key][i]; + + // Both `inputs' and `outputs' are created on a worker stream and used in + // different hcclStreams. Hence, both must record the hcclStream to + // prevent being freed before the collective finishes. + // + // We only record `inputs' here, and leave recording `outputs' to `fn' for + // operations where `inputs' and `outputs' are not the same. + // + // See [Sync Streams]. + c10::npu::NPUCachingAllocator::recordStream( + inputs[i].storage().data_ptr(), hcclStream); + } + { + for (size_t i = 0; i < inputs.size(); ++i) { + npuGuard.set_index(devices[i].index()); + // to avoid to much task pushed to the stream, leading to stream overflow + // insert sync point + // fluxLimit(key, i); + c10::npu::NPUStream& hcclStream = hcclStreams_[key][i]; + hcclUs startut = TIME_NOW(); + C10D_HCCL_CHECK( + fn(inputs[i], outputs[i], hcclComms[i]->getHcclComm(), hcclStream)); + } + } + post(hcclStreams_[key]); + + for (size_t i = 0; i < inputs.size(); ++i) { + c10::npu::NPUStream& hcclStream = hcclStreams_[key][i]; + work->npuEvents_[i].record(hcclStream); + work->hcclComms_[i] = hcclComms[i]; + work->blockingWait_ = blockingWait_; + work->opTimeout_ = opTimeout_; + } + + return work; +} + +template +std::shared_ptr ProcessGroupHCCL::collective( + std::vector& inputs, + std::vector& outputs, + Fn fn) { + return collective( + inputs, + outputs, + fn, + [](std::vector&) {}, + [](std::vector&) {}); +} + +int g_allreduceID = 0; +std::shared_ptr ProcessGroupHCCL::allreduce( + std::vector& tensors, + const AllreduceOptions& opts) { + check_npu_tensors(tensors); + return collective( + tensors, + tensors, + [&](at::Tensor& input, + at::Tensor& output, + HcclComm comm, + c10::npu::NPUStream& stream) { + aclrtSetExceptionInfoCallback(exceptionCallback); + RECORD_FUNCTION("HcclAllreduce", std::vector({input})); + return HcclAllReduce( + input.data_ptr(), + output.data_ptr(), + input.storage().unsafeGetStorageImpl()->numel(), + getHcclDataType(input.scalar_type()), + hcclOp[opts.reduceOp], + comm, + stream.stream()); + }); +} +int g_broadcastID = 100000; +std::shared_ptr ProcessGroupHCCL::broadcast( + std::vector& tensors, + const BroadcastOptions& opts) { + check_npu_tensors(tensors); + return collective( + tensors, + tensors, + [&](at::Tensor& input, + at::Tensor& output, + HcclComm comm, + c10::npu::NPUStream& stream) { + RECORD_FUNCTION("HcclBroadcast", std::vector({input})); + const auto root = opts.rootRank * tensors.size() + opts.rootTensor; + return HcclBroadcast( + input.data_ptr(), + input.storage().unsafeGetStorageImpl()->numel(), + getHcclDataType(input.scalar_type()), + root, + comm, + stream.stream()); + }); +} + +std::shared_ptr ProcessGroupHCCL::allreduce_coalesced( + std::vector& /* unused */, + const AllreduceCoalescedOptions& /* unused */) { + throw std::runtime_error( + "ProcessGroupHCCL does not support allreduce_coalesced"); +} + +std::shared_ptr ProcessGroupHCCL::reduce( + std::vector& /* unused */, + const ReduceOptions& /* unused */) { + throw std::runtime_error("ProcessGroupHCCL does not support reduce"); +} + +std::shared_ptr ProcessGroupHCCL::allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts) { + check_npu_tensors(inputTensors); + auto outputFlattened = + flatten_for_scatter_gather(outputTensors, inputTensors, size_); + check_npu_tensors(outputFlattened); + + return collective( + inputTensors, + outputFlattened, + [&](at::Tensor& input, + at::Tensor& output, + HcclComm comm, + c10::npu::NPUStream& stream) { + RECORD_FUNCTION("HcclAllgather", std::vector({input})); + c10::npu::NPUCachingAllocator::recordStream( + output.storage().data_ptr(), stream); + return HcclAllGather( + input.data_ptr(), + output.data_ptr(), + input.storage().unsafeGetStorageImpl()->numel(), + getHcclDataType(input.scalar_type()), + comm, + stream.stream()); + }, + [&](std::vector& hcclStreams) {}, + [&](std::vector& hcclStreams) { + // Copy the flattened output tensors to the outputs. + for (size_t i = 0; i < outputTensors.size(); ++i) { + c10::npu::NPUStreamGuard guard(hcclStreams[i]); + for (size_t j = 0; j < outputTensors[0].size(); ++j) { + // See [Sync Streams]. + c10::npu::NPUCachingAllocator::recordStream( + outputTensors[i][j].storage().data_ptr(), hcclStreams[i]); + + outputTensors[i][j].copy_(outputFlattened[i][j], true); + } + } + }); +} + +std::shared_ptr ProcessGroupHCCL::allgather_base( + at::Tensor& /*unused */, + at::Tensor& /*unused */, + const AllgatherOptions& /*unused */) { + throw std::runtime_error("ProcessGroupHCCL does not support allgather_base"); +} + +std::shared_ptr ProcessGroupHCCL::reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts) { + check_npu_tensors(outputTensors); + + auto inputFlattened = + flatten_for_scatter_gather(inputTensors, outputTensors, size_); + check_npu_tensors(inputFlattened); + + return collective( + inputFlattened, + outputTensors, + [&](at::Tensor& input, + at::Tensor& output, + HcclComm comm, + c10::npu::NPUStream& stream) { + RECORD_FUNCTION("HcclReduceScatter", std::vector({input})); + c10::npu::NPUCachingAllocator::recordStream( + output.storage().data_ptr(), stream); + return HcclReduceScatter( + input.data_ptr(), + output.data_ptr(), + output.numel(), + getHcclDataType(input.scalar_type()), + hcclOp[opts.reduceOp], + comm, + stream.stream()); + }, + [&](std::vector& hcclStreams) { + // Copy the input tensors to the flattened inputs. + for (size_t i = 0; i < inputTensors.size(); ++i) { + c10::npu::NPUStreamGuard guard(hcclStreams[i]); + for (size_t j = 0; j < inputTensors[0].size(); ++j) { + // See [Sync Streams]. + c10::npu::NPUCachingAllocator::recordStream( + inputTensors[i][j].storage().data_ptr(), hcclStreams[i]); + + inputFlattened[i][j].copy_(inputTensors[i][j], true); + } + } + }, + [&](std::vector& hcclStreams) {}); +} + +std::shared_ptr ProcessGroupHCCL::barrier( + const BarrierOptions& opts) { + std::vector devices; + if (usedDeviceIdxs_.empty()) { + auto numNPUs = c10::npu::device_count(); + int16_t deviceIdx = static_cast(rank_ % numNPUs); + devices.push_back(at::Device(at::DeviceType::NPU, deviceIdx)); + } else { + for (auto usedDeviceIdx : usedDeviceIdxs_) { + devices.push_back(at::Device(at::DeviceType::NPU, usedDeviceIdx)); + } + } + + std::vector barrierTensors; + barrierTensors.reserve(devices.size()); + + at::npu::OptionalNPUGuard npuGuard; + for (auto& device : devices) { + npuGuard.set_index(device.index()); + barrierTensors.push_back(at::empty( + {1}, + at::TensorOptions().device(at::DeviceType::NPU).dtype(at::kFloat))); + } + + auto work = BarrierInside(barrierTensors); + + // Work will take over barrierTensors + auto hcclWork = dynamic_cast(work.get()); + TORCH_CHECK(hcclWork); + hcclWork->barrierTensors_ = std::move(barrierTensors); + + return work; +} + +std::shared_ptr ProcessGroupHCCL::BarrierInside( + std::vector& tensors) { + check_npu_tensors(tensors); + + return collective( + tensors, + tensors, + [&](at::Tensor& input, + at::Tensor& output, + HcclComm comm, + c10::npu::NPUStream& stream) { + aclrtSetExceptionInfoCallback(exceptionCallback); + auto ret = c10::npu::hccl::hccl_barrier(comm, stream.stream()); + if (ret == HcclResult::HCCL_E_NOT_SUPPORT) { + return HcclAllReduce( + input.data_ptr(), + output.data_ptr(), + input.storage().unsafeGetStorageImpl()->numel(), + getHcclDataType(input.scalar_type()), + hcclOp[ReduceOp::SUM], + comm, + stream.stream()); + } + else { + return ret; + } + }); +} + +std::shared_ptr ProcessGroupHCCL::gather( + std::vector>& /* unused */, + std::vector& /* unused */, + const GatherOptions& /* unused */) { + throw std::runtime_error("ProcessGroupHCCL does not support gather"); +} + +std::shared_ptr ProcessGroupHCCL::scatter( + std::vector& /* unused */, + std::vector>& /* unused */, + const ScatterOptions& /* unused */) { + throw std::runtime_error("ProcessGroupHCCL does not support scatter"); +} + +std::shared_ptr ProcessGroupHCCL::send( + std::vector& /* unused */, + int /* unused */, + int /* unused */) { + throw std::runtime_error("ProcessGroupHCCL does not support send"); +} + +std::shared_ptr ProcessGroupHCCL::recv( + std::vector& /* unused */, + int /* unused */, + int /* unused */) { + throw std::runtime_error("ProcessGroupHCCL does not support recv"); +} + +std::shared_ptr ProcessGroupHCCL::recvAnysource( + std::vector& /* unused */, + int /* unused */) { + throw std::runtime_error("ProcessGroupHCCL does not support recv"); +} +} // namespace c10d diff --git a/src/torch/lib/c10d/ProcessGroupHCCL.hpp b/src/torch/lib/c10d/ProcessGroupHCCL.hpp index 76c0253e5c5843836a87f709db1d00350aa90c02..3c7e830e073810ae170b9019e6a5a18e2414eb6e 100644 --- a/src/torch/lib/c10d/ProcessGroupHCCL.hpp +++ b/src/torch/lib/c10d/ProcessGroupHCCL.hpp @@ -1,392 +1,392 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -namespace c10d { -// Environment variable which controls whether or not wait() is blocking or -// non-blocking. -constexpr const char* HCCL_BLOCKING_WAIT = "HCCL_BLOCKING_WAIT"; - -// ProcessGroupHCCL implements HCCL bindings for c10d. -// -// All functions of the class are expected to be called in the same order -// across all processes in the process group. This is the only way that we -// can guarantee to match up the same calls among all processes. -// -// All HCCL functions provided by this class are asynchronous functions. More -// specifically, each HCCL call is scheduled on a separate runtime stream that -// is different from the current runtime stream. This is for the purpose of -// achieving potentially concurrency and better performance. As a result, -// it is the callers' responsibilty to make sure that the runtime stream their -// code works on needs to wait for the HCCL operation from -// this class. -// -// This can be done by calling: -// -// either WorkHCCL::wait() or WorkHCCL::synchronize(), both achieves the same -// functionality and are synonyms. -// -// Also note that WorkHCCL::finishedGPUExecution() is a helper function only -// provided by ProcessGroupHCCL to check if the HCCL operation of WorkHCCL has -// finished execution on the NPU (not just scheduled). -// -// Example on using the HCCL process group -// -// ProcessGroupHCCL pg(store, rank, size); -// std::shared_ptr work = pg.allreduce(tensors); -// -// // At this point, HCCL kernel has already by queued successfully -// // Now, let current stream wait for the HCCL to finish, this function is -// // async operation as well -// -// work->wait() -// -// // Now continue on other work in the current stream. -class ProcessGroupHCCL : public ProcessGroup { - public: - class WorkHCCL : public ProcessGroup::Work { - public: - // Constructor takes a list of NPU devices to adapt framework - // But HCCL support one device only!!! - WorkHCCL(const std::vector& devices); - virtual ~WorkHCCL(); - - // Checks if request has completed. In this specific case of HCCL, it checks - // if the HCCL operation has completed on the NPU in its own HCCL stream. - // Non-blocking operation. - bool isCompleted() override; - - bool isSuccess() const override; - - // Same as calling synchronize() for HCCL work. - bool wait() override; - - // Temporarily not implemented - // void abort() override; - - // Let current stream wait on the completing of the HCCL work - // Throws on exceptions. Blocking operation, which will wait for work - // completion. - void synchronize() override; - - // Helper function that checks if the HCCL have finished - // execution on the NPUs - bool finishedNPUExecution(); - - protected: - // The cached list of NPU devices to operate on. - // HCCL support one device per rank only - std::vector devices_; - - // The NPU events tracking this work item on multiple NPU devices - std::vector npuEvents_; - - // The HCCL communicators used for this work item. - std::vector> hcclComms_; - - // Tensors used for barrier op - std::vector barrierTensors_; - - // Clone of blockingWait_ from ProcessGroupHCCL. - bool blockingWait_ = false; - - // Clone of opTimeout_ from ProcessGroupHCCL. - std::chrono::milliseconds opTimeout_; - - // Time point representing when the work started. - std::chrono::time_point workStartTime_; - - // Temporarily not implemented - // virtual std::exception_ptr checkForHCCLErrors(const - // std::vector>& hcclComms) const; - - private: - // Checks for HCCL errors and sets an appropriate exception_ptr. - void checkAndSetException(); - - // Checks for HCCL errors and throws an appropriate exception. - void checkAndThrowException(); - - // Just checks whether NPU execution has completed, without modifying - // exception_ptr. - bool finishedNPUExecutionInternal() const; - - // Temporarily not implemented - // std::shared_ptr store_; - - friend class ProcessGroupHCCL; - }; - - // If you wish to create multiple process groups, each with a potentially - // different rank and size, you can do so by passing a new store instance - // to each one. If you have only a single store object, you can - // use the `c10d::PrefixStore` to derive scoped instances. - // This is also what the Python API in torch.distributed does. - // - // The process group instance keeps a reference to the store because - // it may be used long after the constructor runs. In fact, the constructor - // doesn't create any HCCL communicators. A single HCCL communicator can - // only be used on a specific set of devices, and are therefore created - // on-demand when a collective runs. If another collective is executed later, - // against a different set of devices, the process group creates another NCCL - // communicator. These HCCL communicators are cached and reused if possible. - // - ProcessGroupHCCL( - const std::shared_ptr& store, - int rank, - int size, - const std::chrono::milliseconds& opTimeout = - std::chrono::milliseconds(kProcessGroupHCCLOpTimeoutMillis)); - - // This constructor includes the deprecated `groupName` argument. - // If you have existing code that uses the `groupName`, you can replace - // it by specifying a `c10d::PrefixStore(groupName, store)` for store. - C10_DEPRECATED ProcessGroupHCCL( - const std::shared_ptr& store, - int rank, - int size, - const std::string& groupName, - const std::chrono::milliseconds& opTimeout = - std::chrono::milliseconds(kProcessGroupHCCLOpTimeoutMillis)) - : ProcessGroupHCCL(store, rank, size, opTimeout) {} - - virtual ~ProcessGroupHCCL(); - - std::shared_ptr broadcast( - std::vector& tensors, - const BroadcastOptions& opts = BroadcastOptions()) override; - - std::shared_ptr allreduce( - std::vector& tensors, - const AllreduceOptions& opts = AllreduceOptions()) override; - - std::shared_ptr allreduce_coalesced( - std::vector& tensors, - const AllreduceCoalescedOptions& opts = - AllreduceCoalescedOptions()) override; - - std::shared_ptr reduce( - std::vector& tensors, - const ReduceOptions& opts = ReduceOptions()) override; - - std::shared_ptr allgather( - std::vector>& outputTensors, - std::vector& inputTensors, - const AllgatherOptions& opts = AllgatherOptions()) override; - - std::shared_ptr allgather_base( - at::Tensor& outputbuffer, - at::Tensor& inputbuffer, - const AllgatherOptions& opts = AllgatherOptions()) override; - - std::shared_ptr reduce_scatter( - std::vector& outputTensors, - std::vector>& inputTensors, - const ReduceScatterOptions& opts = ReduceScatterOptions()) override; - - std::shared_ptr barrier( - const BarrierOptions& opts = BarrierOptions()) override; - -/** - HCCL barrier API for ProcessGroupHCCL Class. - */ - std::shared_ptr BarrierInside( - std::vector& Tensors); - - // Unsupported Ops - std::shared_ptr gather( - std::vector>& outputTensors, - std::vector& inputTensors, - const GatherOptions& opts = GatherOptions()) override; - - std::shared_ptr scatter( - std::vector& outputTensors, - std::vector>& inputTensors, - const ScatterOptions& opts = ScatterOptions()) override; - - std::shared_ptr send( - std::vector& tensors, - int dstRank, - int tag) override; - - std::shared_ptr recv( - std::vector& tensors, - int srcRank, - int tag) override; - - std::shared_ptr recvAnysource( - std::vector& tensors, - int tag) override; - - static const int64_t kProcessGroupHCCLOpTimeoutMillis; - - protected: - // Helper that broadcasts HCCL Master ID to all ranks through the store - void broadcastMasterID(HcclRootInfo* hcclID); - - // Helper that either looks up the cached HCCL communicators or creates - // a new set of NCCL communicators as a cache entry - std::vector>& getHCCLComm( - const std::string& devicesKey, - const std::vector& devices); - - // Temporarily not implemented - // virtual std::exception_ptr checkForHCCLErrors(const - // std::vector>& hcclComms); - - virtual std::shared_ptr initWork( - std::vector devices); - - private: - // Helper that encapsulates work shared across all collective communication - // primitives. The callbacks have the following signatures: - // - // HcclResult fn(at::Tensor& input, at::Tensor& output, - // ncclComm_t, at::cuda::CUDAStream&); - // void {pre,post}(std::vector); - template - std::shared_ptr collective( - std::vector& input, - std::vector& output, - Fn fn); - template - std::shared_ptr collective( - std::vector& input, - std::vector& output, - Fn fn, - PreProcess pre, - PostProcess post); - - // Temporarily not implemented - // static std::exception_ptr checkForHCCLErrorsInternal(const - // std::vector>& hcclComms); void - // ncclCommWatchdog(); void ncclCommWatchdogInternal(); - - // Limit the number of tasks issued to the HCCL stream. - // This interface will introduce RTS bug, - // so we withdraw it temporarily. - // void fluxLimit ( const std::string& key, const int index); - - protected: - static const int64_t kWatchdogThreadSleepMillis; - - // The store is used to broadcast the HCCL Master ID of rank 0. - std::shared_ptr store_; - - // The number of HCCL communicators that have been created during - // the lifetime of this process group. This sequence number is - // used to scope keys used in the store. - uint64_t hcclCommCounter_{0}; - - // The HCCL communicator that the process group has cached. - // The key is a list of NPU devices that an operation is operating on - // The NPU devices are stored in a device sequence and the cache NCCL - // communicator is associated with this NPU device sequence - // - // e.g. If the process group op only uses device 0, then the value of - // the used device string stored (value of the hashmap) would be "0". - // - // If the process group op uses device 0 - 7 and the each tensor of the - // input tensor list is on device, 0, 1, 2, 3, 4, 5, 6, 7 separately, - // then the value of the used device string (key) stored would be - // "0,1,2,3,4,5,6,7" - // - // If the process group op uses device 0 - 7 and the each tensor of the - // input tensor list is on device, 0, 4, 5, 6, 7, 1, 2, 3 separately, - // then the value of the used device string stored would be - // "0,4,5,6,7,1,2,3" - // - // Note that the order of the device for the tensor list matters. - std::unordered_map>> - devHCCLCommMap_; - - // Temporarily not implemented - // std::unordered_map>> - // hcclIdToCommMap_; - - // Mutex to guard devNCCLCommMap_. - std::mutex devHCCLCommMapLock_; - - // Watchdog thread which looks for errors on the cached NCCL communicators. - std::thread hcclCommWatchdogThread_; - - // Whether or not we should terminate the watchdog thread. - std::atomic terminateWatchdog_; - - // Condition variable to control how long the watchdog thread waits. - std::condition_variable watchdogCV_; - - // Mutex for watchdog. - std::mutex watchdogCVMutex_; - - // The NPU steams used by NCCL kernels - std::unordered_map> - hcclStreams_; - - // The NPU events used to sync HCCL streams - std::unordered_map> hcclEvents_; - - // The NPU events used to control task rate to protect streams - std::unordered_map> - rateCtrlEvents_; - std::unordered_map> collectiveCnts_; - - // Device Indexes used for all collectives in this group - std::set usedDeviceIdxs_; - - // map from the key: "group name + pg counter (ID)" to the - // HCCL Master ID count. This needs to be group and pg specific - // - // For each process group, we need a uniform unique HCCL Master ID counter to - // ensure that HCCL operation in this process group can be completed - // successfully. Since each process group ID belongs to a group name, the key - // to this map is a combination of group name and ProcessGroupHCCL ID. - static std::unordered_map pgUniqueHCCLIDCnt_; - - // map from group name to the pg counter (ID) within that group - // - // For each group with the "group name" (which is the key), we need to - // keep track of a unique process group ID when creating a new - // ProcessGroupNCCL for this "group name". Therefore, the value of this - // map keeps the unique ProcessGroupHCCL's ID for a specific group with - // the "group name". The reason we need a per-group process group ID counter - // is that different group can have different ranks and we need ensure that - // each group has its own uniform process group ID for all its ranks. - static std::unordered_map processGroupCounterMap_; - - // Whether or not wait() and synchronize() are blocking operations that wait - // for the operation to complete. - bool blockingWait_ = false; - - // Timeout for operations. This is only used when blockingWait_ is enabled. - std::chrono::milliseconds opTimeout_; - - // Temporarily not implemented - // std::unordered_set abortedComms_; -}; +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +namespace c10d { +// Environment variable which controls whether or not wait() is blocking or +// non-blocking. +constexpr const char* HCCL_BLOCKING_WAIT = "HCCL_BLOCKING_WAIT"; + +// ProcessGroupHCCL implements HCCL bindings for c10d. +// +// All functions of the class are expected to be called in the same order +// across all processes in the process group. This is the only way that we +// can guarantee to match up the same calls among all processes. +// +// All HCCL functions provided by this class are asynchronous functions. More +// specifically, each HCCL call is scheduled on a separate runtime stream that +// is different from the current runtime stream. This is for the purpose of +// achieving potentially concurrency and better performance. As a result, +// it is the callers' responsibilty to make sure that the runtime stream their +// code works on needs to wait for the HCCL operation from +// this class. +// +// This can be done by calling: +// +// either WorkHCCL::wait() or WorkHCCL::synchronize(), both achieves the same +// functionality and are synonyms. +// +// Also note that WorkHCCL::finishedGPUExecution() is a helper function only +// provided by ProcessGroupHCCL to check if the HCCL operation of WorkHCCL has +// finished execution on the NPU (not just scheduled). +// +// Example on using the HCCL process group +// +// ProcessGroupHCCL pg(store, rank, size); +// std::shared_ptr work = pg.allreduce(tensors); +// +// // At this point, HCCL kernel has already by queued successfully +// // Now, let current stream wait for the HCCL to finish, this function is +// // async operation as well +// +// work->wait() +// +// // Now continue on other work in the current stream. +class ProcessGroupHCCL : public ProcessGroup { + public: + class WorkHCCL : public ProcessGroup::Work { + public: + // Constructor takes a list of NPU devices to adapt framework + // But HCCL support one device only!!! + WorkHCCL(const std::vector& devices); + virtual ~WorkHCCL(); + + // Checks if request has completed. In this specific case of HCCL, it checks + // if the HCCL operation has completed on the NPU in its own HCCL stream. + // Non-blocking operation. + bool isCompleted() override; + + bool isSuccess() const override; + + // Same as calling synchronize() for HCCL work. + bool wait() override; + + // Temporarily not implemented + // void abort() override; + + // Let current stream wait on the completing of the HCCL work + // Throws on exceptions. Blocking operation, which will wait for work + // completion. + void synchronize() override; + + // Helper function that checks if the HCCL have finished + // execution on the NPUs + bool finishedNPUExecution(); + + protected: + // The cached list of NPU devices to operate on. + // HCCL support one device per rank only + std::vector devices_; + + // The NPU events tracking this work item on multiple NPU devices + std::vector npuEvents_; + + // The HCCL communicators used for this work item. + std::vector> hcclComms_; + + // Tensors used for barrier op + std::vector barrierTensors_; + + // Clone of blockingWait_ from ProcessGroupHCCL. + bool blockingWait_ = false; + + // Clone of opTimeout_ from ProcessGroupHCCL. + std::chrono::milliseconds opTimeout_; + + // Time point representing when the work started. + std::chrono::time_point workStartTime_; + + // Temporarily not implemented + // virtual std::exception_ptr checkForHCCLErrors(const + // std::vector>& hcclComms) const; + + private: + // Checks for HCCL errors and sets an appropriate exception_ptr. + void checkAndSetException(); + + // Checks for HCCL errors and throws an appropriate exception. + void checkAndThrowException(); + + // Just checks whether NPU execution has completed, without modifying + // exception_ptr. + bool finishedNPUExecutionInternal() const; + + // Temporarily not implemented + // std::shared_ptr store_; + + friend class ProcessGroupHCCL; + }; + + // If you wish to create multiple process groups, each with a potentially + // different rank and size, you can do so by passing a new store instance + // to each one. If you have only a single store object, you can + // use the `c10d::PrefixStore` to derive scoped instances. + // This is also what the Python API in torch.distributed does. + // + // The process group instance keeps a reference to the store because + // it may be used long after the constructor runs. In fact, the constructor + // doesn't create any HCCL communicators. A single HCCL communicator can + // only be used on a specific set of devices, and are therefore created + // on-demand when a collective runs. If another collective is executed later, + // against a different set of devices, the process group creates another NCCL + // communicator. These HCCL communicators are cached and reused if possible. + // + ProcessGroupHCCL( + const std::shared_ptr& store, + int rank, + int size, + const std::chrono::milliseconds& opTimeout = + std::chrono::milliseconds(kProcessGroupHCCLOpTimeoutMillis)); + + // This constructor includes the deprecated `groupName` argument. + // If you have existing code that uses the `groupName`, you can replace + // it by specifying a `c10d::PrefixStore(groupName, store)` for store. + C10_DEPRECATED ProcessGroupHCCL( + const std::shared_ptr& store, + int rank, + int size, + const std::string& groupName, + const std::chrono::milliseconds& opTimeout = + std::chrono::milliseconds(kProcessGroupHCCLOpTimeoutMillis)) + : ProcessGroupHCCL(store, rank, size, opTimeout) {} + + virtual ~ProcessGroupHCCL(); + + std::shared_ptr broadcast( + std::vector& tensors, + const BroadcastOptions& opts = BroadcastOptions()) override; + + std::shared_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + std::shared_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override; + + std::shared_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) override; + + std::shared_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + std::shared_ptr allgather_base( + at::Tensor& outputbuffer, + at::Tensor& inputbuffer, + const AllgatherOptions& opts = AllgatherOptions()) override; + + std::shared_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + std::shared_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override; + +/** + HCCL barrier API for ProcessGroupHCCL Class. + */ + std::shared_ptr BarrierInside( + std::vector& Tensors); + + // Unsupported Ops + std::shared_ptr gather( + std::vector>& outputTensors, + std::vector& inputTensors, + const GatherOptions& opts = GatherOptions()) override; + + std::shared_ptr scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ScatterOptions& opts = ScatterOptions()) override; + + std::shared_ptr send( + std::vector& tensors, + int dstRank, + int tag) override; + + std::shared_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override; + + std::shared_ptr recvAnysource( + std::vector& tensors, + int tag) override; + + static const int64_t kProcessGroupHCCLOpTimeoutMillis; + + protected: + // Helper that broadcasts HCCL Master ID to all ranks through the store + void broadcastMasterID(HcclRootInfo* hcclID); + + // Helper that either looks up the cached HCCL communicators or creates + // a new set of NCCL communicators as a cache entry + std::vector>& getHCCLComm( + const std::string& devicesKey, + const std::vector& devices); + + // Temporarily not implemented + // virtual std::exception_ptr checkForHCCLErrors(const + // std::vector>& hcclComms); + + virtual std::shared_ptr initWork( + std::vector devices); + + private: + // Helper that encapsulates work shared across all collective communication + // primitives. The callbacks have the following signatures: + // + // HcclResult fn(at::Tensor& input, at::Tensor& output, + // ncclComm_t, at::cuda::CUDAStream&); + // void {pre,post}(std::vector); + template + std::shared_ptr collective( + std::vector& input, + std::vector& output, + Fn fn); + template + std::shared_ptr collective( + std::vector& input, + std::vector& output, + Fn fn, + PreProcess pre, + PostProcess post); + + // Temporarily not implemented + // static std::exception_ptr checkForHCCLErrorsInternal(const + // std::vector>& hcclComms); void + // ncclCommWatchdog(); void ncclCommWatchdogInternal(); + + // Limit the number of tasks issued to the HCCL stream. + // This interface will introduce RTS bug, + // so we withdraw it temporarily. + // void fluxLimit ( const std::string& key, const int index); + + protected: + static const int64_t kWatchdogThreadSleepMillis; + + // The store is used to broadcast the HCCL Master ID of rank 0. + std::shared_ptr store_; + + // The number of HCCL communicators that have been created during + // the lifetime of this process group. This sequence number is + // used to scope keys used in the store. + uint64_t hcclCommCounter_{0}; + + // The HCCL communicator that the process group has cached. + // The key is a list of NPU devices that an operation is operating on + // The NPU devices are stored in a device sequence and the cache NCCL + // communicator is associated with this NPU device sequence + // + // e.g. If the process group op only uses device 0, then the value of + // the used device string stored (value of the hashmap) would be "0". + // + // If the process group op uses device 0 - 7 and the each tensor of the + // input tensor list is on device, 0, 1, 2, 3, 4, 5, 6, 7 separately, + // then the value of the used device string (key) stored would be + // "0,1,2,3,4,5,6,7" + // + // If the process group op uses device 0 - 7 and the each tensor of the + // input tensor list is on device, 0, 4, 5, 6, 7, 1, 2, 3 separately, + // then the value of the used device string stored would be + // "0,4,5,6,7,1,2,3" + // + // Note that the order of the device for the tensor list matters. + std::unordered_map>> + devHCCLCommMap_; + + // Temporarily not implemented + // std::unordered_map>> + // hcclIdToCommMap_; + + // Mutex to guard devNCCLCommMap_. + std::mutex devHCCLCommMapLock_; + + // Watchdog thread which looks for errors on the cached NCCL communicators. + std::thread hcclCommWatchdogThread_; + + // Whether or not we should terminate the watchdog thread. + std::atomic terminateWatchdog_; + + // Condition variable to control how long the watchdog thread waits. + std::condition_variable watchdogCV_; + + // Mutex for watchdog. + std::mutex watchdogCVMutex_; + + // The NPU steams used by NCCL kernels + std::unordered_map> + hcclStreams_; + + // The NPU events used to sync HCCL streams + std::unordered_map> hcclEvents_; + + // The NPU events used to control task rate to protect streams + std::unordered_map> + rateCtrlEvents_; + std::unordered_map> collectiveCnts_; + + // Device Indexes used for all collectives in this group + std::set usedDeviceIdxs_; + + // map from the key: "group name + pg counter (ID)" to the + // HCCL Master ID count. This needs to be group and pg specific + // + // For each process group, we need a uniform unique HCCL Master ID counter to + // ensure that HCCL operation in this process group can be completed + // successfully. Since each process group ID belongs to a group name, the key + // to this map is a combination of group name and ProcessGroupHCCL ID. + static std::unordered_map pgUniqueHCCLIDCnt_; + + // map from group name to the pg counter (ID) within that group + // + // For each group with the "group name" (which is the key), we need to + // keep track of a unique process group ID when creating a new + // ProcessGroupNCCL for this "group name". Therefore, the value of this + // map keeps the unique ProcessGroupHCCL's ID for a specific group with + // the "group name". The reason we need a per-group process group ID counter + // is that different group can have different ranks and we need ensure that + // each group has its own uniform process group ID for all its ranks. + static std::unordered_map processGroupCounterMap_; + + // Whether or not wait() and synchronize() are blocking operations that wait + // for the operation to complete. + bool blockingWait_ = false; + + // Timeout for operations. This is only used when blockingWait_ is enabled. + std::chrono::milliseconds opTimeout_; + + // Temporarily not implemented + // std::unordered_set abortedComms_; +}; } // namespace c10d \ No newline at end of file diff --git a/src/torch/npu/random.py b/src/torch/npu/random.py index d84fb925e5f198a242b49a3625906021e8cf6205..9cee1829563453c84b12ae485e01ce8f6c4895e6 100644 --- a/src/torch/npu/random.py +++ b/src/torch/npu/random.py @@ -1,110 +1,110 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from . import _lazy_init, _lazy_call, device_count, current_device - -__all__ = ['manual_seed', 'manual_seed_all', - 'seed', 'seed_all', 'initial_seed'] - - -def manual_seed(seed): - r"""Sets the seed for generating random numbers for the current NPU. - It's safe to call this function if NPU is not available; in that - case, it is silently ignored. - - Args: - seed (int): The desired seed. - - .. warning:: - If you are working with a multi-NPU model, this function is insufficient - to get determinism. To seed all NPUs, use :func:`manual_seed_all`. - """ - seed = int(seed) - - def cb(): - idx = current_device() - default_generator = torch.npu.default_generators[idx] - default_generator.manual_seed(seed) - - _lazy_call(cb) - - -def manual_seed_all(seed): - r"""Sets the seed for generating random numbers on all NPUs. - It's safe to call this function if NPU is not available; in that - case, it is silently ignored. - - Args: - seed (int): The desired seed. - """ - seed = int(seed) - - def cb(): - for i in range(device_count()): - default_generator = torch.npu.default_generators[i] - default_generator.manual_seed(seed) - - _lazy_call(cb) - - -def seed(): - r"""Sets the seed for generating random numbers to a random number for the current NPU. - It's safe to call this function if NPU is not available; in that - case, it is silently ignored. - - .. warning:: - If you are working with a multi-NPU model, this function will only initialize - the seed on one NPU. To initialize all NPUs, use :func:`seed_all`. - """ - def cb(): - idx = current_device() - default_generator = torch.npu.default_generators[idx] - default_generator.seed() - - _lazy_call(cb) - - -def seed_all(): - r"""Sets the seed for generating random numbers to a random number on all NPUs. - It's safe to call this function if NPU is not available; in that - case, it is silently ignored. - """ - def cb(): - random_seed = 0 - seeded = False - for i in range(device_count()): - default_generator = torch.npu.default_generators[i] - if not seeded: - default_generator.seed() - random_seed = default_generator.initial_seed() - seeded = True - else: - default_generator.manual_seed(random_seed) - - _lazy_call(cb) - - -def initial_seed(): - r"""Returns the current random seed of the current NPU. - - .. warning:: - This function eagerly initializes NPU. - """ - _lazy_init() - idx = current_device() - default_generator = torch.npu.default_generators[idx] - return default_generator.initial_seed() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from . import _lazy_init, _lazy_call, device_count, current_device + +__all__ = ['manual_seed', 'manual_seed_all', + 'seed', 'seed_all', 'initial_seed'] + + +def manual_seed(seed): + r"""Sets the seed for generating random numbers for the current NPU. + It's safe to call this function if NPU is not available; in that + case, it is silently ignored. + + Args: + seed (int): The desired seed. + + .. warning:: + If you are working with a multi-NPU model, this function is insufficient + to get determinism. To seed all NPUs, use :func:`manual_seed_all`. + """ + seed = int(seed) + + def cb(): + idx = current_device() + default_generator = torch.npu.default_generators[idx] + default_generator.manual_seed(seed) + + _lazy_call(cb) + + +def manual_seed_all(seed): + r"""Sets the seed for generating random numbers on all NPUs. + It's safe to call this function if NPU is not available; in that + case, it is silently ignored. + + Args: + seed (int): The desired seed. + """ + seed = int(seed) + + def cb(): + for i in range(device_count()): + default_generator = torch.npu.default_generators[i] + default_generator.manual_seed(seed) + + _lazy_call(cb) + + +def seed(): + r"""Sets the seed for generating random numbers to a random number for the current NPU. + It's safe to call this function if NPU is not available; in that + case, it is silently ignored. + + .. warning:: + If you are working with a multi-NPU model, this function will only initialize + the seed on one NPU. To initialize all NPUs, use :func:`seed_all`. + """ + def cb(): + idx = current_device() + default_generator = torch.npu.default_generators[idx] + default_generator.seed() + + _lazy_call(cb) + + +def seed_all(): + r"""Sets the seed for generating random numbers to a random number on all NPUs. + It's safe to call this function if NPU is not available; in that + case, it is silently ignored. + """ + def cb(): + random_seed = 0 + seeded = False + for i in range(device_count()): + default_generator = torch.npu.default_generators[i] + if not seeded: + default_generator.seed() + random_seed = default_generator.initial_seed() + seeded = True + else: + default_generator.manual_seed(random_seed) + + _lazy_call(cb) + + +def initial_seed(): + r"""Returns the current random seed of the current NPU. + + .. warning:: + This function eagerly initializes NPU. + """ + _lazy_init() + idx = current_device() + default_generator = torch.npu.default_generators[idx] + return default_generator.initial_seed() diff --git a/test/test_npu/test_adaptive_avg_pool2d.py b/test/test_npu/test_adaptive_avg_pool2d.py index 45aca180e5b868c5cb5ec7566f96d31b4b4043cf..c356fcc97a00eb968f883d66fdc8cca6f1c1c8b2 100644 --- a/test/test_npu/test_adaptive_avg_pool2d.py +++ b/test/test_npu/test_adaptive_avg_pool2d.py @@ -1,74 +1,74 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestAdaptiveAvgPool2d(TestCase): - def cpu_op_exec(self, input, output_size): - m = nn.AdaptiveAvgPool2d(output_size) - output= m(input) - return output.numpy() - - def npu_op_exec(self, input, output_size): - m = nn.AdaptiveAvgPool2d(output_size).npu() - output = m(input) - return output.cpu().numpy() - - def test_adaptiveAvgPool2d_shape_format_fp16(self, device): - format_list = [0, 3] - shape_list = [(32, 16, 16), - (16, 1024, 256), - (1024, 464, 11, 9), - (1, 2048, 15, 15)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - # TODO(Ascend): tbe operator has problem in precision and (x, 1) case and so on. - output_list = [(4, 4), (3, 5), (1), (1, None), (None, 2)] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input = cpu_input.to(torch.float32) - for output_size in output_list: - cpu_output = self.cpu_op_exec(cpu_input, output_size) - npu_output = self.npu_op_exec(npu_input, output_size) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - def test_adaptiveAvgPool2d_shape_format_fp32(self, device): - format_list = [0, 3] - shape_list = [(32, 16, 16), - (16, 1024, 256), - (1024, 464, 11, 9), - (1, 2048, 15, 15)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - output_list = [(4, 4), (3, 5), (1), (1, None), (None, 2)] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - for output_size in output_list: - cpu_output = self.cpu_op_exec(cpu_input, output_size) - npu_output = self.npu_op_exec(npu_input, output_size) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestAdaptiveAvgPool2d, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestAdaptiveAvgPool2d(TestCase): + def cpu_op_exec(self, input, output_size): + m = nn.AdaptiveAvgPool2d(output_size) + output= m(input) + return output.numpy() + + def npu_op_exec(self, input, output_size): + m = nn.AdaptiveAvgPool2d(output_size).npu() + output = m(input) + return output.cpu().numpy() + + def test_adaptiveAvgPool2d_shape_format_fp16(self, device): + format_list = [0, 3] + shape_list = [(32, 16, 16), + (16, 1024, 256), + (1024, 464, 11, 9), + (1, 2048, 15, 15)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + # TODO(Ascend): tbe operator has problem in precision and (x, 1) case and so on. + output_list = [(4, 4), (3, 5), (1), (1, None), (None, 2)] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input = cpu_input.to(torch.float32) + for output_size in output_list: + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_adaptiveAvgPool2d_shape_format_fp32(self, device): + format_list = [0, 3] + shape_list = [(32, 16, 16), + (16, 1024, 256), + (1024, 464, 11, 9), + (1, 2048, 15, 15)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + output_list = [(4, 4), (3, 5), (1), (1, None), (None, 2)] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + for output_size in output_list: + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestAdaptiveAvgPool2d, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_all.py b/test/test_npu/test_all.py index b68f9abad8c450140f0ab1f54eee797085f6f051..e12d08e7f5e9fa4cfdbed6714cf9e1094f5810bd 100644 --- a/test/test_npu/test_all.py +++ b/test/test_npu/test_all.py @@ -1,85 +1,85 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestAll(TestCase): - def cpu_op_exec1(self, input): - output = torch.all(input) - output = output.numpy() - return output - - def npu_op_exec1(self, input): - input = input.to("npu") - output = torch.all(input) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec3(self, input, axis, keepdim): - output = torch.all(input, axis, keepdim) - output = output.numpy() - return output - - def npu_op_exec3(self, input, axis, keepdim): - input = input.to("npu") - output = torch.all(input, axis, keepdim) - output = output.to("cpu") - output = output.numpy() - return output - - def test_all_noaxis_notkeedim_bool(self, device): - shape_format = [ - [np.bool_, -1, (4, 2, 5)], - [np.bool_, -1, (7, 4, 5, 8)] - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 2) - cpu_output = self.cpu_op_exec1(cpu_input) - npu_output = self.npu_op_exec1(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - - def test_all_axis_notkeedim_bool(self, device): - shape_format = [ - [[np.bool_, -1, (4, 2, 5)], 3], - [[np.bool_, -1, (4, 2, 5, 8)], 4] - ] - for item in shape_format: - for i in range(item[1]): - cpu_input, npu_input = create_common_tensor(item[0], 0, 2) - cpu_output = self.cpu_op_exec3(cpu_input, i, False) - npu_output = self.npu_op_exec3(npu_input, i, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_all_axis_keedim_bool(self, device): - shape_format = [ - [[np.bool_, -1, (4, 2, 5)], 3], - [[np.bool_, -1, (4, 2, 5, 8)], 4] - ] - for item in shape_format: - for i in range(item[1]): - cpu_input, npu_input = create_common_tensor(item[0], 0, 2) - cpu_output = self.cpu_op_exec3(cpu_input, i, True) - npu_output = self.npu_op_exec3(npu_input, i, True) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestAll, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestAll(TestCase): + def cpu_op_exec1(self, input): + output = torch.all(input) + output = output.numpy() + return output + + def npu_op_exec1(self, input): + input = input.to("npu") + output = torch.all(input) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec3(self, input, axis, keepdim): + output = torch.all(input, axis, keepdim) + output = output.numpy() + return output + + def npu_op_exec3(self, input, axis, keepdim): + input = input.to("npu") + output = torch.all(input, axis, keepdim) + output = output.to("cpu") + output = output.numpy() + return output + + def test_all_noaxis_notkeedim_bool(self, device): + shape_format = [ + [np.bool_, -1, (4, 2, 5)], + [np.bool_, -1, (7, 4, 5, 8)] + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 2) + cpu_output = self.cpu_op_exec1(cpu_input) + npu_output = self.npu_op_exec1(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + def test_all_axis_notkeedim_bool(self, device): + shape_format = [ + [[np.bool_, -1, (4, 2, 5)], 3], + [[np.bool_, -1, (4, 2, 5, 8)], 4] + ] + for item in shape_format: + for i in range(item[1]): + cpu_input, npu_input = create_common_tensor(item[0], 0, 2) + cpu_output = self.cpu_op_exec3(cpu_input, i, False) + npu_output = self.npu_op_exec3(npu_input, i, False) + self.assertRtolEqual(cpu_output, npu_output) + + def test_all_axis_keedim_bool(self, device): + shape_format = [ + [[np.bool_, -1, (4, 2, 5)], 3], + [[np.bool_, -1, (4, 2, 5, 8)], 4] + ] + for item in shape_format: + for i in range(item[1]): + cpu_input, npu_input = create_common_tensor(item[0], 0, 2) + cpu_output = self.cpu_op_exec3(cpu_input, i, True) + npu_output = self.npu_op_exec3(npu_input, i, True) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestAll, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_col2im.py b/test/test_npu/test_col2im.py index c045583737586837428566927057bbf9a9c1527d..719507f77c52b53fdb377c25d4ffba7b9daf8b80 100644 --- a/test/test_npu/test_col2im.py +++ b/test/test_npu/test_col2im.py @@ -1,56 +1,56 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestCol2ImBackward(TestCase): - - def cpu_op_exec(self,input1, output_size, ksizes, strides, dilates, padding): - output = torch._C._nn.col2im(input1, output_size, ksizes, dilates, padding, strides) - output = output.numpy() - return output - - def npu_op_exec(self, input1,output_size, ksizes, strides, dilates,padding): - output = torch._C._nn.col2im(input1, output_size, ksizes, dilates, padding, strides) - output = output.to("cpu") - output = output.numpy() - return output - - def test_col2im_shape_format(self, device): - shape_format = [ - [ [np.float32, 0, (4,4)], (4,5), (2,2), (2,2), (1,1), (0,0)], - [ [np.float32, 3, (2, 8,30 )], (4,5), (2,2), (1,1), (1,1), (1,1)], - [ [np.float32, 4, ( 12, 20)], (12,6), (2,3), (1,1), (2,2), (0,0)], - [ [np.float32, 29, ( 1,12, 12)], (4,5), (2,2), (1,1), (1,1), (0,0)], - [ [np.float16, 29, ( 1,12, 12)], (4,5), (2,2), (1,1), (1,1), (0,0)], - ] - - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 1, 20) - cpu_output = self.cpu_op_exec(cpu_input, item[1], item[2], item[3], item[4], item[5]) - npu_output = self.npu_op_exec(npu_input, item[1], item[2], item[3], item[4], item[5]) - self.assertRtolEqual(cpu_output, npu_output) - - - -instantiate_device_type_tests(TestCol2ImBackward, globals(), except_for="cpu") -if __name__ == "__main__": - torch.npu.set_device("npu:5") +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestCol2ImBackward(TestCase): + + def cpu_op_exec(self,input1, output_size, ksizes, strides, dilates, padding): + output = torch._C._nn.col2im(input1, output_size, ksizes, dilates, padding, strides) + output = output.numpy() + return output + + def npu_op_exec(self, input1,output_size, ksizes, strides, dilates,padding): + output = torch._C._nn.col2im(input1, output_size, ksizes, dilates, padding, strides) + output = output.to("cpu") + output = output.numpy() + return output + + def test_col2im_shape_format(self, device): + shape_format = [ + [ [np.float32, 0, (4,4)], (4,5), (2,2), (2,2), (1,1), (0,0)], + [ [np.float32, 3, (2, 8,30 )], (4,5), (2,2), (1,1), (1,1), (1,1)], + [ [np.float32, 4, ( 12, 20)], (12,6), (2,3), (1,1), (2,2), (0,0)], + [ [np.float32, 29, ( 1,12, 12)], (4,5), (2,2), (1,1), (1,1), (0,0)], + [ [np.float16, 29, ( 1,12, 12)], (4,5), (2,2), (1,1), (1,1), (0,0)], + ] + + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 1, 20) + cpu_output = self.cpu_op_exec(cpu_input, item[1], item[2], item[3], item[4], item[5]) + npu_output = self.npu_op_exec(npu_input, item[1], item[2], item[3], item[4], item[5]) + self.assertRtolEqual(cpu_output, npu_output) + + + +instantiate_device_type_tests(TestCol2ImBackward, globals(), except_for="cpu") +if __name__ == "__main__": + torch.npu.set_device("npu:5") run_tests() \ No newline at end of file diff --git a/test/test_npu/test_constant_pad_nd.py b/test/test_npu/test_constant_pad_nd.py index 59d0bbae99a66d27dda0293e97094747ba29010b..b84c2ccd542b405e6d7a7b1b2f032e755ce1dd6d 100644 --- a/test/test_npu/test_constant_pad_nd.py +++ b/test/test_npu/test_constant_pad_nd.py @@ -1,70 +1,70 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestConstantPadNd(TestCase): - - def op_exec_cpu(self, input1, pad_shape): - output = torch.constant_pad_nd(input1, pad_shape) - output = output.numpy() - - return output - - def op_exec_npu(self, input1, pad_shape): - input1 = input1.to("npu") - output = torch.constant_pad_nd(input1, pad_shape) - output = output.to("cpu") - output = output.numpy() - return output - - def test_constant_pad_nd_shape_format(self, device): - shape_format = [ - [[np.float32, 3, (25, 32, 1, 1)], (1,1)], - [[np.float32, 0, [25, 32, 11, 11]], (2,2,2,2)], - [[np.float32, 0, [25, 3, 22, 22]],(2,2,2,2,20,20)], - [[np.float16, 3, [25, 12, 7, 7]], (20,20,20,20)], - [[np.float16, 0, [25, 3, 22, 22]], (20,20,20,20,5,5,5,5)], - [[np.float16, 4, (2, 3, 3, 3)], (1,1,1,20,5,5,5,5)], - [[np.float16, 4, [100, 20, 7, 7]], (0,0,0,0,0,0,0,0)], - [[np.float16, 0, [2,3,4,5]], (1,0,1,0,1,0,1,0)], - [[np.float16, 4, [2]],(0,1)], - [[np.float16, 0, [20,20]],(0,1,0,2)], - [[np.float16, 0, [20,20,20]],(1,1,1,1) ], - [[np.float16, 3, [1,1,1,1]], (1,1)], - [[np.float16, 3, [1]], (1,1)], - [[np.float16, 0, [50, 24, 56, 56]], (100, 100, 100, 100, 100, 100, 100, 100)], - ] - - for item in shape_format: - input_cpu, input_npu = create_common_tensor(item[0], 1, 1) - pad_shape = item[1] - cpu_output = self.op_exec_cpu(input_cpu, pad_shape) - npu_output = self.op_exec_npu(input_npu, pad_shape) - - - self.assertRtolEqual(cpu_output, npu_output) - - - -instantiate_device_type_tests(TestConstantPadNd, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestConstantPadNd(TestCase): + + def op_exec_cpu(self, input1, pad_shape): + output = torch.constant_pad_nd(input1, pad_shape) + output = output.numpy() + + return output + + def op_exec_npu(self, input1, pad_shape): + input1 = input1.to("npu") + output = torch.constant_pad_nd(input1, pad_shape) + output = output.to("cpu") + output = output.numpy() + return output + + def test_constant_pad_nd_shape_format(self, device): + shape_format = [ + [[np.float32, 3, (25, 32, 1, 1)], (1,1)], + [[np.float32, 0, [25, 32, 11, 11]], (2,2,2,2)], + [[np.float32, 0, [25, 3, 22, 22]],(2,2,2,2,20,20)], + [[np.float16, 3, [25, 12, 7, 7]], (20,20,20,20)], + [[np.float16, 0, [25, 3, 22, 22]], (20,20,20,20,5,5,5,5)], + [[np.float16, 4, (2, 3, 3, 3)], (1,1,1,20,5,5,5,5)], + [[np.float16, 4, [100, 20, 7, 7]], (0,0,0,0,0,0,0,0)], + [[np.float16, 0, [2,3,4,5]], (1,0,1,0,1,0,1,0)], + [[np.float16, 4, [2]],(0,1)], + [[np.float16, 0, [20,20]],(0,1,0,2)], + [[np.float16, 0, [20,20,20]],(1,1,1,1) ], + [[np.float16, 3, [1,1,1,1]], (1,1)], + [[np.float16, 3, [1]], (1,1)], + [[np.float16, 0, [50, 24, 56, 56]], (100, 100, 100, 100, 100, 100, 100, 100)], + ] + + for item in shape_format: + input_cpu, input_npu = create_common_tensor(item[0], 1, 1) + pad_shape = item[1] + cpu_output = self.op_exec_cpu(input_cpu, pad_shape) + npu_output = self.op_exec_npu(input_npu, pad_shape) + + + self.assertRtolEqual(cpu_output, npu_output) + + + +instantiate_device_type_tests(TestConstantPadNd, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_convolution_backward.py b/test/test_npu/test_convolution_backward.py index ded1c9a232a8fc5d6797bf2fe65c248cf9c24bf9..0e4a0334071543e8339c9793b84059c91d666a1b 100644 --- a/test/test_npu/test_convolution_backward.py +++ b/test/test_npu/test_convolution_backward.py @@ -1,96 +1,96 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestConv2dBackward(TestCase): - weight_grad = [] - input_grad = [] - - def getWeightGrad(self, grad): - self.weight_grad.append(grad.to("cpu")) - - def getInputGrad(self, grad): - self.input_grad.append(grad.to("cpu")) - - def cpu_op_exec(self, input1, weight, padding = 0, stride = 1, bias1 = None): - input1.requires_grad = True - input1.register_hook(lambda grad: self.getInputGrad(grad)) - weight.requires_grad = True - weight.register_hook(lambda grad: self.getWeightGrad(grad)) - bias1.requires_grad = True - - res_forward = nn.functional.conv2d(input1, weight, bias1, stride, padding) - grads = torch.ones_like(res_forward).float() - res_forward.backward(grads, retain_graph=True) - res_forward = res_forward.detach().numpy() - return res_forward - - def npu_op_exec(self, input1, weight, padding = 0, stride = 1, bias1 = None): - input1.requires_grad = True - input1.register_hook(lambda grad: self.getInputGrad(grad)) - weight.requires_grad = True - weight.register_hook(lambda grad: self.getWeightGrad(grad)) - bias1 = bias1.to("npu") - bias1.requires_grad = True - - res_forward = nn.functional.conv2d(input1, weight, bias1, stride, padding) - grads = torch.ones_like(res_forward).float() - grads = grads.to("npu") - res_forward.backward(grads, retain_graph=True) - res_forward = res_forward.to("cpu") - res_forward = res_forward.detach().numpy() - return res_forward - - def test_conv2d_backward_shape_format(self, device): - shape_format = [ # input, weight, padding, stride - [[np.float32, 0, (1, 4, 5, 5)], [np.float32, 0, (4, 4, 3, 3)], 0, (1, 1)], - [[np.float32, 0, (1, 8, 3, 3)], [np.float32, 0, (8, 8, 1, 1)], 0, (2, 1)], - [[np.float32, 0, (1024, 2048, 6, 6)], [np.float32, 0, (2048, 2048, 3, 3)], 0, (1, 2)], - [[np.float32, 0, (512, 256, 4, 4)], [np.float32, 0, (256, 256, 2, 2)], 0, (2, 2)], - [[np.float32, 0, (128, 4, 3, 3)], [np.float32, 0, (4, 4, 2, 2)], 0, (3, 1)], - [[np.float32, 0, (2, 64, 3, 3)], [np.float32, 0, (64, 64, 3, 3)], 0, (1, 3)], - [[np.float32, 0, (64, 2, 8, 8)], [np.float32, 0, (2, 2, 1, 1)], 0, (3, 3)], - [[np.float32, 0, (32, 16, 4, 4)], [np.float32, 0, (16, 16, 3, 3)], 0, (2, 1)], - [[np.float32, 0, (1024, 8, 3, 3)], [np.float32, 0, (8, 8, 1, 1)], 0, (1, 2)], - [[np.float32, 0, (1, 8, 512, 512)], [np.float32, 0, (8, 8, 3, 3)], 0, (2, 2)], - [[np.float32, 0, (1, 2, 1, 1)], [np.float32, 0, (1, 1, 2, 2)], 0, (1, 1)], - ] - - for item in shape_format: - self.weight_grad.clear() - self.input_grad.clear() - cpu_input1, npu_input1 = create_common_tensor(item[0], -2, 2) - cpu_input2, npu_input2 = create_common_tensor(item[1], -2, 2) - cpu_bias = torch.randn(item[1][2][0]) - npu_bias = copy.deepcopy(cpu_bias) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, item[2], item[3], cpu_bias) - npu_output = self.npu_op_exec(npu_input1, npu_input2, item[2], item[3], npu_bias) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(self.input_grad[0], self.input_grad[1]) - self.assertRtolEqual(self.weight_grad[0], self.weight_grad[1]) - - -instantiate_device_type_tests(TestConv2dBackward, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestConv2dBackward(TestCase): + weight_grad = [] + input_grad = [] + + def getWeightGrad(self, grad): + self.weight_grad.append(grad.to("cpu")) + + def getInputGrad(self, grad): + self.input_grad.append(grad.to("cpu")) + + def cpu_op_exec(self, input1, weight, padding = 0, stride = 1, bias1 = None): + input1.requires_grad = True + input1.register_hook(lambda grad: self.getInputGrad(grad)) + weight.requires_grad = True + weight.register_hook(lambda grad: self.getWeightGrad(grad)) + bias1.requires_grad = True + + res_forward = nn.functional.conv2d(input1, weight, bias1, stride, padding) + grads = torch.ones_like(res_forward).float() + res_forward.backward(grads, retain_graph=True) + res_forward = res_forward.detach().numpy() + return res_forward + + def npu_op_exec(self, input1, weight, padding = 0, stride = 1, bias1 = None): + input1.requires_grad = True + input1.register_hook(lambda grad: self.getInputGrad(grad)) + weight.requires_grad = True + weight.register_hook(lambda grad: self.getWeightGrad(grad)) + bias1 = bias1.to("npu") + bias1.requires_grad = True + + res_forward = nn.functional.conv2d(input1, weight, bias1, stride, padding) + grads = torch.ones_like(res_forward).float() + grads = grads.to("npu") + res_forward.backward(grads, retain_graph=True) + res_forward = res_forward.to("cpu") + res_forward = res_forward.detach().numpy() + return res_forward + + def test_conv2d_backward_shape_format(self, device): + shape_format = [ # input, weight, padding, stride + [[np.float32, 0, (1, 4, 5, 5)], [np.float32, 0, (4, 4, 3, 3)], 0, (1, 1)], + [[np.float32, 0, (1, 8, 3, 3)], [np.float32, 0, (8, 8, 1, 1)], 0, (2, 1)], + [[np.float32, 0, (1024, 2048, 6, 6)], [np.float32, 0, (2048, 2048, 3, 3)], 0, (1, 2)], + [[np.float32, 0, (512, 256, 4, 4)], [np.float32, 0, (256, 256, 2, 2)], 0, (2, 2)], + [[np.float32, 0, (128, 4, 3, 3)], [np.float32, 0, (4, 4, 2, 2)], 0, (3, 1)], + [[np.float32, 0, (2, 64, 3, 3)], [np.float32, 0, (64, 64, 3, 3)], 0, (1, 3)], + [[np.float32, 0, (64, 2, 8, 8)], [np.float32, 0, (2, 2, 1, 1)], 0, (3, 3)], + [[np.float32, 0, (32, 16, 4, 4)], [np.float32, 0, (16, 16, 3, 3)], 0, (2, 1)], + [[np.float32, 0, (1024, 8, 3, 3)], [np.float32, 0, (8, 8, 1, 1)], 0, (1, 2)], + [[np.float32, 0, (1, 8, 512, 512)], [np.float32, 0, (8, 8, 3, 3)], 0, (2, 2)], + [[np.float32, 0, (1, 2, 1, 1)], [np.float32, 0, (1, 1, 2, 2)], 0, (1, 1)], + ] + + for item in shape_format: + self.weight_grad.clear() + self.input_grad.clear() + cpu_input1, npu_input1 = create_common_tensor(item[0], -2, 2) + cpu_input2, npu_input2 = create_common_tensor(item[1], -2, 2) + cpu_bias = torch.randn(item[1][2][0]) + npu_bias = copy.deepcopy(cpu_bias) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, item[2], item[3], cpu_bias) + npu_output = self.npu_op_exec(npu_input1, npu_input2, item[2], item[3], npu_bias) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(self.input_grad[0], self.input_grad[1]) + self.assertRtolEqual(self.weight_grad[0], self.weight_grad[1]) + + +instantiate_device_type_tests(TestConv2dBackward, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_cosine_embedding_loss.py b/test/test_npu/test_cosine_embedding_loss.py index fc44237c9900307d559d701afeee7f570dde5bf6..443c57ec8c397cf141c0ed86019dd47ff94f9d58 100644 --- a/test/test_npu/test_cosine_embedding_loss.py +++ b/test/test_npu/test_cosine_embedding_loss.py @@ -1,99 +1,99 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -import random - - -class TestCosineEmbeddingLoss(TestCase): - def generate_target(self, shape, dtype): - target = np.random.randint(2, size=shape, dtype=dtype) - target = target*2-1 - target = torch.from_numpy(target) - return target - - def cpu_op_exec(self, input1, input2, target, margin, reduction): - output = torch.nn.functional.cosine_embedding_loss( - input1, input2, target, margin=margin, reduction=reduction) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2, target, margin, reduction): - input1 = input1.to("npu") - input2 = input2.to("npu") - target = target.to("npu") - output = torch.nn.functional.cosine_embedding_loss( - input1, input2, target, margin=margin, reduction=reduction) - output = output.to("cpu") - output = output.numpy() - return output - - def test_cosine_embedding_loss_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (5, 3)], [np.float32, -1, (5, 3)], - [np.int32, (5, )], 'sum'], - [[np.float32, 0, (16, 4, 3)], [np.float32, 0, - (16, 4, 3)], [np.int32, (16, 3)], 'mean'], - [[np.float32, 3, (64, 10, 10)], [np.float32, 3, - (64, 10, 10)], [np.int32, (64, 10)], 'none'], - ] - for item1, item2, target, reduction in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item1, 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item2, 1, 100) - target = self.generate_target(target[1], target[0]) - margin = np.random.uniform(0, 1) - cpu_output = self.cpu_op_exec( - cpu_input1, cpu_input2, target, margin, reduction) - npu_output = self.npu_op_exec( - npu_input1, npu_input2, target, margin, reduction) - self.assertRtolEqual(cpu_output, npu_output) - - def test_cosine_embedding_loss_float16_shape_format(self, device): - def cpu_op_exec_fp16(input1, input2, target, margin, reduction): - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - output = torch.nn.functional.cosine_embedding_loss( - input1, input2, target, margin=margin, reduction=reduction) - output = output.numpy().astype(np.float16) - return output - - shape_format = [ - [[np.float16, 3, (4, 1, 3)], [np.float16, 3, - (4, 1, 3)], [np.int32, (4, 3)], 'sum'], - [[np.float16, -1, (16, 8)], [np.float16, -1, (16, 8)], - [np.int32, (16, )], 'mean'], - [[np.float16, 4, (64, 10, 10)], [np.float16, 3, - (64, 10, 10)], [np.int32, (64, 10)], 'none'] - ] - - for item1, item2, target, reduction in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item1, 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item2, 1, 100) - target = self.generate_target(target[1], target[0]) - margin = np.random.uniform(0, 1) - cpu_output = cpu_op_exec_fp16( - cpu_input1, cpu_input2, target, margin, reduction) - npu_output = self.npu_op_exec( - npu_input1, npu_input2, target, margin, reduction) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests( - TestCosineEmbeddingLoss, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:5") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +import random + + +class TestCosineEmbeddingLoss(TestCase): + def generate_target(self, shape, dtype): + target = np.random.randint(2, size=shape, dtype=dtype) + target = target*2-1 + target = torch.from_numpy(target) + return target + + def cpu_op_exec(self, input1, input2, target, margin, reduction): + output = torch.nn.functional.cosine_embedding_loss( + input1, input2, target, margin=margin, reduction=reduction) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2, target, margin, reduction): + input1 = input1.to("npu") + input2 = input2.to("npu") + target = target.to("npu") + output = torch.nn.functional.cosine_embedding_loss( + input1, input2, target, margin=margin, reduction=reduction) + output = output.to("cpu") + output = output.numpy() + return output + + def test_cosine_embedding_loss_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (5, 3)], [np.float32, -1, (5, 3)], + [np.int32, (5, )], 'sum'], + [[np.float32, 0, (16, 4, 3)], [np.float32, 0, + (16, 4, 3)], [np.int32, (16, 3)], 'mean'], + [[np.float32, 3, (64, 10, 10)], [np.float32, 3, + (64, 10, 10)], [np.int32, (64, 10)], 'none'], + ] + for item1, item2, target, reduction in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item1, 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item2, 1, 100) + target = self.generate_target(target[1], target[0]) + margin = np.random.uniform(0, 1) + cpu_output = self.cpu_op_exec( + cpu_input1, cpu_input2, target, margin, reduction) + npu_output = self.npu_op_exec( + npu_input1, npu_input2, target, margin, reduction) + self.assertRtolEqual(cpu_output, npu_output) + + def test_cosine_embedding_loss_float16_shape_format(self, device): + def cpu_op_exec_fp16(input1, input2, target, margin, reduction): + input1 = input1.to(torch.float32) + input2 = input2.to(torch.float32) + output = torch.nn.functional.cosine_embedding_loss( + input1, input2, target, margin=margin, reduction=reduction) + output = output.numpy().astype(np.float16) + return output + + shape_format = [ + [[np.float16, 3, (4, 1, 3)], [np.float16, 3, + (4, 1, 3)], [np.int32, (4, 3)], 'sum'], + [[np.float16, -1, (16, 8)], [np.float16, -1, (16, 8)], + [np.int32, (16, )], 'mean'], + [[np.float16, 4, (64, 10, 10)], [np.float16, 3, + (64, 10, 10)], [np.int32, (64, 10)], 'none'] + ] + + for item1, item2, target, reduction in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item1, 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item2, 1, 100) + target = self.generate_target(target[1], target[0]) + margin = np.random.uniform(0, 1) + cpu_output = cpu_op_exec_fp16( + cpu_input1, cpu_input2, target, margin, reduction) + npu_output = self.npu_op_exec( + npu_input1, npu_input2, target, margin, reduction) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests( + TestCosineEmbeddingLoss, globals(), except_for='cpu') +if __name__ == "__main__": + torch.npu.set_device("npu:5") + run_tests() diff --git a/test/test_npu/test_cudnn_rnn_backward.py b/test/test_npu/test_cudnn_rnn_backward.py index 68730689e79359904915b793d0d6e131508b80e1..1dd236c74e1727500bedd8371c91370a4ce2a815 100644 --- a/test/test_npu/test_cudnn_rnn_backward.py +++ b/test/test_npu/test_cudnn_rnn_backward.py @@ -1,95 +1,95 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import random -import numpy as np -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestCudnnRnnBackward(TestCase): - def generate_bool(self): - scalar = random.randint(1, 2) - return scalar == 1 - - def generate_int(self, min_d, max_d): - scalar = random.randint(min_d, max_d) - return scalar - - def cpu_op_exec(self, input1, vocab_size, num_hiddens, num_steps, batch_size, first, drop, bid): - input1.requires_grad_(True) - m = torch.nn.RNN(input_size=vocab_size, hidden_size=num_hiddens, batch_first=first, dropout=drop, bidirectional=bid) - state = None - output, _ = m(input1, state) - w = torch.ones_like(output) - output = output.backward(w) - return input1.grad - - def npu_op_exec(self, input1, vocab_size, num_hiddens, num_steps, batch_size, first, drop, bid): - input1.requires_grad_(True) - m = torch.nn.RNN(input_size=vocab_size, hidden_size=num_hiddens, batch_first=first, dropout=drop, bidirectional=bid) - m = m.npu() - state = None - output, _ = m(input1, state) - w = torch.ones_like(output) - output = output.backward(w) - out = input1.grad - out = out.to("cpu") - return out - - def test_cudnn_rnn_backward_common_shape_format(self, device): - npu_vocab_size = self.generate_int(1, 10) - npu_num_hiddens = self.generate_int(1, 10) - npu_num_step = self.generate_int(1, 10) - npu_batch_size = self.generate_int(1, 10) - first = self.generate_bool() - drop = self.generate_int(0, 1) - bid = self.generate_bool() - shape_format = [ - [[np.float32, -1, (npu_num_step, npu_batch_size, npu_vocab_size)]], - [[np.float32, 0, (npu_num_step, npu_batch_size, npu_vocab_size)]], - [[np.float32, 3, (npu_num_step, npu_batch_size, npu_vocab_size)]], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_result = self.cpu_op_exec(cpu_input1, npu_vocab_size, npu_num_hiddens, npu_num_step, npu_batch_size, first, drop, bid) - npu_result = self.npu_op_exec(npu_input1, npu_vocab_size, npu_num_hiddens, npu_num_step, npu_batch_size, first, drop, bid) - self.assertRtolEqual(cpu_result.numpy(), npu_result.numpy()); - - def test_cudnn_rnn_backward_float16_shape_format(self, device): - npu_vocab_size = self.generate_int(1, 10) - npu_num_hiddens = self.generate_int(1, 10) - npu_num_step = self.generate_int(1, 10) - npu_batch_size = self.generate_int(1, 10) - first = self.generate_bool() - drop = self.generate_int(0, 1) - bid = self.generate_bool() - shape_format = [ - [[np.float16, -1, (npu_num_step, npu_batch_size, npu_vocab_size)]], - [[np.float16, 0, (npu_num_step, npu_batch_size, npu_vocab_size)]], - [[np.float16, 3, (npu_num_step, npu_batch_size, npu_vocab_size)]], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 10, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_result = self.cpu_op_exec(cpu_input1, npu_vocab_size, npu_num_hiddens, npu_num_step, npu_batch_size, first, drop, bid) - npu_result = self.npu_op_exec(npu_input1, npu_vocab_size, npu_num_hiddens, npu_num_step, npu_batch_size, first, drop, bid) - self.assertRtolEqual(cpu_result.numpy().astype(np.float16), npu_result.numpy().astype(np.float16)); - -instantiate_device_type_tests(TestCudnnRnnBackward, globals(), except_for="cpu") -if __name__ == "__main__": - torch.npu.set_device("npu:5") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import random +import numpy as np +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestCudnnRnnBackward(TestCase): + def generate_bool(self): + scalar = random.randint(1, 2) + return scalar == 1 + + def generate_int(self, min_d, max_d): + scalar = random.randint(min_d, max_d) + return scalar + + def cpu_op_exec(self, input1, vocab_size, num_hiddens, num_steps, batch_size, first, drop, bid): + input1.requires_grad_(True) + m = torch.nn.RNN(input_size=vocab_size, hidden_size=num_hiddens, batch_first=first, dropout=drop, bidirectional=bid) + state = None + output, _ = m(input1, state) + w = torch.ones_like(output) + output = output.backward(w) + return input1.grad + + def npu_op_exec(self, input1, vocab_size, num_hiddens, num_steps, batch_size, first, drop, bid): + input1.requires_grad_(True) + m = torch.nn.RNN(input_size=vocab_size, hidden_size=num_hiddens, batch_first=first, dropout=drop, bidirectional=bid) + m = m.npu() + state = None + output, _ = m(input1, state) + w = torch.ones_like(output) + output = output.backward(w) + out = input1.grad + out = out.to("cpu") + return out + + def test_cudnn_rnn_backward_common_shape_format(self, device): + npu_vocab_size = self.generate_int(1, 10) + npu_num_hiddens = self.generate_int(1, 10) + npu_num_step = self.generate_int(1, 10) + npu_batch_size = self.generate_int(1, 10) + first = self.generate_bool() + drop = self.generate_int(0, 1) + bid = self.generate_bool() + shape_format = [ + [[np.float32, -1, (npu_num_step, npu_batch_size, npu_vocab_size)]], + [[np.float32, 0, (npu_num_step, npu_batch_size, npu_vocab_size)]], + [[np.float32, 3, (npu_num_step, npu_batch_size, npu_vocab_size)]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_result = self.cpu_op_exec(cpu_input1, npu_vocab_size, npu_num_hiddens, npu_num_step, npu_batch_size, first, drop, bid) + npu_result = self.npu_op_exec(npu_input1, npu_vocab_size, npu_num_hiddens, npu_num_step, npu_batch_size, first, drop, bid) + self.assertRtolEqual(cpu_result.numpy(), npu_result.numpy()); + + def test_cudnn_rnn_backward_float16_shape_format(self, device): + npu_vocab_size = self.generate_int(1, 10) + npu_num_hiddens = self.generate_int(1, 10) + npu_num_step = self.generate_int(1, 10) + npu_batch_size = self.generate_int(1, 10) + first = self.generate_bool() + drop = self.generate_int(0, 1) + bid = self.generate_bool() + shape_format = [ + [[np.float16, -1, (npu_num_step, npu_batch_size, npu_vocab_size)]], + [[np.float16, 0, (npu_num_step, npu_batch_size, npu_vocab_size)]], + [[np.float16, 3, (npu_num_step, npu_batch_size, npu_vocab_size)]], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 10, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_result = self.cpu_op_exec(cpu_input1, npu_vocab_size, npu_num_hiddens, npu_num_step, npu_batch_size, first, drop, bid) + npu_result = self.npu_op_exec(npu_input1, npu_vocab_size, npu_num_hiddens, npu_num_step, npu_batch_size, first, drop, bid) + self.assertRtolEqual(cpu_result.numpy().astype(np.float16), npu_result.numpy().astype(np.float16)); + +instantiate_device_type_tests(TestCudnnRnnBackward, globals(), except_for="cpu") +if __name__ == "__main__": + torch.npu.set_device("npu:5") + run_tests() diff --git a/test/test_npu/test_det.py b/test/test_npu/test_det.py index 4ab0228b148b4919cddc4050253dbb9dba8af948..657b82473b44bba2dd1671e935f3733f80dc2f49 100644 --- a/test/test_npu/test_det.py +++ b/test/test_npu/test_det.py @@ -1,67 +1,67 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -# pylint: disable=unused-variable, unused-argument - -class TestDet(TestCase): - def generate_data(self, min_val, max_val, shape, dtype): - input1 = np.random.uniform(min_val, max_val, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def cpu_op_exec(self, input1): - output = torch.det(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - input1 = input1.to("npu") - output = torch.det(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def test_det_float32(self, device): - npu_input1 = self.generate_data(-9.313225746154785e-10, 9.313225746154785e-10, (1, 1, 64, 64), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_det_float16(self, device): - npu_input1 = self.generate_data(0, 0, (2, 2, 32, 32), np.float16) - cpu_output = self.cpu_op_exec(npu_input1.float()).astype(np.float16) - npu_output = self.npu_op_exec(npu_input1.float()).astype(np.float16) - print(cpu_output,npu_output,'123') - self.assertRtolEqual(cpu_output, npu_output) - - def test_big_scale_float32(self, device): - npu_input1 = self.generate_data(0, 10, (32, 32), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestDet, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:7") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +# pylint: disable=unused-variable, unused-argument + +class TestDet(TestCase): + def generate_data(self, min_val, max_val, shape, dtype): + input1 = np.random.uniform(min_val, max_val, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def cpu_op_exec(self, input1): + output = torch.det(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + input1 = input1.to("npu") + output = torch.det(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def test_det_float32(self, device): + npu_input1 = self.generate_data(-9.313225746154785e-10, 9.313225746154785e-10, (1, 1, 64, 64), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_det_float16(self, device): + npu_input1 = self.generate_data(0, 0, (2, 2, 32, 32), np.float16) + cpu_output = self.cpu_op_exec(npu_input1.float()).astype(np.float16) + npu_output = self.npu_op_exec(npu_input1.float()).astype(np.float16) + print(cpu_output,npu_output,'123') + self.assertRtolEqual(cpu_output, npu_output) + + def test_big_scale_float32(self, device): + npu_input1 = self.generate_data(0, 10, (32, 32), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestDet, globals(), except_for='cpu') +if __name__ == "__main__": + torch.npu.set_device("npu:7") + run_tests() diff --git a/test/test_npu/test_dynamic_ops/test_dynamic_embedding.py b/test/test_npu/test_dynamic_ops/test_dynamic_embedding.py index c29994544378437ed39a02a631dcaa96ccbb0641..fda65867de3eb146b97af8a10077ae998fa6b859 100644 --- a/test/test_npu/test_dynamic_ops/test_dynamic_embedding.py +++ b/test/test_npu/test_dynamic_ops/test_dynamic_embedding.py @@ -1,80 +1,80 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests -import time -import os -import copy -# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 - -class EmbeddingFuncNet(torch.nn.Module): - def __init__(self): - super(EmbeddingFuncNet, self).__init__() - - def forward(self, indices, weight): - out = torch.nn.functional.embedding(indices, weight) - return out - -class EmbeddingNet(torch.nn.Module): - def __init__(self): - super(EmbeddingNet, self).__init__() - - def forward(self, indices, embed): - out =embed(indices) - return out - -class TestShape(TestCase): - def generate_weight(self, x, y): - rand_data = np.random.randn(x,y).astype(np.float32) - cpu_out = torch.from_numpy(rand_data) - npu_out = torch.from_numpy(rand_data).npu() - return cpu_out.to(torch.float), npu_out.to(torch.float) - - def generate_indices(self, shape, min, max): - rand_data = np.random.randint(min, max, shape) - cpu_out = torch.from_numpy(rand_data) - npu_out = torch.from_numpy(rand_data).npu() - return cpu_out.to(torch.long), npu_out.to(torch.long) - - def test_dynamic_threads_support_op(self, device): - shape_list1 = [[40, 32], [40, 1024], [40000, 1024], [33712, 1024]] - shape_list2 = [[40], [40,3125], [64, 7, 128]] - shape_format = [ - [i, j] for i in shape_list1 for j in shape_list2 - ] - net_func = EmbeddingFuncNet() - net = EmbeddingNet() - for item in shape_format: - weight_cpu, weight_npu = self.generate_weight(item[0][0], item[0][1]) - indices_cpu, indices_npu = self.generate_indices(item[1], 1, item[0][0]) - cpu_out = net_func(indices_cpu, weight_cpu) - npu_out = net_func(indices_npu, weight_npu) - npu_output = npu_out.to("cpu") - self.assertRtolEqual(cpu_out.numpy(), npu_output.numpy()) - - embed_cpu = torch.nn.Embedding(item[0][0], item[0][1]) - embed_npu = copy.deepcopy(embed_cpu).npu() - cpu_out = net(indices_cpu, embed_cpu) - npu_out = net(indices_npu, embed_npu) - npu_output = npu_out.to("cpu") - self.assertRtolEqual(cpu_out.detach().numpy(), npu_output.detach().numpy()) - - -instantiate_device_type_tests(TestShape, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests +import time +import os +import copy +# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 + +class EmbeddingFuncNet(torch.nn.Module): + def __init__(self): + super(EmbeddingFuncNet, self).__init__() + + def forward(self, indices, weight): + out = torch.nn.functional.embedding(indices, weight) + return out + +class EmbeddingNet(torch.nn.Module): + def __init__(self): + super(EmbeddingNet, self).__init__() + + def forward(self, indices, embed): + out =embed(indices) + return out + +class TestShape(TestCase): + def generate_weight(self, x, y): + rand_data = np.random.randn(x,y).astype(np.float32) + cpu_out = torch.from_numpy(rand_data) + npu_out = torch.from_numpy(rand_data).npu() + return cpu_out.to(torch.float), npu_out.to(torch.float) + + def generate_indices(self, shape, min, max): + rand_data = np.random.randint(min, max, shape) + cpu_out = torch.from_numpy(rand_data) + npu_out = torch.from_numpy(rand_data).npu() + return cpu_out.to(torch.long), npu_out.to(torch.long) + + def test_dynamic_threads_support_op(self, device): + shape_list1 = [[40, 32], [40, 1024], [40000, 1024], [33712, 1024]] + shape_list2 = [[40], [40,3125], [64, 7, 128]] + shape_format = [ + [i, j] for i in shape_list1 for j in shape_list2 + ] + net_func = EmbeddingFuncNet() + net = EmbeddingNet() + for item in shape_format: + weight_cpu, weight_npu = self.generate_weight(item[0][0], item[0][1]) + indices_cpu, indices_npu = self.generate_indices(item[1], 1, item[0][0]) + cpu_out = net_func(indices_cpu, weight_cpu) + npu_out = net_func(indices_npu, weight_npu) + npu_output = npu_out.to("cpu") + self.assertRtolEqual(cpu_out.numpy(), npu_output.numpy()) + + embed_cpu = torch.nn.Embedding(item[0][0], item[0][1]) + embed_npu = copy.deepcopy(embed_cpu).npu() + cpu_out = net(indices_cpu, embed_cpu) + npu_out = net(indices_npu, embed_npu) + npu_output = npu_out.to("cpu") + self.assertRtolEqual(cpu_out.detach().numpy(), npu_output.detach().numpy()) + + +instantiate_device_type_tests(TestShape, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_dynamic_ops/test_network_all.py b/test/test_npu/test_dynamic_ops/test_network_all.py index 152b297d18cebdb8e90483d8ca3a7ff0c3148f28..3cb4dc0a23dd89bc5af6c19a071e7c923794b394 100644 --- a/test/test_npu/test_dynamic_ops/test_network_all.py +++ b/test/test_npu/test_dynamic_ops/test_network_all.py @@ -1,81 +1,81 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests -import time -import os -import copy -# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 - - -class AllNet(torch.nn.Module): - def __init__(self): - super(AllNet, self).__init__() - - def forward(self, x, axis): - if x.device == torch.device("cpu") and x.dtype == torch.float16: - x = x.to(torch.float32) - out = torch.all(x, axis) - if x.device == torch.device("cpu") and x.dtype == torch.float16: - out = out.to(torch.float16) - return out - - -class TestShape(TestCase): - def create_random_shape_tensor(self, item, min_value, max_value): - npu_format = item[0] - dtype = item[1] - dim = item[2] - shape = np.random.randint(1, 10, dim) - input_tensor = np.random.uniform(min_value, max_value, shape).astype(dtype) - cpu_input = torch.from_numpy(input_tensor) - npu_input = torch.from_numpy(input_tensor).npu() - """ - if npu_format not in (-1, 0): - npu_input = npu_input.npu_format_cast(npu_format) - """ - return cpu_input, npu_input - - def get_random_axis(self, cpu_tensor): - shape = list(cpu_tensor.shape) - axis = np.random.randint(0, len(shape)) - return axis - - def test_dynamic_threads_support_op(self, device): - format_list = [0, 3, 29] - dtype_list = [np.bool_] - dim_list = [1, 2, 3, 4] - net = AllNet() - net_npu = copy.deepcopy(net).to("npu") - items = [ - [i, j, k] for i in format_list for j in dtype_list for k in dim_list - ] - for item in items: - if item[0] == 29 and item[2] == 1: - continue - for _ in range(100): - cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -10, 10) - axis = self.get_random_axis(cpu_tensor) - cpu_output = net(cpu_tensor, axis) - npu_output = net_npu(npu_tensor, axis) - self.assertRtolEqual(cpu_output.to(npu_output.dtype).numpy(), npu_output.cpu().numpy()) - - -instantiate_device_type_tests(TestShape, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests +import time +import os +import copy +# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 + + +class AllNet(torch.nn.Module): + def __init__(self): + super(AllNet, self).__init__() + + def forward(self, x, axis): + if x.device == torch.device("cpu") and x.dtype == torch.float16: + x = x.to(torch.float32) + out = torch.all(x, axis) + if x.device == torch.device("cpu") and x.dtype == torch.float16: + out = out.to(torch.float16) + return out + + +class TestShape(TestCase): + def create_random_shape_tensor(self, item, min_value, max_value): + npu_format = item[0] + dtype = item[1] + dim = item[2] + shape = np.random.randint(1, 10, dim) + input_tensor = np.random.uniform(min_value, max_value, shape).astype(dtype) + cpu_input = torch.from_numpy(input_tensor) + npu_input = torch.from_numpy(input_tensor).npu() + """ + if npu_format not in (-1, 0): + npu_input = npu_input.npu_format_cast(npu_format) + """ + return cpu_input, npu_input + + def get_random_axis(self, cpu_tensor): + shape = list(cpu_tensor.shape) + axis = np.random.randint(0, len(shape)) + return axis + + def test_dynamic_threads_support_op(self, device): + format_list = [0, 3, 29] + dtype_list = [np.bool_] + dim_list = [1, 2, 3, 4] + net = AllNet() + net_npu = copy.deepcopy(net).to("npu") + items = [ + [i, j, k] for i in format_list for j in dtype_list for k in dim_list + ] + for item in items: + if item[0] == 29 and item[2] == 1: + continue + for _ in range(100): + cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -10, 10) + axis = self.get_random_axis(cpu_tensor) + cpu_output = net(cpu_tensor, axis) + npu_output = net_npu(npu_tensor, axis) + self.assertRtolEqual(cpu_output.to(npu_output.dtype).numpy(), npu_output.cpu().numpy()) + + +instantiate_device_type_tests(TestShape, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_dynamic_ops/test_network_broadcast.py b/test/test_npu/test_dynamic_ops/test_network_broadcast.py index 6c19132920be6ab4a9f90744d7963bc89ef32148..2b1776b780779ddd5a2b78bbb5e13a7d2de6e100 100644 --- a/test/test_npu/test_dynamic_ops/test_network_broadcast.py +++ b/test/test_npu/test_dynamic_ops/test_network_broadcast.py @@ -1,77 +1,77 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests -import time -import os -import copy -# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 - - -class BroadcastToNet(torch.nn.Module): - def __init__(self): - super(BroadcastToNet, self).__init__() - - def forward(self, x, size): - out = x.npu_broadcast(size) - return out - - -class TestShape(TestCase): - def create_random_shape_tensor(self, item, min_value, max_value): - npu_format = item[0] - dtype = item[1] - dim = item[2] - shape = np.random.randint(1, 10, dim) - input_tensor = np.random.uniform(min_value, max_value, shape).astype(dtype) - cpu_input = torch.from_numpy(input_tensor) - npu_input = torch.from_numpy(input_tensor).npu() - if npu_format not in (-1, 0): - npu_input = npu_input.npu_format_cast(npu_format) - return cpu_input, npu_input - - def get_broad_size(self, cpu_tensor): - shape = list(cpu_tensor.shape) - addim = np.random.randint(0, 10, 2) - sizes = list(addim) + shape - return list(sizes) - - def test_dynamic_threads_support_op(self, device): - format_list = [0] - dtype_list = [np.float32, np.float16] - dim_list = [4] - net = BroadcastToNet() - net_npu = copy.deepcopy(net).to("npu") - items = [ - [i, j, k] for i in format_list for j in dtype_list for k in dim_list - ] - for item in items: - if item[0] == 29 and item[2] == 1: - continue - for _ in range(100): - cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -10, 10) - sizes = self.get_broad_size(cpu_tensor) - output = net_npu(npu_tensor, sizes) - size1 = np.array(output.size(), dtype=np.int32) - size2 = np.array(sizes, dtype=np.int32) - self.assertRtolEqual(size1, size2) - - -instantiate_device_type_tests(TestShape, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests +import time +import os +import copy +# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 + + +class BroadcastToNet(torch.nn.Module): + def __init__(self): + super(BroadcastToNet, self).__init__() + + def forward(self, x, size): + out = x.npu_broadcast(size) + return out + + +class TestShape(TestCase): + def create_random_shape_tensor(self, item, min_value, max_value): + npu_format = item[0] + dtype = item[1] + dim = item[2] + shape = np.random.randint(1, 10, dim) + input_tensor = np.random.uniform(min_value, max_value, shape).astype(dtype) + cpu_input = torch.from_numpy(input_tensor) + npu_input = torch.from_numpy(input_tensor).npu() + if npu_format not in (-1, 0): + npu_input = npu_input.npu_format_cast(npu_format) + return cpu_input, npu_input + + def get_broad_size(self, cpu_tensor): + shape = list(cpu_tensor.shape) + addim = np.random.randint(0, 10, 2) + sizes = list(addim) + shape + return list(sizes) + + def test_dynamic_threads_support_op(self, device): + format_list = [0] + dtype_list = [np.float32, np.float16] + dim_list = [4] + net = BroadcastToNet() + net_npu = copy.deepcopy(net).to("npu") + items = [ + [i, j, k] for i in format_list for j in dtype_list for k in dim_list + ] + for item in items: + if item[0] == 29 and item[2] == 1: + continue + for _ in range(100): + cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -10, 10) + sizes = self.get_broad_size(cpu_tensor) + output = net_npu(npu_tensor, sizes) + size1 = np.array(output.size(), dtype=np.int32) + size2 = np.array(sizes, dtype=np.int32) + self.assertRtolEqual(size1, size2) + + +instantiate_device_type_tests(TestShape, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_dynamic_ops/test_network_dynamic_shape.py b/test/test_npu/test_dynamic_ops/test_network_dynamic_shape.py index eb1be0b528c59ed9e9fcbca18a1e83c3963c80a5..45d915c590875dd997f7cb426d371bba6e486872 100644 --- a/test/test_npu/test_dynamic_ops/test_network_dynamic_shape.py +++ b/test/test_npu/test_dynamic_ops/test_network_dynamic_shape.py @@ -1,193 +1,193 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests -import time -import os - -class NetFirst(torch.nn.Module): - def __init__(self): - super(NetFirst, self).__init__() - self.cont = 0 - - def forward(self): - self.cont += 1 - return self.cont - -class NetStatic(torch.nn.Module): - def __init__(self): - super(NetStatic, self).__init__() - - def forward(self, x): - out = torch.mul(x, x) - return out - -class NetDynamic(torch.nn.Module): - def __init__(self): - super(NetDynamic, self).__init__() - - def forward(self, x): - out1 = torch.mul(x, x) - out2 = torch.relu(out1) - out3 = torch.neg(out2) - out4 = torch.floor_divide(x, out2) - out5 = torch.div(out4, out3) - out6 = torch.mul(out5, 0.8) - return out6 - -class NetUnsupport(torch.nn.Module): - def __init__(self): - super(NetUnsupport, self).__init__() - - def forward(self, x): - out1 = torch.sin(x) - out2 = torch.sinh(out1) - out3 = torch.selu(out2) - return out3 - -class NetMixOp(torch.nn.Module): - def __init__(self): - super(NetMixOp, self).__init__() - - def forward(self, x): - out0 = torch.floor_divide(x, x) - out1 = torch.selu(out0) - out2 = torch.sub(out1, x) - out3 = torch.div(out2, x) - out4 = torch.sin(out3) - out5 = torch.mul(out4, x) - out6 = torch.sinh(out5) - out7 = torch.neg(out6) - out8 = torch.relu(out7) - return out8 - - -class TestShape(TestCase): - def create_random_shape_tensor(self, item, minValue, maxValue): - format = item[0] - dtype = item[1] - dim = item[2] - shape = np.random.randint(1, 100, dim) - input = np.random.uniform(minValue, maxValue, shape).astype(dtype) - cpu_input = torch.from_numpy(input) - npu_input = torch.from_numpy(input).to("npu:0") - if format not in (-1, 0): - npu_input = npu_input.npu_format_cast(format) - return cpu_input, npu_input - - def test_dynamic_first_step(self, device): - net = NetFirst() - net = net.to("npu") - for step in range(100): - cpu_output = net() - npu_output = net() - assert cpu_output == npu_output - 1 - - def test_dynamic_static_shape(self, device): - net = NetStatic() - net = net.to("npu") - item = [np.float32, 3, (10, 255, 5, 5)] - for step in range(100): - cpu_tensor, npu_tensor = create_common_tensor(item, -100, 100) - cpu_output = net(cpu_tensor) - cpu_output = cpu_output.numpy() - npu_output = net(npu_tensor) - npu_output = npu_output.to("cpu") - npu_output = npu_output.numpy() - self.assertRtolEqual(cpu_output, npu_output) - - def test_dynamic_op(self, device): - net = NetDynamic() - net = net.to("npu") - shape_format = [ - [3, np.float32, 1], - [3, np.float32, 2], - [3, np.float32, 4] - ] - for step in range(100): - for item in shape_format: - cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -100, 100) - cpu_output = net(cpu_tensor) - cpu_output = cpu_output.numpy() - npu_output = net(npu_tensor) - npu_output = npu_output.to("cpu") - npu_output = npu_output.numpy() - self.assertRtolEqual(cpu_output, npu_output) - - def test_dyamic_unspport_op(self, device): - net = NetUnsupport() - net = net.to("npu") - item = [3, np.float32, 4] - for step in range(100): - cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -100, 100) - cpu_output = net(cpu_tensor) - cpu_output = cpu_output.numpy() - npu_output = net(npu_tensor) - npu_output = npu_output.to("cpu") - npu_output = npu_output.numpy() - self.assertRtolEqual(cpu_output, npu_output) - - def test_dynamic_mix_op(self, device): - net = NetMixOp() - net = net.to("npu") - item = [3, np.float32, 4] - for step in range(100): - cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -100, 100) - cpu_output = net(cpu_tensor) - cpu_output = cpu_output.numpy() - npu_output = net(npu_tensor) - npu_output = npu_output.to("cpu") - npu_output = npu_output.numpy() - self.assertRtolEqual(cpu_output, npu_output) - - def test_dynamic_all_random_mix_op(self, device): - net = NetMixOp() - net = net.to("npu") - format_list = [0, 3, 29] - dtype_list = [np.float32] - dim_list = [1, 2, 3, 4] - items = [ - [i, j, k, 10] for i in format_list for j in dtype_list for k in dim_list - ] - for step in range(100): - for item in items: - cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -100, 100) - cpu_output = net(cpu_tensor) - cpu_output = cpu_output.numpy() - npu_output = net(npu_tensor) - npu_output = npu_output.to("cpu") - npu_output = npu_output.numpy() - self.assertRtolEqual(cpu_output, npu_output) - - def test_dynamic_exit(self, device): - net = NetMixOp() - net = net.to("npu") - item = [3, np.float32, 4] - for step in range(2): - cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -100, 100) - cpu_output = net(cpu_tensor) - cpu_output = cpu_output.numpy() - npu_output = net(npu_tensor) - npu_output = npu_output.to("cpu") - npu_output = npu_output.numpy() - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestShape, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests +import time +import os + +class NetFirst(torch.nn.Module): + def __init__(self): + super(NetFirst, self).__init__() + self.cont = 0 + + def forward(self): + self.cont += 1 + return self.cont + +class NetStatic(torch.nn.Module): + def __init__(self): + super(NetStatic, self).__init__() + + def forward(self, x): + out = torch.mul(x, x) + return out + +class NetDynamic(torch.nn.Module): + def __init__(self): + super(NetDynamic, self).__init__() + + def forward(self, x): + out1 = torch.mul(x, x) + out2 = torch.relu(out1) + out3 = torch.neg(out2) + out4 = torch.floor_divide(x, out2) + out5 = torch.div(out4, out3) + out6 = torch.mul(out5, 0.8) + return out6 + +class NetUnsupport(torch.nn.Module): + def __init__(self): + super(NetUnsupport, self).__init__() + + def forward(self, x): + out1 = torch.sin(x) + out2 = torch.sinh(out1) + out3 = torch.selu(out2) + return out3 + +class NetMixOp(torch.nn.Module): + def __init__(self): + super(NetMixOp, self).__init__() + + def forward(self, x): + out0 = torch.floor_divide(x, x) + out1 = torch.selu(out0) + out2 = torch.sub(out1, x) + out3 = torch.div(out2, x) + out4 = torch.sin(out3) + out5 = torch.mul(out4, x) + out6 = torch.sinh(out5) + out7 = torch.neg(out6) + out8 = torch.relu(out7) + return out8 + + +class TestShape(TestCase): + def create_random_shape_tensor(self, item, minValue, maxValue): + format = item[0] + dtype = item[1] + dim = item[2] + shape = np.random.randint(1, 100, dim) + input = np.random.uniform(minValue, maxValue, shape).astype(dtype) + cpu_input = torch.from_numpy(input) + npu_input = torch.from_numpy(input).to("npu:0") + if format not in (-1, 0): + npu_input = npu_input.npu_format_cast(format) + return cpu_input, npu_input + + def test_dynamic_first_step(self, device): + net = NetFirst() + net = net.to("npu") + for step in range(100): + cpu_output = net() + npu_output = net() + assert cpu_output == npu_output - 1 + + def test_dynamic_static_shape(self, device): + net = NetStatic() + net = net.to("npu") + item = [np.float32, 3, (10, 255, 5, 5)] + for step in range(100): + cpu_tensor, npu_tensor = create_common_tensor(item, -100, 100) + cpu_output = net(cpu_tensor) + cpu_output = cpu_output.numpy() + npu_output = net(npu_tensor) + npu_output = npu_output.to("cpu") + npu_output = npu_output.numpy() + self.assertRtolEqual(cpu_output, npu_output) + + def test_dynamic_op(self, device): + net = NetDynamic() + net = net.to("npu") + shape_format = [ + [3, np.float32, 1], + [3, np.float32, 2], + [3, np.float32, 4] + ] + for step in range(100): + for item in shape_format: + cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -100, 100) + cpu_output = net(cpu_tensor) + cpu_output = cpu_output.numpy() + npu_output = net(npu_tensor) + npu_output = npu_output.to("cpu") + npu_output = npu_output.numpy() + self.assertRtolEqual(cpu_output, npu_output) + + def test_dyamic_unspport_op(self, device): + net = NetUnsupport() + net = net.to("npu") + item = [3, np.float32, 4] + for step in range(100): + cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -100, 100) + cpu_output = net(cpu_tensor) + cpu_output = cpu_output.numpy() + npu_output = net(npu_tensor) + npu_output = npu_output.to("cpu") + npu_output = npu_output.numpy() + self.assertRtolEqual(cpu_output, npu_output) + + def test_dynamic_mix_op(self, device): + net = NetMixOp() + net = net.to("npu") + item = [3, np.float32, 4] + for step in range(100): + cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -100, 100) + cpu_output = net(cpu_tensor) + cpu_output = cpu_output.numpy() + npu_output = net(npu_tensor) + npu_output = npu_output.to("cpu") + npu_output = npu_output.numpy() + self.assertRtolEqual(cpu_output, npu_output) + + def test_dynamic_all_random_mix_op(self, device): + net = NetMixOp() + net = net.to("npu") + format_list = [0, 3, 29] + dtype_list = [np.float32] + dim_list = [1, 2, 3, 4] + items = [ + [i, j, k, 10] for i in format_list for j in dtype_list for k in dim_list + ] + for step in range(100): + for item in items: + cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -100, 100) + cpu_output = net(cpu_tensor) + cpu_output = cpu_output.numpy() + npu_output = net(npu_tensor) + npu_output = npu_output.to("cpu") + npu_output = npu_output.numpy() + self.assertRtolEqual(cpu_output, npu_output) + + def test_dynamic_exit(self, device): + net = NetMixOp() + net = net.to("npu") + item = [3, np.float32, 4] + for step in range(2): + cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -100, 100) + cpu_output = net(cpu_tensor) + cpu_output = cpu_output.numpy() + npu_output = net(npu_tensor) + npu_output = npu_output.to("cpu") + npu_output = npu_output.numpy() + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestShape, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_dynamic_ops/test_network_gatherv2.py b/test/test_npu/test_dynamic_ops/test_network_gatherv2.py index cbbbf407fcaa58c085109e84949a85049be3fe92..176fa41622a8df44954e6233e78a6765750b2ce5 100644 --- a/test/test_npu/test_dynamic_ops/test_network_gatherv2.py +++ b/test/test_npu/test_dynamic_ops/test_network_gatherv2.py @@ -1,84 +1,84 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests -import time -import os -import copy -# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 - - -class GatherV2Net(torch.nn.Module): - def __init__(self): - super(GatherV2Net, self).__init__() - - def forward(self, x, dim, idx): - if x.device == torch.device("cpu") and x.dtype == torch.float16: - x = x.to(torch.float32) - out = torch.index_select(x, dim, idx) - if x.device == torch.device("cpu") and x.dtype == torch.float16: - out = out.to(torch.float16) - return out - - -class TestShape(TestCase): - def create_random_shape_tensor(self, item, min_value, max_value): - npu_format = item[0] - dtype = item[1] - dim = item[2] - shape = np.random.randint(1, 10, dim) - input_tensor = np.random.uniform(min_value, max_value, shape).astype(dtype) - cpu_input = torch.from_numpy(input_tensor) - npu_input = torch.from_numpy(input_tensor).npu() - if npu_format not in (-1, 0): - npu_input = npu_input.npu_format_cast(npu_format) - return cpu_input, npu_input - - def get_random_dim_index(self, cpu_tensor): - shape = list(cpu_tensor.shape) - dim = np.random.randint(0, len(shape)) - the_size_of_dim = shape[dim] - indices = np.random.randint(0, the_size_of_dim, np.random.randint(the_size_of_dim), dtype=np.int64) - indices = torch.from_numpy(indices) - return dim, indices - - def test_dynamic_threads_support_op(self, device): - format_list = [0] - dtype_list = [np.float32, np.float16] - dim_list = [3,4] - net = GatherV2Net() - net_npu = copy.deepcopy(net).to("npu") - items = [ - [i, j, k] for i in format_list for j in dtype_list for k in dim_list - ] - for item in items: - if item[0] == 29 and item[2] == 1: - continue - if item[0] in [0,3,29] and item[2] == 5: - continue - for _ in range(100): - cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -10, 10) - dim, indices = self.get_random_dim_index(cpu_tensor) - cpu_output = net(cpu_tensor, dim, indices) - npu_output = net_npu(npu_tensor, dim, indices.to("npu")) - self.assertRtolEqual(cpu_output.to(npu_output.dtype).numpy(), npu_output.cpu().numpy()) - - -instantiate_device_type_tests(TestShape, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests +import time +import os +import copy +# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 + + +class GatherV2Net(torch.nn.Module): + def __init__(self): + super(GatherV2Net, self).__init__() + + def forward(self, x, dim, idx): + if x.device == torch.device("cpu") and x.dtype == torch.float16: + x = x.to(torch.float32) + out = torch.index_select(x, dim, idx) + if x.device == torch.device("cpu") and x.dtype == torch.float16: + out = out.to(torch.float16) + return out + + +class TestShape(TestCase): + def create_random_shape_tensor(self, item, min_value, max_value): + npu_format = item[0] + dtype = item[1] + dim = item[2] + shape = np.random.randint(1, 10, dim) + input_tensor = np.random.uniform(min_value, max_value, shape).astype(dtype) + cpu_input = torch.from_numpy(input_tensor) + npu_input = torch.from_numpy(input_tensor).npu() + if npu_format not in (-1, 0): + npu_input = npu_input.npu_format_cast(npu_format) + return cpu_input, npu_input + + def get_random_dim_index(self, cpu_tensor): + shape = list(cpu_tensor.shape) + dim = np.random.randint(0, len(shape)) + the_size_of_dim = shape[dim] + indices = np.random.randint(0, the_size_of_dim, np.random.randint(the_size_of_dim), dtype=np.int64) + indices = torch.from_numpy(indices) + return dim, indices + + def test_dynamic_threads_support_op(self, device): + format_list = [0] + dtype_list = [np.float32, np.float16] + dim_list = [3,4] + net = GatherV2Net() + net_npu = copy.deepcopy(net).to("npu") + items = [ + [i, j, k] for i in format_list for j in dtype_list for k in dim_list + ] + for item in items: + if item[0] == 29 and item[2] == 1: + continue + if item[0] in [0,3,29] and item[2] == 5: + continue + for _ in range(100): + cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -10, 10) + dim, indices = self.get_random_dim_index(cpu_tensor) + cpu_output = net(cpu_tensor, dim, indices) + npu_output = net_npu(npu_tensor, dim, indices.to("npu")) + self.assertRtolEqual(cpu_output.to(npu_output.dtype).numpy(), npu_output.cpu().numpy()) + + +instantiate_device_type_tests(TestShape, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_dynamic_ops/test_network_repeat.py b/test/test_npu/test_dynamic_ops/test_network_repeat.py index b135370e3c1425c121b2642c35ed0e4fcef6c07b..b60f9d8a5058b9a157a3b4329a9a6d2c2db72cad 100644 --- a/test/test_npu/test_dynamic_ops/test_network_repeat.py +++ b/test/test_npu/test_dynamic_ops/test_network_repeat.py @@ -1,83 +1,83 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests -import time -import os -import copy -# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 - - -class RepeatNet(torch.nn.Module): - def __init__(self): - super(RepeatNet, self).__init__() - - def forward(self, x, size): - if x.device == torch.device("cpu") and x.dtype == torch.float16: - x = x.to(torch.float32) - out = x.repeat(size) - if x.device == torch.device("cpu") and x.dtype == torch.float16: - out = out.to(torch.float16) - return out - - - -class TestShape(TestCase): - def create_random_shape_tensor(self, item, min_value, max_value): - npu_format = item[0] - dtype = item[1] - dim = item[2] - shape = np.random.randint(1, 10, dim) - input_tensor = np.random.uniform(min_value, max_value, shape).astype(dtype) - cpu_input = torch.from_numpy(input_tensor) - npu_input = torch.from_numpy(input_tensor).npu() - if npu_format not in (-1, 0): - npu_input = npu_input.npu_format_cast(npu_format) - return cpu_input, npu_input - - def get_random_size(self, cpu_tensor): - shape = list(cpu_tensor.shape) - if np.random.rand()<0.4: - sizes = np.random.randint(2, 5,len(shape)+int(np.random.randint(1, 3))) - else: - sizes = np.random.randint(2, 5, len(shape)) - return list(sizes) - - def test_dynamic_threads_support_op(self, device): - format_list = [0] - dtype_list = [np.float32, np.float16] - dim_list = [3, 4] - net = RepeatNet() - net_npu = copy.deepcopy(net).to("npu") - items = [ - [i, j, k] for i in format_list for j in dtype_list for k in dim_list - ] - for item in items: - if item[0] == 29 and item[2] == 1: - continue - for _ in range(10): - cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -10, 10) - sizes = self.get_random_size(cpu_tensor) - cpu_output = net(cpu_tensor, sizes) - npu_output = net_npu(npu_tensor, sizes) - self.assertRtolEqual(cpu_output.to(npu_output.dtype).numpy(), npu_output.cpu().numpy()) - - -instantiate_device_type_tests(TestShape, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests +import time +import os +import copy +# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 + + +class RepeatNet(torch.nn.Module): + def __init__(self): + super(RepeatNet, self).__init__() + + def forward(self, x, size): + if x.device == torch.device("cpu") and x.dtype == torch.float16: + x = x.to(torch.float32) + out = x.repeat(size) + if x.device == torch.device("cpu") and x.dtype == torch.float16: + out = out.to(torch.float16) + return out + + + +class TestShape(TestCase): + def create_random_shape_tensor(self, item, min_value, max_value): + npu_format = item[0] + dtype = item[1] + dim = item[2] + shape = np.random.randint(1, 10, dim) + input_tensor = np.random.uniform(min_value, max_value, shape).astype(dtype) + cpu_input = torch.from_numpy(input_tensor) + npu_input = torch.from_numpy(input_tensor).npu() + if npu_format not in (-1, 0): + npu_input = npu_input.npu_format_cast(npu_format) + return cpu_input, npu_input + + def get_random_size(self, cpu_tensor): + shape = list(cpu_tensor.shape) + if np.random.rand()<0.4: + sizes = np.random.randint(2, 5,len(shape)+int(np.random.randint(1, 3))) + else: + sizes = np.random.randint(2, 5, len(shape)) + return list(sizes) + + def test_dynamic_threads_support_op(self, device): + format_list = [0] + dtype_list = [np.float32, np.float16] + dim_list = [3, 4] + net = RepeatNet() + net_npu = copy.deepcopy(net).to("npu") + items = [ + [i, j, k] for i in format_list for j in dtype_list for k in dim_list + ] + for item in items: + if item[0] == 29 and item[2] == 1: + continue + for _ in range(10): + cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -10, 10) + sizes = self.get_random_size(cpu_tensor) + cpu_output = net(cpu_tensor, sizes) + npu_output = net_npu(npu_tensor, sizes) + self.assertRtolEqual(cpu_output.to(npu_output.dtype).numpy(), npu_output.cpu().numpy()) + + +instantiate_device_type_tests(TestShape, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_dynamic_ops/test_network_topK.py b/test/test_npu/test_dynamic_ops/test_network_topK.py index 79ad2f3b5f0c7941d48458e7c646afde9cfc5de6..614cd3592cab9988b1c55b8257050204a89a2d80 100644 --- a/test/test_npu/test_dynamic_ops/test_network_topK.py +++ b/test/test_npu/test_dynamic_ops/test_network_topK.py @@ -1,76 +1,76 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests -import time -import os -import copy -# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 - - -class TopkNet(torch.nn.Module): - def __init__(self): - super(TopkNet, self).__init__() - - def forward(self, x, k): - if x.device == torch.device("cpu") and x.dtype == torch.float16: - x = x.to(torch.float32) - out = torch.topk(x, k) - if x.device == torch.device("cpu") and x.dtype == torch.float16: - out = out.to(torch.float16) - return out - - -class TestShape(TestCase): - def create_random_shape_tensor(self, item, min_value, max_value): - npu_format = item[0] - dtype = item[1] - dim = item[2] - shape = np.random.randint(5, 10, dim) - input_tensor = np.random.uniform(min_value, max_value, shape).astype(dtype) - cpu_input = torch.from_numpy(input_tensor) - npu_input = torch.from_numpy(input_tensor).npu() - if npu_format not in (-1, 0): - npu_input = npu_input.npu_format_cast(npu_format) - return cpu_input, npu_input - - def test_dynamic_threads_support_op(self, device): - format_list = [0] - dtype_list = [np.float16] - dim_list = [3, 4] - net = TopkNet() - net_npu = copy.deepcopy(net).to("npu") - items = [ - [i, j, k] for i in format_list for j in dtype_list for k in dim_list - ] - for item in items: - if item[0] == 29 and item[2] == 1: - continue - for _ in range(100): - cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -10, 10) - k = 5 - cpu_output,cpu_indice = net(cpu_tensor, k) - npu_output,npu_indice = net_npu(npu_tensor, k) - self.assertRtolEqual(cpu_output.to(npu_output.dtype).numpy(), npu_output.cpu().numpy()) - self.assertRtolEqual(cpu_indice.to(torch.int32).numpy(), npu_indice.to(torch.int32).cpu().numpy()) - - - -instantiate_device_type_tests(TestShape, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests +import time +import os +import copy +# Need export DYNAMIC_COMPILE_ENABLE=1 and export EXPERIMENTAL_DYNAMIC_PARTITION=1 + + +class TopkNet(torch.nn.Module): + def __init__(self): + super(TopkNet, self).__init__() + + def forward(self, x, k): + if x.device == torch.device("cpu") and x.dtype == torch.float16: + x = x.to(torch.float32) + out = torch.topk(x, k) + if x.device == torch.device("cpu") and x.dtype == torch.float16: + out = out.to(torch.float16) + return out + + +class TestShape(TestCase): + def create_random_shape_tensor(self, item, min_value, max_value): + npu_format = item[0] + dtype = item[1] + dim = item[2] + shape = np.random.randint(5, 10, dim) + input_tensor = np.random.uniform(min_value, max_value, shape).astype(dtype) + cpu_input = torch.from_numpy(input_tensor) + npu_input = torch.from_numpy(input_tensor).npu() + if npu_format not in (-1, 0): + npu_input = npu_input.npu_format_cast(npu_format) + return cpu_input, npu_input + + def test_dynamic_threads_support_op(self, device): + format_list = [0] + dtype_list = [np.float16] + dim_list = [3, 4] + net = TopkNet() + net_npu = copy.deepcopy(net).to("npu") + items = [ + [i, j, k] for i in format_list for j in dtype_list for k in dim_list + ] + for item in items: + if item[0] == 29 and item[2] == 1: + continue + for _ in range(100): + cpu_tensor, npu_tensor = self.create_random_shape_tensor(item, -10, 10) + k = 5 + cpu_output,cpu_indice = net(cpu_tensor, k) + npu_output,npu_indice = net_npu(npu_tensor, k) + self.assertRtolEqual(cpu_output.to(npu_output.dtype).numpy(), npu_output.cpu().numpy()) + self.assertRtolEqual(cpu_indice.to(torch.int32).numpy(), npu_indice.to(torch.int32).cpu().numpy()) + + + +instantiate_device_type_tests(TestShape, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_fill_.py b/test/test_npu/test_fill_.py index 4c609713b99fcc8f51262618d0b3619fcc90ed53..9dbb022f092f21c2b41c1f5631eb59eeb82ddfa1 100644 --- a/test/test_npu/test_fill_.py +++ b/test/test_npu/test_fill_.py @@ -1,129 +1,129 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestFill(TestCase): - - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - - return npu_input1, npu_input2 - - def generate_single_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def cpu_op_exec(self, input1, input2): - output = torch.fill_(input1, input2).numpy() - return output - - def npu_op_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.fill_(input1, input2) - output = output.to("cpu").numpy() - return output - - def npu_op_exec_scalar(self, input1, input2): - input1 = input1.to("npu") - output = torch.fill_(input1, input2) - output = output.to("cpu").numpy() - return output - - - def test_fill_scalar_int32(self, device): - npu_input1, _ = self.generate_data(0, 100, (2, 3), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, 1) - npu_output = self.npu_op_exec_scalar(npu_input1, 1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_fill_scalar_float16(self, device): - npu_input1, _ = self.generate_data(0, 100, (2, 3), np.float16) - cpu_output = self.cpu_op_exec(npu_input1, 1) - npu_output = self.npu_op_exec_scalar(npu_input1, 1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_fill_scalar_float32(self, device): - npu_input1, _ = self.generate_data(0, 100, (2, 3), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, 1) - npu_output = self.npu_op_exec_scalar(npu_input1, 1) - self.assertRtolEqual(cpu_output, npu_output) - - - def test_fill_common_shape_format(self, device): - shape_format = [ - [np.float32, -1, (4, 3)], - [np.int32, -1, (2, 3)], - [np.int32, -1, (4, 3, 1)], - [np.float16,-1,(65535, 1)], - [np.float16, -1, (1, 8192)], - [np.float16, -1, (1, 16384)], - [np.float16, -1, (1, 32768)], - [np.float16, -1, ( 1, 131072)], - [np.float16, -1, (1, 196608)], - [np.float16, -1, (1, 262144)], - [np.float16, -1, (1, 393216)], - [np.float16, -1, (1, 524288)], - [np.float16, -1, (1, 655360)], - [np.float16, -1, (1, 786432)], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) - cpu_output = self.cpu_op_exec(cpu_input1, 1) - npu_output = self.npu_op_exec_scalar(npu_input1, 1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_fill_float32_data_range(self, device): - data_range = [ - [-1.1754943508e-38, -1.1754943508e-38], - [-3402823500.0, 3402823500.0], - [-0.000030517578125, 0.000030517578125], - [3402823500, 3402800000], - [-9.313225746154785e-10, 9.313225746154785e-10], - [-3402823500.0, -3402823500.0], - [-3402823500.0, 3402823500.0], - [-9.313225746154785e-10, 9.313225746154785e-10], - [-3402823500.0,-3402823500.0], - [-0.000000000000000000000000000000000000011754943508, 0.000000000000000000000000000000000000011754943508], - [0.000000000000000000000000000000000000011754943508, 0.000000000000000000000000000000000000011754943508], - [-0.000000000000000000000000000000000000011754943508, -0.000000000000000000000000000000000000011754943508], - [-0.000000000000000000000000000000000000011754943508, 0.000000000000000000000000000000000000011754943508] - ] - for item in data_range: - cpu_input1, npu_input1 = create_common_tensor([np.float32, - 1, (1, 31, 149, 2)], item[0], item[1]) - cpu_output = self.cpu_op_exec(cpu_input1, 1) - npu_output = self.npu_op_exec_scalar(npu_input1, 1) - self.assertRtolEqual(cpu_output, npu_output) - print("float32 run") - -instantiate_device_type_tests(TestFill, globals(), except_for='cpu') -if __name__ == '__main__': - torch.npu.set_device("npu:7") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestFill(TestCase): + + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + + return npu_input1, npu_input2 + + def generate_single_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def cpu_op_exec(self, input1, input2): + output = torch.fill_(input1, input2).numpy() + return output + + def npu_op_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = torch.fill_(input1, input2) + output = output.to("cpu").numpy() + return output + + def npu_op_exec_scalar(self, input1, input2): + input1 = input1.to("npu") + output = torch.fill_(input1, input2) + output = output.to("cpu").numpy() + return output + + + def test_fill_scalar_int32(self, device): + npu_input1, _ = self.generate_data(0, 100, (2, 3), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, 1) + npu_output = self.npu_op_exec_scalar(npu_input1, 1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_fill_scalar_float16(self, device): + npu_input1, _ = self.generate_data(0, 100, (2, 3), np.float16) + cpu_output = self.cpu_op_exec(npu_input1, 1) + npu_output = self.npu_op_exec_scalar(npu_input1, 1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_fill_scalar_float32(self, device): + npu_input1, _ = self.generate_data(0, 100, (2, 3), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, 1) + npu_output = self.npu_op_exec_scalar(npu_input1, 1) + self.assertRtolEqual(cpu_output, npu_output) + + + def test_fill_common_shape_format(self, device): + shape_format = [ + [np.float32, -1, (4, 3)], + [np.int32, -1, (2, 3)], + [np.int32, -1, (4, 3, 1)], + [np.float16,-1,(65535, 1)], + [np.float16, -1, (1, 8192)], + [np.float16, -1, (1, 16384)], + [np.float16, -1, (1, 32768)], + [np.float16, -1, ( 1, 131072)], + [np.float16, -1, (1, 196608)], + [np.float16, -1, (1, 262144)], + [np.float16, -1, (1, 393216)], + [np.float16, -1, (1, 524288)], + [np.float16, -1, (1, 655360)], + [np.float16, -1, (1, 786432)], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_output = self.cpu_op_exec(cpu_input1, 1) + npu_output = self.npu_op_exec_scalar(npu_input1, 1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_fill_float32_data_range(self, device): + data_range = [ + [-1.1754943508e-38, -1.1754943508e-38], + [-3402823500.0, 3402823500.0], + [-0.000030517578125, 0.000030517578125], + [3402823500, 3402800000], + [-9.313225746154785e-10, 9.313225746154785e-10], + [-3402823500.0, -3402823500.0], + [-3402823500.0, 3402823500.0], + [-9.313225746154785e-10, 9.313225746154785e-10], + [-3402823500.0,-3402823500.0], + [-0.000000000000000000000000000000000000011754943508, 0.000000000000000000000000000000000000011754943508], + [0.000000000000000000000000000000000000011754943508, 0.000000000000000000000000000000000000011754943508], + [-0.000000000000000000000000000000000000011754943508, -0.000000000000000000000000000000000000011754943508], + [-0.000000000000000000000000000000000000011754943508, 0.000000000000000000000000000000000000011754943508] + ] + for item in data_range: + cpu_input1, npu_input1 = create_common_tensor([np.float32, - 1, (1, 31, 149, 2)], item[0], item[1]) + cpu_output = self.cpu_op_exec(cpu_input1, 1) + npu_output = self.npu_op_exec_scalar(npu_input1, 1) + self.assertRtolEqual(cpu_output, npu_output) + print("float32 run") + +instantiate_device_type_tests(TestFill, globals(), except_for='cpu') +if __name__ == '__main__': + torch.npu.set_device("npu:7") + run_tests() diff --git a/test/test_npu/test_fmod.py b/test/test_npu/test_fmod.py index 5683a74794c8f1b4c17aeb4d38d1ed41dc95f827..9732571e0264e7d9c31a7860ae641ede7c7b656d 100644 --- a/test/test_npu/test_fmod.py +++ b/test/test_npu/test_fmod.py @@ -1,120 +1,120 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestFmod(TestCase): - - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - - return npu_input1, npu_input2 - - def generate_single_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def generate_scalar(self, min_d, max_d): - scalar = np.random.uniform(min_d, max_d) - return scalar - - def generate_int_scalar(self, min_d, max_d): - scalar = np.random.randint(min_d, max_d) - return scalar - - def cpu_op_exec(self, input1, input2): - output = torch.fmod(input1, input2) - # output = torch.fmod(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.fmod(input1, input2) - # output = torch.fmod(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_tensor_need_to_npu(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.fmod(input1, input2) - # output = torch.fmod(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_scalar(self, input1, input2): - input1 = input1.to("npu") - # output = input1 + input2 - output = torch.fmod(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2, input3): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = input3.to("npu") - torch.fmod(input1, input2, out=output) - output = output.to("cpu") - output = output.numpy() - return output - - - def test_fmod_scalar_float32(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 3), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, 1) - npu_output = self.npu_op_exec_scalar(npu_input1, 1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_fmod_uncontiguous_float32_scalar(self, device): - def cpu_uncontiguous_op_exec_scalar(input1, input2): - input1 = input1.as_strided([2, 2], [1, 2], 1) - output = torch.fmod(input1, input2) - output = output.numpy() - return output - - def npu_uncontiguous_op_exec_scalar(input1, input2): - input1 = input1.to("cpu") - input1 = input1.as_strided([2, 2], [1, 2], 1) - output = torch.fmod(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - npu_input1, npu_input2 = self.generate_data(0, 100, (4, 3), np.float32) - cpu_input1 = copy.deepcopy(npu_input1) - cpu_output = cpu_uncontiguous_op_exec_scalar(cpu_input1, 2) - npu_output = npu_uncontiguous_op_exec_scalar(npu_input1, 2) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestFmod, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:7") +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestFmod(TestCase): + + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + + return npu_input1, npu_input2 + + def generate_single_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def generate_scalar(self, min_d, max_d): + scalar = np.random.uniform(min_d, max_d) + return scalar + + def generate_int_scalar(self, min_d, max_d): + scalar = np.random.randint(min_d, max_d) + return scalar + + def cpu_op_exec(self, input1, input2): + output = torch.fmod(input1, input2) + # output = torch.fmod(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.fmod(input1, input2) + # output = torch.fmod(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_tensor_need_to_npu(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = torch.fmod(input1, input2) + # output = torch.fmod(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_scalar(self, input1, input2): + input1 = input1.to("npu") + # output = input1 + input2 + output = torch.fmod(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2, input3): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = input3.to("npu") + torch.fmod(input1, input2, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + + def test_fmod_scalar_float32(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 3), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, 1) + npu_output = self.npu_op_exec_scalar(npu_input1, 1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_fmod_uncontiguous_float32_scalar(self, device): + def cpu_uncontiguous_op_exec_scalar(input1, input2): + input1 = input1.as_strided([2, 2], [1, 2], 1) + output = torch.fmod(input1, input2) + output = output.numpy() + return output + + def npu_uncontiguous_op_exec_scalar(input1, input2): + input1 = input1.to("cpu") + input1 = input1.as_strided([2, 2], [1, 2], 1) + output = torch.fmod(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + npu_input1, npu_input2 = self.generate_data(0, 100, (4, 3), np.float32) + cpu_input1 = copy.deepcopy(npu_input1) + cpu_output = cpu_uncontiguous_op_exec_scalar(cpu_input1, 2) + npu_output = npu_uncontiguous_op_exec_scalar(npu_input1, 2) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestFmod, globals(), except_for='cpu') +if __name__ == "__main__": + torch.npu.set_device("npu:7") run_tests() \ No newline at end of file diff --git a/test/test_npu/test_hardtanh_backward.py b/test/test_npu/test_hardtanh_backward.py index 1e36d1876fa8cddaaea97451f094fe8d28b8784b..fc5e1b7c288299b6bcc421f99a88e0aa31020718 100644 --- a/test/test_npu/test_hardtanh_backward.py +++ b/test/test_npu/test_hardtanh_backward.py @@ -1,65 +1,65 @@ -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestHardtanhBackward(TestCase): - - def cpu_op_exec(self, input_x, min_val, max_val): - input_x.requires_grad_(True) - m = torch.nn.Hardtanh(min_val, max_val) - output = m(input_x) - w = torch.ones_like(output) - output.backward(w) - out = input_x.grad - return out - - def npu_op_exec(self, input_x, min_val, max_val): - input_x.requires_grad_(True) - m = torch.nn.Hardtanh(min_val, max_val) - output = m(input_x) - w = torch.ones_like(output) - w = w.to("npu") - output.backward(w) - out = input_x.grad.to('cpu') - return out - - def test_hardtanh_backwardfloat32(self, device): - shape_format = [ - [[np.float32, 0, (10, 10)], -1, 1], [[np.float32, 0, (5, 6, 7)], -1, 1], - [[np.float32, -1, (6, 6, 6)], -1, 3], [[np.float32, 3, (8, 6, 4)], -2, 2], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], -2, 2) - cpu_output = self.cpu_op_exec(cpu_input, item[1], item[2]) - npu_output = self.npu_op_exec(npu_input, item[1], item[2]) - self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) - - def test_hardtanh_backwardfloat16(self, device): - shape_format = [ - [[np.float16, 0, (10, 10, 10)], -1, 1], [[np.float16, 0, (7, 7, 7)], -1, 1], - [[np.float16, -1, (6, 6, 6, 6)], -3, 1], [[np.float16, 3, (10, 10, 10, 10)], -1, 3], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], -2, 2) - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input, item[1], item[2]) - npu_output = self.npu_op_exec(npu_input, item[1], item[2]) - cpu_output = cpu_output.to(torch.float16) - self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) - -instantiate_device_type_tests(TestHardtanhBackward, globals(), except_for="cpu") -if __name__ == "__main__": - torch.npu.set_device("npu:5") +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestHardtanhBackward(TestCase): + + def cpu_op_exec(self, input_x, min_val, max_val): + input_x.requires_grad_(True) + m = torch.nn.Hardtanh(min_val, max_val) + output = m(input_x) + w = torch.ones_like(output) + output.backward(w) + out = input_x.grad + return out + + def npu_op_exec(self, input_x, min_val, max_val): + input_x.requires_grad_(True) + m = torch.nn.Hardtanh(min_val, max_val) + output = m(input_x) + w = torch.ones_like(output) + w = w.to("npu") + output.backward(w) + out = input_x.grad.to('cpu') + return out + + def test_hardtanh_backwardfloat32(self, device): + shape_format = [ + [[np.float32, 0, (10, 10)], -1, 1], [[np.float32, 0, (5, 6, 7)], -1, 1], + [[np.float32, -1, (6, 6, 6)], -1, 3], [[np.float32, 3, (8, 6, 4)], -2, 2], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], -2, 2) + cpu_output = self.cpu_op_exec(cpu_input, item[1], item[2]) + npu_output = self.npu_op_exec(npu_input, item[1], item[2]) + self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) + + def test_hardtanh_backwardfloat16(self, device): + shape_format = [ + [[np.float16, 0, (10, 10, 10)], -1, 1], [[np.float16, 0, (7, 7, 7)], -1, 1], + [[np.float16, -1, (6, 6, 6, 6)], -3, 1], [[np.float16, 3, (10, 10, 10, 10)], -1, 3], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], -2, 2) + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input, item[1], item[2]) + npu_output = self.npu_op_exec(npu_input, item[1], item[2]) + cpu_output = cpu_output.to(torch.float16) + self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) + +instantiate_device_type_tests(TestHardtanhBackward, globals(), except_for="cpu") +if __name__ == "__main__": + torch.npu.set_device("npu:5") run_tests() \ No newline at end of file diff --git a/test/test_npu/test_isfinite.py b/test/test_npu/test_isfinite.py index 8e948c199e11855ff36f2fd3d702f522b9e239b4..18a2debae0060e5a2a7bef6f077e7e204d9ec08f 100644 --- a/test/test_npu/test_isfinite.py +++ b/test/test_npu/test_isfinite.py @@ -1,70 +1,70 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestIsfinite(TestCase): - - def generate_data(self, minValue, maxValue, shape, dtype): - input1 = np.random.uniform(minValue, maxValue, shape).astype(dtype) - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - return npu_input1 - - - def cpu_op_exec(self, input1): - output = torch.isfinite(input1); - output = output.numpy() - return output - - - def npu_op_exec(self, input1): - input1 = input1.to("npu") - output = torch.isfinite(input1); - output = output.to("cpu") - output = output.numpy() - return output - - - def test_isfinite_common_shape_format(self, device): - shape_format = [ - [[np.bool, -1, (4, 3, 1)]], - [[np.int32, -1, (4, 3, 1)]], - [[np.int8, -1, (2, 3)]], - [[np.int16, -1, (2, 3)]], - [[np.int64, -1, (2, 3)]], - [[np.float32, -1, (4, 3, 1)]], - [[np.float64, -1, (4, 3, 1)]], - [[np.uint8, -1, (4, 3, 1)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestIsfinite, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:3") +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestIsfinite(TestCase): + + def generate_data(self, minValue, maxValue, shape, dtype): + input1 = np.random.uniform(minValue, maxValue, shape).astype(dtype) + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + return npu_input1 + + + def cpu_op_exec(self, input1): + output = torch.isfinite(input1); + output = output.numpy() + return output + + + def npu_op_exec(self, input1): + input1 = input1.to("npu") + output = torch.isfinite(input1); + output = output.to("cpu") + output = output.numpy() + return output + + + def test_isfinite_common_shape_format(self, device): + shape_format = [ + [[np.bool, -1, (4, 3, 1)]], + [[np.int32, -1, (4, 3, 1)]], + [[np.int8, -1, (2, 3)]], + [[np.int16, -1, (2, 3)]], + [[np.int64, -1, (2, 3)]], + [[np.float32, -1, (4, 3, 1)]], + [[np.float64, -1, (4, 3, 1)]], + [[np.uint8, -1, (4, 3, 1)]] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestIsfinite, globals(), except_for='cpu') +if __name__ == "__main__": + torch.npu.set_device("npu:3") run_tests() \ No newline at end of file diff --git a/test/test_npu/test_le.py b/test/test_npu/test_le.py index 293d23b63f92e93e1ba78ec531c2aade385e4ca0..2eb4be77fe1ddd484897582acb4f4036fe1ff293 100644 --- a/test/test_npu/test_le.py +++ b/test/test_npu/test_le.py @@ -1,106 +1,106 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestLe(TestCase): - - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - - return npu_input1, npu_input2 - - def generate_single_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def cpu_op_exec(self, input1, input2): - output = torch.le(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.le(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_scalar(self, input1, input2): - input1 = input1.to("npu") - output = torch.le(input1,input2) - output = output.to("cpu") - output = output.numpy() - return output - - - def test_le_float16(self, device): - def cpu_op_exec_fp16(input1, input2): - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - output = torch.le(input1, input2) - output = output.numpy() - return output - npu_input1, npu_input2 = self.generate_data(0, 100, (5, 3), np.float16) - cpu_output = cpu_op_exec_fp16(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - - def test_le_float32(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (4, 3), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - - - def test_le_float32_broadcast(self, device): - npu_input1 = self.generate_single_data(0, 100, (4, 3, 1), np.float32) - npu_input2 = self.generate_single_data(0, 100, (4, 1, 5), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - - def test_less_scalar_float32(self, device): - npu_input1, _= self.generate_data(0, 100, (2,3), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, 1) - npu_output = self.npu_op_exec_scalar(npu_input1, 1) - self.assertRtolEqual(cpu_output, npu_output) - - - def test_le_int32(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 3), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestLe, globals(), except_for='cpu') -if __name__ == '__main__': - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestLe(TestCase): + + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + + return npu_input1, npu_input2 + + def generate_single_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def cpu_op_exec(self, input1, input2): + output = torch.le(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = torch.le(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_scalar(self, input1, input2): + input1 = input1.to("npu") + output = torch.le(input1,input2) + output = output.to("cpu") + output = output.numpy() + return output + + + def test_le_float16(self, device): + def cpu_op_exec_fp16(input1, input2): + input1 = input1.to(torch.float32) + input2 = input2.to(torch.float32) + output = torch.le(input1, input2) + output = output.numpy() + return output + npu_input1, npu_input2 = self.generate_data(0, 100, (5, 3), np.float16) + cpu_output = cpu_op_exec_fp16(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + + def test_le_float32(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (4, 3), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + + + def test_le_float32_broadcast(self, device): + npu_input1 = self.generate_single_data(0, 100, (4, 3, 1), np.float32) + npu_input2 = self.generate_single_data(0, 100, (4, 1, 5), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + + def test_less_scalar_float32(self, device): + npu_input1, _= self.generate_data(0, 100, (2,3), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, 1) + npu_output = self.npu_op_exec_scalar(npu_input1, 1) + self.assertRtolEqual(cpu_output, npu_output) + + + def test_le_int32(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 3), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestLe, globals(), except_for='cpu') +if __name__ == '__main__': + run_tests() diff --git a/test/test_npu/test_log2.py b/test/test_npu/test_log2.py index 6eb03210b6689527445364d7760b003fcf65206c..e6e34271e56cfe0f29536ff10bdfa514276cef8d 100644 --- a/test/test_npu/test_log2.py +++ b/test/test_npu/test_log2.py @@ -1,101 +1,101 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestLog2(TestCase): - - def cpu_op_exec(self,input1): - output = torch.log2(input1) - output = output.numpy() - return output - - def npu_op_exec(self,input1): - output = torch.log2(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_(self,input1): - output = torch.log2_(input1) - output = input1.numpy() - return output - - def npu_op_exec_(self,input1): - output = torch.log2_(input1) - output = input1.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_out(self,input1,cpu_out): - output = torch.log2(input1, out = cpu_out) - output = cpu_out.numpy() - return output - - def npu_op_exec_out(self,input1,npu_out): - output = torch.log2(input1, out = npu_out) - output = npu_out.to("cpu") - output = output.numpy() - return output - - def test_log2_float32_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (1)]], - [[np.float32, -1, (4, 23)]], - [[np.float32, -1, (2, 3)]], - [[np.float32, -1, (12, 23)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log2_float321_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (3)]], - [[np.float32, -1, (4, 3)]], - [[np.float32, -1, (12, 32)]], - [[np.float32, -1, (22, 38)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_exec_(cpu_input1) - npu_output = self.npu_op_exec_(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log2_out_float32_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (4)]], - [[np.float32, -1, (4, 1, 5)]], - [[np.float32, -1, (2, 3, 8)]], - [[np.float32, -1, (2, 13, 56)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_out, npu_out = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_exec_out(cpu_input1,cpu_out) - npu_output = self.npu_op_exec_out(npu_input1,npu_out) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestLog2, globals(), except_for="cpu") - -if __name__ == "__main__": - torch.npu.set_device("npu:6") +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestLog2(TestCase): + + def cpu_op_exec(self,input1): + output = torch.log2(input1) + output = output.numpy() + return output + + def npu_op_exec(self,input1): + output = torch.log2(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_(self,input1): + output = torch.log2_(input1) + output = input1.numpy() + return output + + def npu_op_exec_(self,input1): + output = torch.log2_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_out(self,input1,cpu_out): + output = torch.log2(input1, out = cpu_out) + output = cpu_out.numpy() + return output + + def npu_op_exec_out(self,input1,npu_out): + output = torch.log2(input1, out = npu_out) + output = npu_out.to("cpu") + output = output.numpy() + return output + + def test_log2_float32_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (1)]], + [[np.float32, -1, (4, 23)]], + [[np.float32, -1, (2, 3)]], + [[np.float32, -1, (12, 23)]] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log2_float321_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (3)]], + [[np.float32, -1, (4, 3)]], + [[np.float32, -1, (12, 32)]], + [[np.float32, -1, (22, 38)]] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_output = self.cpu_op_exec_(cpu_input1) + npu_output = self.npu_op_exec_(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log2_out_float32_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (4)]], + [[np.float32, -1, (4, 1, 5)]], + [[np.float32, -1, (2, 3, 8)]], + [[np.float32, -1, (2, 13, 56)]] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_out, npu_out = create_common_tensor(item[0], 1, 100) + cpu_output = self.cpu_op_exec_out(cpu_input1,cpu_out) + npu_output = self.npu_op_exec_out(npu_input1,npu_out) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestLog2, globals(), except_for="cpu") + +if __name__ == "__main__": + torch.npu.set_device("npu:6") run_tests() \ No newline at end of file diff --git a/test/test_npu/test_logdet.py b/test/test_npu/test_logdet.py index 39a0757fae6de337d790fc12e506113388387701..a8b3e203cc45b5196be1f5ee93b3d3abc54327ee 100644 --- a/test/test_npu/test_logdet.py +++ b/test/test_npu/test_logdet.py @@ -1,186 +1,186 @@ - -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestLogDet(TestCase): - - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - - return npu_input1, npu_input2 - - def generate_single_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def generate_three_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input3 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - npu_input3 = torch.from_numpy(input3) - - return npu_input1, npu_input2, npu_input3 - - def generate_scalar(self, min_d, max_d): - scalar = np.random.uniform(min_d, max_d) - return scalar - - def generate_int_scalar(self, min_d, max_d): - scalar = np.random.randint(min_d, max_d) - return scalar - - def cpu_op_exec(self, input1): - output = torch.logdet(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - output = torch.logdet(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_tensor_need_to_npu(self, input1): - input1 = input1.to("npu") - output = torch.logdet(input1) - - output = output.to("cpu") - output = output.numpy() - return output - - def register_tensor(self, item,min_val,max_val): - res = [] - cpu_input, npu_input = create_common_tensor(item, min_val , max_val) - det_result = torch.det(cpu_input) - for i in range(len(det_result)): - if det_result[i] > 0: - res.append(cpu_input[i]) - res = torch.stack(res) - return res, len(res) - - def register_tensor_fp16(self, item,min_val,max_val): - res = [] - cpu_input, npu_input = create_common_tensor(item, min_val , max_val) - cpu_input_tmp = cpu_input.to(torch.float32) - det_result = torch.det(cpu_input_tmp) - for i in range(len(det_result)): - if det_result[i] > 0: - res.append(cpu_input[i]) - res = torch.stack(res) - return res, len(res) - - def create_det_tensor(self, input_tensor): - cpu_input = input_tensor - npu_input = input_tensor.to("npu") - return cpu_input, npu_input - - def test_logdet_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (6, 2, 2)], 100, 200], - [[np.float32, -1, (24, 5, 5)], 100, 200], - [[np.float32, -1, (14, 5, 5)], 21, 22], - [[np.float32, -1, (74,4,4)], 21205, 22225], - [[np.float32, -1, (58,4,4)], -30,30], - [[np.float32, -1, (30,16,16)], -30,30], - [[np.float32, -1, (58, 4, 4)], 0.3219780311757745 , 92], - [[np.float32, -1, (32, 16, 16)], 0.4820305734500543 , 28], - [[np.float32, -1, (28, 8, 8)], 0.8563874665918477 , 98], - [[np.float32, -1, (42, 6, 6)], 0.0694198357720135 , 50], - [[np.float32, -1, (12, 10, 10)], 0.3316939248453338 , 17], - [[np.float32, -1, (6, 10, 10)], 0.6447298684351989 , 95], - [[np.float32, -1, (6, 9, 9)],0.8723538084975545 , 85], - [[np.float32, -1, (10, 5, 5)], 0.8283759153463854 , 71], - [[np.float32, -1, (10, 1, 1)], 0.24718684227306953 , 1], - [[np.float32, -1, (6,1,1)], 0.0694198357720135, 0.24718684227306953], - [[np.float32, -1, (8, 10, 10)], 0.7866457165672994 , 5], - [[np.float32, -1, (6, 14, 14)], 0.9956475043306917 , 28], - [[np.float32, -1, (6, 7, 7)],0.3793216987112159 , 39], - [[np.float32, -1, (14, 10, 10)], 0.769565434387681 , 9], - [[np.float32, -1, (16, 10, 10)], 0.8039978883789274 , 22], - [[np.float32, -1, (30, 3, 3)], 0.03133650248813469 , 37], - [[np.float32, -1, (4, 1, 1)], 0.853775978441379 , 34 ], - [[np.float32, -1, (18, 6, 6)], 0.503285855595573 , 35], - [[np.float32, -1, (6, 3, 3)], 1, 10], - ] - for item in shape_format: - input_shape = item[0][2] - res, tmp_shape0 = self.register_tensor(item[0],item[1],item[2]) - cpu_input1, npu_input1 = self.create_det_tensor(res) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - input_shape = list(input_shape) - input_shape[0] = tmp_shape0 - self.assertRtolEqual(cpu_output, npu_output) - - def test_logdet_float16_shape_format(self, device): - def cpu_op_exec_fp16(input1): - input1 = input1.to(torch.float32) - output = torch.logdet(input1) - output = output.numpy() - output = output.astype(np.float16) - return output - - shape_format = [ - [[np.float16, -1, (9, 5, 5)],-2,2], - [[np.float16, -1, (60,4,4)],-10,12], - [[np.float16, -1, (12,5,5)], 5,10], - [[np.float16, -1, (14, 5, 5)], 0.9283381566708346 , 10], - [[np.float16, -1, (71, 2, 2)], 0.6234465730020081 , 13], - [[np.float16, -1, (10, 5, 5)], 0.7440899332166594 , 1], - [[np.float16, -1, (13, 5, 5)], 0.9790231845699171 , 9], - [[np.float16, -1, (10, 7, 7)], 0.7852605507867441 , 8], - [[np.float16, -1, (18, 2, 2)], 0.8758750778305631 , 9], - [[np.float16, -1, (10, 6, 6)], 0.7570129172808612 , 5], - [[np.float16, -1, (7, 7, 7)], 0 , 2], - [[np.float16, -1, (9, 5, 5)], 1 , 2], - [[np.float16, -1, (12, 4, 4)], 0.7349293532899402 , 19], - [[np.float16, -1, (15, 8, 8)], 0.9583309378850908 , 3], - [[np.float16, -1, (11, 2, 2)],0.3560076034004038 , 25], - ] - - for item in shape_format: - input_shape = item[0][2] - res, tmp_shape0 = self.register_tensor_fp16(item[0],item[1],item[2]) - cpu_input1,npu_input1 = self.create_det_tensor(res) - cpu_output = cpu_op_exec_fp16(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - input_shape = list(input_shape) - input_shape[0] = tmp_shape0 - self.assertRtolEqual(cpu_output, npu_output,prec=1e-3) - - -instantiate_device_type_tests(TestLogDet, globals(), except_for='cpu') - -if __name__ == "__main__": - torch.npu.set_device("npu:6") - run_tests() + +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestLogDet(TestCase): + + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + + return npu_input1, npu_input2 + + def generate_single_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def generate_three_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input3 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + npu_input3 = torch.from_numpy(input3) + + return npu_input1, npu_input2, npu_input3 + + def generate_scalar(self, min_d, max_d): + scalar = np.random.uniform(min_d, max_d) + return scalar + + def generate_int_scalar(self, min_d, max_d): + scalar = np.random.randint(min_d, max_d) + return scalar + + def cpu_op_exec(self, input1): + output = torch.logdet(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = torch.logdet(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_tensor_need_to_npu(self, input1): + input1 = input1.to("npu") + output = torch.logdet(input1) + + output = output.to("cpu") + output = output.numpy() + return output + + def register_tensor(self, item,min_val,max_val): + res = [] + cpu_input, npu_input = create_common_tensor(item, min_val , max_val) + det_result = torch.det(cpu_input) + for i in range(len(det_result)): + if det_result[i] > 0: + res.append(cpu_input[i]) + res = torch.stack(res) + return res, len(res) + + def register_tensor_fp16(self, item,min_val,max_val): + res = [] + cpu_input, npu_input = create_common_tensor(item, min_val , max_val) + cpu_input_tmp = cpu_input.to(torch.float32) + det_result = torch.det(cpu_input_tmp) + for i in range(len(det_result)): + if det_result[i] > 0: + res.append(cpu_input[i]) + res = torch.stack(res) + return res, len(res) + + def create_det_tensor(self, input_tensor): + cpu_input = input_tensor + npu_input = input_tensor.to("npu") + return cpu_input, npu_input + + def test_logdet_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (6, 2, 2)], 100, 200], + [[np.float32, -1, (24, 5, 5)], 100, 200], + [[np.float32, -1, (14, 5, 5)], 21, 22], + [[np.float32, -1, (74,4,4)], 21205, 22225], + [[np.float32, -1, (58,4,4)], -30,30], + [[np.float32, -1, (30,16,16)], -30,30], + [[np.float32, -1, (58, 4, 4)], 0.3219780311757745 , 92], + [[np.float32, -1, (32, 16, 16)], 0.4820305734500543 , 28], + [[np.float32, -1, (28, 8, 8)], 0.8563874665918477 , 98], + [[np.float32, -1, (42, 6, 6)], 0.0694198357720135 , 50], + [[np.float32, -1, (12, 10, 10)], 0.3316939248453338 , 17], + [[np.float32, -1, (6, 10, 10)], 0.6447298684351989 , 95], + [[np.float32, -1, (6, 9, 9)],0.8723538084975545 , 85], + [[np.float32, -1, (10, 5, 5)], 0.8283759153463854 , 71], + [[np.float32, -1, (10, 1, 1)], 0.24718684227306953 , 1], + [[np.float32, -1, (6,1,1)], 0.0694198357720135, 0.24718684227306953], + [[np.float32, -1, (8, 10, 10)], 0.7866457165672994 , 5], + [[np.float32, -1, (6, 14, 14)], 0.9956475043306917 , 28], + [[np.float32, -1, (6, 7, 7)],0.3793216987112159 , 39], + [[np.float32, -1, (14, 10, 10)], 0.769565434387681 , 9], + [[np.float32, -1, (16, 10, 10)], 0.8039978883789274 , 22], + [[np.float32, -1, (30, 3, 3)], 0.03133650248813469 , 37], + [[np.float32, -1, (4, 1, 1)], 0.853775978441379 , 34 ], + [[np.float32, -1, (18, 6, 6)], 0.503285855595573 , 35], + [[np.float32, -1, (6, 3, 3)], 1, 10], + ] + for item in shape_format: + input_shape = item[0][2] + res, tmp_shape0 = self.register_tensor(item[0],item[1],item[2]) + cpu_input1, npu_input1 = self.create_det_tensor(res) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + input_shape = list(input_shape) + input_shape[0] = tmp_shape0 + self.assertRtolEqual(cpu_output, npu_output) + + def test_logdet_float16_shape_format(self, device): + def cpu_op_exec_fp16(input1): + input1 = input1.to(torch.float32) + output = torch.logdet(input1) + output = output.numpy() + output = output.astype(np.float16) + return output + + shape_format = [ + [[np.float16, -1, (9, 5, 5)],-2,2], + [[np.float16, -1, (60,4,4)],-10,12], + [[np.float16, -1, (12,5,5)], 5,10], + [[np.float16, -1, (14, 5, 5)], 0.9283381566708346 , 10], + [[np.float16, -1, (71, 2, 2)], 0.6234465730020081 , 13], + [[np.float16, -1, (10, 5, 5)], 0.7440899332166594 , 1], + [[np.float16, -1, (13, 5, 5)], 0.9790231845699171 , 9], + [[np.float16, -1, (10, 7, 7)], 0.7852605507867441 , 8], + [[np.float16, -1, (18, 2, 2)], 0.8758750778305631 , 9], + [[np.float16, -1, (10, 6, 6)], 0.7570129172808612 , 5], + [[np.float16, -1, (7, 7, 7)], 0 , 2], + [[np.float16, -1, (9, 5, 5)], 1 , 2], + [[np.float16, -1, (12, 4, 4)], 0.7349293532899402 , 19], + [[np.float16, -1, (15, 8, 8)], 0.9583309378850908 , 3], + [[np.float16, -1, (11, 2, 2)],0.3560076034004038 , 25], + ] + + for item in shape_format: + input_shape = item[0][2] + res, tmp_shape0 = self.register_tensor_fp16(item[0],item[1],item[2]) + cpu_input1,npu_input1 = self.create_det_tensor(res) + cpu_output = cpu_op_exec_fp16(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + input_shape = list(input_shape) + input_shape[0] = tmp_shape0 + self.assertRtolEqual(cpu_output, npu_output,prec=1e-3) + + +instantiate_device_type_tests(TestLogDet, globals(), except_for='cpu') + +if __name__ == "__main__": + torch.npu.set_device("npu:6") + run_tests() diff --git a/test/test_npu/test_logical_and.py b/test/test_npu/test_logical_and.py index 743b2e2594b7d1b6e887857006831f8f0347067f..2964665c31bb22250f4f307c5b27cfdd310a9466 100644 --- a/test/test_npu/test_logical_and.py +++ b/test/test_npu/test_logical_and.py @@ -1,176 +1,176 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestLogicalAnd(TestCase): - - def generate_single_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - #modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - - return npu_input1, npu_input2 - - def generate_three_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input3 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - #modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - npu_input3 = torch.from_numpy(input3) - - return npu_input1, npu_input2, npu_input3 - - def cpu_op_exec(self, input1, input2): - output = torch.logical_and(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.logical_and(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_out(self, input1, input2, input3): - torch.logical_and(input1, input2, out=input3) - output = input3.numpy() - return output - - def npu_op_exec_out(self, input1, input2, input3): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = input3.to("npu") - torch.logical_and(input1, input2, out=output) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_(self, input1, input2): - output = torch.Tensor.logical_and_(input1, input2) - output = output.numpy() - return output - - def npu_op_exec_(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.Tensor.logical_and_(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def test_logical_and_int8(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.int8) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_uint8(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.uint8) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_int32(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_bool(self, device): - npu_input1, npu_input2 = self.generate_data(0, 2, (2, 5), np.bool) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_float16(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.float16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_float32(self, device): - npu_input1, npu_input2 = self.generate_data(0, 5, (2, 5), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_or_float32_broadcast(self, device): - npu_input1 = self.generate_single_data(0, 2, (4, 3, 1), np.float32) - npu_input2 = self.generate_single_data(0, 2, (4, 1, 5), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_inplace_uint8(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.uint8) - cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_inplace_int8(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.int8) - cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_inplace_int32(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.int32) - cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_inplace_bool(self, device): - npu_input1, npu_input2 = self.generate_data(0, 2, (2, 5), np.bool) - cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_inplace_float16(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.float16) - cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_inplace_float32(self, device): - npu_input1, npu_input2 = self.generate_data(0, 5, (2, 5), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestLogicalAnd, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:0") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestLogicalAnd(TestCase): + + def generate_single_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + #modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + + return npu_input1, npu_input2 + + def generate_three_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input3 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + #modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + npu_input3 = torch.from_numpy(input3) + + return npu_input1, npu_input2, npu_input3 + + def cpu_op_exec(self, input1, input2): + output = torch.logical_and(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = torch.logical_and(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_out(self, input1, input2, input3): + torch.logical_and(input1, input2, out=input3) + output = input3.numpy() + return output + + def npu_op_exec_out(self, input1, input2, input3): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = input3.to("npu") + torch.logical_and(input1, input2, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_(self, input1, input2): + output = torch.Tensor.logical_and_(input1, input2) + output = output.numpy() + return output + + def npu_op_exec_(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = torch.Tensor.logical_and_(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def test_logical_and_int8(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.int8) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_uint8(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.uint8) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_int32(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_bool(self, device): + npu_input1, npu_input2 = self.generate_data(0, 2, (2, 5), np.bool) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_float16(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.float16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_float32(self, device): + npu_input1, npu_input2 = self.generate_data(0, 5, (2, 5), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_or_float32_broadcast(self, device): + npu_input1 = self.generate_single_data(0, 2, (4, 3, 1), np.float32) + npu_input2 = self.generate_single_data(0, 2, (4, 1, 5), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_inplace_uint8(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.uint8) + cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_inplace_int8(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.int8) + cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_inplace_int32(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.int32) + cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_inplace_bool(self, device): + npu_input1, npu_input2 = self.generate_data(0, 2, (2, 5), np.bool) + cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_inplace_float16(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 5), np.float16) + cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_inplace_float32(self, device): + npu_input1, npu_input2 = self.generate_data(0, 5, (2, 5), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestLogicalAnd, globals(), except_for='cpu') +if __name__ == "__main__": + torch.npu.set_device("npu:0") + run_tests() diff --git a/test/test_npu/test_masked_scale.py b/test/test_npu/test_masked_scale.py index 85c26cc1307fb4d2cbea647faa40d2ad1f79dece..386f9136fa625ce03d8fa9f4f4cd69637762fd78 100644 --- a/test/test_npu/test_masked_scale.py +++ b/test/test_npu/test_masked_scale.py @@ -1,79 +1,79 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestMaskedScale(TestCase): - def generate_data(self, dtype, shape, min_d, max_d): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - cpu_input = input1 - npu_input = torch.from_numpy(input1).to("npu") - return cpu_input, npu_input - - def generate_mask(self, shape): - mask = torch.empty(shape,dtype=torch.int8).random_(2) - cpu_mask = mask.numpy() - return cpu_mask, mask - - def dynamic_generate_data(self, data_type): - format_list = [] - shape_range = [2,5] - min_value_range = [-100, 0, 1] - max_value_range = [100, 1, 1000] - for shape in shape_range: - for min_v, max_v in zip(min_value_range, max_value_range): - shape_v = [np.random.randint(1, 50) for _ in range(shape)] - format_list.append( - [data_type, shape_v, min_v, max_v] - ) - return format_list - - def numpy_op_exec_masked_scale(self, input1, mask, value): - res = input1 * mask * value - return res - - def npu_op_exec_masked_scale(self, input1, mask, value): - input1 = input1.npu() - mask = mask.npu() - value = torch.tensor(value) - res = torch._masked_scale(input1, mask, value) - res = res.to("cpu") - res = res.detach().numpy() - return res - - def test_masked_scale_format_fp16(self,device): - self._test_masked_scale_format(device, np.float16) - - def test_masked_scale_format_fp32(self,device): - self._test_masked_scale_format(device, np.float32) - - def _test_masked_scale_format(self, device, dtype): - format_list = self.dynamic_generate_data(dtype) - for item in format_list: - cpu_input, npu_input = self.generate_data(*item) - cpu_mask, npu_mask = self.generate_mask(item[1]) - scale = np.random.uniform(0,1) - cpu_output = self.numpy_op_exec_masked_scale(cpu_input,cpu_mask,scale) - npu_output = self.npu_op_exec_masked_scale(npu_input,npu_mask,scale) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestMaskedScale, globals(), except_for='cpu') -if __name__ == '__main__': - torch.npu.set_device("npu:0") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestMaskedScale(TestCase): + def generate_data(self, dtype, shape, min_d, max_d): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + cpu_input = input1 + npu_input = torch.from_numpy(input1).to("npu") + return cpu_input, npu_input + + def generate_mask(self, shape): + mask = torch.empty(shape,dtype=torch.int8).random_(2) + cpu_mask = mask.numpy() + return cpu_mask, mask + + def dynamic_generate_data(self, data_type): + format_list = [] + shape_range = [2,5] + min_value_range = [-100, 0, 1] + max_value_range = [100, 1, 1000] + for shape in shape_range: + for min_v, max_v in zip(min_value_range, max_value_range): + shape_v = [np.random.randint(1, 50) for _ in range(shape)] + format_list.append( + [data_type, shape_v, min_v, max_v] + ) + return format_list + + def numpy_op_exec_masked_scale(self, input1, mask, value): + res = input1 * mask * value + return res + + def npu_op_exec_masked_scale(self, input1, mask, value): + input1 = input1.npu() + mask = mask.npu() + value = torch.tensor(value) + res = torch._masked_scale(input1, mask, value) + res = res.to("cpu") + res = res.detach().numpy() + return res + + def test_masked_scale_format_fp16(self,device): + self._test_masked_scale_format(device, np.float16) + + def test_masked_scale_format_fp32(self,device): + self._test_masked_scale_format(device, np.float32) + + def _test_masked_scale_format(self, device, dtype): + format_list = self.dynamic_generate_data(dtype) + for item in format_list: + cpu_input, npu_input = self.generate_data(*item) + cpu_mask, npu_mask = self.generate_mask(item[1]) + scale = np.random.uniform(0,1) + cpu_output = self.numpy_op_exec_masked_scale(cpu_input,cpu_mask,scale) + npu_output = self.npu_op_exec_masked_scale(npu_input,npu_mask,scale) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestMaskedScale, globals(), except_for='cpu') +if __name__ == '__main__': + torch.npu.set_device("npu:0") + run_tests() diff --git a/test/test_npu/test_miopen_depthwise_convolution.py b/test/test_npu/test_miopen_depthwise_convolution.py index 1436b7f90767e227a0c32f0f310a409d519ef71a..fdf6ac202f6b1b02f725b4ab14681c288bdf3e18 100644 --- a/test/test_npu/test_miopen_depthwise_convolution.py +++ b/test/test_npu/test_miopen_depthwise_convolution.py @@ -1,197 +1,197 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestMiopenDepthwiseConvolution(TestCase): - - - def op_exec_cpu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True): - input1 = input - weight1 = weight - - bias1 = False - if bias != None: - bias1 = True - - m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias1, groups=in_channels) - m1.weight.data = weight1 - - cpuOutput = m1(input1) - tmp = torch.ones_like(cpuOutput) - - return cpuOutput - - def op_exec_npu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True): - input1 = input - weight1 = weight - - - bias1 = False - if bias != None: - bias1 = True - - m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias1, groups=in_channels) - m1.weight.data = weight1 - m1 = m1.to("npu") - npuOutput = m1(input1) - npuOutput = npuOutput.to("cpu") - tmp = torch.ones_like(npuOutput) - return npuOutput - - def test_miopen_depthwise_convolution_input_range1(self, device): - shape_format = [ # input, weight, padding, stride, dilation, bias - [[np.float16, 3, [4, 3, 5, 5]], [np.float16, 0, [3, 1, 2, 2]], 0, 1, 1, None], - ] - - for item in shape_format: - - input_cpu, input_npu = create_common_tensor(item[0],-65504.0,65504.0) - input_cpu1, input_npu1 = create_common_tensor(item[0],-0.000030517578125,0.000030517578125) - input_cpu2, input_npu2 = create_common_tensor(item[0],-3402823500.0,3402823500.0) - input_cpu3, input_npu3 = create_common_tensor(item[0],-0.001953125,0.001953125) - - if input_cpu.dtype == torch.float16: - input_cpu = input_cpu.to(torch.float32) - weight_cpu, weight_npu = create_common_tensor(item[1], 0,1 ) - if weight_cpu.dtype == torch.float16: - weight_cpu = weight_cpu.to(torch.float32) - - if input_cpu1.dtype == torch.float16: - input_cpu1 = input_cpu1.to(torch.float32) - weight_cpu1, weight_npu1 = create_common_tensor(item[1], 0,1 ) - if weight_cpu1.dtype == torch.float16: - weight_cpu1 = weight_cpu1.to(torch.float32) - - if input_cpu2.dtype == torch.float16: - input_cpu2 = input_cpu2.to(torch.float32) - weight_cpu2, weight_npu2 = create_common_tensor(item[1], 0,1 ) - if weight_cpu2.dtype == torch.float16: - weight_cpu2 = weight_cpu2.to(torch.float32) - - if input_cpu3.dtype == torch.float16: - input_cpu3 = input_cpu3.to(torch.float32) - weight_cpu3, weight_npu3 = create_common_tensor(item[1], 0,1 ) - if weight_cpu3.dtype == torch.float16: - weight_cpu3= weight_cpu3.to(torch.float32) - - - kernel_size = (item[1][2][2], item[1][2][3]) - cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - weight_npu = weight_npu.to("cpu") - npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - cpu_output = cpu_output.to(npu_output.dtype) - - - cpu_output1 = self.op_exec_cpu(input_cpu1, weight_cpu1, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - weight_npu1 = weight_npu1.to("cpu") - npu_output1 = self.op_exec_npu(input_npu1, weight_npu1, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - cpu_output1 = cpu_output1.to(npu_output1.dtype) - - - cpu_output2 = self.op_exec_cpu(input_cpu2, weight_cpu2, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - weight_npu2 = weight_npu2.to("cpu") - npu_output2 = self.op_exec_npu(input_npu2, weight_npu2, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - cpu_output2 = cpu_output2.to(npu_output2.dtype) - - - cpu_output3 = self.op_exec_cpu(input_cpu3, weight_cpu3, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - weight_npu3 = weight_npu3.to("cpu") - npu_output3 = self.op_exec_npu(input_npu3, weight_npu3, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - cpu_output3 = cpu_output3.to(npu_output3.dtype) - - - print("===========cpu_output============") - print(cpu_output) - print("===========cpu_output1============") - print(cpu_output1) - print("===========cpu_output2============") - print(cpu_output2) - print("===========cpu_output3============") - print(cpu_output3) - - print("===========npu_output============") - print(npu_output) - print("===========npu_output1============") - print(npu_output1) - print("===========npu_output2============") - print(npu_output2) - print("===========npu_output3============") - print(npu_output3) - - - print("===========cpu_input&&npu_input==================") - print(input_cpu) - - self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.detach().numpy()) - - def test_miopen_depthwise_convolution_shape_format(self, device): - shape_format = [ # input, weight, padding, stride, dilation, bias - [[np.float16, 3, [256, 32, 112, 112]], [np.float16, 0, [32, 1, 3, 3]], 0, 1, 1, None], - [[np.float16, 0, [1024, 116, 28, 28]], [np.float16, 0, [116, 1, 3, 3]], 1, [2, 2], 1, None], - [[np.float16, 0, [1024, 232, 14, 14]], [np.float16, 0, [232, 1, 3, 3]], 1, [2, 2], 1, None], - [[np.float16, 3, [1024, 232, 7, 7]], [np.float16, 0, [232, 1, 3, 3]], 1, 1, 1, None], - [[np.float16, 3, [1024, 24, 56, 56]], [np.float16, 0, [24, 1, 3, 3]], 1, [2, 2], 1, None], - [[np.float16, 3, [1024, 116, 28, 28]], [np.float16, 0, [116, 1, 3, 3]], 1, [2, 2], 1, None], - [[np.float16, 3, [1024, 232, 14, 14]], [np.float16, 0, [232, 1, 3, 3]], 1, [2, 2], 1, None], - ] - - for item in shape_format: - - input_cpu, input_npu = create_common_tensor(item[0],-65504.0,65504.0) - if input_cpu.dtype == torch.float16: - input_cpu = input_cpu.to(torch.float32) - weight_cpu, weight_npu = create_common_tensor(item[1], 0,1 ) - if weight_cpu.dtype == torch.float16: - weight_cpu = weight_cpu.to(torch.float32) - kernel_size = (item[1][2][2], item[1][2][3]) - cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - weight_npu = weight_npu.to("cpu") - npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - cpu_output = cpu_output.to(npu_output.dtype) - - print("===========cpu_output============") - print(cpu_output) - - print("===========npu_output============") - print(npu_output) - - print("===========cpu_input&&npu_input==================") - print(input_cpu) - - self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.detach().numpy()) - - - -instantiate_device_type_tests(TestMiopenDepthwiseConvolution, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestMiopenDepthwiseConvolution(TestCase): + + + def op_exec_cpu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True): + input1 = input + weight1 = weight + + bias1 = False + if bias != None: + bias1 = True + + m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias1, groups=in_channels) + m1.weight.data = weight1 + + cpuOutput = m1(input1) + tmp = torch.ones_like(cpuOutput) + + return cpuOutput + + def op_exec_npu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True): + input1 = input + weight1 = weight + + + bias1 = False + if bias != None: + bias1 = True + + m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias1, groups=in_channels) + m1.weight.data = weight1 + m1 = m1.to("npu") + npuOutput = m1(input1) + npuOutput = npuOutput.to("cpu") + tmp = torch.ones_like(npuOutput) + return npuOutput + + def test_miopen_depthwise_convolution_input_range1(self, device): + shape_format = [ # input, weight, padding, stride, dilation, bias + [[np.float16, 3, [4, 3, 5, 5]], [np.float16, 0, [3, 1, 2, 2]], 0, 1, 1, None], + ] + + for item in shape_format: + + input_cpu, input_npu = create_common_tensor(item[0],-65504.0,65504.0) + input_cpu1, input_npu1 = create_common_tensor(item[0],-0.000030517578125,0.000030517578125) + input_cpu2, input_npu2 = create_common_tensor(item[0],-3402823500.0,3402823500.0) + input_cpu3, input_npu3 = create_common_tensor(item[0],-0.001953125,0.001953125) + + if input_cpu.dtype == torch.float16: + input_cpu = input_cpu.to(torch.float32) + weight_cpu, weight_npu = create_common_tensor(item[1], 0,1 ) + if weight_cpu.dtype == torch.float16: + weight_cpu = weight_cpu.to(torch.float32) + + if input_cpu1.dtype == torch.float16: + input_cpu1 = input_cpu1.to(torch.float32) + weight_cpu1, weight_npu1 = create_common_tensor(item[1], 0,1 ) + if weight_cpu1.dtype == torch.float16: + weight_cpu1 = weight_cpu1.to(torch.float32) + + if input_cpu2.dtype == torch.float16: + input_cpu2 = input_cpu2.to(torch.float32) + weight_cpu2, weight_npu2 = create_common_tensor(item[1], 0,1 ) + if weight_cpu2.dtype == torch.float16: + weight_cpu2 = weight_cpu2.to(torch.float32) + + if input_cpu3.dtype == torch.float16: + input_cpu3 = input_cpu3.to(torch.float32) + weight_cpu3, weight_npu3 = create_common_tensor(item[1], 0,1 ) + if weight_cpu3.dtype == torch.float16: + weight_cpu3= weight_cpu3.to(torch.float32) + + + kernel_size = (item[1][2][2], item[1][2][3]) + cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + weight_npu = weight_npu.to("cpu") + npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + cpu_output = cpu_output.to(npu_output.dtype) + + + cpu_output1 = self.op_exec_cpu(input_cpu1, weight_cpu1, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + weight_npu1 = weight_npu1.to("cpu") + npu_output1 = self.op_exec_npu(input_npu1, weight_npu1, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + cpu_output1 = cpu_output1.to(npu_output1.dtype) + + + cpu_output2 = self.op_exec_cpu(input_cpu2, weight_cpu2, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + weight_npu2 = weight_npu2.to("cpu") + npu_output2 = self.op_exec_npu(input_npu2, weight_npu2, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + cpu_output2 = cpu_output2.to(npu_output2.dtype) + + + cpu_output3 = self.op_exec_cpu(input_cpu3, weight_cpu3, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + weight_npu3 = weight_npu3.to("cpu") + npu_output3 = self.op_exec_npu(input_npu3, weight_npu3, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + cpu_output3 = cpu_output3.to(npu_output3.dtype) + + + print("===========cpu_output============") + print(cpu_output) + print("===========cpu_output1============") + print(cpu_output1) + print("===========cpu_output2============") + print(cpu_output2) + print("===========cpu_output3============") + print(cpu_output3) + + print("===========npu_output============") + print(npu_output) + print("===========npu_output1============") + print(npu_output1) + print("===========npu_output2============") + print(npu_output2) + print("===========npu_output3============") + print(npu_output3) + + + print("===========cpu_input&&npu_input==================") + print(input_cpu) + + self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.detach().numpy()) + + def test_miopen_depthwise_convolution_shape_format(self, device): + shape_format = [ # input, weight, padding, stride, dilation, bias + [[np.float16, 3, [256, 32, 112, 112]], [np.float16, 0, [32, 1, 3, 3]], 0, 1, 1, None], + [[np.float16, 0, [1024, 116, 28, 28]], [np.float16, 0, [116, 1, 3, 3]], 1, [2, 2], 1, None], + [[np.float16, 0, [1024, 232, 14, 14]], [np.float16, 0, [232, 1, 3, 3]], 1, [2, 2], 1, None], + [[np.float16, 3, [1024, 232, 7, 7]], [np.float16, 0, [232, 1, 3, 3]], 1, 1, 1, None], + [[np.float16, 3, [1024, 24, 56, 56]], [np.float16, 0, [24, 1, 3, 3]], 1, [2, 2], 1, None], + [[np.float16, 3, [1024, 116, 28, 28]], [np.float16, 0, [116, 1, 3, 3]], 1, [2, 2], 1, None], + [[np.float16, 3, [1024, 232, 14, 14]], [np.float16, 0, [232, 1, 3, 3]], 1, [2, 2], 1, None], + ] + + for item in shape_format: + + input_cpu, input_npu = create_common_tensor(item[0],-65504.0,65504.0) + if input_cpu.dtype == torch.float16: + input_cpu = input_cpu.to(torch.float32) + weight_cpu, weight_npu = create_common_tensor(item[1], 0,1 ) + if weight_cpu.dtype == torch.float16: + weight_cpu = weight_cpu.to(torch.float32) + kernel_size = (item[1][2][2], item[1][2][3]) + cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + weight_npu = weight_npu.to("cpu") + npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + cpu_output = cpu_output.to(npu_output.dtype) + + print("===========cpu_output============") + print(cpu_output) + + print("===========npu_output============") + print(npu_output) + + print("===========cpu_input&&npu_input==================") + print(input_cpu) + + self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.detach().numpy()) + + + +instantiate_device_type_tests(TestMiopenDepthwiseConvolution, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_miopen_depthwise_convolution_backward.py b/test/test_npu/test_miopen_depthwise_convolution_backward.py index b0857b23055a4a4e3ba52c6dbe92e161c43a2557..9d557c2e3427d1b8cd613ae175e90b2528c70a4b 100644 --- a/test/test_npu/test_miopen_depthwise_convolution_backward.py +++ b/test/test_npu/test_miopen_depthwise_convolution_backward.py @@ -1,113 +1,113 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestMiopenDepthwiseConvolutionBackward(TestCase): - weight_grad = [] - input_grad = [] - - def getWeightGrad(self, grad): - self.weight_grad.append(grad.to("cpu")) - - def getInputGrad(self, grad): - self.input_grad.append(grad.to("cpu")) - - def op_exec_cpu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True): - input1 = input - weight1 = weight - input1.requires_grad = True - input1.register_hook(lambda grad: self.getInputGrad(grad)) - - bias1 = False - if bias != None: - bias1 = True - - m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias1, groups=in_channels) - m1.weight.data = weight1 - m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) - cpuOutput = m1(input1) - tmp = torch.ones_like(cpuOutput) - cpuOutput.backward(tmp) - - return cpuOutput - - def op_exec_npu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True): - input1 = input - weight1 = weight - input1.requires_grad = True - input1.register_hook(lambda grad: self.getInputGrad(grad)) - - bias1 = False - if bias != None: - bias1 = True - - m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias1, groups=in_channels) - m1.weight.data = weight1 - m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) - m1 = m1.to("npu") - npuOutput = m1(input1) - npuOutput = npuOutput.to("cpu") - tmp = torch.ones_like(npuOutput) - npuOutput.backward(tmp) - - return npuOutput - - def test_miopen_depthwise_convolution_backward_shape_format(self, device): - shape_format = [ # input, weight, padding, stride, dilation, bias - [[np.float16, 3, [20, 3, 112, 112]], [np.float16, 0, [3, 1, 3, 3]], 0, 1, 1, None], - ] - - for item in shape_format: - self.weight_grad.clear() - self.input_grad.clear() - input_cpu, input_npu = create_common_tensor(item[0], 0, 10) - if input_cpu.dtype == torch.float16: - input_cpu = input_cpu.to(torch.float32) - weight_cpu, weight_npu = create_common_tensor(item[1], 0, 10) - if weight_cpu.dtype == torch.float16: - weight_cpu = weight_cpu.to(torch.float32) - kernel_size = (item[1][2][2], item[1][2][3]) - cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - weight_npu = weight_npu.to("cpu") - npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - cpu_output = cpu_output.to(npu_output.dtype) - - print("===========cpu_output============") - print(cpu_output) - - print("===========npu_output============") - print(npu_output) - - self.input_grad[0] = self.input_grad[0].to(self.input_grad[1].dtype) - self.weight_grad[0] = self.weight_grad[0].to(self.weight_grad[1].dtype) - - self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.detach().numpy()) - self.assertRtolEqual(self.input_grad[0].numpy(), self.input_grad[1].numpy()) - self.assertRtolEqual(self.weight_grad[0].numpy(), self.weight_grad[1].numpy()) - - -instantiate_device_type_tests(TestMiopenDepthwiseConvolutionBackward, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestMiopenDepthwiseConvolutionBackward(TestCase): + weight_grad = [] + input_grad = [] + + def getWeightGrad(self, grad): + self.weight_grad.append(grad.to("cpu")) + + def getInputGrad(self, grad): + self.input_grad.append(grad.to("cpu")) + + def op_exec_cpu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True): + input1 = input + weight1 = weight + input1.requires_grad = True + input1.register_hook(lambda grad: self.getInputGrad(grad)) + + bias1 = False + if bias != None: + bias1 = True + + m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias1, groups=in_channels) + m1.weight.data = weight1 + m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) + cpuOutput = m1(input1) + tmp = torch.ones_like(cpuOutput) + cpuOutput.backward(tmp) + + return cpuOutput + + def op_exec_npu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True): + input1 = input + weight1 = weight + input1.requires_grad = True + input1.register_hook(lambda grad: self.getInputGrad(grad)) + + bias1 = False + if bias != None: + bias1 = True + + m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias1, groups=in_channels) + m1.weight.data = weight1 + m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) + m1 = m1.to("npu") + npuOutput = m1(input1) + npuOutput = npuOutput.to("cpu") + tmp = torch.ones_like(npuOutput) + npuOutput.backward(tmp) + + return npuOutput + + def test_miopen_depthwise_convolution_backward_shape_format(self, device): + shape_format = [ # input, weight, padding, stride, dilation, bias + [[np.float16, 3, [20, 3, 112, 112]], [np.float16, 0, [3, 1, 3, 3]], 0, 1, 1, None], + ] + + for item in shape_format: + self.weight_grad.clear() + self.input_grad.clear() + input_cpu, input_npu = create_common_tensor(item[0], 0, 10) + if input_cpu.dtype == torch.float16: + input_cpu = input_cpu.to(torch.float32) + weight_cpu, weight_npu = create_common_tensor(item[1], 0, 10) + if weight_cpu.dtype == torch.float16: + weight_cpu = weight_cpu.to(torch.float32) + kernel_size = (item[1][2][2], item[1][2][3]) + cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + weight_npu = weight_npu.to("cpu") + npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + cpu_output = cpu_output.to(npu_output.dtype) + + print("===========cpu_output============") + print(cpu_output) + + print("===========npu_output============") + print(npu_output) + + self.input_grad[0] = self.input_grad[0].to(self.input_grad[1].dtype) + self.weight_grad[0] = self.weight_grad[0].to(self.weight_grad[1].dtype) + + self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.detach().numpy()) + self.assertRtolEqual(self.input_grad[0].numpy(), self.input_grad[1].numpy()) + self.assertRtolEqual(self.weight_grad[0].numpy(), self.weight_grad[1].numpy()) + + +instantiate_device_type_tests(TestMiopenDepthwiseConvolutionBackward, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_ne.py b/test/test_npu/test_ne.py index 2e1cd67fa9c46a1e222aea71efa3b999151f6255..548c12ebef33cfc358eabd351fc05b36d27f489c 100644 --- a/test/test_npu/test_ne.py +++ b/test/test_npu/test_ne.py @@ -1,160 +1,160 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestNe(TestCase): - - def cpu_op_exec_scalar(self, input1, other): - output = torch.ne(input1, other) - output = output.numpy() - return output - - def npu_op_exec_scalar(self,input1, other): - output = torch.ne(input1, other) - output1 = output.to("cpu") - output2 = output1.numpy() - return output2 - - def cpu_op_exec(self, input1, other): - output = torch.ne(input1, other) - output = output.numpy() - return output - - def npu_op_exec(self,input1, other): - output = torch.ne(input1, other) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_(self,input1, other): - torch.ne_(input1,other) - output = input1.numpy() - return output - - def npu_op_exec_(self,input1, other): - torch.ne_(input1, other) - output = input1.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_scalar_(self,input1, other): - torch.ne_(input1,other) - output = input1.numpy() - return output - - def npu_op_exec_scalar_(self,input1, other): - torch.ne_(input1, other) - output = input1.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_scalar_out(self,input1,other, out): - torch.ne(input1,other, out=out) - output = out.numpy() - return output - - def npu_op_exec_scalar_out(self,input1, other, out): - torch.ne(input1, other, out=out) - output = out.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_out(self,input1,other, out): - torch.ne(input1,other, out=out) - output = out.numpy() - return output - - def npu_op_exec_out(self,input1, other, out): - torch.ne(input1, other, out=out) - output = out.to("cpu") - output = output.numpy() - return output - - def test_ne_scalar_common_shape_format(self, device): - shape_format = [ - [[np.float32,0 , (2,4, 3)], 3], - [[np.float32, 3, (2, 3)], 2], - [[np.float32, 0, (3, 2)], 8], - [[np.int8, 0 , (4, 3)],3], - [[np.uint8, -1, (2,4, 3)],3], - [[np.int32, 0, (2, 6)],6] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) - cpu_output = self.cpu_op_exec_scalar(cpu_input1, item[1]) - npu_output = self.npu_op_exec_scalar(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_ne_common_shape_format(self, device): - shape_format = [ - [[np.float32,0 , (2, 4, 3)], [np.float32,0 , (2, 4, 3)]], - [[np.float32, 3, (2, 3)], [np.float32, 3, (2, 3)]], - [[np.float32, 0, (3, 2)], [np.float32, 0, (3, 2)]], - [[np.int8, 0 , (4, 3)], [np.int8, 0 , (4, 3)]], - [[np.uint8, -1, (2,4, 3)], [np.uint8, -1, (2,4, 3)]], - [[np.int32, 0, (2, 6)], [np.int32, 0, (2, 6)]], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) - cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 10) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_ne_scalar_out_common_shape_format(self, device): - shape_format = [ - [[np.float32,0 , (2, 4, 3)], 2, [np.bool, 0 , (2, 4, 3)]], - [[np.float32, 3, (2, 3)], 3, [np.bool, -1, (2, 3)]], - [[np.float32, 0, (3, 2)], 4, [np.bool, 0, (3, 2)]], - - [[np.int8, 0 , (4, 3)], 5, [np.bool, 0 , (4, 3)]], - [[np.uint8, -1, (2,4, 3)], 6, [np.bool, -1, (2,4, 3)]], - [[np.int32, 0, (2, 6)], 7, [np.bool, 0, (2, 6)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) - cpu_out, npu_out = create_common_tensor(item[2], 1, 10) - cpu_output = self.cpu_op_exec_scalar_out(cpu_input1, item[1], cpu_out) - npu_output = self.npu_op_exec_scalar_out(npu_input1, item[1], npu_out) - self.assertRtolEqual(cpu_output, npu_output) - - def test_ne_out_common_shape_format(self, device): - shape_format = [ - [[np.float32,0 , (2, 4, 3)], [np.float32,0 , (2, 4, 3)], [np.bool, 0 , (2, 4, 3)]], - [[np.float32, 3, (2, 3)], [np.float32, 3, (2, 3)], [np.bool, -1, (2, 3)]], - [[np.float32, 0, (3, 2)], [np.float32, 0, (3, 2)], [np.bool, 0, (3, 2)]], - - [[np.int8, 0 , (4, 3)], [np.int8, 0 , (4, 3)], [np.bool, 0 , (4, 3)]], - [[np.uint8, -1, (2,4, 3)], [np.uint8, -1, (2,4, 3)], [np.bool, -1, (2,4, 3)]], - [[np.int32, 0, (2, 6)], [np.int32, 0, (2, 6)], [np.bool, 0, (2, 6)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) - cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 10) - cpu_out, npu_out = create_common_tensor(item[2], 1, 10) - cpu_output = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_out) - npu_output = self.npu_op_exec_out(npu_input1, npu_input2, npu_out) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestNe, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestNe(TestCase): + + def cpu_op_exec_scalar(self, input1, other): + output = torch.ne(input1, other) + output = output.numpy() + return output + + def npu_op_exec_scalar(self,input1, other): + output = torch.ne(input1, other) + output1 = output.to("cpu") + output2 = output1.numpy() + return output2 + + def cpu_op_exec(self, input1, other): + output = torch.ne(input1, other) + output = output.numpy() + return output + + def npu_op_exec(self,input1, other): + output = torch.ne(input1, other) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_(self,input1, other): + torch.ne_(input1,other) + output = input1.numpy() + return output + + def npu_op_exec_(self,input1, other): + torch.ne_(input1, other) + output = input1.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_scalar_(self,input1, other): + torch.ne_(input1,other) + output = input1.numpy() + return output + + def npu_op_exec_scalar_(self,input1, other): + torch.ne_(input1, other) + output = input1.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_scalar_out(self,input1,other, out): + torch.ne(input1,other, out=out) + output = out.numpy() + return output + + def npu_op_exec_scalar_out(self,input1, other, out): + torch.ne(input1, other, out=out) + output = out.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_out(self,input1,other, out): + torch.ne(input1,other, out=out) + output = out.numpy() + return output + + def npu_op_exec_out(self,input1, other, out): + torch.ne(input1, other, out=out) + output = out.to("cpu") + output = output.numpy() + return output + + def test_ne_scalar_common_shape_format(self, device): + shape_format = [ + [[np.float32,0 , (2,4, 3)], 3], + [[np.float32, 3, (2, 3)], 2], + [[np.float32, 0, (3, 2)], 8], + [[np.int8, 0 , (4, 3)],3], + [[np.uint8, -1, (2,4, 3)],3], + [[np.int32, 0, (2, 6)],6] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) + cpu_output = self.cpu_op_exec_scalar(cpu_input1, item[1]) + npu_output = self.npu_op_exec_scalar(npu_input1, item[1]) + self.assertRtolEqual(cpu_output, npu_output) + + def test_ne_common_shape_format(self, device): + shape_format = [ + [[np.float32,0 , (2, 4, 3)], [np.float32,0 , (2, 4, 3)]], + [[np.float32, 3, (2, 3)], [np.float32, 3, (2, 3)]], + [[np.float32, 0, (3, 2)], [np.float32, 0, (3, 2)]], + [[np.int8, 0 , (4, 3)], [np.int8, 0 , (4, 3)]], + [[np.uint8, -1, (2,4, 3)], [np.uint8, -1, (2,4, 3)]], + [[np.int32, 0, (2, 6)], [np.int32, 0, (2, 6)]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) + cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 10) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_ne_scalar_out_common_shape_format(self, device): + shape_format = [ + [[np.float32,0 , (2, 4, 3)], 2, [np.bool, 0 , (2, 4, 3)]], + [[np.float32, 3, (2, 3)], 3, [np.bool, -1, (2, 3)]], + [[np.float32, 0, (3, 2)], 4, [np.bool, 0, (3, 2)]], + + [[np.int8, 0 , (4, 3)], 5, [np.bool, 0 , (4, 3)]], + [[np.uint8, -1, (2,4, 3)], 6, [np.bool, -1, (2,4, 3)]], + [[np.int32, 0, (2, 6)], 7, [np.bool, 0, (2, 6)]] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) + cpu_out, npu_out = create_common_tensor(item[2], 1, 10) + cpu_output = self.cpu_op_exec_scalar_out(cpu_input1, item[1], cpu_out) + npu_output = self.npu_op_exec_scalar_out(npu_input1, item[1], npu_out) + self.assertRtolEqual(cpu_output, npu_output) + + def test_ne_out_common_shape_format(self, device): + shape_format = [ + [[np.float32,0 , (2, 4, 3)], [np.float32,0 , (2, 4, 3)], [np.bool, 0 , (2, 4, 3)]], + [[np.float32, 3, (2, 3)], [np.float32, 3, (2, 3)], [np.bool, -1, (2, 3)]], + [[np.float32, 0, (3, 2)], [np.float32, 0, (3, 2)], [np.bool, 0, (3, 2)]], + + [[np.int8, 0 , (4, 3)], [np.int8, 0 , (4, 3)], [np.bool, 0 , (4, 3)]], + [[np.uint8, -1, (2,4, 3)], [np.uint8, -1, (2,4, 3)], [np.bool, -1, (2,4, 3)]], + [[np.int32, 0, (2, 6)], [np.int32, 0, (2, 6)], [np.bool, 0, (2, 6)]] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) + cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 10) + cpu_out, npu_out = create_common_tensor(item[2], 1, 10) + cpu_output = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_out) + npu_output = self.npu_op_exec_out(npu_input1, npu_input2, npu_out) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestNe, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_neg.py b/test/test_npu/test_neg.py index 7a192768f0a57a50b9b264b11040c91cc790ea9a..6c98aa1ed59faf3c4073e12d938617e6e0f109a3 100644 --- a/test/test_npu/test_neg.py +++ b/test/test_npu/test_neg.py @@ -1,213 +1,213 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestNeg(TestCase): - def generate_single_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def cpu_op_exec(self, input1): - output = torch.neg(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - input1 = input1.to("npu") - output = torch.neg(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def test_neg_float16_1(self, device): - def cpu_op_exec_fp16(input1): - input1 = input1.to(torch.float32) - output = torch.neg(input1) - output = output.numpy() - output = output.astype(np.float16) - return output - - npu_input1 = self.generate_single_data(-2, 2, ((65535, 1, 1, 1)), np.float16) - cpu_output = cpu_op_exec_fp16(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float16_2(self, device): - def cpu_op_exec_fp16(input1): - input1 = input1.to(torch.float32) - output = torch.neg(input1) - output = output.numpy() - output = output.astype(np.float16) - return output - - npu_input1 = self.generate_single_data(-2, 2, ((1, 1, 1, 8192)), np.float16) - cpu_output = cpu_op_exec_fp16(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float16_3(self, device): - def cpu_op_exec_fp16(input1): - input1 = input1.to(torch.float32) - output = torch.neg(input1) - output = output.numpy() - output = output.astype(np.float16) - return output - - npu_input1 = self.generate_single_data(-2, 2, ((1, 1, 1, 65535)), np.float16) - cpu_output = cpu_op_exec_fp16(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float16_4(self, device): - def cpu_op_exec_fp16(input1): - input1 = input1.to(torch.float32) - output = torch.neg(input1) - output = output.numpy() - output = output.astype(np.float16) - return output - - npu_input1 = self.generate_single_data(-2, 2, ((1, 1, 1, 524288)), np.float16) - cpu_output = cpu_op_exec_fp16(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float16_5(self, device): - def cpu_op_exec_fp16(input1): - input1 = input1.to(torch.float32) - output = torch.neg(input1) - output = output.numpy() - output = output.astype(np.float16) - return output - - npu_input1 = self.generate_single_data(-2, 2, ((1, 1, 1, 786432)), np.float16) - cpu_output = cpu_op_exec_fp16(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_1(self, device): - npu_input1 = self.generate_single_data(-1.1754943508e-38, -1.1754943508e-38, ((1, 31, 149, 2)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_2(self, device): - npu_input1 = self.generate_single_data(-3402823500.0, 3402823500.0, ((1, 32, 31, 1)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertEqual(cpu_output, npu_output) - - def test_neg_float32_3(self, device): - npu_input1 = self.generate_single_data(-0.000030517578125, 0.000030517578125, ((2, 32, 149, 31)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_4(self, device): - npu_input1 = self.generate_single_data(3402823500, 3402800000, ((128)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_5(self, device): - npu_input1 = self.generate_single_data(-9.313225746154785e-10, 9.313225746154785e-10, ((184965, 1)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_6(self, device): - npu_input1 = self.generate_single_data(-3402823500.0, -3402823500.0, ((1, 31, 149, 2)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_7(self, device): - npu_input1 = self.generate_single_data(-3402823500.0, 3402823500.0, ((1, 31, 149, 2)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_8(self, device): - npu_input1 = self.generate_single_data(-9.313225746154785e-10, 9.313225746154785e-10, ((1, 31, 149, 2)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_9(self, device): - npu_input1 = self.generate_single_data(-3402823500.0, -3402823500.0, ((1, 31, 149, 2)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_10(self, device): - npu_input1 = self.generate_single_data(-0.000000000000000000000000000000000000011754943508, - 0.000000000000000000000000000000000000011754943508, ((2, 31, 149, 2)), - np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_11(self, device): - npu_input1 = self.generate_single_data(0.000000000000000000000000000000000000011754943508, - 0.000000000000000000000000000000000000011754943508, ((4, 31, 149, 2)), - np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_12(self, device): - npu_input1 = self.generate_single_data(-0.000000000000000000000000000000000000011754943508, - -0.000000000000000000000000000000000000011754943508, ((2048, 31, 1, 2)), - np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_float32_13(self, device): - npu_input1 = self.generate_single_data(-0.000000000000000000000000000000000000011754943508, - 0.000000000000000000000000000000000000011754943508, ((8, 7, 149)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_int32_1(self, device): - npu_input1 = self.generate_single_data(0, 100, (2, 3), np.int32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_int32_2(self, device): - npu_input1 = self.generate_single_data(2147483647, 2147483648, (2, 3), np.int32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_neg_int32_3(self, device): - npu_input1 = self.generate_single_data(-2147483648, -2147483647, (2, 3), np.int32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestNeg, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:5") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestNeg(TestCase): + def generate_single_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def cpu_op_exec(self, input1): + output = torch.neg(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + input1 = input1.to("npu") + output = torch.neg(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def test_neg_float16_1(self, device): + def cpu_op_exec_fp16(input1): + input1 = input1.to(torch.float32) + output = torch.neg(input1) + output = output.numpy() + output = output.astype(np.float16) + return output + + npu_input1 = self.generate_single_data(-2, 2, ((65535, 1, 1, 1)), np.float16) + cpu_output = cpu_op_exec_fp16(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float16_2(self, device): + def cpu_op_exec_fp16(input1): + input1 = input1.to(torch.float32) + output = torch.neg(input1) + output = output.numpy() + output = output.astype(np.float16) + return output + + npu_input1 = self.generate_single_data(-2, 2, ((1, 1, 1, 8192)), np.float16) + cpu_output = cpu_op_exec_fp16(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float16_3(self, device): + def cpu_op_exec_fp16(input1): + input1 = input1.to(torch.float32) + output = torch.neg(input1) + output = output.numpy() + output = output.astype(np.float16) + return output + + npu_input1 = self.generate_single_data(-2, 2, ((1, 1, 1, 65535)), np.float16) + cpu_output = cpu_op_exec_fp16(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float16_4(self, device): + def cpu_op_exec_fp16(input1): + input1 = input1.to(torch.float32) + output = torch.neg(input1) + output = output.numpy() + output = output.astype(np.float16) + return output + + npu_input1 = self.generate_single_data(-2, 2, ((1, 1, 1, 524288)), np.float16) + cpu_output = cpu_op_exec_fp16(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float16_5(self, device): + def cpu_op_exec_fp16(input1): + input1 = input1.to(torch.float32) + output = torch.neg(input1) + output = output.numpy() + output = output.astype(np.float16) + return output + + npu_input1 = self.generate_single_data(-2, 2, ((1, 1, 1, 786432)), np.float16) + cpu_output = cpu_op_exec_fp16(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_1(self, device): + npu_input1 = self.generate_single_data(-1.1754943508e-38, -1.1754943508e-38, ((1, 31, 149, 2)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_2(self, device): + npu_input1 = self.generate_single_data(-3402823500.0, 3402823500.0, ((1, 32, 31, 1)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertEqual(cpu_output, npu_output) + + def test_neg_float32_3(self, device): + npu_input1 = self.generate_single_data(-0.000030517578125, 0.000030517578125, ((2, 32, 149, 31)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_4(self, device): + npu_input1 = self.generate_single_data(3402823500, 3402800000, ((128)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_5(self, device): + npu_input1 = self.generate_single_data(-9.313225746154785e-10, 9.313225746154785e-10, ((184965, 1)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_6(self, device): + npu_input1 = self.generate_single_data(-3402823500.0, -3402823500.0, ((1, 31, 149, 2)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_7(self, device): + npu_input1 = self.generate_single_data(-3402823500.0, 3402823500.0, ((1, 31, 149, 2)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_8(self, device): + npu_input1 = self.generate_single_data(-9.313225746154785e-10, 9.313225746154785e-10, ((1, 31, 149, 2)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_9(self, device): + npu_input1 = self.generate_single_data(-3402823500.0, -3402823500.0, ((1, 31, 149, 2)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_10(self, device): + npu_input1 = self.generate_single_data(-0.000000000000000000000000000000000000011754943508, + 0.000000000000000000000000000000000000011754943508, ((2, 31, 149, 2)), + np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_11(self, device): + npu_input1 = self.generate_single_data(0.000000000000000000000000000000000000011754943508, + 0.000000000000000000000000000000000000011754943508, ((4, 31, 149, 2)), + np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_12(self, device): + npu_input1 = self.generate_single_data(-0.000000000000000000000000000000000000011754943508, + -0.000000000000000000000000000000000000011754943508, ((2048, 31, 1, 2)), + np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_float32_13(self, device): + npu_input1 = self.generate_single_data(-0.000000000000000000000000000000000000011754943508, + 0.000000000000000000000000000000000000011754943508, ((8, 7, 149)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_int32_1(self, device): + npu_input1 = self.generate_single_data(0, 100, (2, 3), np.int32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_int32_2(self, device): + npu_input1 = self.generate_single_data(2147483647, 2147483648, (2, 3), np.int32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_neg_int32_3(self, device): + npu_input1 = self.generate_single_data(-2147483648, -2147483647, (2, 3), np.int32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestNeg, globals(), except_for='cpu') +if __name__ == "__main__": + torch.npu.set_device("npu:5") + run_tests() diff --git a/test/test_npu/test_network_ops/test___ior__.py b/test/test_npu/test_network_ops/test___ior__.py index c28a0bd8e0eebc166ed817d421c681843417f3d4..20ce8bca8a7c6139ae6afa8335a3aadde9255f3e 100644 --- a/test/test_npu/test_network_ops/test___ior__.py +++ b/test/test_npu/test_network_ops/test___ior__.py @@ -1,226 +1,226 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestIor(TestCase): - #pylint: disable=unused-argument - def generate_bool_data(self, shape): - input1 = np.random.uniform(0, 1, shape).astype(np.float32) - input1 = input1 < 0.5 - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - - return npu_input1, npu_input2 - - def generate_single_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - return npu_input1 - - def generate_int_scalar(self, min_d, max_d): - scalar = np.random.randint(min_d, max_d) - return scalar - - def cpu_op_exec(self, input1, input2): - output = input1.__ior__(input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = input1.__ior__(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_scalar(self, input1, input2): - input1 = input1.to("npu") - output = input1.__ior__(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def test___ior___bool(self, device): - npu_input1 = self.generate_bool_data((1, 31, 149, 2)) - npu_input2 = self.generate_bool_data((1, 31, 149, 2)) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___bool_scalar(self, device): - npu_input1 = self.generate_bool_data((1, 31, 149, 2)) - npu_input2 = False - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec_scalar(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___uint8(self, device): - npu_input1, npu_input2 = self.generate_data(0, 255, (1, 31, 149, 2), np.uint8) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int8(self, device): - npu_input1, npu_input2 = self.generate_data(-128, 127, (1, 31, 149, 2), np.int8) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_001(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, -2147483648, (1, 31, 149, 2), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_002(self, device): - npu_input1, npu_input2 = self.generate_data(2147483647, 2147483647, (128), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_003(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (184965, 1), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_004(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1, 31, 149, 2), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_005(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (2, 31, 149, 2), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_006(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (4, 31, 149, 2), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_007(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (2048, 31, 1, 2), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_008(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (8, 7, 149), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_009(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (65535,1,1,1), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_010(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,8192), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_011(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,16384), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_012(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,32768), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_013(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,65535), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_014(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,131072), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_015(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,196608), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_016(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,262144), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_017(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,393216), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_018(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,524288), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_019(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,655360), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int32_020(self, device): - npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,786432), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test___ior___int_scalar(self, device): - npu_input1 = self.generate_single_data(-2147483648, 2147483647, (1,31,149,2), np.int32) - npu_input2 = self.generate_int_scalar(-2147483648, 2147483647) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec_scalar(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestIor, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestIor(TestCase): + #pylint: disable=unused-argument + def generate_bool_data(self, shape): + input1 = np.random.uniform(0, 1, shape).astype(np.float32) + input1 = input1 < 0.5 + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + + return npu_input1, npu_input2 + + def generate_single_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + return npu_input1 + + def generate_int_scalar(self, min_d, max_d): + scalar = np.random.randint(min_d, max_d) + return scalar + + def cpu_op_exec(self, input1, input2): + output = input1.__ior__(input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = input1.__ior__(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_scalar(self, input1, input2): + input1 = input1.to("npu") + output = input1.__ior__(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def test___ior___bool(self, device): + npu_input1 = self.generate_bool_data((1, 31, 149, 2)) + npu_input2 = self.generate_bool_data((1, 31, 149, 2)) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___bool_scalar(self, device): + npu_input1 = self.generate_bool_data((1, 31, 149, 2)) + npu_input2 = False + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec_scalar(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___uint8(self, device): + npu_input1, npu_input2 = self.generate_data(0, 255, (1, 31, 149, 2), np.uint8) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int8(self, device): + npu_input1, npu_input2 = self.generate_data(-128, 127, (1, 31, 149, 2), np.int8) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_001(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, -2147483648, (1, 31, 149, 2), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_002(self, device): + npu_input1, npu_input2 = self.generate_data(2147483647, 2147483647, (128), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_003(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (184965, 1), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_004(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1, 31, 149, 2), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_005(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (2, 31, 149, 2), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_006(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (4, 31, 149, 2), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_007(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (2048, 31, 1, 2), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_008(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (8, 7, 149), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_009(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (65535,1,1,1), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_010(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,8192), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_011(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,16384), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_012(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,32768), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_013(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,65535), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_014(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,131072), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_015(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,196608), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_016(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,262144), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_017(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,393216), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_018(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,524288), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_019(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,655360), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int32_020(self, device): + npu_input1, npu_input2 = self.generate_data(-2147483648, 2147483647, (1,1,1,786432), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test___ior___int_scalar(self, device): + npu_input1 = self.generate_single_data(-2147483648, 2147483647, (1,31,149,2), np.int32) + npu_input2 = self.generate_int_scalar(-2147483648, 2147483647) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec_scalar(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestIor, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_abs.py b/test/test_npu/test_network_ops/test_abs.py old mode 100644 new mode 100755 index 163c3fe304d4e2710bfa19cc614912c2ed84e126..607a7ea4e58454da14d8bb6667dfff0ffb56dd95 --- a/test/test_npu/test_network_ops/test_abs.py +++ b/test/test_npu/test_network_ops/test_abs.py @@ -1,64 +1,64 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - - -class TestAbs(TestCase): - def cpu_op_exec(self, input): - output = torch.abs(input) - output = output.numpy() - return output - - def npu_op_exec(self, input): - output = torch.abs(input) - output = output.to("cpu") - output = output.numpy() - return output - - def test_abs_shape_format_fp16(self, device): - format_list = [0, 3] - shape_list = [[5], [5, 10], [1, 3, 2], [52, 15, 15, 20]] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - # print(item) - cpu_input, npu_input = create_common_tensor(item, -10, 10) - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - def test_abs_shape_format_fp32(self, device): - format_list = [0, 3] - shape_list = [[5], [5, 10], [1, 3, 2], [52, 15, 15, 20]] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - # print(item) - cpu_input, npu_input = create_common_tensor(item, -10, 10) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestAbs, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + + +class TestAbs(TestCase): + def cpu_op_exec(self, input): + output = torch.abs(input) + output = output.numpy() + return output + + def npu_op_exec(self, input): + output = torch.abs(input) + output = output.to("cpu") + output = output.numpy() + return output + + def test_abs_shape_format_fp16(self, device): + format_list = [0, 3] + shape_list = [[5], [5, 10], [1, 3, 2], [52, 15, 15, 20]] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + # print(item) + cpu_input, npu_input = create_common_tensor(item, -10, 10) + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_abs_shape_format_fp32(self, device): + format_list = [0, 3] + shape_list = [[5], [5, 10], [1, 3, 2], [52, 15, 15, 20]] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + # print(item) + cpu_input, npu_input = create_common_tensor(item, -10, 10) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestAbs, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_adaptive_avg_pool1d.py b/test/test_npu/test_network_ops/test_adaptive_avg_pool1d.py index 662cae2af3231941d335f5aa27f24af512b06a66..16e66b88372aff6c6824c1760d88bb5a77da3016 100644 --- a/test/test_npu/test_network_ops/test_adaptive_avg_pool1d.py +++ b/test/test_npu/test_network_ops/test_adaptive_avg_pool1d.py @@ -1,63 +1,63 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestAdaptiveAvgPool1d(TestCase): - def cpu_op_exec(self, input, output_size): - m = nn.AdaptiveAvgPool1d(output_size) - output= m(input) - return output.numpy() - - def npu_op_exec(self, input, output_size): - m = nn.AdaptiveAvgPool1d(output_size).npu() - output = m(input) - return output.cpu().numpy() - - def test_AdaptiveAvgPool1d_shape_format_fp16(self, device): - shape_format = [ - [np.float16, 0, (64, 10, 16)], - [np.float16, -1, (256, 2048, 8)], - [np.float16, 3, (32, 16, 16)] - ] - output_list = [(4), (3)] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 10) - for output_size in output_list: - cpu_output = self.cpu_op_exec(cpu_input, output_size) - npu_output = self.npu_op_exec(npu_input, output_size) - self.assertRtolEqual(cpu_output, npu_output, prec16=0.002) - - def test_AdaptiveAvgPool1d_shape_format_fp32(self, device): - shape_format = [ - [np.float32, 0, (64, 10, 16)], - [np.float32, -1, (256, 2048, 8)], - [np.float32, 3, (32, 16, 16)] - ] - output_list = [(4), (3), (1)] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 10) - for output_size in output_list: - cpu_output = self.cpu_op_exec(cpu_input, output_size) - npu_output = self.npu_op_exec(npu_input, output_size) - self.assertRtolEqual(cpu_output, npu_output, 0.001) - -instantiate_device_type_tests(TestAdaptiveAvgPool1d, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestAdaptiveAvgPool1d(TestCase): + def cpu_op_exec(self, input, output_size): + m = nn.AdaptiveAvgPool1d(output_size) + output= m(input) + return output.numpy() + + def npu_op_exec(self, input, output_size): + m = nn.AdaptiveAvgPool1d(output_size).npu() + output = m(input) + return output.cpu().numpy() + + def test_AdaptiveAvgPool1d_shape_format_fp16(self, device): + shape_format = [ + [np.float16, 0, (64, 10, 16)], + [np.float16, -1, (256, 2048, 8)], + [np.float16, 3, (32, 16, 16)] + ] + output_list = [(4), (3)] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 10) + for output_size in output_list: + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + self.assertRtolEqual(cpu_output, npu_output, prec16=0.002) + + def test_AdaptiveAvgPool1d_shape_format_fp32(self, device): + shape_format = [ + [np.float32, 0, (64, 10, 16)], + [np.float32, -1, (256, 2048, 8)], + [np.float32, 3, (32, 16, 16)] + ] + output_list = [(4), (3), (1)] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 10) + for output_size in output_list: + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + self.assertRtolEqual(cpu_output, npu_output, 0.001) + +instantiate_device_type_tests(TestAdaptiveAvgPool1d, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_adaptive_avg_pool3d.py b/test/test_npu/test_network_ops/test_adaptive_avg_pool3d.py index 859cccf3cbb1470177b87472a518fe4d0c06f870..27250968d7bc731b48b9d24d72ac4aedf05e9e83 100644 --- a/test/test_npu/test_network_ops/test_adaptive_avg_pool3d.py +++ b/test/test_npu/test_network_ops/test_adaptive_avg_pool3d.py @@ -1,69 +1,69 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestAdaptiveAvgPool3d(TestCase): - def cpu_op_exec(self, input, output_size): - m = nn.AdaptiveAvgPool3d(output_size) - output= m(input) - return output.numpy() - - def npu_op_exec(self, input, output_size): - m = nn.AdaptiveAvgPool3d(output_size) - output= m(input).cpu() - return output.numpy() - - def test_AdaptiveAvgPool3d_shape_format_fp16(self, device): - shape_format = [ - [np.float16, -1, (64, 10, 16, 32)], - [np.float16, -1, (4, 16, 8, 4, 2)], - [np.float16, -1, (2, 16, 4, 32)], - [np.float16, -1, (4, 16, 8, 4, 16)] - ] - # output_list = [(4, 2, 4), (2, 2, 2), (2, 4, 4), (4, 4, 2)] - output_list = [(1, 1, 1)] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 10) - cpu_input = cpu_input.to(torch.float32) - for output_size in output_list: - cpu_output = self.cpu_op_exec(cpu_input, output_size) - npu_output = self.npu_op_exec(npu_input, output_size) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_AdaptiveAvgPool3d_shape_format_fp32(self, device): - shape_format = [ - [np.float32, -1, (64, 10, 16, 32)], - [np.float32, -1, (4, 2, 2, 4, 316)], - [np.float32, -1, (2, 16, 4, 32)], - [np.float32, -1, (4, 16, 8, 4, 16)] - ] - # output_list = [(4, 2, 4), (2, 2, 2), (2, 4, 4), (4, 4, 2)] - output_list = [(1, 1, 1)] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 10) - for output_size in output_list: - cpu_output = self.cpu_op_exec(cpu_input, output_size) - npu_output = self.npu_op_exec(npu_input, output_size) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestAdaptiveAvgPool3d, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestAdaptiveAvgPool3d(TestCase): + def cpu_op_exec(self, input, output_size): + m = nn.AdaptiveAvgPool3d(output_size) + output= m(input) + return output.numpy() + + def npu_op_exec(self, input, output_size): + m = nn.AdaptiveAvgPool3d(output_size) + output= m(input).cpu() + return output.numpy() + + def test_AdaptiveAvgPool3d_shape_format_fp16(self, device): + shape_format = [ + [np.float16, -1, (64, 10, 16, 32)], + [np.float16, -1, (4, 16, 8, 4, 2)], + [np.float16, -1, (2, 16, 4, 32)], + [np.float16, -1, (4, 16, 8, 4, 16)] + ] + # output_list = [(4, 2, 4), (2, 2, 2), (2, 4, 4), (4, 4, 2)] + output_list = [(1, 1, 1)] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 10) + cpu_input = cpu_input.to(torch.float32) + for output_size in output_list: + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_AdaptiveAvgPool3d_shape_format_fp32(self, device): + shape_format = [ + [np.float32, -1, (64, 10, 16, 32)], + [np.float32, -1, (4, 2, 2, 4, 316)], + [np.float32, -1, (2, 16, 4, 32)], + [np.float32, -1, (4, 16, 8, 4, 16)] + ] + # output_list = [(4, 2, 4), (2, 2, 2), (2, 4, 4), (4, 4, 2)] + output_list = [(1, 1, 1)] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 10) + for output_size in output_list: + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestAdaptiveAvgPool3d, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_adaptive_avg_pool3d_backward.py b/test/test_npu/test_network_ops/test_adaptive_avg_pool3d_backward.py index c3dc9a48430dbc337faa1ac4895b7563883584e2..01d1ce2a115c1f8c42fe67ad360b3897d93ef434 100644 --- a/test/test_npu/test_network_ops/test_adaptive_avg_pool3d_backward.py +++ b/test/test_npu/test_network_ops/test_adaptive_avg_pool3d_backward.py @@ -1,66 +1,66 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -from torch.nn import functional as F -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestAdaptiveAvgPool3dBackward(TestCase): - - def cpu_op_exec(self, input_x, output_size): - input_x.requires_grad_(True) - m = torch.nn.AdaptiveAvgPool3d(output_size) - output = m(input_x) - ones = torch.ones_like(output) - output.backward(ones) - out = input_x.grad - return out.numpy() - - def npu_op_exec(self, input_x, output_size): - input_x.requires_grad_(True) - m = torch.nn.AdaptiveAvgPool3d( output_size) - output = m(input_x) - ones = torch.ones_like(output) - output.backward(ones) - out = input_x.grad.cpu() - return out.numpy() - - def test_adaptiveAvgPool3d_backward(self, device): - dtype_list = [np.float16, np.float32] - format_list = [-1] - shape_list = [ - [2, 3, 7, 7], - [1, 2, 3, 6, 6], - [6, 5, 8, 10], - [2, 5, 6, 8, 9] - ] - shape_format = [ - [i, j, k] for i in dtype_list for j in format_list for k in shape_list - ] - output_sizes = [[1, 1, 1]] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 10) - for output_size in output_sizes: - cpu_output = self.cpu_op_exec(cpu_input, output_size) - npu_output = self.npu_op_exec(npu_input, output_size) - - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestAdaptiveAvgPool3dBackward, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +from torch.nn import functional as F +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestAdaptiveAvgPool3dBackward(TestCase): + + def cpu_op_exec(self, input_x, output_size): + input_x.requires_grad_(True) + m = torch.nn.AdaptiveAvgPool3d(output_size) + output = m(input_x) + ones = torch.ones_like(output) + output.backward(ones) + out = input_x.grad + return out.numpy() + + def npu_op_exec(self, input_x, output_size): + input_x.requires_grad_(True) + m = torch.nn.AdaptiveAvgPool3d( output_size) + output = m(input_x) + ones = torch.ones_like(output) + output.backward(ones) + out = input_x.grad.cpu() + return out.numpy() + + def test_adaptiveAvgPool3d_backward(self, device): + dtype_list = [np.float16, np.float32] + format_list = [-1] + shape_list = [ + [2, 3, 7, 7], + [1, 2, 3, 6, 6], + [6, 5, 8, 10], + [2, 5, 6, 8, 9] + ] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + output_sizes = [[1, 1, 1]] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 10) + for output_size in output_sizes: + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestAdaptiveAvgPool3dBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_adaptive_max_pool2d.py b/test/test_npu/test_network_ops/test_adaptive_max_pool2d.py index 877f50c11c26fb787a491cae9fcfc7b2957db0a9..fbd6bf65e7a961b55b2dd7487b14a0f5509e869f 100644 --- a/test/test_npu/test_network_ops/test_adaptive_max_pool2d.py +++ b/test/test_npu/test_network_ops/test_adaptive_max_pool2d.py @@ -1,53 +1,53 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestAdaptiveMaxPool2d(TestCase): - def cpu_op_exec(self, input, output_size): - m = nn.AdaptiveMaxPool2d(output_size) - output = m(input) - return output.numpy() - - def npu_op_exec(self, input, output_size): - m = nn.AdaptiveMaxPool2d(output_size).npu() - output = m(input) - return output.cpu().numpy() - - def test_adaptiveMaxPool2d_shape_format_fp32_6(self, device): - format_list = [-1] - # (1, 8, 9) IndexError - shape_list = [(1, 5, 9, 9)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - output_list = [(3, 3)] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - for output_size in output_list: - cpu_output = self.cpu_op_exec(cpu_input, output_size) - npu_output = self.npu_op_exec(npu_input, output_size) - - self.assertRtolEqual(cpu_output, npu_output, 0.0004) - - -instantiate_device_type_tests(TestAdaptiveMaxPool2d, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestAdaptiveMaxPool2d(TestCase): + def cpu_op_exec(self, input, output_size): + m = nn.AdaptiveMaxPool2d(output_size) + output = m(input) + return output.numpy() + + def npu_op_exec(self, input, output_size): + m = nn.AdaptiveMaxPool2d(output_size).npu() + output = m(input) + return output.cpu().numpy() + + def test_adaptiveMaxPool2d_shape_format_fp32_6(self, device): + format_list = [-1] + # (1, 8, 9) IndexError + shape_list = [(1, 5, 9, 9)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + output_list = [(3, 3)] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + for output_size in output_list: + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + + self.assertRtolEqual(cpu_output, npu_output, 0.0004) + + +instantiate_device_type_tests(TestAdaptiveMaxPool2d, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_add.py b/test/test_npu/test_network_ops/test_add.py old mode 100644 new mode 100755 index af1963946098a7f5a66cc43b722ab421e6db7ee5..34235b05e7cb5f8b60b880c917dc0fbef6b40a19 --- a/test/test_npu/test_network_ops/test_add.py +++ b/test/test_npu/test_network_ops/test_add.py @@ -1,398 +1,398 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('..') -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestAdd(TestCase): - def cpu_op_out_exec(self, input1, input2, output): - torch.add(input1, input2, alpha = 1, out = output) - output = output.numpy() - return output - - def npu_op_out_exec_new(self, input1, input2, output): - torch.add(input1, input2, alpha = 1, out = output) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec(self, input1, input2): - output = torch.add(input1, input2, alpha = 1) - output = output.numpy() - return output - - def npu_op_exec_new(self, input1, input2): - output = torch.add(input1, input2, alpha = 1) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_alpha(self, input1, input2): - output = torch.add(input1, input2, alpha = 3) - output = output.numpy() - return output - - def npu_op_exec_new_alpha(self, input1, input2): - output = torch.add(input1, input2, alpha = 3) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_scalar_exec(self, input1, scalar): - output = torch.add(input1, scalar, alpha = 1) - output = output.numpy() - return output - - def npu_op_scalar_exec_new(self, input1, scalar): - output = torch.add(input1, scalar, alpha = 1) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_scalar_exec_alpha(self, input1, scalar): - output = torch.add(input1, scalar, alpha = 3) - output = output.numpy() - return output - - def npu_op_scalar_exec_new_alpha(self, input1, scalar): - output = torch.add(input1, scalar, alpha = 3) - output = output.to("cpu") - output = output.numpy() - return output - - def add_scalar_result(self, shape_format): - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_scalar_exec(cpu_input, item[1]) - npu_output = self.npu_op_exec_new(npu_input, item[1]) - cpu_output = cpu_output.astype(npu_output.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - - def add_scalar_alpha_result(self, shape_format): - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_scalar_exec_alpha(cpu_input, item[1]) - npu_output = self.npu_op_scalar_exec_new_alpha(npu_input, item[1]) - cpu_output = cpu_output.astype(npu_output.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - - def add_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec_new(npu_input1, npu_input2) - cpu_output = cpu_output.astype(npu_output.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - - def add_out_result(self, shape_format): - for item in shape_format: - cpuout = torch.randn(3) - npuout = torch.randn(3).to("npu") - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - - cpu_output = self.cpu_op_out_exec(cpu_input1, cpu_input2,cpuout) - npu_output = self.npu_op_out_exec_new(npu_input1, npu_input2, npuout) - cpu_output = cpu_output.astype(npu_output.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - - def add_alpha_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - - cpu_output = self.cpu_op_exec_alpha(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec_new_alpha(npu_input1, npu_input2) - cpu_output = cpu_output.astype(npu_output.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - - def test_add_scalar_shape_format_fp16_1d(self, device): - format_list = [0, 3] - scalar_list = [0,1] - shape_format = [ - [[np.float16, i, [18]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_result(shape_format) - - def test_add_scalar_shape_format_fp32_1d(self, device): - format_list = [0, 3] - scalar_list = [0,1] - shape_format = [ - [[np.float32, i, [18]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_result(shape_format) - - def test_add_scalar_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float16, i, [5, 256]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_result(shape_format) - - def test_add_scalar_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float32, i, [5, 256]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_result(shape_format) - - def test_add_scalar_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float16, i, [32, 3, 3]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_result(shape_format) - - def test_add_scalar_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float32, i, [32, 3, 3]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_result(shape_format) - - def test_add_scalar_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float16, i, [64, 112, 7, 7]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_result(shape_format) - - def test_add_scalar_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float32, i, [64, 112, 7, 7]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_result(shape_format) - - def test_add_scalar_shape_format_fp16_1d(self, device): - format_list = [0, 3] - scalar_list = [0,1] - shape_format = [ - [[np.float16, i, [18]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_alpha_result(shape_format) - - def test_add_scalar_shape_format_fp32_1d(self, device): - format_list = [0, 3] - scalar_list = [0,1] - shape_format = [ - [[np.float32, i, [18]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_alpha_result(shape_format) - - def test_add_scalar_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float16, i, [5, 256]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_alpha_result(shape_format) - - def test_add_scalar_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float32, i, [5, 256]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_alpha_result(shape_format) - - def test_add_scalar_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float16, i, [32, 3, 3]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_alpha_result(shape_format) - - def test_add_scalar_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float32, i, [32, 3, 3]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_alpha_result(shape_format) - - def test_add_scalar_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float16, i, [64, 112, 7, 7]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_alpha_result(shape_format) - - def test_add_scalar_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - scalar_list = [0,1] - shape_format = [ - [[np.float32, i, [64, 112, 7, 7]], k] for i in format_list for k in scalar_list - ] - self.add_scalar_alpha_result(shape_format) - - def test_add_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float16, i, [64]] for i in format_list - ] - self.add_result(shape_format) - - def test_add_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float32, i, [64]] for i in format_list - ] - self.add_result(shape_format) - - def test_add_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float16, i, [5, 256]] for i in format_list - ] - self.add_result(shape_format) - - def test_add_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [5, 256]] for i in format_list - ] - self.add_result(shape_format) - - def test_add_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float16, i, [32, 3, 3]] for i in format_list - ] - self.add_result(shape_format) - - def test_add_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [32, 3, 3]] for i in format_list - ] - self.add_result(shape_format) - - def test_add_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float16, i, [64, 112, 7, 7]] for i in format_list - ] - self.add_result(shape_format) - - def test_add_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [64, 112, 7, 7]] for i in format_list - ] - self.add_result(shape_format) - - def test_add_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float16, i, [64]] for i in format_list - ] - self.add_alpha_result(shape_format) - - def test_add_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float32, i, [64]] for i in format_list - ] - self.add_alpha_result(shape_format) - - def test_add_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float16, i, [5, 256]] for i in format_list - ] - self.add_alpha_result(shape_format) - - def test_add_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [5, 256]] for i in format_list - ] - self.add_alpha_result(shape_format) - - def test_add_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float16, i, [32, 3, 3]] for i in format_list - ] - self.add_alpha_result(shape_format) - - def test_add_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [32, 3, 3]] for i in format_list - ] - self.add_alpha_result(shape_format) - - def test_add_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float16, i, [64, 112, 7, 7]] for i in format_list - ] - self.add_alpha_result(shape_format) - - def test_add_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [64, 112, 7, 7]] for i in format_list - ] - self.add_alpha_result(shape_format) - - def test_add_mix_dtype(self, device): - cpu_input1, npu_input1 = create_common_tensor([np.int32, 0, (2, 3)], 1, 100) - cpu_input2, npu_input2 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) - cpu_output = torch.add(cpu_input1, cpu_input2) - npu_output = torch.add(npu_input1, npu_input2) - npu_output = npu_output.to("cpu") - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestAdd, globals(), except_for="cpu") - -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append('..') +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestAdd(TestCase): + def cpu_op_out_exec(self, input1, input2, output): + torch.add(input1, input2, alpha = 1, out = output) + output = output.numpy() + return output + + def npu_op_out_exec_new(self, input1, input2, output): + torch.add(input1, input2, alpha = 1, out = output) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec(self, input1, input2): + output = torch.add(input1, input2, alpha = 1) + output = output.numpy() + return output + + def npu_op_exec_new(self, input1, input2): + output = torch.add(input1, input2, alpha = 1) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_alpha(self, input1, input2): + output = torch.add(input1, input2, alpha = 3) + output = output.numpy() + return output + + def npu_op_exec_new_alpha(self, input1, input2): + output = torch.add(input1, input2, alpha = 3) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_scalar_exec(self, input1, scalar): + output = torch.add(input1, scalar, alpha = 1) + output = output.numpy() + return output + + def npu_op_scalar_exec_new(self, input1, scalar): + output = torch.add(input1, scalar, alpha = 1) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_scalar_exec_alpha(self, input1, scalar): + output = torch.add(input1, scalar, alpha = 3) + output = output.numpy() + return output + + def npu_op_scalar_exec_new_alpha(self, input1, scalar): + output = torch.add(input1, scalar, alpha = 3) + output = output.to("cpu") + output = output.numpy() + return output + + def add_scalar_result(self, shape_format): + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_scalar_exec(cpu_input, item[1]) + npu_output = self.npu_op_exec_new(npu_input, item[1]) + cpu_output = cpu_output.astype(npu_output.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + + def add_scalar_alpha_result(self, shape_format): + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_scalar_exec_alpha(cpu_input, item[1]) + npu_output = self.npu_op_scalar_exec_new_alpha(npu_input, item[1]) + cpu_output = cpu_output.astype(npu_output.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + + def add_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec_new(npu_input1, npu_input2) + cpu_output = cpu_output.astype(npu_output.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + + def add_out_result(self, shape_format): + for item in shape_format: + cpuout = torch.randn(3) + npuout = torch.randn(3).to("npu") + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + + cpu_output = self.cpu_op_out_exec(cpu_input1, cpu_input2,cpuout) + npu_output = self.npu_op_out_exec_new(npu_input1, npu_input2, npuout) + cpu_output = cpu_output.astype(npu_output.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + + def add_alpha_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + + cpu_output = self.cpu_op_exec_alpha(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec_new_alpha(npu_input1, npu_input2) + cpu_output = cpu_output.astype(npu_output.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + + def test_add_scalar_shape_format_fp16_1d(self, device): + format_list = [0, 3] + scalar_list = [0,1] + shape_format = [ + [[np.float16, i, [18]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_result(shape_format) + + def test_add_scalar_shape_format_fp32_1d(self, device): + format_list = [0, 3] + scalar_list = [0,1] + shape_format = [ + [[np.float32, i, [18]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_result(shape_format) + + def test_add_scalar_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float16, i, [5, 256]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_result(shape_format) + + def test_add_scalar_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float32, i, [5, 256]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_result(shape_format) + + def test_add_scalar_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float16, i, [32, 3, 3]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_result(shape_format) + + def test_add_scalar_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float32, i, [32, 3, 3]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_result(shape_format) + + def test_add_scalar_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float16, i, [64, 112, 7, 7]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_result(shape_format) + + def test_add_scalar_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float32, i, [64, 112, 7, 7]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_result(shape_format) + + def test_add_scalar_shape_format_fp16_1d(self, device): + format_list = [0, 3] + scalar_list = [0,1] + shape_format = [ + [[np.float16, i, [18]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_alpha_result(shape_format) + + def test_add_scalar_shape_format_fp32_1d(self, device): + format_list = [0, 3] + scalar_list = [0,1] + shape_format = [ + [[np.float32, i, [18]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_alpha_result(shape_format) + + def test_add_scalar_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float16, i, [5, 256]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_alpha_result(shape_format) + + def test_add_scalar_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float32, i, [5, 256]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_alpha_result(shape_format) + + def test_add_scalar_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float16, i, [32, 3, 3]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_alpha_result(shape_format) + + def test_add_scalar_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float32, i, [32, 3, 3]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_alpha_result(shape_format) + + def test_add_scalar_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float16, i, [64, 112, 7, 7]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_alpha_result(shape_format) + + def test_add_scalar_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + scalar_list = [0,1] + shape_format = [ + [[np.float32, i, [64, 112, 7, 7]], k] for i in format_list for k in scalar_list + ] + self.add_scalar_alpha_result(shape_format) + + def test_add_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float16, i, [64]] for i in format_list + ] + self.add_result(shape_format) + + def test_add_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float32, i, [64]] for i in format_list + ] + self.add_result(shape_format) + + def test_add_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float16, i, [5, 256]] for i in format_list + ] + self.add_result(shape_format) + + def test_add_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [5, 256]] for i in format_list + ] + self.add_result(shape_format) + + def test_add_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float16, i, [32, 3, 3]] for i in format_list + ] + self.add_result(shape_format) + + def test_add_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [32, 3, 3]] for i in format_list + ] + self.add_result(shape_format) + + def test_add_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float16, i, [64, 112, 7, 7]] for i in format_list + ] + self.add_result(shape_format) + + def test_add_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [64, 112, 7, 7]] for i in format_list + ] + self.add_result(shape_format) + + def test_add_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float16, i, [64]] for i in format_list + ] + self.add_alpha_result(shape_format) + + def test_add_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float32, i, [64]] for i in format_list + ] + self.add_alpha_result(shape_format) + + def test_add_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float16, i, [5, 256]] for i in format_list + ] + self.add_alpha_result(shape_format) + + def test_add_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [5, 256]] for i in format_list + ] + self.add_alpha_result(shape_format) + + def test_add_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float16, i, [32, 3, 3]] for i in format_list + ] + self.add_alpha_result(shape_format) + + def test_add_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [32, 3, 3]] for i in format_list + ] + self.add_alpha_result(shape_format) + + def test_add_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float16, i, [64, 112, 7, 7]] for i in format_list + ] + self.add_alpha_result(shape_format) + + def test_add_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [64, 112, 7, 7]] for i in format_list + ] + self.add_alpha_result(shape_format) + + def test_add_mix_dtype(self, device): + cpu_input1, npu_input1 = create_common_tensor([np.int32, 0, (2, 3)], 1, 100) + cpu_input2, npu_input2 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) + cpu_output = torch.add(cpu_input1, cpu_input2) + npu_output = torch.add(npu_input1, npu_input2) + npu_output = npu_output.to("cpu") + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestAdd, globals(), except_for="cpu") + +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_addmm.py b/test/test_npu/test_network_ops/test_addmm.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_all.py b/test/test_npu/test_network_ops/test_all.py old mode 100644 new mode 100755 index 12cf8284caf24bf24f2ae090be190166117c009b..71373287f9cc9ce231f14f5eabe9288f10288711 --- a/test/test_npu/test_network_ops/test_all.py +++ b/test/test_npu/test_network_ops/test_all.py @@ -1,88 +1,88 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestAll(TestCase): - def create_bool_tensor(self, shape, minValue, maxValue): - input1 = np.random.uniform(minValue, maxValue, shape) - input1 = input1 > 0.5 - cpu_input = torch.from_numpy(input1) - npu_input = torch.from_numpy(input1).to("npu") - return cpu_input, npu_input - - def cpu_op_exec(self, input): - output = input.all() - output = output.numpy() - return output - - def npu_op_exec(self, input): - output = input.all() - output = output.to("cpu") - output = output.numpy() - return output - - def test_all_shape_format(self, device): - shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1024], [2, 0, 2]] - for item in shape_list: - cpu_input, npu_input = self.create_bool_tensor(item, 0, 1) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - self.assertRtolEqual( - cpu_output.astype( - np.int32), npu_output.astype( - np.int32)) - - def cpu_op_exec1(self, input, dim): - output = input.all(dim=dim) - output = output.numpy() - return output - - def npu_op_exec1(self, input, dim): - output = input.all(dim=dim) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_out_exec1(self, input, dim): - shape = list(input.shape) - output0 = torch.randn(shape) > 0 - output1 = torch.randn(shape.pop()) > 0 - output0 = output0.npu() - output1 = output1.npu() - torch.all(input, dim=dim, keepdim = False, out = output0) - torch.all(input, dim=dim, keepdim = False, out = output1) - output0 = output0.to("cpu").numpy() - output1 = output1.to("cpu").numpy() - return output0, output1 - - def test_alld_shape_format(self, device): - shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1024]] - for item in shape_list: - cpu_input, npu_input = self.create_bool_tensor(item, 0, 1) - cpu_output = self.cpu_op_exec1(cpu_input, 0) - npu_output = self.npu_op_exec1(npu_input, 0) - npu_out0, npu_out1 = self.npu_op_out_exec1(npu_input, 0) - self.assertRtolEqual(cpu_output.astype(np.int32), npu_output.astype(np.int32)) - self.assertRtolEqual(cpu_output.astype(np.int32), npu_out0.astype(np.int32)) - self.assertRtolEqual(cpu_output.astype(np.int32), npu_out1.astype(np.int32)) - - -instantiate_device_type_tests(TestAll, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestAll(TestCase): + def create_bool_tensor(self, shape, minValue, maxValue): + input1 = np.random.uniform(minValue, maxValue, shape) + input1 = input1 > 0.5 + cpu_input = torch.from_numpy(input1) + npu_input = torch.from_numpy(input1).to("npu") + return cpu_input, npu_input + + def cpu_op_exec(self, input): + output = input.all() + output = output.numpy() + return output + + def npu_op_exec(self, input): + output = input.all() + output = output.to("cpu") + output = output.numpy() + return output + + def test_all_shape_format(self, device): + shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1024], [2, 0, 2]] + for item in shape_list: + cpu_input, npu_input = self.create_bool_tensor(item, 0, 1) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual( + cpu_output.astype( + np.int32), npu_output.astype( + np.int32)) + + def cpu_op_exec1(self, input, dim): + output = input.all(dim=dim) + output = output.numpy() + return output + + def npu_op_exec1(self, input, dim): + output = input.all(dim=dim) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_out_exec1(self, input, dim): + shape = list(input.shape) + output0 = torch.randn(shape) > 0 + output1 = torch.randn(shape.pop()) > 0 + output0 = output0.npu() + output1 = output1.npu() + torch.all(input, dim=dim, keepdim = False, out = output0) + torch.all(input, dim=dim, keepdim = False, out = output1) + output0 = output0.to("cpu").numpy() + output1 = output1.to("cpu").numpy() + return output0, output1 + + def test_alld_shape_format(self, device): + shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1024]] + for item in shape_list: + cpu_input, npu_input = self.create_bool_tensor(item, 0, 1) + cpu_output = self.cpu_op_exec1(cpu_input, 0) + npu_output = self.npu_op_exec1(npu_input, 0) + npu_out0, npu_out1 = self.npu_op_out_exec1(npu_input, 0) + self.assertRtolEqual(cpu_output.astype(np.int32), npu_output.astype(np.int32)) + self.assertRtolEqual(cpu_output.astype(np.int32), npu_out0.astype(np.int32)) + self.assertRtolEqual(cpu_output.astype(np.int32), npu_out1.astype(np.int32)) + + +instantiate_device_type_tests(TestAll, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_amp_non_finite_check_and_unscale_.py b/test/test_npu/test_network_ops/test_amp_non_finite_check_and_unscale_.py index 7660cb245f36afec495ce9ce25dda06a886ab3ce..b34bbe53b65a71fb068129c0ad3941b901bf5968 100644 --- a/test/test_npu/test_network_ops/test_amp_non_finite_check_and_unscale_.py +++ b/test/test_npu/test_network_ops/test_amp_non_finite_check_and_unscale_.py @@ -1,80 +1,80 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class Test_AmpNonFiniteCheckAndUnscale_(TestCase): - def generate_data(self, min_d, max_d, shape, dtype, input3): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input1 = torch.from_numpy(input1) - input2 = np.array([0.0]).astype(dtype) - input2 = torch.from_numpy(input2) - input3 = np.array([input3]).astype(dtype) - input3 = torch.from_numpy(input3) - return input1, input2, input3 - - def cpu_op_exec(self, input1, input2, input3): - input1 = input1.numpy() - input2 = input2.numpy() - input3 = input3.numpy() - input1 = np.multiply(input1, input3) - return input1 - - def npu_op_exec(self, input1, input2, input3): - input1 = input1.to("npu") - input2 = input2.to("npu") - input3 = input3.to("npu") - torch._amp_non_finite_check_and_unscale_(input1,input2,input3) - input1 = input1.to("cpu") - input1 = input1.numpy() - return input1 - - def test_AmpNonFiniteCheckAndUnscale_float32_case1(self, device): - input1, input2, input3 = self.generate_data(0, 100, (4, 3), np.float32, 1.5) - cpu_output = self.cpu_op_exec(input1, input2, input3) - npu_output = self.npu_op_exec(input1, input2, input3) - self.assertRtolEqual(cpu_output, npu_output) - - def test_AmpNonFiniteCheckAndUnscale_float32_case2(self, device): - input1, input2, input3 = self.generate_data(0, 100, (2, 5, 6), np.float32, 3.7) - cpu_output = self.cpu_op_exec(input1, input2, input3) - npu_output = self.npu_op_exec(input1, input2, input3) - self.assertRtolEqual(cpu_output, npu_output) - - def test_AmpNonFiniteCheckAndUnscale_float16_case1(self, device): - input1, input2, input3 = self.generate_data(0, 100, (5, 7), np.float16, 1.9) - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - input3 = input3.to(torch.float32) - cpu_output = self.cpu_op_exec(input1, input2, input3) - npu_output = self.npu_op_exec(input1, input2, input3) - self.assertRtolEqual(cpu_output, npu_output) - - def test_AmpNonFiniteCheckAndUnscale_float16_case2(self, device): - input1, input2, input3 = self.generate_data(0, 100, (2, 8, 1), np.float16, 3.2) - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - input3 = input3.to(torch.float32) - cpu_output = self.cpu_op_exec(input1, input2, input3) - npu_output = self.npu_op_exec(input1, input2, input3) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(Test_AmpNonFiniteCheckAndUnscale_, globals(), except_for='cpu') -if __name__ == '__main__': +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class Test_AmpNonFiniteCheckAndUnscale_(TestCase): + def generate_data(self, min_d, max_d, shape, dtype, input3): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input1 = torch.from_numpy(input1) + input2 = np.array([0.0]).astype(dtype) + input2 = torch.from_numpy(input2) + input3 = np.array([input3]).astype(dtype) + input3 = torch.from_numpy(input3) + return input1, input2, input3 + + def cpu_op_exec(self, input1, input2, input3): + input1 = input1.numpy() + input2 = input2.numpy() + input3 = input3.numpy() + input1 = np.multiply(input1, input3) + return input1 + + def npu_op_exec(self, input1, input2, input3): + input1 = input1.to("npu") + input2 = input2.to("npu") + input3 = input3.to("npu") + torch._amp_non_finite_check_and_unscale_(input1,input2,input3) + input1 = input1.to("cpu") + input1 = input1.numpy() + return input1 + + def test_AmpNonFiniteCheckAndUnscale_float32_case1(self, device): + input1, input2, input3 = self.generate_data(0, 100, (4, 3), np.float32, 1.5) + cpu_output = self.cpu_op_exec(input1, input2, input3) + npu_output = self.npu_op_exec(input1, input2, input3) + self.assertRtolEqual(cpu_output, npu_output) + + def test_AmpNonFiniteCheckAndUnscale_float32_case2(self, device): + input1, input2, input3 = self.generate_data(0, 100, (2, 5, 6), np.float32, 3.7) + cpu_output = self.cpu_op_exec(input1, input2, input3) + npu_output = self.npu_op_exec(input1, input2, input3) + self.assertRtolEqual(cpu_output, npu_output) + + def test_AmpNonFiniteCheckAndUnscale_float16_case1(self, device): + input1, input2, input3 = self.generate_data(0, 100, (5, 7), np.float16, 1.9) + input1 = input1.to(torch.float32) + input2 = input2.to(torch.float32) + input3 = input3.to(torch.float32) + cpu_output = self.cpu_op_exec(input1, input2, input3) + npu_output = self.npu_op_exec(input1, input2, input3) + self.assertRtolEqual(cpu_output, npu_output) + + def test_AmpNonFiniteCheckAndUnscale_float16_case2(self, device): + input1, input2, input3 = self.generate_data(0, 100, (2, 8, 1), np.float16, 3.2) + input1 = input1.to(torch.float32) + input2 = input2.to(torch.float32) + input3 = input3.to(torch.float32) + cpu_output = self.cpu_op_exec(input1, input2, input3) + npu_output = self.npu_op_exec(input1, input2, input3) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(Test_AmpNonFiniteCheckAndUnscale_, globals(), except_for='cpu') +if __name__ == '__main__': run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_anchor_response_flags.py b/test/test_npu/test_network_ops/test_anchor_response_flags.py index 06c2e33a105939c9d851c03a43bc32049dd2f763..ea0bc819e8af685a2583287195a3aada2a1c94fa 100644 --- a/test/test_npu/test_network_ops/test_anchor_response_flags.py +++ b/test/test_npu/test_network_ops/test_anchor_response_flags.py @@ -1,60 +1,60 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestAnchorResponseFlags(TestCase): - def cpu_op_exec(self, gt_bboxes, featmap_size, strides, num_base_anchors): - feat_h, feat_w = featmap_size - gt_bboxes_cx = ((gt_bboxes[:, 0] + gt_bboxes[:, 2]) * 0.5) - gt_bboxes_cy = ((gt_bboxes[:, 1] + gt_bboxes[:, 3]) * 0.5) - gt_bboxes_grid_x = torch.floor(gt_bboxes_cx / strides[0]).int() - gt_bboxes_grid_y = torch.floor(gt_bboxes_cy / strides[1]).int() - gt_bboxes_grid_idx = gt_bboxes_grid_y * feat_w + gt_bboxes_grid_x - responsible_grid = torch.zeros(feat_h * feat_w, dtype=torch.uint8) - gt_bboxes_grid_idx = gt_bboxes_grid_idx.long() - responsible_grid[gt_bboxes_grid_idx] = 1 - responsible_grid = responsible_grid[:, None].expand( - responsible_grid.size(0), num_base_anchors).contiguous().view(-1) - return responsible_grid.numpy() - - def npu_op_exec(self, input_npu, featmap_size, strides, num_base_anchors): - out = torch.npu_anchor_response_flags(input_npu, featmap_size, strides, num_base_anchors) - out = out.to("cpu") - return out.detach().numpy() - - - def test_anchor_response_flags(self, device): - shape_format = [ - [[np.float32, -1, [100, 4]], [60, 60], [2, 2], 9], - [[np.float16, -1, [200, 4]], [10, 10], [32, 32], 3], - [[np.float16, -1, [500, 4]], [32, 32], [16, 16], 5] - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input, *item[1:]) - npu_output = self.npu_op_exec(npu_input, *item[1:]) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestAnchorResponseFlags, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestAnchorResponseFlags(TestCase): + def cpu_op_exec(self, gt_bboxes, featmap_size, strides, num_base_anchors): + feat_h, feat_w = featmap_size + gt_bboxes_cx = ((gt_bboxes[:, 0] + gt_bboxes[:, 2]) * 0.5) + gt_bboxes_cy = ((gt_bboxes[:, 1] + gt_bboxes[:, 3]) * 0.5) + gt_bboxes_grid_x = torch.floor(gt_bboxes_cx / strides[0]).int() + gt_bboxes_grid_y = torch.floor(gt_bboxes_cy / strides[1]).int() + gt_bboxes_grid_idx = gt_bboxes_grid_y * feat_w + gt_bboxes_grid_x + responsible_grid = torch.zeros(feat_h * feat_w, dtype=torch.uint8) + gt_bboxes_grid_idx = gt_bboxes_grid_idx.long() + responsible_grid[gt_bboxes_grid_idx] = 1 + responsible_grid = responsible_grid[:, None].expand( + responsible_grid.size(0), num_base_anchors).contiguous().view(-1) + return responsible_grid.numpy() + + def npu_op_exec(self, input_npu, featmap_size, strides, num_base_anchors): + out = torch.npu_anchor_response_flags(input_npu, featmap_size, strides, num_base_anchors) + out = out.to("cpu") + return out.detach().numpy() + + + def test_anchor_response_flags(self, device): + shape_format = [ + [[np.float32, -1, [100, 4]], [60, 60], [2, 2], 9], + [[np.float16, -1, [200, 4]], [10, 10], [32, 32], 3], + [[np.float16, -1, [500, 4]], [32, 32], [16, 16], 5] + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input, *item[1:]) + npu_output = self.npu_op_exec(npu_input, *item[1:]) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestAnchorResponseFlags, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_any.py b/test/test_npu/test_network_ops/test_any.py old mode 100644 new mode 100755 index ded18ccb5cfb579b74e17bf14560732f12a0b649..2bde503e0641f48fc1221c8a179359dd14ebd061 --- a/test/test_npu/test_network_ops/test_any.py +++ b/test/test_npu/test_network_ops/test_any.py @@ -1,94 +1,94 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestAny(TestCase): - def create_bool_tensor(self, shape, minValue, maxValue): - input1 = np.random.uniform(minValue, maxValue, shape) - cpu_input = torch.from_numpy(input1) > 0.5 - npu_input = (torch.from_numpy(input1) > 0.5).to("npu") - return cpu_input, npu_input - - def cpu_op_exec(self, input): - output = input.any() - output = output.numpy() - return output - - def npu_op_exec(self, input): - output = input.any() - output = output.to("cpu") - output = output.numpy() - return output - - def test_any_shape_format(self, device): - shape_list = [[], - [1024], - [32, 1024], - [32, 8, 1024], - [128, 32, 8, 1024]] - - for item in shape_list: - cpu_input, npu_input = self.create_bool_tensor(item, 0, 1) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - self.assertRtolEqual( - cpu_output.astype(np.int32), - npu_output.astype(np.int32)) - - def cpu_op_exec1(self, input, dim, keepdim): - output = input.any(dim=dim, keepdim=keepdim) - output = output.numpy() - return output - - def npu_op_exec1(self, input, dim, keepdim): - output = input.any(dim=dim, keepdim=keepdim) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_out_exec1(self, input, dim, keepdim): - shape = list(input.shape) - output0 = torch.randn(shape)>0 - output1 = torch.randn(shape.pop())>0 - output0 = output0.npu() - output1 = output1.npu() - torch.any(input, dim=dim, keepdim=keepdim, out=output0) - torch.any(input, dim=dim, keepdim=keepdim, out=output1) - output0 = output0.to("cpu").numpy() - output1 = output1.to("cpu").numpy() - return output0, output1 - - def test_anyd_shape_format(self, device): - shape_list = [[ [1024], 0, False], - [ [32, 1024], 1, False], - [ [32, 8, 1024], 2, True ], - [ [128, 32, 8, 1024], 3, True ]] - - for item in shape_list: - cpu_input, npu_input = self.create_bool_tensor(item[0], 0, 1) - cpu_output = self.cpu_op_exec1(cpu_input, item[1], item[2]) - npu_output = self.npu_op_exec1(npu_input, item[1], item[2]) - npu_out0, npu_out1 = self.npu_op_out_exec1(npu_input, item[1], item[2]) - self.assertRtolEqual(cpu_output.astype(np.int32),npu_output.astype(np.int32)) - self.assertRtolEqual(cpu_output.astype(np.int32),npu_out0.astype(np.int32)) - self.assertRtolEqual(cpu_output.astype(np.int32),npu_out1.astype(np.int32)) - -instantiate_device_type_tests(TestAny, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestAny(TestCase): + def create_bool_tensor(self, shape, minValue, maxValue): + input1 = np.random.uniform(minValue, maxValue, shape) + cpu_input = torch.from_numpy(input1) > 0.5 + npu_input = (torch.from_numpy(input1) > 0.5).to("npu") + return cpu_input, npu_input + + def cpu_op_exec(self, input): + output = input.any() + output = output.numpy() + return output + + def npu_op_exec(self, input): + output = input.any() + output = output.to("cpu") + output = output.numpy() + return output + + def test_any_shape_format(self, device): + shape_list = [[], + [1024], + [32, 1024], + [32, 8, 1024], + [128, 32, 8, 1024]] + + for item in shape_list: + cpu_input, npu_input = self.create_bool_tensor(item, 0, 1) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual( + cpu_output.astype(np.int32), + npu_output.astype(np.int32)) + + def cpu_op_exec1(self, input, dim, keepdim): + output = input.any(dim=dim, keepdim=keepdim) + output = output.numpy() + return output + + def npu_op_exec1(self, input, dim, keepdim): + output = input.any(dim=dim, keepdim=keepdim) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_out_exec1(self, input, dim, keepdim): + shape = list(input.shape) + output0 = torch.randn(shape)>0 + output1 = torch.randn(shape.pop())>0 + output0 = output0.npu() + output1 = output1.npu() + torch.any(input, dim=dim, keepdim=keepdim, out=output0) + torch.any(input, dim=dim, keepdim=keepdim, out=output1) + output0 = output0.to("cpu").numpy() + output1 = output1.to("cpu").numpy() + return output0, output1 + + def test_anyd_shape_format(self, device): + shape_list = [[ [1024], 0, False], + [ [32, 1024], 1, False], + [ [32, 8, 1024], 2, True ], + [ [128, 32, 8, 1024], 3, True ]] + + for item in shape_list: + cpu_input, npu_input = self.create_bool_tensor(item[0], 0, 1) + cpu_output = self.cpu_op_exec1(cpu_input, item[1], item[2]) + npu_output = self.npu_op_exec1(npu_input, item[1], item[2]) + npu_out0, npu_out1 = self.npu_op_out_exec1(npu_input, item[1], item[2]) + self.assertRtolEqual(cpu_output.astype(np.int32),npu_output.astype(np.int32)) + self.assertRtolEqual(cpu_output.astype(np.int32),npu_out0.astype(np.int32)) + self.assertRtolEqual(cpu_output.astype(np.int32),npu_out1.astype(np.int32)) + +instantiate_device_type_tests(TestAny, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_arange.py b/test/test_npu/test_network_ops/test_arange.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_argmax.py b/test/test_npu/test_network_ops/test_argmax.py old mode 100644 new mode 100755 index 0b00aba0f60c1c00c6c084ffe327409680254398..70b08c7a0f95922ad3c720836400c83f135c6cb9 --- a/test/test_npu/test_network_ops/test_argmax.py +++ b/test/test_npu/test_network_ops/test_argmax.py @@ -1,101 +1,101 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestArgmax(TestCase): - def cpu_op_exec(self, input): - output = torch.argmax(input) - output = output.numpy() - return output - - def npu_op_exec(self, input): - output = torch.argmax(input) - output = output.to("cpu") - output = output.numpy() - return output - - def test_argmax_shape_format_fp16(self, device): - format_list = [0] - shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, -10, 10) - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_argmax_shape_format_fp32(self, device): - format_list = [0] - shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, -10, 10) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def cpu_op_exec1(self, input, dim): - output = torch.argmax(input, dim) - output = output.numpy() - return output - - def npu_op_exec1(self, input, dim): - output = torch.argmax(input, dim) - output = output.to("cpu") - output = output.numpy() - return output - - def test_argmaxd_shape_format_fp16(self, device): - format_list = [0] - shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, -10, 10) - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_exec1(cpu_input, -1) - npu_output = self.npu_op_exec1(npu_input, -1) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_argmaxd_shape_format_fp32(self, device): - format_list = [0] - shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, -10, 10) - cpu_output = self.cpu_op_exec1(cpu_input, -1) - npu_output = self.npu_op_exec1(npu_input, -1) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestArgmax, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestArgmax(TestCase): + def cpu_op_exec(self, input): + output = torch.argmax(input) + output = output.numpy() + return output + + def npu_op_exec(self, input): + output = torch.argmax(input) + output = output.to("cpu") + output = output.numpy() + return output + + def test_argmax_shape_format_fp16(self, device): + format_list = [0] + shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, -10, 10) + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_argmax_shape_format_fp32(self, device): + format_list = [0] + shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, -10, 10) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def cpu_op_exec1(self, input, dim): + output = torch.argmax(input, dim) + output = output.numpy() + return output + + def npu_op_exec1(self, input, dim): + output = torch.argmax(input, dim) + output = output.to("cpu") + output = output.numpy() + return output + + def test_argmaxd_shape_format_fp16(self, device): + format_list = [0] + shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, -10, 10) + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec1(cpu_input, -1) + npu_output = self.npu_op_exec1(npu_input, -1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_argmaxd_shape_format_fp32(self, device): + format_list = [0] + shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, -10, 10) + cpu_output = self.cpu_op_exec1(cpu_input, -1) + npu_output = self.npu_op_exec1(npu_input, -1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestArgmax, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_argsort.py b/test/test_npu/test_network_ops/test_argsort.py index be90fd3c8e9d458e30257fb62e43853489e51091..73fc5ecc4e364ac24e88a1e6e77e6484feef9fb2 100644 --- a/test/test_npu/test_network_ops/test_argsort.py +++ b/test/test_npu/test_network_ops/test_argsort.py @@ -1,80 +1,80 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestArgSort(TestCase): - def cpu_op_exec(self, input1, dim, descending): - output = torch.argsort(input1, dim=dim, descending=descending) - return output.numpy() - - def npu_op_exec(self, input1, dim, descending): - output = torch.argsort(input1, dim=dim, descending=descending) - - return output.cpu().numpy() - - def cpu_default_op_exec(self, input1): - output = torch.argsort(input1) - return output.numpy() - - def npu_default_op_exec(self, input1): - output = torch.argsort(input1) - return output.cpu().numpy() - - def test_sort_shape_format_fp32(self, device): - shape_format = [ - [[np.float32, 0, (8, 4, 3, 9)], 2, False], - [[np.float32, 0, (2, 3)]], - [[np.float32, 0, (1, 7)], 0, True], - [[np.float32, 0, (1, 5, 6)], 1, False], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - if len(item) > 1: - cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2]) - npu_output = self.npu_op_exec(npu_input1, item[1], item[2]) - else: - cpu_output = self.cpu_default_op_exec(cpu_input1) - npu_output = self.npu_default_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sort_shape_format_fp16(self, device): - shape_format = [ - [[np.float16, 0, (8, 4, 3, 9)], 2, False], - [[np.float16, 0, (2, 3)]], - [[np.float16, 0, (1, 7)], 0, True], - [[np.float16, 0, (1, 5, 6)], 1, False], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - if len(item) > 1: - cpu_output = self.cpu_op_exec(cpu_input1.to(torch.float32), item[1], item[2]) - npu_output = self.npu_op_exec(npu_input1, item[1], item[2]) - else: - cpu_output = self.cpu_default_op_exec(cpu_input1.to(torch.float32)) - npu_output = self.npu_default_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestArgSort, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestArgSort(TestCase): + def cpu_op_exec(self, input1, dim, descending): + output = torch.argsort(input1, dim=dim, descending=descending) + return output.numpy() + + def npu_op_exec(self, input1, dim, descending): + output = torch.argsort(input1, dim=dim, descending=descending) + + return output.cpu().numpy() + + def cpu_default_op_exec(self, input1): + output = torch.argsort(input1) + return output.numpy() + + def npu_default_op_exec(self, input1): + output = torch.argsort(input1) + return output.cpu().numpy() + + def test_sort_shape_format_fp32(self, device): + shape_format = [ + [[np.float32, 0, (8, 4, 3, 9)], 2, False], + [[np.float32, 0, (2, 3)]], + [[np.float32, 0, (1, 7)], 0, True], + [[np.float32, 0, (1, 5, 6)], 1, False], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + if len(item) > 1: + cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2]) + npu_output = self.npu_op_exec(npu_input1, item[1], item[2]) + else: + cpu_output = self.cpu_default_op_exec(cpu_input1) + npu_output = self.npu_default_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sort_shape_format_fp16(self, device): + shape_format = [ + [[np.float16, 0, (8, 4, 3, 9)], 2, False], + [[np.float16, 0, (2, 3)]], + [[np.float16, 0, (1, 7)], 0, True], + [[np.float16, 0, (1, 5, 6)], 1, False], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + if len(item) > 1: + cpu_output = self.cpu_op_exec(cpu_input1.to(torch.float32), item[1], item[2]) + npu_output = self.npu_op_exec(npu_input1, item[1], item[2]) + else: + cpu_output = self.cpu_default_op_exec(cpu_input1.to(torch.float32)) + npu_output = self.npu_default_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestArgSort, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_as_strided.py b/test/test_npu/test_network_ops/test_as_strided.py index f8da429613197b599b907640daaea11f4d474ecb..5ffeaace5115d0a606aa81a9f0ff9468b764902d 100644 --- a/test/test_npu/test_network_ops/test_as_strided.py +++ b/test/test_npu/test_network_ops/test_as_strided.py @@ -1,53 +1,53 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestAsStrided(TestCase): - def cpu_op_exec(self, input1, size, stride, storage_offset): - output = torch.as_strided(input1, size, stride, storage_offset) - output = output.numpy() - return output - - def npu_op_exec(self,input1, size, stride, storage_offset): - output = torch.as_strided(input1, size, stride, storage_offset) - output = output.cpu().numpy() - return output - - def test_as_strided(self, device): - shape_format = [ - [[np.float32, 0, [3, 3]], (2, 2), (1, 2), 0], - [[np.float16, 0, [13, 23]], (10, 15), (1, 2), 1], - [[np.int32, 0, [5, 5]], (3, 3), (1, 2), 1], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2], item[3]) - npu_output = self.npu_op_exec(npu_input1, item[1], item[2], item[3]) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestAsStrided, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestAsStrided(TestCase): + def cpu_op_exec(self, input1, size, stride, storage_offset): + output = torch.as_strided(input1, size, stride, storage_offset) + output = output.numpy() + return output + + def npu_op_exec(self,input1, size, stride, storage_offset): + output = torch.as_strided(input1, size, stride, storage_offset) + output = output.cpu().numpy() + return output + + def test_as_strided(self, device): + shape_format = [ + [[np.float32, 0, [3, 3]], (2, 2), (1, 2), 0], + [[np.float16, 0, [13, 23]], (10, 15), (1, 2), 1], + [[np.int32, 0, [5, 5]], (3, 3), (1, 2), 1], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2], item[3]) + npu_output = self.npu_op_exec(npu_input1, item[1], item[2], item[3]) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestAsStrided, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_atan.py b/test/test_npu/test_network_ops/test_atan.py index 32292bea7bbb09df0d5a97d077451a5ef8a1a698..65451c008735c2c543086f4ed21459fb43114f32 100644 --- a/test/test_npu/test_network_ops/test_atan.py +++ b/test/test_npu/test_network_ops/test_atan.py @@ -1,49 +1,49 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestAtan(TestCase): - def cpu_op_exec(self, input): - output = torch.atan(input) - return output - - def npu_op_exec(self, input): - output = torch.atan(input) - output = output.to("cpu") - return output - - def test_atan_shape_format(self, device): - shape_format = [ - [[np.float32, 0, 1]], - [[np.float32, 0, (64, 10)]], - [[np.float32, 3, (256, 2048, 7, 7)]], - [[np.float32, 4, (32, 1, 3, 3)]], - [[np.float32, 29, (10, 128)]] - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], -1, 1) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestAtan, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestAtan(TestCase): + def cpu_op_exec(self, input): + output = torch.atan(input) + return output + + def npu_op_exec(self, input): + output = torch.atan(input) + output = output.to("cpu") + return output + + def test_atan_shape_format(self, device): + shape_format = [ + [[np.float32, 0, 1]], + [[np.float32, 0, (64, 10)]], + [[np.float32, 3, (256, 2048, 7, 7)]], + [[np.float32, 4, (32, 1, 3, 3)]], + [[np.float32, 29, (10, 128)]] + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], -1, 1) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestAtan, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_avg_pool2d.py b/test/test_npu/test_network_ops/test_avg_pool2d.py index 6042069f8de3840e11a894f355da879a2319098a..ff218cd5dc7d0ba00e81081f76031195da8df5ca 100644 --- a/test/test_npu/test_network_ops/test_avg_pool2d.py +++ b/test/test_npu/test_network_ops/test_avg_pool2d.py @@ -1,63 +1,63 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestAvgPool2d(TestCase): - def cpu_op_exec(self, input, ceil_mode): - m = nn.AvgPool2d(3, stride=(6, 5), padding=0, ceil_mode=ceil_mode) - output = m(input) - output = output.detach().numpy() - return output - - def npu_op_exec(self, input, ceil_mode): - m = nn.AvgPool2d(3, stride=(6, 5), padding=0, ceil_mode=ceil_mode).npu() - output = m(input) - output = output.to("cpu") - output = output.detach().numpy() - return output - - def test_avg_pool2d_backward_shape_format_fp16(self, device): - shape_format = [ - [[np.float16, 0, (1, 3, 147, 147)], True], - [[np.float16, 0, (1, 3, 147, 147)], True] - ] - - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input.float(), item[1]).astype(np.float16) - npu_output = self.npu_op_exec(npu_input, item[1]) - self.assertRtolEqual(cpu_output, npu_output, prec16=0.002) - - def test_avg_pool2d_backward_shape_format_fp32(self, device): - shape_format = [ - [[np.float32, 0, (1, 3, 147, 147)], True], - [[np.float32, 0, (1, 3, 147, 147)], True] - ] - - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input, item[1]) - npu_output = self.npu_op_exec(npu_input, item[1]) - self.assertRtolEqual(cpu_output, npu_output, 0.0009) - -instantiate_device_type_tests(TestAvgPool2d, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestAvgPool2d(TestCase): + def cpu_op_exec(self, input, ceil_mode): + m = nn.AvgPool2d(3, stride=(6, 5), padding=0, ceil_mode=ceil_mode) + output = m(input) + output = output.detach().numpy() + return output + + def npu_op_exec(self, input, ceil_mode): + m = nn.AvgPool2d(3, stride=(6, 5), padding=0, ceil_mode=ceil_mode).npu() + output = m(input) + output = output.to("cpu") + output = output.detach().numpy() + return output + + def test_avg_pool2d_backward_shape_format_fp16(self, device): + shape_format = [ + [[np.float16, 0, (1, 3, 147, 147)], True], + [[np.float16, 0, (1, 3, 147, 147)], True] + ] + + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input.float(), item[1]).astype(np.float16) + npu_output = self.npu_op_exec(npu_input, item[1]) + self.assertRtolEqual(cpu_output, npu_output, prec16=0.002) + + def test_avg_pool2d_backward_shape_format_fp32(self, device): + shape_format = [ + [[np.float32, 0, (1, 3, 147, 147)], True], + [[np.float32, 0, (1, 3, 147, 147)], True] + ] + + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input, item[1]) + npu_output = self.npu_op_exec(npu_input, item[1]) + self.assertRtolEqual(cpu_output, npu_output, 0.0009) + +instantiate_device_type_tests(TestAvgPool2d, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_avg_pool2d_backward.py b/test/test_npu/test_network_ops/test_avg_pool2d_backward.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_avg_pool3d.py b/test/test_npu/test_network_ops/test_avg_pool3d.py index cd33a713467660c06b8ad36e458f038296be54b9..fba81c961671794b9ee63c8851f57e1c29a7ac7c 100644 --- a/test/test_npu/test_network_ops/test_avg_pool3d.py +++ b/test/test_npu/test_network_ops/test_avg_pool3d.py @@ -1,76 +1,76 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestAvgPool3D(TestCase): - - def cpu_op_exec(self, kernel_size, stride, input1): - m = torch.nn.AvgPool3d(kernel_size, stride) - output_data = m(input1) - return output_data - - def cpu_op_exec_fp16(self, kernel_size, stride, input1): - m = torch.nn.AvgPool3d(kernel_size, stride) - output_data = m(input1.float()) - return output_data.half() - - def npu_op_exec(self, kernel_size, stride, input1): - m = torch.nn.AvgPool3d(kernel_size, stride).npu() - output_data = m(input1) - return output_data - - def test_avg_pool_3d_fp32(self, device): - # shape_format:[[dtype, (input_shape)], kernel_size, stride] - shape_format = [ - [[np.float32, -1, (20, 16, 50, 44, 31)], (3, 2, 2), (2, 1, 2)], - [[np.float32, -1, (2, 1, 4, 4, 4)], 3, 2], - [[np.float32, -1, (2, 1, 4, 4, 4)], 2, 2], - [[np.float32, -1, (2, 4 , 4, 4)], 2, 2] - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor( item[0], 1, 100 ) - npu_output = self.npu_op_exec(item[1], item[2], npu_input1) - cpu_output = self.cpu_op_exec(item[1], item[2], cpu_input1) - self.assertRtolEqual(cpu_output, npu_output.cpu(), 1.e-3) - - def test_avg_pool_3d_fp16(self, device): - # shape_format:[[dtype, (input_shape)], kernel_size, stride] - shape_format = [ - [[np.float16, -1, (20, 16, 50, 44, 31)], (3, 2, 2), (2, 1, 2)], - [[np.float16, -1, (2, 1, 4, 4, 4)], 3, 2], - [[np.float16, -1, (2, 1, 4, 4, 4)], 2, 2], - [[np.float16, -1, (2, 4 , 4, 4)], 2, 2] - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor( item[0], 1, 100 ) - npu_output = self.npu_op_exec(item[1], item[2], npu_input1) - cpu_output = self.cpu_op_exec_fp16(item[1], item[2], cpu_input1) - self.assertRtolEqual(cpu_output, npu_output.cpu()) - -instantiate_device_type_tests(TestAvgPool3D, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() - +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestAvgPool3D(TestCase): + + def cpu_op_exec(self, kernel_size, stride, input1): + m = torch.nn.AvgPool3d(kernel_size, stride) + output_data = m(input1) + return output_data + + def cpu_op_exec_fp16(self, kernel_size, stride, input1): + m = torch.nn.AvgPool3d(kernel_size, stride) + output_data = m(input1.float()) + return output_data.half() + + def npu_op_exec(self, kernel_size, stride, input1): + m = torch.nn.AvgPool3d(kernel_size, stride).npu() + output_data = m(input1) + return output_data + + def test_avg_pool_3d_fp32(self, device): + # shape_format:[[dtype, (input_shape)], kernel_size, stride] + shape_format = [ + [[np.float32, -1, (20, 16, 50, 44, 31)], (3, 2, 2), (2, 1, 2)], + [[np.float32, -1, (2, 1, 4, 4, 4)], 3, 2], + [[np.float32, -1, (2, 1, 4, 4, 4)], 2, 2], + [[np.float32, -1, (2, 4 , 4, 4)], 2, 2] + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor( item[0], 1, 100 ) + npu_output = self.npu_op_exec(item[1], item[2], npu_input1) + cpu_output = self.cpu_op_exec(item[1], item[2], cpu_input1) + self.assertRtolEqual(cpu_output, npu_output.cpu(), 1.e-3) + + def test_avg_pool_3d_fp16(self, device): + # shape_format:[[dtype, (input_shape)], kernel_size, stride] + shape_format = [ + [[np.float16, -1, (20, 16, 50, 44, 31)], (3, 2, 2), (2, 1, 2)], + [[np.float16, -1, (2, 1, 4, 4, 4)], 3, 2], + [[np.float16, -1, (2, 1, 4, 4, 4)], 2, 2], + [[np.float16, -1, (2, 4 , 4, 4)], 2, 2] + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor( item[0], 1, 100 ) + npu_output = self.npu_op_exec(item[1], item[2], npu_input1) + cpu_output = self.cpu_op_exec_fp16(item[1], item[2], cpu_input1) + self.assertRtolEqual(cpu_output, npu_output.cpu()) + +instantiate_device_type_tests(TestAvgPool3D, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() + diff --git a/test/test_npu/test_network_ops/test_batchnorm_backward_eval.py b/test/test_npu/test_network_ops/test_batchnorm_backward_eval.py index 69b82acc880b9e2368567438f6e05df5bab3b217..81120f2ef927c0ffa28337bcb038d5ec035e653f 100644 --- a/test/test_npu/test_network_ops/test_batchnorm_backward_eval.py +++ b/test/test_npu/test_network_ops/test_batchnorm_backward_eval.py @@ -1,83 +1,83 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -import torch.nn as nn - -class Model(nn.Module): - def __init__(self, in_channels): - super(Model, self).__init__() - self.op1 = nn.Conv2d(in_channels, in_channels, 1) - self.op2 = nn.BatchNorm2d(in_channels) - self.op2.running_mean = torch.tensor([i/1000 for i in range(in_channels)]) - self.op2.running_var = torch.tensor([i/1000 for i in range(in_channels)]) - self.op3 = nn.Conv2d(in_channels, in_channels, 1) - - def forward(self, x): - self.op2.eval() - x = self.op1(x) - x = self.op2(x) - x = self.op3(x) - return x - -class TestBn2dEval(TestCase): - def test_batchnorm_backward_eval(self, device): - model = Model(in_channels=256) - cpu_tensor = torch.randn(32,256,14,14) - npu_tensor = cpu_tensor.npu() - cpu_tensor.requires_grad = True - npu_tensor.requires_grad = True - - for i in range(1): - out = model(cpu_tensor) - loss = out.sum() - loss.backward() - cpuout = out - cpu_grad_list = [] - for name, module in model.named_parameters(): - cpu_grad_list.append(module.grad) - module.grad = None - - model = model.npu() - out = model(npu_tensor) - loss = out.sum() - loss.backward() - npuout = out - npu_grad_list = [] - for name, module in model.named_parameters(): - npu_grad_list.append(module.grad.cpu()) - - #print(cpu_tensor.grad, npu_tensor.grad) - cpu_grad = cpu_tensor.grad - npu_grad = npu_tensor.grad - # TODO(ascend): Insufficient precision - #精度未满足 self.assertRtolEqual(cpu_grad.numpy(), npu_grad.cpu().numpy()) - self.assertRtolEqual(cpu_grad.numpy(), npu_grad.cpu().numpy(), 0.01) - - for cpu_grad, npu_grad in zip(cpu_grad_list, npu_grad_list): - #print(cpu_grad, npu_grad) - # TODO(ascend): Insufficient precision - #精度未满足 self.assertRtolEqual(cpu_grad.numpy(), npu_grad.numpy()) - self.assertRtolEqual(cpu_grad.numpy(), npu_grad.numpy(), 0.1) - -instantiate_device_type_tests(TestBn2dEval, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +import torch.nn as nn + +class Model(nn.Module): + def __init__(self, in_channels): + super(Model, self).__init__() + self.op1 = nn.Conv2d(in_channels, in_channels, 1) + self.op2 = nn.BatchNorm2d(in_channels) + self.op2.running_mean = torch.tensor([i/1000 for i in range(in_channels)]) + self.op2.running_var = torch.tensor([i/1000 for i in range(in_channels)]) + self.op3 = nn.Conv2d(in_channels, in_channels, 1) + + def forward(self, x): + self.op2.eval() + x = self.op1(x) + x = self.op2(x) + x = self.op3(x) + return x + +class TestBn2dEval(TestCase): + def test_batchnorm_backward_eval(self, device): + model = Model(in_channels=256) + cpu_tensor = torch.randn(32,256,14,14) + npu_tensor = cpu_tensor.npu() + cpu_tensor.requires_grad = True + npu_tensor.requires_grad = True + + for i in range(1): + out = model(cpu_tensor) + loss = out.sum() + loss.backward() + cpuout = out + cpu_grad_list = [] + for name, module in model.named_parameters(): + cpu_grad_list.append(module.grad) + module.grad = None + + model = model.npu() + out = model(npu_tensor) + loss = out.sum() + loss.backward() + npuout = out + npu_grad_list = [] + for name, module in model.named_parameters(): + npu_grad_list.append(module.grad.cpu()) + + #print(cpu_tensor.grad, npu_tensor.grad) + cpu_grad = cpu_tensor.grad + npu_grad = npu_tensor.grad + # TODO(ascend): Insufficient precision + #精度未满足 self.assertRtolEqual(cpu_grad.numpy(), npu_grad.cpu().numpy()) + self.assertRtolEqual(cpu_grad.numpy(), npu_grad.cpu().numpy(), 0.01) + + for cpu_grad, npu_grad in zip(cpu_grad_list, npu_grad_list): + #print(cpu_grad, npu_grad) + # TODO(ascend): Insufficient precision + #精度未满足 self.assertRtolEqual(cpu_grad.numpy(), npu_grad.numpy()) + self.assertRtolEqual(cpu_grad.numpy(), npu_grad.numpy(), 0.1) + +instantiate_device_type_tests(TestBn2dEval, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_bernoulli.py b/test/test_npu/test_network_ops/test_bernoulli.py index b161b2696a93e299d41cb792038db6e3a2326961..05c70b03ba3a331369320d57b9aba6839610334c 100644 --- a/test/test_npu/test_network_ops/test_bernoulli.py +++ b/test/test_npu/test_network_ops/test_bernoulli.py @@ -1,116 +1,116 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestBernoulli(TestCase): - def cpu_op_exec(self, input): - output = torch.bernoulli(input) - output = output.numpy() - return output - - def npu_op_exec(self, input): - output = torch.bernoulli(input) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_tensor_exec(self, input, p): - output = input.bernoulli_(p) - output = output.numpy() - return output - - def npu_op_inplace_tensor_exec(self, input, p): - output = input.bernoulli_(p) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_float_exec(self, input): - output = input.bernoulli_(0.5) - output = output.numpy() - return output - - def npu_op_inplace_float_exec(self, input): - output = input.bernoulli_(0.5) - output = output.to("cpu") - output = output.numpy() - return output - - def test_bernoulli_float32(self, device): - format_list = [0, 3] - shape_list = [(2, 3, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 1) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - print(cpu_output, npu_output) - #self.assertEqual(cpu_output, npu_output) - #生成随机值,无法对比cpu值 - - def test_bernoulli_float16(self, device): - format_list = [0, 3] - shape_list = [(2, 3, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 1) - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - cpu_output = cpu_output.astype(np.float16) - print(cpu_output, npu_output) - #self.assertEqual(cpu_output, npu_output) - - def test_bernoulli_tensor_p(self, device): - format_list = [0, 3] - shape_list = [(2, 3, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 1) - cpu_input_p, npu_input_p = create_common_tensor(item, 0, 1) - cpu_output = self.cpu_op_inplace_tensor_exec(cpu_input, cpu_input_p) - npu_output = self.npu_op_inplace_tensor_exec(npu_input, npu_input_p) - print(cpu_output, npu_output) - #self.assertEqual(cpu_output, npu_output) - - def test_bernoulli_float_p(self, device): - format_list = [0, 3] - shape_list = [(2, 3, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 1) - cpu_output = self.cpu_op_inplace_float_exec(cpu_input) - npu_output = self.npu_op_inplace_float_exec(npu_input) - print(cpu_output, npu_output) - #self.assertEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestBernoulli, globals(), except_for="cpu") -if __name__ == '__main__': - run_tests() - +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestBernoulli(TestCase): + def cpu_op_exec(self, input): + output = torch.bernoulli(input) + output = output.numpy() + return output + + def npu_op_exec(self, input): + output = torch.bernoulli(input) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_tensor_exec(self, input, p): + output = input.bernoulli_(p) + output = output.numpy() + return output + + def npu_op_inplace_tensor_exec(self, input, p): + output = input.bernoulli_(p) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_float_exec(self, input): + output = input.bernoulli_(0.5) + output = output.numpy() + return output + + def npu_op_inplace_float_exec(self, input): + output = input.bernoulli_(0.5) + output = output.to("cpu") + output = output.numpy() + return output + + def test_bernoulli_float32(self, device): + format_list = [0, 3] + shape_list = [(2, 3, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 1) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + print(cpu_output, npu_output) + #self.assertEqual(cpu_output, npu_output) + #生成随机值,无法对比cpu值 + + def test_bernoulli_float16(self, device): + format_list = [0, 3] + shape_list = [(2, 3, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 1) + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + cpu_output = cpu_output.astype(np.float16) + print(cpu_output, npu_output) + #self.assertEqual(cpu_output, npu_output) + + def test_bernoulli_tensor_p(self, device): + format_list = [0, 3] + shape_list = [(2, 3, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 1) + cpu_input_p, npu_input_p = create_common_tensor(item, 0, 1) + cpu_output = self.cpu_op_inplace_tensor_exec(cpu_input, cpu_input_p) + npu_output = self.npu_op_inplace_tensor_exec(npu_input, npu_input_p) + print(cpu_output, npu_output) + #self.assertEqual(cpu_output, npu_output) + + def test_bernoulli_float_p(self, device): + format_list = [0, 3] + shape_list = [(2, 3, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 1) + cpu_output = self.cpu_op_inplace_float_exec(cpu_input) + npu_output = self.npu_op_inplace_float_exec(npu_input) + print(cpu_output, npu_output) + #self.assertEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestBernoulli, globals(), except_for="cpu") +if __name__ == '__main__': + run_tests() + diff --git a/test/test_npu/test_bilinear.py b/test/test_npu/test_network_ops/test_bilinear.py similarity index 74% rename from test/test_npu/test_bilinear.py rename to test/test_npu/test_network_ops/test_bilinear.py index dbb919e5a7466f0848326adbb34125a04fc0b34e..4bfdb837f9827f178042539e0e161167a0a81859 100644 --- a/test/test_npu/test_bilinear.py +++ b/test/test_npu/test_network_ops/test_bilinear.py @@ -33,7 +33,7 @@ class test_bilinear(TestCase): outputs = outputs.cpu().detach().numpy() return outputs - def test_add_common_shape_format1(self, device): + def test_bilinear_common_shape_format1(self, device): shape_format = [ [[np.float32, -1, (10,30)], [np.float32, -1, (10, 40)], [np.float32, -1, (5, 30, 40)], [np.float32, -1, (5,)]], @@ -43,12 +43,12 @@ class test_bilinear(TestCase): [[np.float32, -1, (10, 30, 40, 30)], [np.float32, -1, (10, 30, 40, 30)], [np.float32, -1, (30, 30, 30)], [np.float32, -1, (30,)]], - [[np.float32, -1, (100,3)], [np.float32, -1, (1000, 4)], [np.float32, -1, (5, 3, 4)], + [[np.float32, -1, (100,3)], [np.float32, -1, (100, 4)], [np.float32, -1, (5, 3, 4)], [np.float32, -1, (5,)]], [[np.float16, -1, (2, 1, 1, 1)], [np.float16, -1, (2, 1, 1, 1)], [np.float16, -1, (5, 1, 1)], [np.float16, -1, (5,)]], [[np.float16, -1, (2, 50)], [np.float16, -1, (2, 50)], [np.float16, -1, (5, 50, 50)], - [np.float16, -1, (2, 4)]], + [np.float16, -1, (5)]], [[np.float16, -1, (2, 3)], [np.float16, -1, (2, 4)], [np.float16, -1, (2, 3, 4)],], [[np.float16, -1, (2, 3)], [np.float16, -1, (2, 4)], [np.float16, -1, (4, 3, 4)], [np.float16, -1, (4,)]], @@ -61,11 +61,19 @@ class test_bilinear(TestCase): if len(item)>3: cpu_input4, npu_input4 = create_common_tensor(item[3], 0, 1) bias = [cpu_input4, npu_input4] - cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) + if cpu_input1.dtype == torch.float16: + if bias[0] != None: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0].float()).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0]).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) npu_outputs = self.npu_op_exec(npu_input1, npu_input2, npu_input3, bias[1]) self.assertRtolEqual(cpu_outputs, npu_outputs) - def test_add_common_shape_format2(self, device): + def test_bilinear_common_shape_format2(self, device): shape_format = [ [[np.int32, -1, (10,30)], [np.int32, -1, (10, 40)], [np.int32, -1, (5, 30, 40)], [np.int32, -1, (5,)]], @@ -87,7 +95,7 @@ class test_bilinear(TestCase): npu_outputs = self.npu_op_exec(npu_input1, npu_input2, npu_input3, bias[1]) self.assertRtolEqual(cpu_outputs, npu_outputs) - def test_add_common_shape_format3(self, device): + def test_bilinear_common_shape_format3(self, device): shape_format = [ [[np.float32, 0, (10,30)], [np.float32, 0, (10, 40)], [np.float32, 0, (5, 30, 40)], [np.float32, 0, (5,)]], @@ -97,12 +105,12 @@ class test_bilinear(TestCase): [[np.float32, 0, (10, 30, 40, 30)], [np.float32, 0, (10, 30, 40, 30)], [np.float32, 0, (30, 30, 30)], [np.float32, 0, (30,)]], - [[np.float32, 0, (100,3)], [np.float32, 0, (1000, 4)], [np.float32, 0, (5, 3, 4)], + [[np.float32, 0, (100,3)], [np.float32, 0, (100, 4)], [np.float32, 0, (5, 3, 4)], [np.float32, 0, (5,)]], [[np.float16, 0, (2, 1, 1, 1)], [np.float16, 0, (2, 1, 1, 1)], [np.float16, 0, (5, 1, 1)], [np.float16, 0, (5,)]], [[np.float16, 0, (2, 50)], [np.float16, 0, (2, 50)], [np.float16, 0, (5, 50, 50)], - [np.float16, 0, (2, 4)]], + [np.float16, 0, (5)]], [[np.float16, 0, (2, 3)], [np.float16, 0, (2, 4)], [np.float16, 0, (2, 3, 4)],], [[np.float16, 0, (2, 3)], [np.float16, 0, (2, 4)], [np.float16, 0, (4, 3, 4)], [np.float16, 0, (4,)]], @@ -115,11 +123,19 @@ class test_bilinear(TestCase): if len(item)>3: cpu_input4, npu_input4 = create_common_tensor(item[3], 0, 1) bias = [cpu_input4, npu_input4] - cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) + if cpu_input1.dtype == torch.float16: + if bias[0] != None: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0].float()).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0]).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) npu_outputs = self.npu_op_exec(npu_input1, npu_input2, npu_input3, bias[1]) self.assertRtolEqual(cpu_outputs, npu_outputs) - def test_add_common_shape_format4(self, device): + def test_bilinear_common_shape_format4(self, device): shape_format = [ [[np.float32, 3, (10,30)], [np.float32, 3, (10, 40)], [np.float32, 3, (5, 30, 40)], [np.float32, 3, (5,)]], @@ -129,15 +145,15 @@ class test_bilinear(TestCase): [[np.float32, 3, (10, 30, 40, 30)], [np.float32, 3, (10, 30, 40, 30)], [np.float32, 3, (30, 30, 30)], [np.float32, 3, (30,)]], - [[np.float32, 29, (100,3)], [np.float32, 29, (1000, 4)], [np.float32, 29, (5, 3, 4)], - [np.float32, 29, (5,)]], - [[np.float16, 29, (2, 1, 1, 1)], [np.float16, 29, (2, 1, 1, 1)], [np.float16, 29, (5, 1, 1)], - [np.float16, 29, (5,)]], - [[np.float16, 29, (2, 50)], [np.float16, 29, (2, 50)], [np.float16, 29, (5, 50, 50)], - [np.float16, 29, (2, 4)]], - [[np.float16, 29, (2, 3)], [np.float16, 29, (2, 4)], [np.float16, 29, (2, 3, 4)],], - [[np.float16, 29, (2, 3)], [np.float16, 29, (2, 4)], [np.float16, 29, (4, 3, 4)], - [np.float16, 29, (4,)]], + [[np.float32, 2, (100,3)], [np.float32, 2, (100, 4)], [np.float32, 2, (5, 3, 4)], + [np.float32, 2, (5,)]], + [[np.float16, 2, (2, 1, 1, 1)], [np.float16, 2, (2, 1, 1, 1)], [np.float16, 2, (5, 1, 1)], + [np.float16, 2, (5,)]], + [[np.float16, 2, (2, 50)], [np.float16, 2, (2, 50)], [np.float16, 2, (5, 50, 50)], + [np.float16, 2, (5)]], + [[np.float16, 2, (2, 3)], [np.float16, 2, (2, 4)], [np.float16, 2, (2, 3, 4)],], + [[np.float16, 2, (2, 3)], [np.float16, 2, (2, 4)], [np.float16, 2, (4, 3, 4)], + [np.float16, 2, (4,)]], ] for item in shape_format: bias = [None, None] @@ -147,11 +163,18 @@ class test_bilinear(TestCase): if len(item)>3: cpu_input4, npu_input4 = create_common_tensor(item[3], 0, 1) bias = [cpu_input4, npu_input4] - cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) + if cpu_input1.dtype == torch.float16: + if bias[0] != None: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0].float()).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0]).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) npu_outputs = self.npu_op_exec(npu_input1, npu_input2, npu_input3, bias[1]) self.assertRtolEqual(cpu_outputs, npu_outputs) instantiate_device_type_tests(test_bilinear, globals(), except_for='cpu') if __name__ == "__main__": - torch.npu.set_device("npu:5") run_tests() diff --git a/test/test_npu/test_network_ops/test_binary_cross_entropy_with_logits_backward.py b/test/test_npu/test_network_ops/test_binary_cross_entropy_with_logits_backward.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_bitwise_not.py b/test/test_npu/test_network_ops/test_bitwise_not.py index a10801063b1b47b54932dac4080ed43f9c79a6a5..0f6baa7c47c16a8b1e44e75613a34df79d7223b1 100644 --- a/test/test_npu/test_network_ops/test_bitwise_not.py +++ b/test/test_npu/test_network_ops/test_bitwise_not.py @@ -1,104 +1,104 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class Test_Bitwise_Not(TestCase): - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def generate_bool_data(self, shape): - input1 = np.random.randint(0, 2, shape).astype(np.bool_) - npu_input1 = torch.from_numpy(input1) - return npu_input1 - - def cpu_op_exec(self, input1): - output = torch.bitwise_not(input1) - if output.dtype not in [torch.int32, torch.int8, torch.bool]: - output = output.to(torch.int32) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - input1 = input1.to("npu") - output = torch.bitwise_not(input1) - output = output.to("cpu") - if output.dtype not in [torch.int32, torch.int8, torch.bool]: - output = output.to(torch.int32) - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - torch.bitwise_not(input1, out = input2) - output = input2.to("cpu") - if output.dtype not in [torch.int32, torch.int8, torch.bool]: - output = output.to(torch.int32) - output = output.numpy() - return output - - def test_bitwise_not_bool(self, device): - npu_input1 = self.generate_bool_data((2, 3)) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_bitwise_not_int16(self, device): - npu_input1 = self.generate_data(0, 2342, (2, 3), np.int16) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_bitwise_not_int32(self, device): - npu_input1 = self.generate_data(0, 34222, (2, 3), np.int32) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_bitwise_not_int64(self, device): - npu_input1 = self.generate_data(0, 355553, (2, 3), np.int64) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_bitwise_not_out(self, device): - shape_format = [ - [[0, 2342, [2, 3], np.int16], [0, 2342, [10, 20], np.int16]], - [[0, 34222, [2, 3], np.int32], [0, 34222, [10, 20], np.int32]], - [[0, 355553, [2, 3], np.int64], [0, 355553, [1, 1], np.int64]], - ] - for item in shape_format: - npu_input1 = self.generate_data(item[0][0], item[0][1], item[0][2], item[0][3]) - npu_input2 = self.generate_data(item[1][0], item[1][1], item[1][2], item[1][3]) - cpu_output = self.cpu_op_exec(npu_input1) - npu_output1 = self.npu_op_exec_out(npu_input1, npu_input1) - npu_output2 = self.npu_op_exec_out(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output1) - self.assertRtolEqual(cpu_output, npu_output1) - - -instantiate_device_type_tests(Test_Bitwise_Not, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class Test_Bitwise_Not(TestCase): + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def generate_bool_data(self, shape): + input1 = np.random.randint(0, 2, shape).astype(np.bool_) + npu_input1 = torch.from_numpy(input1) + return npu_input1 + + def cpu_op_exec(self, input1): + output = torch.bitwise_not(input1) + if output.dtype not in [torch.int32, torch.int8, torch.bool]: + output = output.to(torch.int32) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + input1 = input1.to("npu") + output = torch.bitwise_not(input1) + output = output.to("cpu") + if output.dtype not in [torch.int32, torch.int8, torch.bool]: + output = output.to(torch.int32) + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + torch.bitwise_not(input1, out = input2) + output = input2.to("cpu") + if output.dtype not in [torch.int32, torch.int8, torch.bool]: + output = output.to(torch.int32) + output = output.numpy() + return output + + def test_bitwise_not_bool(self, device): + npu_input1 = self.generate_bool_data((2, 3)) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_bitwise_not_int16(self, device): + npu_input1 = self.generate_data(0, 2342, (2, 3), np.int16) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_bitwise_not_int32(self, device): + npu_input1 = self.generate_data(0, 34222, (2, 3), np.int32) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_bitwise_not_int64(self, device): + npu_input1 = self.generate_data(0, 355553, (2, 3), np.int64) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_bitwise_not_out(self, device): + shape_format = [ + [[0, 2342, [2, 3], np.int16], [0, 2342, [10, 20], np.int16]], + [[0, 34222, [2, 3], np.int32], [0, 34222, [10, 20], np.int32]], + [[0, 355553, [2, 3], np.int64], [0, 355553, [1, 1], np.int64]], + ] + for item in shape_format: + npu_input1 = self.generate_data(item[0][0], item[0][1], item[0][2], item[0][3]) + npu_input2 = self.generate_data(item[1][0], item[1][1], item[1][2], item[1][3]) + cpu_output = self.cpu_op_exec(npu_input1) + npu_output1 = self.npu_op_exec_out(npu_input1, npu_input1) + npu_output2 = self.npu_op_exec_out(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output1) + self.assertRtolEqual(cpu_output, npu_output1) + + +instantiate_device_type_tests(Test_Bitwise_Not, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_bitwise_xor.py b/test/test_npu/test_network_ops/test_bitwise_xor.py index a23603b7b843cb11a99579993e200710f7f70f18..6d90d7d44b6d7c0379b40d98b2f749f811947456 100644 --- a/test/test_npu/test_network_ops/test_bitwise_xor.py +++ b/test/test_npu/test_network_ops/test_bitwise_xor.py @@ -1,202 +1,202 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -import random - -class TestBitwiseXor(TestCase): - def generate_data(self, min, max, shape_x, shape_y, dtype): - input1 = np.random.randint(min, max, shape_x).astype(dtype) - input2 = np.random.randint(min, max, shape_y).astype(dtype) - - #can't convert np.uint16 to pytoch tensor, so convert np.uint16 to np.int32 first - if input1.dtype == np.uint16: - input1 = input1.astype(np.int32) - input2 = input2.astype(np.int32) - - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - return npu_input1, npu_input2 - - def cpu_op_exec(self, input1, input2): - output = torch.bitwise_xor(input1, input2) - if output.dtype not in [torch.int32, torch.bool]: - output = output.to(torch.int32) - output = output.numpy() - return output - - - def npu_op_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.bitwise_xor(input1, input2) - output = output.to("cpu") - if output.dtype not in [torch.int32, torch.bool]: - output = output.to(torch.int32) - output = output.numpy() - return output - - def cpu_op_exec_scalar(self, input1, scalar): - output = torch.bitwise_xor(input1, scalar) - if output.dtype not in [torch.int32, torch.bool]: - output = output.to(torch.int32) - output = output.numpy() - return output - - def npu_op_exec_scalar(self, input1, input2): - input1 = input1.to("npu") - output = torch.bitwise_xor(input1, input2) - output = output.to("cpu") - if output.dtype not in [torch.int32, torch.bool]: - output = output.to(torch.int32) - output = output.numpy() - return output - - def npu_op_exec_scalar_out(self, input1, scalar, output): - input1 = input1.to("npu") - output = output.to("npu") - output = torch.bitwise_xor(input1, scalar, out = output) - output = output.to("cpu") - if output.dtype not in [torch.int32, torch.bool]: - output = output.to(torch.int32) - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2, input3): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = input3.to("npu") - torch.bitwise_xor(input1, input2, out=output) - output = output.to("cpu") - if output.dtype not in [torch.int32, torch.bool]: - output = output.to(torch.int32) - output = output.numpy() - return output - - def bitwise_xor_tensor_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 100) - cpu_input3, npu_input3 = create_common_tensor(item[1], 0, 100) - cpu_output_out = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) - - self.assertRtolEqual(cpu_output_out, npu_output_out) - - def test_bitwise_xor_tensor_out(self, device): - shape_format = [ - [[np.int16, 0, [128, 3, 224, 224]], [np.int16, 0, [3, 3, 3]]], - [[np.int16, 0, [128, 116, 14, 14]], [np.int16, 0, [128, 116, 14, 14]]], - [[np.int32, 0, [256, 128, 7, 7]], [np.int32, 0, [128, 256, 3, 3]]], - [[np.int32, 0, [2, 3, 3, 3]], [np.int32, 0, [3, 1, 3]]], - [[np.int32, 0, [128, 232, 7, 7]], [np.int32, 0, [128, 232, 7, 7]]], - ] - self.bitwise_xor_tensor_out_result(shape_format) - - def bitwise_xor_scalar_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - scalar = np.random.randint(1, 5) - cpu_output_out = self.cpu_op_exec_scalar(cpu_input1, scalar) - npu_output_out = self.npu_op_exec_scalar_out(npu_input1, scalar, npu_input2) - cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) - self.assertRtolEqual(cpu_output_out, npu_output_out) - - def test_bitwise_xor_scalar_out(self, device): - shape_format = [ - [[np.int16, 0, [16, 3, 1111, 1212]], [np.int16, 0, [3, 3, 3]]], - [[np.int16, 0, [128, 116, 14, 14]], [np.int16, 0, [128, 116, 14, 14]]], - [[np.int32, 0, [1313, 3, 3, 3]], [np.int32, 0, [3, 1, 3]]], - [[np.int32, 0, [128, 232, 7, 7]], [np.int32, 0, [128, 232, 7, 7]]], - ] - self.bitwise_xor_scalar_out_result(shape_format) - - def test_bitwise_xor_int16_3d(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3, 3), (3, 3, 3), np.int16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float32) - npu_output = npu_output.astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_bitwise_xor_int16_1_1(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3, 3), (1, 1), np.int16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float32) - npu_output = npu_output.astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_bitwise_xor_int16_1(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3, 3), 1, np.int16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float32) - npu_output = npu_output.astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_bitwise_xor_int16(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3, 3), (), np.int16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float32) - npu_output = npu_output.astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_bitwise_xor_int32(self, device): - npu_input1, npu_input2 = self.generate_data(0, 2, (1, 3), (1, 3), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, True) - npu_output = self.npu_op_exec_scalar(npu_input1, True) - cpu_output = cpu_output.astype(np.float32) - npu_output = npu_output.astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_bitwise_xor_bool(self, device): - npu_input1, npu_input2 = self.generate_data(0, 2, (1, 3), (1, 3), np.bool) - cpu_output = self.cpu_op_exec(npu_input1, True) - npu_output = self.npu_op_exec_scalar(npu_input1, True) - cpu_output = cpu_output.astype(np.float32) - npu_output = npu_output.astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_bitwise_xor_uint16(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3, 3), (3, 3, 3), np.uint16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float32) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - npu_output = npu_output.astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_bitwise_xor_mix_dtype(self, device): - npu_input1, npu_input3 = self.generate_data(0, 100, (3, 3, 3), (), np.uint16) - npu_input2, npu_input4 = self.generate_data(0, 100, (3, 3, 3), (), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - - -instantiate_device_type_tests(TestBitwiseXor, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() - +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +import random + +class TestBitwiseXor(TestCase): + def generate_data(self, min, max, shape_x, shape_y, dtype): + input1 = np.random.randint(min, max, shape_x).astype(dtype) + input2 = np.random.randint(min, max, shape_y).astype(dtype) + + #can't convert np.uint16 to pytoch tensor, so convert np.uint16 to np.int32 first + if input1.dtype == np.uint16: + input1 = input1.astype(np.int32) + input2 = input2.astype(np.int32) + + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + return npu_input1, npu_input2 + + def cpu_op_exec(self, input1, input2): + output = torch.bitwise_xor(input1, input2) + if output.dtype not in [torch.int32, torch.bool]: + output = output.to(torch.int32) + output = output.numpy() + return output + + + def npu_op_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = torch.bitwise_xor(input1, input2) + output = output.to("cpu") + if output.dtype not in [torch.int32, torch.bool]: + output = output.to(torch.int32) + output = output.numpy() + return output + + def cpu_op_exec_scalar(self, input1, scalar): + output = torch.bitwise_xor(input1, scalar) + if output.dtype not in [torch.int32, torch.bool]: + output = output.to(torch.int32) + output = output.numpy() + return output + + def npu_op_exec_scalar(self, input1, input2): + input1 = input1.to("npu") + output = torch.bitwise_xor(input1, input2) + output = output.to("cpu") + if output.dtype not in [torch.int32, torch.bool]: + output = output.to(torch.int32) + output = output.numpy() + return output + + def npu_op_exec_scalar_out(self, input1, scalar, output): + input1 = input1.to("npu") + output = output.to("npu") + output = torch.bitwise_xor(input1, scalar, out = output) + output = output.to("cpu") + if output.dtype not in [torch.int32, torch.bool]: + output = output.to(torch.int32) + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2, input3): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = input3.to("npu") + torch.bitwise_xor(input1, input2, out=output) + output = output.to("cpu") + if output.dtype not in [torch.int32, torch.bool]: + output = output.to(torch.int32) + output = output.numpy() + return output + + def bitwise_xor_tensor_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 100) + cpu_input3, npu_input3 = create_common_tensor(item[1], 0, 100) + cpu_output_out = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) + + self.assertRtolEqual(cpu_output_out, npu_output_out) + + def test_bitwise_xor_tensor_out(self, device): + shape_format = [ + [[np.int16, 0, [128, 3, 224, 224]], [np.int16, 0, [3, 3, 3]]], + [[np.int16, 0, [128, 116, 14, 14]], [np.int16, 0, [128, 116, 14, 14]]], + [[np.int32, 0, [256, 128, 7, 7]], [np.int32, 0, [128, 256, 3, 3]]], + [[np.int32, 0, [2, 3, 3, 3]], [np.int32, 0, [3, 1, 3]]], + [[np.int32, 0, [128, 232, 7, 7]], [np.int32, 0, [128, 232, 7, 7]]], + ] + self.bitwise_xor_tensor_out_result(shape_format) + + def bitwise_xor_scalar_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + scalar = np.random.randint(1, 5) + cpu_output_out = self.cpu_op_exec_scalar(cpu_input1, scalar) + npu_output_out = self.npu_op_exec_scalar_out(npu_input1, scalar, npu_input2) + cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) + self.assertRtolEqual(cpu_output_out, npu_output_out) + + def test_bitwise_xor_scalar_out(self, device): + shape_format = [ + [[np.int16, 0, [16, 3, 1111, 1212]], [np.int16, 0, [3, 3, 3]]], + [[np.int16, 0, [128, 116, 14, 14]], [np.int16, 0, [128, 116, 14, 14]]], + [[np.int32, 0, [1313, 3, 3, 3]], [np.int32, 0, [3, 1, 3]]], + [[np.int32, 0, [128, 232, 7, 7]], [np.int32, 0, [128, 232, 7, 7]]], + ] + self.bitwise_xor_scalar_out_result(shape_format) + + def test_bitwise_xor_int16_3d(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3, 3), (3, 3, 3), np.int16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float32) + npu_output = npu_output.astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_bitwise_xor_int16_1_1(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3, 3), (1, 1), np.int16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float32) + npu_output = npu_output.astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_bitwise_xor_int16_1(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3, 3), 1, np.int16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float32) + npu_output = npu_output.astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_bitwise_xor_int16(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3, 3), (), np.int16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float32) + npu_output = npu_output.astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_bitwise_xor_int32(self, device): + npu_input1, npu_input2 = self.generate_data(0, 2, (1, 3), (1, 3), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, True) + npu_output = self.npu_op_exec_scalar(npu_input1, True) + cpu_output = cpu_output.astype(np.float32) + npu_output = npu_output.astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_bitwise_xor_bool(self, device): + npu_input1, npu_input2 = self.generate_data(0, 2, (1, 3), (1, 3), np.bool) + cpu_output = self.cpu_op_exec(npu_input1, True) + npu_output = self.npu_op_exec_scalar(npu_input1, True) + cpu_output = cpu_output.astype(np.float32) + npu_output = npu_output.astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_bitwise_xor_uint16(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3, 3), (3, 3, 3), np.uint16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float32) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + npu_output = npu_output.astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_bitwise_xor_mix_dtype(self, device): + npu_input1, npu_input3 = self.generate_data(0, 100, (3, 3, 3), (), np.uint16) + npu_input2, npu_input4 = self.generate_data(0, 100, (3, 3, 3), (), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + + +instantiate_device_type_tests(TestBitwiseXor, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() + diff --git a/test/test_npu/test_network_ops/test_bmm.py b/test/test_npu/test_network_ops/test_bmm.py old mode 100644 new mode 100755 index 68eccbf83b39104d43b4ded2535476b2d198fd24..62cd0be0ef9ff88fe987e7034a9f19924832649f --- a/test/test_npu/test_network_ops/test_bmm.py +++ b/test/test_npu/test_network_ops/test_bmm.py @@ -1,72 +1,72 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - - -class TestBatchMatMul(TestCase): - def cpu_op_exec(self, input1, input2): - output = torch.bmm(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.bmm(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def bmm_auto_list_exec(self, shape): - for item in shape: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 10) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 10) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_batchmatmul_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_list = [(1, 3, 2)] - shape_format1 = [[np.float16, i, j] - for i in format_list for j in shape_list] - format_list = [0, 3, 29] - shape_list = [(1, 2, 3)] - shape_format2 = [[np.float16, i, j] - for i in format_list for j in shape_list] - shape_format = [[i, j] for i in shape_format1 for j in shape_format2] - self.bmm_auto_list_exec(shape_format) - - def test_batchmatmul_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_list = [(1, 3, 2)] - shape_format1 = [[np.float32, i, j] - for i in format_list for j in shape_list] - format_list = [0, 3, 29] - shape_list = [(1, 2, 3)] - shape_format2 = [[np.float32, i, j] - for i in format_list for j in shape_list] - shape_format = [[i, j] for i in shape_format1 for j in shape_format2] - self.bmm_auto_list_exec(shape_format) - -instantiate_device_type_tests(TestBatchMatMul, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + + +class TestBatchMatMul(TestCase): + def cpu_op_exec(self, input1, input2): + output = torch.bmm(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.bmm(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def bmm_auto_list_exec(self, shape): + for item in shape: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 10) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 10) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_batchmatmul_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_list = [(1, 3, 2)] + shape_format1 = [[np.float16, i, j] + for i in format_list for j in shape_list] + format_list = [0, 3, 29] + shape_list = [(1, 2, 3)] + shape_format2 = [[np.float16, i, j] + for i in format_list for j in shape_list] + shape_format = [[i, j] for i in shape_format1 for j in shape_format2] + self.bmm_auto_list_exec(shape_format) + + def test_batchmatmul_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_list = [(1, 3, 2)] + shape_format1 = [[np.float32, i, j] + for i in format_list for j in shape_list] + format_list = [0, 3, 29] + shape_list = [(1, 2, 3)] + shape_format2 = [[np.float32, i, j] + for i in format_list for j in shape_list] + shape_format = [[i, j] for i in shape_format1 for j in shape_format2] + self.bmm_auto_list_exec(shape_format) + +instantiate_device_type_tests(TestBatchMatMul, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_broadcastToD.py b/test/test_npu/test_network_ops/test_broadcastToD.py old mode 100644 new mode 100755 index 3a7ff68abd1c5954077359894ed5dc4e3820d1f1..7145d4e1a89a0eb0c67811e7a4f8d0028bab00f2 --- a/test/test_npu/test_network_ops/test_broadcastToD.py +++ b/test/test_npu/test_network_ops/test_broadcastToD.py @@ -1,40 +1,40 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests - -class TestBroadCastToD(TestCase): - @dtypes(torch.float, torch.float16, torch.int32, torch.int8, torch.uint8, torch.bool) - def test_broadcast(self, device, dtype): - shapes = [ - [[1], [5]], - [[ 1, 2], [3, 2]], - [[1, 2, 1], [1, 2, 3]], - ] - for item in shapes: - input1 = torch.randn(item[0]).to(dtype).npu() - output = input1.npu_broadcast(item[1]) - size1 = np.array(output.size(), dtype=np.int32) - size2 = np.array(item[1], dtype=np.int32) - self.assertRtolEqual(size1, size2) - - -instantiate_device_type_tests(TestBroadCastToD, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests + +class TestBroadCastToD(TestCase): + @dtypes(torch.float, torch.float16, torch.int32, torch.int8, torch.uint8, torch.bool) + def test_broadcast(self, device, dtype): + shapes = [ + [[1], [5]], + [[ 1, 2], [3, 2]], + [[1, 2, 1], [1, 2, 3]], + ] + for item in shapes: + input1 = torch.randn(item[0]).to(dtype).npu() + output = input1.npu_broadcast(item[1]) + size1 = np.array(output.size(), dtype=np.int32) + size2 = np.array(item[1], dtype=np.int32) + self.assertRtolEqual(size1, size2) + + +instantiate_device_type_tests(TestBroadCastToD, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_broadcast_tensors.py b/test/test_npu/test_network_ops/test_broadcast_tensors.py index dd24481b3fa8e7175ab6dac1f8a17b1da25859d7..b83d1c5417ce81f8a37241d73771bb25ddd3f027 100644 --- a/test/test_npu/test_network_ops/test_broadcast_tensors.py +++ b/test/test_npu/test_network_ops/test_broadcast_tensors.py @@ -1,54 +1,54 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestBroadCastTensors(TestCase): - - def cpu_op_exec(self, input1, input2): - output1, output2 = torch.broadcast_tensors(input1,input2); - return output1.numpy(), output2.numpy() - - def npu_op_exec(self, input1, input2): - input1 =input1.npu() - input2 =input2.npu() - output1, output2 = torch.broadcast_tensors(input1,input2); - return output1.cpu().numpy(), output2.cpu().numpy() - - def test_broadcast_tensors_common_shape_format(self, device): - shape_format = [ - [[1, 3], (2, 1), torch.float32], - [[1, 9], (5, 1), torch.float32], - [[3, 1], (1, 3), torch.float32], - ] - for item in shape_format: - cpu_input1 = torch.randn(item[0], dtype=item[2]) - cpu_input2 = torch.randn(item[1], dtype=item[2]) - cpu_output1, cpu_output2 = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output1, npu_output2 = self.npu_op_exec(cpu_input1, cpu_input2) - self.assertRtolEqual(cpu_output1, npu_output1) - self.assertRtolEqual(cpu_output2, npu_output2) - - -instantiate_device_type_tests(TestBroadCastTensors, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestBroadCastTensors(TestCase): + + def cpu_op_exec(self, input1, input2): + output1, output2 = torch.broadcast_tensors(input1,input2); + return output1.numpy(), output2.numpy() + + def npu_op_exec(self, input1, input2): + input1 =input1.npu() + input2 =input2.npu() + output1, output2 = torch.broadcast_tensors(input1,input2); + return output1.cpu().numpy(), output2.cpu().numpy() + + def test_broadcast_tensors_common_shape_format(self, device): + shape_format = [ + [[1, 3], (2, 1), torch.float32], + [[1, 9], (5, 1), torch.float32], + [[3, 1], (1, 3), torch.float32], + ] + for item in shape_format: + cpu_input1 = torch.randn(item[0], dtype=item[2]) + cpu_input2 = torch.randn(item[1], dtype=item[2]) + cpu_output1, cpu_output2 = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output1, npu_output2 = self.npu_op_exec(cpu_input1, cpu_input2) + self.assertRtolEqual(cpu_output1, npu_output1) + self.assertRtolEqual(cpu_output2, npu_output2) + + +instantiate_device_type_tests(TestBroadCastTensors, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_cat.py b/test/test_npu/test_network_ops/test_cat.py old mode 100644 new mode 100755 index 200ec9d8aa506ba2a5e96c8387c2e620b8b12c52..5e3acfbe8f9c44c3a33025b14f21ebcf144df660 --- a/test/test_npu/test_network_ops/test_cat.py +++ b/test/test_npu/test_network_ops/test_cat.py @@ -1,158 +1,158 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestCat(TestCase): - def cpu_op_exec(self, input1, input2, n): - output = torch.cat(input1 + input2, n) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2, n): - output = torch.cat(input1 + input2, n) - output = output.to("cpu") - output = output.numpy() - return output - - def test_cat_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_list = [(256, 32, 56)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) - npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_cat_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_list = [(256, 32, 56)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) - npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_cat_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - shape_list = [(256, 32, 56, 56)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) - npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_cat_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - shape_list = [(256, 32, 56, 56)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) - npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_cat_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_list = [(56, 56)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) - npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_cat_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_list = [(56, 56)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) - npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_cat_null_tensor(self, device): - x1 = torch.randn(15, 2, 1, 1) - x2 = torch.randn(0, 2, 1, 1) - x3 = torch.randn(0, 2, 3, 1) - y1_cpu = torch.cat([x1, x2], dim=0) - y2_cpu = torch.cat([x2, x3], dim=2) - y3_cpu = torch.cat([x2, x2, x2], dim=1) - x1 = x1.npu() - x2 = x2.npu() - x3 = x3.npu() - y1_npu = torch.cat([x1, x2], dim=0) - y2_npu = torch.cat([x2, x3], dim=2) - y3_npu = torch.cat([x2, x2, x2], dim=1) - self.assertRtolEqual(y1_cpu, y1_npu.cpu()) - self.assertRtolEqual(y2_cpu, y2_npu.cpu()) - self.assertRtolEqual(y3_cpu, y3_npu.cpu()) - - -instantiate_device_type_tests(TestCat, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestCat(TestCase): + def cpu_op_exec(self, input1, input2, n): + output = torch.cat(input1 + input2, n) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2, n): + output = torch.cat(input1 + input2, n) + output = output.to("cpu") + output = output.numpy() + return output + + def test_cat_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_list = [(256, 32, 56)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) + npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_cat_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_list = [(256, 32, 56)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) + npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_cat_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + shape_list = [(256, 32, 56, 56)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) + npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_cat_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + shape_list = [(256, 32, 56, 56)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) + npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_cat_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_list = [(56, 56)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) + npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_cat_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_list = [(56, 56)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec([cpu_input1], [cpu_input2], 1) + npu_output = self.npu_op_exec([npu_input1], [npu_input2], 1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_cat_null_tensor(self, device): + x1 = torch.randn(15, 2, 1, 1) + x2 = torch.randn(0, 2, 1, 1) + x3 = torch.randn(0, 2, 3, 1) + y1_cpu = torch.cat([x1, x2], dim=0) + y2_cpu = torch.cat([x2, x3], dim=2) + y3_cpu = torch.cat([x2, x2, x2], dim=1) + x1 = x1.npu() + x2 = x2.npu() + x3 = x3.npu() + y1_npu = torch.cat([x1, x2], dim=0) + y2_npu = torch.cat([x2, x3], dim=2) + y3_npu = torch.cat([x2, x2, x2], dim=1) + self.assertRtolEqual(y1_cpu, y1_npu.cpu()) + self.assertRtolEqual(y2_cpu, y2_npu.cpu()) + self.assertRtolEqual(y3_cpu, y3_npu.cpu()) + + +instantiate_device_type_tests(TestCat, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_clamp.py b/test/test_npu/test_network_ops/test_clamp.py old mode 100644 new mode 100755 index 8de1bbc7d2754c7f4a965d9b2061a16b5f0c763e..3c61dbd4dffaccbf16f8e8392d9da138376dfedc --- a/test/test_npu/test_network_ops/test_clamp.py +++ b/test/test_npu/test_network_ops/test_clamp.py @@ -1,199 +1,199 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestClamp(TestCase): - def generate_data(self, data): - input1 = np.random.uniform(data[0], data[1], data[2]).astype(data[3]) - - #modify from numpy.ndarray to torch.tensor - input1 = torch.from_numpy(input1) - - return input1 - - def npu_op_exec(self, input1, min_val, max_val): - input1 = input1.to("npu") - output = torch.clamp(input1, min_val, max_val) - output = output.to("cpu") - output = output.numpy() - - return output - - def cpu_op_exec(self, input1, min_val, max_val): - output = torch.clamp(input1, min_val,max_val) - output = output.numpy() - - return output - - def cpu_op_exec_float16(self, input1, min_val, max_val): - input1 = input1.to(torch.float32) - output = torch.clamp(input1, min_val, max_val).to(torch.float16) - output = output.numpy() - - return output - - def npu_inp_op_exec(self, input1, min_val, max_val): - input1 = input1.to("npu") - output = torch.clamp_(input1, min_val, max_val) - output = input1.to("cpu") - output = output.numpy() - - return output - - def cpu_inp_op_exec(self, input1, min_val, max_val): - output = torch.clamp_(input1, min_val, max_val) - output = output.numpy() - - return output - - def cpu_inp_op_exec_float16(self, input1, min_val, max_val): - input1 = input1.to(torch.float32) - output = torch.clamp_(input1, min_val, max_val).to(torch.float16) - output = output.numpy() - - return output - - def npu_op_exec_out(self, input1, min_val, max_val, input2): - input1 = input1.to("npu") - output = input2.to("npu") - torch.clamp(input1, min_val, max_val, out=output) - output = output.to("cpu") - output = output.numpy() - - return output - - def npu_inp_uncon_op_exec(self, input1, min_val, max_val): - input1 = input1.to("npu") - input1 = input1.as_strided([2, 2], [1, 2], 2) - output = torch.clamp_(input1, min_val, max_val) - output = input1.to("cpu") - output = output.numpy() - - return output - - def cpu_inp_uncon_op_exec(self, input1, min_val, max_val): - input1 = input1.as_strided([2, 2], [1, 2], 2) - output = torch.clamp(input1, min_val, max_val) - output = output.numpy() - - return output - - def cpu_inp_uncon_op_exec_float16(self, input1, min_val, max_val): - input1 = input1.to(torch.float32).as_strided([2, 2], [1, 2], 2) - output = torch.clamp(input1, min_val, max_val).to(torch.float16) - output = output.numpy() - - return output - - def test_clamp_common(self, device): - shape_format = [ - [1, 100, (4, 3), np.float32], - [1, 100, (4, 3), np.int32], - ] - for item in shape_format: - input1 = self.generate_data(item) - - cpu_output = self.cpu_op_exec(input1, 40, 60) - npu_output = self.npu_op_exec(input1, 40, 60) - - cpu_inp_output = self.cpu_inp_op_exec(input1, 40, 60) - npu_inp_output = self.npu_inp_op_exec(input1, 40, 60) - - input2 = self.generate_data(item) - npu_out_output = self.npu_op_exec_out(input1, 40, 60, input2) - - cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec(input1, 40, 60) - npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 40, 60) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_inp_output, npu_inp_output) - self.assertRtolEqual(cpu_output, npu_out_output) - self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) - - def test_clamp_common_out(self, device): - shape_format = [ - [[1, 100, (4, 3), np.float32], [1, 100, (10, 10), np.float32]], - [[1, 100, (4, 3), np.float32], [1, 100, (1, 1), np.float32]], - [[1, 100, (4, 3), np.int32], [1, 100, (10, 10), np.int32]], - [[1, 100, (4, 3), np.int32], [1, 100, (1, 1), np.int32]] - ] - for item in shape_format: - print(item) - input1 = self.generate_data(item[0]) - cpu_output = self.cpu_op_exec(input1, 40, 60) - - input2 = self.generate_data(item[0]) - npu_out_output = self.npu_op_exec_out(input1, 40, 60, input2) - input3 = self.generate_data(item[1]) - npu_out_output1 = self.npu_op_exec_out(input1, 40, 60, input3) - - self.assertRtolEqual(cpu_output, npu_out_output) - self.assertRtolEqual(cpu_output, npu_out_output1) - - def test_clamp_float16(self, device): - shape_format = [ - [1, 100, (4, 3), np.float16], - ] - for item in shape_format: - input1 = self.generate_data(item) - - cpu_output = self.cpu_op_exec_float16(input1, 40, 60) - npu_output = self.npu_op_exec(input1, 40, 60) - - cpu_inp_output = self.cpu_inp_op_exec_float16(input1, 40, 60) - npu_inp_output = self.npu_inp_op_exec(input1, 40, 60) - - input2 = self.generate_data(item) - npu_out_output = self.npu_op_exec_out(input1, 40, 60, input2) - - cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec_float16(input1, 40, 60) - npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 40, 60) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_inp_output, npu_inp_output) - self.assertRtolEqual(cpu_output, npu_out_output) - self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) - - def test_clamp_float16_out(self, device): - shape_format = [ - [[1, 100, (4, 3), np.float16], [1, 100, (10, 10), np.float16]], - [[1, 100, (4, 3), np.float16], [1, 100, (1, 1), np.float16]], - ] - for item in shape_format: - print(item) - input1 = self.generate_data(item[0]) - cpu_output = self.cpu_op_exec_float16(input1, 40, 60) - - input2 = self.generate_data(item[0]) - npu_out_output = self.npu_op_exec_out(input1, 40, 60, input2) - input3 = self.generate_data(item[1]) - npu_out_output1 = self.npu_op_exec_out(input1, 40, 60, input3) - - self.assertRtolEqual(cpu_output, npu_out_output) - self.assertRtolEqual(cpu_output, npu_out_output1) - - -instantiate_device_type_tests(TestClamp, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestClamp(TestCase): + def generate_data(self, data): + input1 = np.random.uniform(data[0], data[1], data[2]).astype(data[3]) + + #modify from numpy.ndarray to torch.tensor + input1 = torch.from_numpy(input1) + + return input1 + + def npu_op_exec(self, input1, min_val, max_val): + input1 = input1.to("npu") + output = torch.clamp(input1, min_val, max_val) + output = output.to("cpu") + output = output.numpy() + + return output + + def cpu_op_exec(self, input1, min_val, max_val): + output = torch.clamp(input1, min_val,max_val) + output = output.numpy() + + return output + + def cpu_op_exec_float16(self, input1, min_val, max_val): + input1 = input1.to(torch.float32) + output = torch.clamp(input1, min_val, max_val).to(torch.float16) + output = output.numpy() + + return output + + def npu_inp_op_exec(self, input1, min_val, max_val): + input1 = input1.to("npu") + output = torch.clamp_(input1, min_val, max_val) + output = input1.to("cpu") + output = output.numpy() + + return output + + def cpu_inp_op_exec(self, input1, min_val, max_val): + output = torch.clamp_(input1, min_val, max_val) + output = output.numpy() + + return output + + def cpu_inp_op_exec_float16(self, input1, min_val, max_val): + input1 = input1.to(torch.float32) + output = torch.clamp_(input1, min_val, max_val).to(torch.float16) + output = output.numpy() + + return output + + def npu_op_exec_out(self, input1, min_val, max_val, input2): + input1 = input1.to("npu") + output = input2.to("npu") + torch.clamp(input1, min_val, max_val, out=output) + output = output.to("cpu") + output = output.numpy() + + return output + + def npu_inp_uncon_op_exec(self, input1, min_val, max_val): + input1 = input1.to("npu") + input1 = input1.as_strided([2, 2], [1, 2], 2) + output = torch.clamp_(input1, min_val, max_val) + output = input1.to("cpu") + output = output.numpy() + + return output + + def cpu_inp_uncon_op_exec(self, input1, min_val, max_val): + input1 = input1.as_strided([2, 2], [1, 2], 2) + output = torch.clamp(input1, min_val, max_val) + output = output.numpy() + + return output + + def cpu_inp_uncon_op_exec_float16(self, input1, min_val, max_val): + input1 = input1.to(torch.float32).as_strided([2, 2], [1, 2], 2) + output = torch.clamp(input1, min_val, max_val).to(torch.float16) + output = output.numpy() + + return output + + def test_clamp_common(self, device): + shape_format = [ + [1, 100, (4, 3), np.float32], + [1, 100, (4, 3), np.int32], + ] + for item in shape_format: + input1 = self.generate_data(item) + + cpu_output = self.cpu_op_exec(input1, 40, 60) + npu_output = self.npu_op_exec(input1, 40, 60) + + cpu_inp_output = self.cpu_inp_op_exec(input1, 40, 60) + npu_inp_output = self.npu_inp_op_exec(input1, 40, 60) + + input2 = self.generate_data(item) + npu_out_output = self.npu_op_exec_out(input1, 40, 60, input2) + + cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec(input1, 40, 60) + npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 40, 60) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_inp_output, npu_inp_output) + self.assertRtolEqual(cpu_output, npu_out_output) + self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) + + def test_clamp_common_out(self, device): + shape_format = [ + [[1, 100, (4, 3), np.float32], [1, 100, (10, 10), np.float32]], + [[1, 100, (4, 3), np.float32], [1, 100, (1, 1), np.float32]], + [[1, 100, (4, 3), np.int32], [1, 100, (10, 10), np.int32]], + [[1, 100, (4, 3), np.int32], [1, 100, (1, 1), np.int32]] + ] + for item in shape_format: + print(item) + input1 = self.generate_data(item[0]) + cpu_output = self.cpu_op_exec(input1, 40, 60) + + input2 = self.generate_data(item[0]) + npu_out_output = self.npu_op_exec_out(input1, 40, 60, input2) + input3 = self.generate_data(item[1]) + npu_out_output1 = self.npu_op_exec_out(input1, 40, 60, input3) + + self.assertRtolEqual(cpu_output, npu_out_output) + self.assertRtolEqual(cpu_output, npu_out_output1) + + def test_clamp_float16(self, device): + shape_format = [ + [1, 100, (4, 3), np.float16], + ] + for item in shape_format: + input1 = self.generate_data(item) + + cpu_output = self.cpu_op_exec_float16(input1, 40, 60) + npu_output = self.npu_op_exec(input1, 40, 60) + + cpu_inp_output = self.cpu_inp_op_exec_float16(input1, 40, 60) + npu_inp_output = self.npu_inp_op_exec(input1, 40, 60) + + input2 = self.generate_data(item) + npu_out_output = self.npu_op_exec_out(input1, 40, 60, input2) + + cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec_float16(input1, 40, 60) + npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 40, 60) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_inp_output, npu_inp_output) + self.assertRtolEqual(cpu_output, npu_out_output) + self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) + + def test_clamp_float16_out(self, device): + shape_format = [ + [[1, 100, (4, 3), np.float16], [1, 100, (10, 10), np.float16]], + [[1, 100, (4, 3), np.float16], [1, 100, (1, 1), np.float16]], + ] + for item in shape_format: + print(item) + input1 = self.generate_data(item[0]) + cpu_output = self.cpu_op_exec_float16(input1, 40, 60) + + input2 = self.generate_data(item[0]) + npu_out_output = self.npu_op_exec_out(input1, 40, 60, input2) + input3 = self.generate_data(item[1]) + npu_out_output1 = self.npu_op_exec_out(input1, 40, 60, input3) + + self.assertRtolEqual(cpu_output, npu_out_output) + self.assertRtolEqual(cpu_output, npu_out_output1) + + +instantiate_device_type_tests(TestClamp, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_clamp_max.py b/test/test_npu/test_network_ops/test_clamp_max.py index aeaf3cf9d73bb7ced33f92cf930962d8f2acaff4..66148ae023173fe1a566f0c614f12c15cac49d65 100644 --- a/test/test_npu/test_network_ops/test_clamp_max.py +++ b/test/test_npu/test_network_ops/test_clamp_max.py @@ -1,161 +1,161 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestClampMax(TestCase): - def generate_data(self, data): - input1 = np.random.uniform(data[0], data[1], data[2]).astype(data[3]) - - #modify from numpy.ndarray to torch.tensor - input1 = torch.from_numpy(input1) - - return input1 - - def npu_op_exec(self, input1, max_val): - input1 = input1.to("npu") - output = torch.clamp_max(input1, max_val) - output = output.to("cpu") - output = output.numpy() - - return output - - def cpu_op_exec(self, input1, max_val): - output = torch.clamp_max(input1, max_val) - output = output.numpy() - - return output - - def cpu_op_exec_float16(self, input1, max_val): - input1 = input1.to(torch.float32) - output = torch.clamp_max(input1, max_val).to(torch.float16) - output = output.numpy() - - return output - - def npu_inp_op_exec(self, input1, max_val): - input1 = input1.to("npu") - output = torch.clamp_max_(input1, max_val) - output = input1.to("cpu") - output = output.numpy() - - return output - - def cpu_inp_op_exec(self, input1, max_val): - output = torch.clamp_max_(input1, max_val) - output = output.numpy() - - return output - - def cpu_inp_op_exec_float16(self, input1, max_val): - input1 = input1.to(torch.float32) - output = torch.clamp_max_(input1, max_val).to(torch.float16) - output = output.numpy() - - return output - - def npu_op_exec_out(self, input1, max_val, input2): - input1 = input1.to("npu") - output = input2.to("npu") - torch.clamp_max(input1, max_val, out=output) - output = output.to("cpu") - output = output.numpy() - - return output - - def npu_inp_uncon_op_exec(self, input1, max_val): - input1 = input1.to("npu") - input1 = input1.as_strided([2, 2], [1, 2], 2) - output = torch.clamp_max_(input1, max_val) - output = input1.to("cpu") - output = output.numpy() - - return output - - def cpu_inp_uncon_op_exec(self, input1, max_val): - input1 = input1.as_strided([2, 2], [1, 2], 2) - output = torch.clamp_max(input1, max_val) - output = output.numpy() - - return output - - def cpu_inp_uncon_op_exec_float16(self, input1, max_val): - input1 = input1.to(torch.float32).as_strided([2, 2], [1, 2], 2) - output = torch.clamp_max(input1, max_val).to(torch.float16) - output = output.numpy() - - return output - - def test_clamp_max_common(self, device): - shape_format = [ - [1, 100, (4, 3), np.float32], - [1, 100, (4, 3), np.int32], - ] - for item in shape_format: - input1 = self.generate_data(item) - - cpu_output = self.cpu_op_exec(input1, 50) - npu_output = self.npu_op_exec(input1, 50) - - cpu_inp_output = self.cpu_inp_op_exec(input1, 50) - npu_inp_output = self.npu_inp_op_exec(input1, 50) - - input2 = self.generate_data(item) - npu_out_output = self.npu_op_exec_out(input1, 50, input2) - - cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec(input1, 50) - npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 50) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_inp_output, npu_inp_output) - self.assertRtolEqual(cpu_output, npu_out_output) - self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) - - def test_clamp_max_float16(self, device): - shape_format = [ - [1, 100, (4, 3), np.float16], - ] - for item in shape_format: - input1 = self.generate_data(item) - - cpu_output = self.cpu_op_exec_float16(input1, 50) - npu_output = self.npu_op_exec(input1, 50) - - cpu_inp_output = self.cpu_inp_op_exec_float16(input1, 50) - npu_inp_output = self.npu_inp_op_exec(input1, 50) - - input2 = self.generate_data(item) - npu_out_output = self.npu_op_exec_out(input1, 50, input2) - - cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec_float16(input1, 50) - npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 50) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_inp_output, npu_inp_output) - self.assertRtolEqual(cpu_output, npu_out_output) - self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) - - -instantiate_device_type_tests(TestClampMax, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestClampMax(TestCase): + def generate_data(self, data): + input1 = np.random.uniform(data[0], data[1], data[2]).astype(data[3]) + + #modify from numpy.ndarray to torch.tensor + input1 = torch.from_numpy(input1) + + return input1 + + def npu_op_exec(self, input1, max_val): + input1 = input1.to("npu") + output = torch.clamp_max(input1, max_val) + output = output.to("cpu") + output = output.numpy() + + return output + + def cpu_op_exec(self, input1, max_val): + output = torch.clamp_max(input1, max_val) + output = output.numpy() + + return output + + def cpu_op_exec_float16(self, input1, max_val): + input1 = input1.to(torch.float32) + output = torch.clamp_max(input1, max_val).to(torch.float16) + output = output.numpy() + + return output + + def npu_inp_op_exec(self, input1, max_val): + input1 = input1.to("npu") + output = torch.clamp_max_(input1, max_val) + output = input1.to("cpu") + output = output.numpy() + + return output + + def cpu_inp_op_exec(self, input1, max_val): + output = torch.clamp_max_(input1, max_val) + output = output.numpy() + + return output + + def cpu_inp_op_exec_float16(self, input1, max_val): + input1 = input1.to(torch.float32) + output = torch.clamp_max_(input1, max_val).to(torch.float16) + output = output.numpy() + + return output + + def npu_op_exec_out(self, input1, max_val, input2): + input1 = input1.to("npu") + output = input2.to("npu") + torch.clamp_max(input1, max_val, out=output) + output = output.to("cpu") + output = output.numpy() + + return output + + def npu_inp_uncon_op_exec(self, input1, max_val): + input1 = input1.to("npu") + input1 = input1.as_strided([2, 2], [1, 2], 2) + output = torch.clamp_max_(input1, max_val) + output = input1.to("cpu") + output = output.numpy() + + return output + + def cpu_inp_uncon_op_exec(self, input1, max_val): + input1 = input1.as_strided([2, 2], [1, 2], 2) + output = torch.clamp_max(input1, max_val) + output = output.numpy() + + return output + + def cpu_inp_uncon_op_exec_float16(self, input1, max_val): + input1 = input1.to(torch.float32).as_strided([2, 2], [1, 2], 2) + output = torch.clamp_max(input1, max_val).to(torch.float16) + output = output.numpy() + + return output + + def test_clamp_max_common(self, device): + shape_format = [ + [1, 100, (4, 3), np.float32], + [1, 100, (4, 3), np.int32], + ] + for item in shape_format: + input1 = self.generate_data(item) + + cpu_output = self.cpu_op_exec(input1, 50) + npu_output = self.npu_op_exec(input1, 50) + + cpu_inp_output = self.cpu_inp_op_exec(input1, 50) + npu_inp_output = self.npu_inp_op_exec(input1, 50) + + input2 = self.generate_data(item) + npu_out_output = self.npu_op_exec_out(input1, 50, input2) + + cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec(input1, 50) + npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 50) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_inp_output, npu_inp_output) + self.assertRtolEqual(cpu_output, npu_out_output) + self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) + + def test_clamp_max_float16(self, device): + shape_format = [ + [1, 100, (4, 3), np.float16], + ] + for item in shape_format: + input1 = self.generate_data(item) + + cpu_output = self.cpu_op_exec_float16(input1, 50) + npu_output = self.npu_op_exec(input1, 50) + + cpu_inp_output = self.cpu_inp_op_exec_float16(input1, 50) + npu_inp_output = self.npu_inp_op_exec(input1, 50) + + input2 = self.generate_data(item) + npu_out_output = self.npu_op_exec_out(input1, 50, input2) + + cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec_float16(input1, 50) + npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 50) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_inp_output, npu_inp_output) + self.assertRtolEqual(cpu_output, npu_out_output) + self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) + + +instantiate_device_type_tests(TestClampMax, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_clamp_min.py b/test/test_npu/test_network_ops/test_clamp_min.py index 04f2370f0837f700f86611f2812a3895fc7bfaa0..d8f176897bfbd5c2f257a5d04fecd70d423a78c0 100644 --- a/test/test_npu/test_network_ops/test_clamp_min.py +++ b/test/test_npu/test_network_ops/test_clamp_min.py @@ -1,196 +1,196 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestClampMin(TestCase): - def generate_data(self, data): - input1 = np.random.uniform(data[0], data[1], data[2]).astype(data[3]) - - #modify from numpy.ndarray to torch.tensor - input1 = torch.from_numpy(input1) - - return input1 - - def npu_op_exec(self, input1, min_val): - input1 = input1.to("npu") - output = torch.clamp_min(input1, min_val) - output = output.to("cpu") - output = output.numpy() - - return output - - def cpu_op_exec(self, input1, min_val): - output = torch.clamp_min(input1, min_val) - output = output.numpy() - - return output - - def cpu_op_exec_float16(self, input1, min_val): - input1 = input1.to(torch.float32) - output = torch.clamp_min(input1, min_val).to(torch.float16) - output = output.numpy() - - return output - - def npu_inp_op_exec(self, input1, min_val): - input1 = input1.to("npu") - output = torch.clamp_min_(input1, min_val) - output = input1.to("cpu") - output = output.numpy() - - return output - - def cpu_inp_op_exec(self, input1, min_val): - output = torch.clamp_min_(input1, min_val) - output = output.numpy() - - return output - - def cpu_inp_op_exec_float16(self, input1, min_val): - input1 = input1.to(torch.float32) - output = torch.clamp_min_(input1, min_val).to(torch.float16) - output = output.numpy() - - return output - - def npu_op_exec_out(self, input1, min_val, input2): - input1 = input1.to("npu") - output = input2.to("npu") - torch.clamp_min(input1, min_val, out=output) - output = output.to("cpu") - output = output.numpy() - - return output - - def npu_inp_uncon_op_exec(self, input1, min_val): - input1 = input1.to("npu") - input1 = input1.as_strided([2, 2], [1, 2], 2) - output = torch.clamp_min_(input1, min_val) - output = input1.to("cpu") - output = output.numpy() - - return output - - def cpu_inp_uncon_op_exec(self, input1, min_val): - input1 = input1.as_strided([2, 2], [1, 2], 2) - output = torch.clamp_min(input1, min_val) - output = output.numpy() - - return output - - def cpu_inp_uncon_op_exec_float16(self, input1, min_val): - input1 = input1.to(torch.float32).as_strided([2, 2], [1, 2], 2) - output = torch.clamp_min(input1, min_val).to(torch.float16) - output = output.numpy() - - return output - - def test_clamp_min_common(self, device): - shape_format = [ - [1, 100, (4, 3), np.float32], - [1, 100, (4, 3), np.int32], - ] - for item in shape_format: - input1 = self.generate_data(item) - - cpu_output = self.cpu_op_exec(input1, 50) - npu_output = self.npu_op_exec(input1, 50) - - cpu_inp_output = self.cpu_inp_op_exec(input1, 50) - npu_inp_output = self.npu_inp_op_exec(input1, 50) - - input2 = self.generate_data(item) - npu_out_output = self.npu_op_exec_out(input1, 50, input2) - - cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec(input1, 50) - npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 50) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_inp_output, npu_inp_output) - self.assertRtolEqual(cpu_output, npu_out_output) - self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) - - def test_clamp_min_common_out(self, device): - shape_format = [ - [[1, 100, (4, 3), np.float32], [1, 100, (10, 10), np.float32]], - [[1, 100, (4, 3), np.float32], [1, 100, (1, 1), np.float32]], - [[1, 100, (4, 3), np.int32], [1, 100, (10, 10), np.int32]], - [[1, 100, (4, 3), np.int32], [1, 100, (1, 1), np.int32]] - ] - for item in shape_format: - print(item) - input1 = self.generate_data(item[0]) - cpu_output = self.cpu_op_exec(input1, 50) - input2 = self.generate_data(item[0]) - npu_out_output = self.npu_op_exec_out(input1, 50, input2) - input3 = self.generate_data(item[1]) - npu_out_output1 = self.npu_op_exec_out(input1, 50, input3) - - self.assertRtolEqual(cpu_output, npu_out_output) - self.assertRtolEqual(cpu_output, npu_out_output1) - - def test_clamp_min_float16(self, device): - shape_format = [ - [1, 100, (4, 3), np.float16], - ] - for item in shape_format: - input1 = self.generate_data(item) - - cpu_output = self.cpu_op_exec_float16(input1, 50) - npu_output = self.npu_op_exec(input1, 50) - - cpu_inp_output = self.cpu_inp_op_exec_float16(input1, 50) - npu_inp_output = self.npu_inp_op_exec(input1, 50) - - input2 = self.generate_data(item) - npu_out_output = self.npu_op_exec_out(input1, 50, input2) - - cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec_float16(input1, 50) - npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 50) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_inp_output, npu_inp_output) - self.assertRtolEqual(cpu_output, npu_out_output) - self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) - - def test_clamp_min_float16_out(self, device): - shape_format = [ - [[1, 100, (4, 3), np.float16], [1, 100, (10, 10), np.float16]], - [[1, 100, (4, 3), np.float16], [1, 100, (1, 1), np.float16]], - ] - for item in shape_format: - print(item) - input1 = self.generate_data(item[0]) - cpu_output = self.cpu_op_exec_float16(input1, 50) - input2 = self.generate_data(item[0]) - npu_out_output = self.npu_op_exec_out(input1, 50, input2) - input3 = self.generate_data(item[1]) - npu_out_output1 = self.npu_op_exec_out(input1, 50, input3) - - self.assertRtolEqual(cpu_output, npu_out_output) - self.assertRtolEqual(cpu_output, npu_out_output1) - -instantiate_device_type_tests(TestClampMin, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestClampMin(TestCase): + def generate_data(self, data): + input1 = np.random.uniform(data[0], data[1], data[2]).astype(data[3]) + + #modify from numpy.ndarray to torch.tensor + input1 = torch.from_numpy(input1) + + return input1 + + def npu_op_exec(self, input1, min_val): + input1 = input1.to("npu") + output = torch.clamp_min(input1, min_val) + output = output.to("cpu") + output = output.numpy() + + return output + + def cpu_op_exec(self, input1, min_val): + output = torch.clamp_min(input1, min_val) + output = output.numpy() + + return output + + def cpu_op_exec_float16(self, input1, min_val): + input1 = input1.to(torch.float32) + output = torch.clamp_min(input1, min_val).to(torch.float16) + output = output.numpy() + + return output + + def npu_inp_op_exec(self, input1, min_val): + input1 = input1.to("npu") + output = torch.clamp_min_(input1, min_val) + output = input1.to("cpu") + output = output.numpy() + + return output + + def cpu_inp_op_exec(self, input1, min_val): + output = torch.clamp_min_(input1, min_val) + output = output.numpy() + + return output + + def cpu_inp_op_exec_float16(self, input1, min_val): + input1 = input1.to(torch.float32) + output = torch.clamp_min_(input1, min_val).to(torch.float16) + output = output.numpy() + + return output + + def npu_op_exec_out(self, input1, min_val, input2): + input1 = input1.to("npu") + output = input2.to("npu") + torch.clamp_min(input1, min_val, out=output) + output = output.to("cpu") + output = output.numpy() + + return output + + def npu_inp_uncon_op_exec(self, input1, min_val): + input1 = input1.to("npu") + input1 = input1.as_strided([2, 2], [1, 2], 2) + output = torch.clamp_min_(input1, min_val) + output = input1.to("cpu") + output = output.numpy() + + return output + + def cpu_inp_uncon_op_exec(self, input1, min_val): + input1 = input1.as_strided([2, 2], [1, 2], 2) + output = torch.clamp_min(input1, min_val) + output = output.numpy() + + return output + + def cpu_inp_uncon_op_exec_float16(self, input1, min_val): + input1 = input1.to(torch.float32).as_strided([2, 2], [1, 2], 2) + output = torch.clamp_min(input1, min_val).to(torch.float16) + output = output.numpy() + + return output + + def test_clamp_min_common(self, device): + shape_format = [ + [1, 100, (4, 3), np.float32], + [1, 100, (4, 3), np.int32], + ] + for item in shape_format: + input1 = self.generate_data(item) + + cpu_output = self.cpu_op_exec(input1, 50) + npu_output = self.npu_op_exec(input1, 50) + + cpu_inp_output = self.cpu_inp_op_exec(input1, 50) + npu_inp_output = self.npu_inp_op_exec(input1, 50) + + input2 = self.generate_data(item) + npu_out_output = self.npu_op_exec_out(input1, 50, input2) + + cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec(input1, 50) + npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 50) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_inp_output, npu_inp_output) + self.assertRtolEqual(cpu_output, npu_out_output) + self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) + + def test_clamp_min_common_out(self, device): + shape_format = [ + [[1, 100, (4, 3), np.float32], [1, 100, (10, 10), np.float32]], + [[1, 100, (4, 3), np.float32], [1, 100, (1, 1), np.float32]], + [[1, 100, (4, 3), np.int32], [1, 100, (10, 10), np.int32]], + [[1, 100, (4, 3), np.int32], [1, 100, (1, 1), np.int32]] + ] + for item in shape_format: + print(item) + input1 = self.generate_data(item[0]) + cpu_output = self.cpu_op_exec(input1, 50) + input2 = self.generate_data(item[0]) + npu_out_output = self.npu_op_exec_out(input1, 50, input2) + input3 = self.generate_data(item[1]) + npu_out_output1 = self.npu_op_exec_out(input1, 50, input3) + + self.assertRtolEqual(cpu_output, npu_out_output) + self.assertRtolEqual(cpu_output, npu_out_output1) + + def test_clamp_min_float16(self, device): + shape_format = [ + [1, 100, (4, 3), np.float16], + ] + for item in shape_format: + input1 = self.generate_data(item) + + cpu_output = self.cpu_op_exec_float16(input1, 50) + npu_output = self.npu_op_exec(input1, 50) + + cpu_inp_output = self.cpu_inp_op_exec_float16(input1, 50) + npu_inp_output = self.npu_inp_op_exec(input1, 50) + + input2 = self.generate_data(item) + npu_out_output = self.npu_op_exec_out(input1, 50, input2) + + cpu_inp_uncon_output = self.cpu_inp_uncon_op_exec_float16(input1, 50) + npu_inp_uncon_output = self.npu_inp_uncon_op_exec(input1, 50) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_inp_output, npu_inp_output) + self.assertRtolEqual(cpu_output, npu_out_output) + self.assertRtolEqual(cpu_inp_uncon_output, npu_inp_uncon_output) + + def test_clamp_min_float16_out(self, device): + shape_format = [ + [[1, 100, (4, 3), np.float16], [1, 100, (10, 10), np.float16]], + [[1, 100, (4, 3), np.float16], [1, 100, (1, 1), np.float16]], + ] + for item in shape_format: + print(item) + input1 = self.generate_data(item[0]) + cpu_output = self.cpu_op_exec_float16(input1, 50) + input2 = self.generate_data(item[0]) + npu_out_output = self.npu_op_exec_out(input1, 50, input2) + input3 = self.generate_data(item[1]) + npu_out_output1 = self.npu_op_exec_out(input1, 50, input3) + + self.assertRtolEqual(cpu_output, npu_out_output) + self.assertRtolEqual(cpu_output, npu_out_output1) + +instantiate_device_type_tests(TestClampMin, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_confusion_transpose.py b/test/test_npu/test_network_ops/test_confusion_transpose.py index 7702496c95fe5441001121c1cbe272cd0ecce53e..50f91e881ddf37b0ff7e835483508830eaf2d5af 100644 --- a/test/test_npu/test_network_ops/test_confusion_transpose.py +++ b/test/test_npu/test_network_ops/test_confusion_transpose.py @@ -1,56 +1,56 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestConfusionTransposeD(TestCase): - def npu_op_exec(self, input1, shape, perm, transpose_first): - output = torch.npu_confusion_transpose(input1, perm, shape, transpose_first) - output = output.cpu().numpy() - return output - - def cpu_op_exec(self, input1, shape, perm, transpose_first): - if transpose_first: - output = input1.permute(*perm).contiguous().view(shape) - else: - output = input1.view(shape).permute(*perm) - output = output.numpy() - return output - - def test_confusion_transpose(self, device): - shape_format = [ - [[np.float32, 0, [1, 576, 2560]],[1, 576, 32, 80], (0, 2, 1, 3), False], - [[np.float32, 0, [1, 32, 576, 80]],[1, 576, 2560], (0, 2, 1, 3), True], - [[np.float16, 0, [1, 576, 2560]], [1, 576, 32, 80], (0, 2, 1, 3), False], - [[np.float16, 0, [1, 32, 576, 80]], [1, 576, 2560], (0, 2, 1, 3), True], - [[np.int, 0, [1, 576, 2560]], [1, 576, 32, 80], (0, 2, 1, 3), False], - [[np.int, 0, [1, 32, 576, 80]], [1, 576, 2560], (0, 2, 1, 3), True], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input, item[1], item[2], item[3]) - npu_output = self.npu_op_exec(npu_input, item[1], item[2], item[3]) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestConfusionTransposeD, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestConfusionTransposeD(TestCase): + def npu_op_exec(self, input1, shape, perm, transpose_first): + output = torch.npu_confusion_transpose(input1, perm, shape, transpose_first) + output = output.cpu().numpy() + return output + + def cpu_op_exec(self, input1, shape, perm, transpose_first): + if transpose_first: + output = input1.permute(*perm).contiguous().view(shape) + else: + output = input1.view(shape).permute(*perm) + output = output.numpy() + return output + + def test_confusion_transpose(self, device): + shape_format = [ + [[np.float32, 0, [1, 576, 2560]],[1, 576, 32, 80], (0, 2, 1, 3), False], + [[np.float32, 0, [1, 32, 576, 80]],[1, 576, 2560], (0, 2, 1, 3), True], + [[np.float16, 0, [1, 576, 2560]], [1, 576, 32, 80], (0, 2, 1, 3), False], + [[np.float16, 0, [1, 32, 576, 80]], [1, 576, 2560], (0, 2, 1, 3), True], + [[np.int, 0, [1, 576, 2560]], [1, 576, 32, 80], (0, 2, 1, 3), False], + [[np.int, 0, [1, 32, 576, 80]], [1, 576, 2560], (0, 2, 1, 3), True], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input, item[1], item[2], item[3]) + npu_output = self.npu_op_exec(npu_input, item[1], item[2], item[3]) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestConfusionTransposeD, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_confusion_transpose_backward.py b/test/test_npu/test_network_ops/test_confusion_transpose_backward.py index debfc54ae8cddc984ade037ca457a6cafe80a638..37c65cbe631ff7101d3a3818fb11daac07bd931b 100644 --- a/test/test_npu/test_network_ops/test_confusion_transpose_backward.py +++ b/test/test_npu/test_network_ops/test_confusion_transpose_backward.py @@ -1,59 +1,59 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import instantiate_device_type_tests -from util_test import create_common_tensor - -class TestConfusionTransposeDBackward(TestCase): - def npu_op_exec(self, input1, shape, perm, transpose_first): - input1.requires_grad_() - output = torch.npu_confusion_transpose(input1, perm, shape, transpose_first) - output.backward(torch.ones_like(output)) - output1 = output.detach().cpu().numpy() - output2 = input1.grad.cpu().numpy() - return output1, output2 - - def cpu_op_exec(self, input1, shape, perm, transpose_first): - input1.requires_grad_() - if transpose_first: - output = input1.permute(*perm).contiguous().view(shape) - else: - output = input1.view(shape).permute(*perm) - output.backward(torch.ones_like(output)) - output1 = output.detach().numpy() - output2 = input1.grad.numpy() - return output1, output2 - - def test_confusion_transpose_backward(self, device): - shape_format = [ - [[np.float32, 0, [1, 576, 2560]],[1, 576, 32, 80], (0, 2, 1, 3), False], - [[np.float32, 0, [1, 32, 576, 80]],[1, 576, 2560], (0, 2, 1, 3), True], - [[np.float16, 0, [1, 576, 2560]], [1, 576, 32, 80], (0, 2, 1, 3), False], - [[np.float16, 0, [1, 32, 576, 80]], [1, 576, 2560], (0, 2, 1, 3), True], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output1, cpu_output2 = self.cpu_op_exec(cpu_input, item[1], item[2], item[3]) - npu_output1, npu_output2 = self.npu_op_exec(npu_input, item[1], item[2], item[3]) - self.assertRtolEqual(cpu_output1, npu_output1) - self.assertRtolEqual(cpu_output2, npu_output2) - -instantiate_device_type_tests(TestConfusionTransposeDBackward, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests +from util_test import create_common_tensor + +class TestConfusionTransposeDBackward(TestCase): + def npu_op_exec(self, input1, shape, perm, transpose_first): + input1.requires_grad_() + output = torch.npu_confusion_transpose(input1, perm, shape, transpose_first) + output.backward(torch.ones_like(output)) + output1 = output.detach().cpu().numpy() + output2 = input1.grad.cpu().numpy() + return output1, output2 + + def cpu_op_exec(self, input1, shape, perm, transpose_first): + input1.requires_grad_() + if transpose_first: + output = input1.permute(*perm).contiguous().view(shape) + else: + output = input1.view(shape).permute(*perm) + output.backward(torch.ones_like(output)) + output1 = output.detach().numpy() + output2 = input1.grad.numpy() + return output1, output2 + + def test_confusion_transpose_backward(self, device): + shape_format = [ + [[np.float32, 0, [1, 576, 2560]],[1, 576, 32, 80], (0, 2, 1, 3), False], + [[np.float32, 0, [1, 32, 576, 80]],[1, 576, 2560], (0, 2, 1, 3), True], + [[np.float16, 0, [1, 576, 2560]], [1, 576, 32, 80], (0, 2, 1, 3), False], + [[np.float16, 0, [1, 32, 576, 80]], [1, 576, 2560], (0, 2, 1, 3), True], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output1, cpu_output2 = self.cpu_op_exec(cpu_input, item[1], item[2], item[3]) + npu_output1, npu_output2 = self.npu_op_exec(npu_input, item[1], item[2], item[3]) + self.assertRtolEqual(cpu_output1, npu_output1) + self.assertRtolEqual(cpu_output2, npu_output2) + +instantiate_device_type_tests(TestConfusionTransposeDBackward, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_conv1d.py b/test/test_npu/test_network_ops/test_conv1d.py index 6b3fe0a04cd5e11337f4633832f009e27721581d..4d7d8d90797deacfa9c6754aa9af921e0952215f 100644 --- a/test/test_npu/test_network_ops/test_conv1d.py +++ b/test/test_npu/test_network_ops/test_conv1d.py @@ -1,86 +1,86 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -import torch.nn as nn -import torch.nn.functional as F -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestConv1d(TestCase): - def cpu_op_exec(self, input1, weight, stride, pad): - input1.requires_grad = True - weight.requires_grad = True - out = F.conv1d(input1, weight, stride=stride, padding=pad) - out.backward(torch.ones_like(out)) - input_grad = input1.grad - weight_grad = weight.grad - out = out.detach() - return out, input_grad, weight_grad - - def npu_op_exec(self, input1, weight, stride, pad): - input1.requires_grad = True - weight.requires_grad = True - out = F.conv1d(input1, weight, stride=stride, padding=pad) - out.backward(torch.ones_like(out)) - input_grad = input1.grad.cpu() - weight_grad = weight.grad.cpu() - out = out.cpu().detach() - return out, input_grad, weight_grad - - def test_conv1d_shape_format_fp16(self, device): - shape_format = [ - [[np.float16, 0, [4, 1, 166400]], [np.float16, 0, [514, 1, 400]], 400, 0] - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 1) - cpu_weight, npu_weight = create_common_tensor(item[1], 0, 1) - stride = item[2] - padding = item[3] - - cpu_output, cpu_input_grad, cpu_weight_grad = self.cpu_op_exec(cpu_input.float(), cpu_weight.float(), stride, padding) - cpu_output = cpu_output.half() - cpu_input_grad = cpu_input_grad.half() - cpu_weight_grad = cpu_weight_grad.half() - npu_output, npu_input_grad, npu_weight_grad = self.npu_op_exec(npu_input, npu_weight, stride, padding) - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_input_grad, npu_input_grad) - self.assertRtolEqual(cpu_weight_grad, npu_weight_grad) - - def test_conv1d_shape_format_fp32(self, device): - shape_format = [ - [[np.float32, 0, [4, 1, 166400]], [np.float32, 0, [514, 1, 400]], 400, 0] - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 1) - cpu_weight, npu_weight = create_common_tensor(item[1], 0, 1) - stride = item[2] - padding = item[3] - - cpu_output, cpu_input_grad, cpu_weight_grad = self.cpu_op_exec(cpu_input, cpu_weight, stride, padding) - npu_output, npu_input_grad, npu_weight_grad = self.npu_op_exec(npu_input, npu_weight, stride, padding) - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_input_grad, npu_input_grad) - self.assertRtolEqual(cpu_weight_grad, npu_weight_grad) - - -instantiate_device_type_tests(TestConv1d, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +import torch.nn.functional as F +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestConv1d(TestCase): + def cpu_op_exec(self, input1, weight, stride, pad): + input1.requires_grad = True + weight.requires_grad = True + out = F.conv1d(input1, weight, stride=stride, padding=pad) + out.backward(torch.ones_like(out)) + input_grad = input1.grad + weight_grad = weight.grad + out = out.detach() + return out, input_grad, weight_grad + + def npu_op_exec(self, input1, weight, stride, pad): + input1.requires_grad = True + weight.requires_grad = True + out = F.conv1d(input1, weight, stride=stride, padding=pad) + out.backward(torch.ones_like(out)) + input_grad = input1.grad.cpu() + weight_grad = weight.grad.cpu() + out = out.cpu().detach() + return out, input_grad, weight_grad + + def test_conv1d_shape_format_fp16(self, device): + shape_format = [ + [[np.float16, 0, [4, 1, 166400]], [np.float16, 0, [514, 1, 400]], 400, 0] + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 1) + cpu_weight, npu_weight = create_common_tensor(item[1], 0, 1) + stride = item[2] + padding = item[3] + + cpu_output, cpu_input_grad, cpu_weight_grad = self.cpu_op_exec(cpu_input.float(), cpu_weight.float(), stride, padding) + cpu_output = cpu_output.half() + cpu_input_grad = cpu_input_grad.half() + cpu_weight_grad = cpu_weight_grad.half() + npu_output, npu_input_grad, npu_weight_grad = self.npu_op_exec(npu_input, npu_weight, stride, padding) + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_input_grad, npu_input_grad) + self.assertRtolEqual(cpu_weight_grad, npu_weight_grad) + + def test_conv1d_shape_format_fp32(self, device): + shape_format = [ + [[np.float32, 0, [4, 1, 166400]], [np.float32, 0, [514, 1, 400]], 400, 0] + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 1) + cpu_weight, npu_weight = create_common_tensor(item[1], 0, 1) + stride = item[2] + padding = item[3] + + cpu_output, cpu_input_grad, cpu_weight_grad = self.cpu_op_exec(cpu_input, cpu_weight, stride, padding) + npu_output, npu_input_grad, npu_weight_grad = self.npu_op_exec(npu_input, npu_weight, stride, padding) + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_input_grad, npu_input_grad) + self.assertRtolEqual(cpu_weight_grad, npu_weight_grad) + + +instantiate_device_type_tests(TestConv1d, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_conv2d.py b/test/test_npu/test_network_ops/test_conv2d.py old mode 100644 new mode 100755 index 916e197f5320e310845bcd550c9055a3c3df63df..5a5f88f14d849ee787c2560c8ba413462cd6058d --- a/test/test_npu/test_network_ops/test_conv2d.py +++ b/test/test_npu/test_network_ops/test_conv2d.py @@ -1,210 +1,210 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('..') -import torch -import numpy as np -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestConv2d(TestCase): - weight_grad = [] - input_grad = [] - - def getWeightGrad(self, grad): - self.weight_grad.append(grad.to("cpu")) - - def getInputGrad(self, grad): - self.input_grad.append(grad.to("cpu")) - - def op_exec_cpu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True, groups=1): - input1 = input - weight1 = weight - input1.requires_grad = True - input1.register_hook(lambda grad: self.getInputGrad(grad)) - - m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, groups=groups) - m1.weight.data = weight1 - m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) - cpuOutput = m1(input1) - tmp = torch.ones_like(cpuOutput) - cpuOutput.backward(tmp) - - return cpuOutput - - def op_exec_npu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True, groups=1): - input1 = input - weight1 = weight - input1.requires_grad = True - input1.register_hook(lambda grad: self.getInputGrad(grad)) - - m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, groups=groups) - m1.weight.data = weight1 - m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) - m1 = m1.to("npu") - npuOutput = m1(input1) - tmp = torch.ones_like(npuOutput) - npuOutput.backward(tmp) - - return npuOutput.to("cpu") - - def conv2d_backward_result(self, shape_format): - for item in shape_format: - self.weight_grad.clear() - self.input_grad.clear() - input_cpu, input_npu = create_common_tensor(item[0], -1, 1) - if input_cpu.dtype == torch.float16: - input_cpu = input_cpu.to(torch.float32) - weight_cpu, weight_npu = create_common_tensor(item[1], -1, 1) - if weight_cpu.dtype == torch.float16: - weight_cpu = weight_cpu.to(torch.float32) - kernel_size = (item[1][2][2], item[1][2][3]) - assert item[0][2][1]/item[6] == item[1][2][1], "ilegal parameters: con2d in_channels//groups must equal to weight.size[1]." - cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5], groups=item[6]) - weight_npu = weight_npu.to("cpu") - npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5], groups=item[6]) - - npu_output = npu_output.to(torch.float16) - cpu_output = cpu_output.to(torch.float16) - self.input_grad[0] = self.input_grad[0].to(torch.float16) - self.input_grad[1] = self.input_grad[1].to(torch.float16) - - self.weight_grad[0] = self.weight_grad[0].to(self.weight_grad[1].dtype) - - self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.detach().numpy()) - self.assertRtolEqual(self.input_grad[0].numpy(), self.input_grad[1].numpy()) - self.assertRtolEqual(self.weight_grad[0].numpy(), self.weight_grad[1].numpy()) - - def test_conv2d_backward_shape_format_fp16(self, device): - shape_format = [ # input, weight, padding, stride, dilation, bias, groups - # shuflenet - [[np.float16, 3, [1024, 232, 7, 7]], [np.float16, 4, [232, 232, 1, 1]], 0, 1, 1, None, 1], - [[np.float16, 0, [1024, 116, 14, 14]], [np.float16, 4, [116, 116, 1, 1]], 0, 1, 1, None, 1], - [[np.float16, 0, [4, 8, 300, 40]], [np.float16, 0, [16, 8, 3, 3]], [2,1], 1, 1, None, 1], - [[np.float16, 0, [4, 64, 150, 10]], [np.float16, 0, [32, 64, 1, 1]], 0, 1, 1, None, 1], - [[np.float16, 0, [4, 128, 75, 10]], [np.float16, 0, [64, 128, 1, 1]], 0, 1, 1, None, 1], - [[np.float16, 0, [4, 256, 75, 5]], [np.float16, 0, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], - [[np.float16, 0, [4, 384, 75, 1]], [np.float16, 0, [192, 384, 3, 1]], 0, 1, 1, None, 1], - [[np.float16, 0, [4, 384, 1, 75]], [np.float16, 0, [192, 384, 1, 3]], 0, 1, 1, None, 1], - [[np.float16, 3, [4, 256, 75, 5]], [np.float16, 4, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], - [[np.float16, 3, [4, 384, 75, 1]], [np.float16, 4, [192, 384, 3, 1]], 0, 1, 1, None, 1], - [[np.float16, 3, [4, 384, 1, 75]], [np.float16, 4, [192, 384, 1, 3]], 0, 1, 1, None, 1], - [[np.float16, 0, [4, 256, 75, 5]], [np.float16, 4, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], - [[np.float16, 0, [4, 384, 75, 1]], [np.float16, 4, [192, 384, 3, 1]], 0, 1, 1, None, 1], - [[np.float16, 0, [4, 384, 1, 75]], [np.float16, 4, [192, 384, 1, 3]], 0, 1, 1, None, 1], - # 当前不支持kernel_size_h >= padding_h*2 + input_h和kernel_size_w >= padding_w*2 + input_w, 预计330支持 - # [[np.float16, 0, [4, 384, 75, 1]], [np.float16, 0, [192, 384, 3, 3]], 0, 1, 1, None, 1], - # [[np.float16, 0, [4, 384, 75, 1]], [np.float16, 0, [192, 384, 3, 3]], [1,1], 1, 1, None, 1], - # [[np.float16, 0, [4, 384, 1, 75]], [np.float16, 0, [192, 384, 3, 3]], 0, 1, 1, None, 1], - # [[np.float16, 0, [4, 384, 1, 75]], [np.float16, 0, [192, 384, 3, 3]], [1,1], 1, 1, None, 1], - ] - self.conv2d_backward_result(shape_format) - - def test_conv2d_backward_shape_format_fp32(self, device): - shape_format = [ # input, weight, padding, stride, dilation, bias, groups - # mobilenet - [[np.float32, 3, [256, 960, 7, 7]], [np.float32, 0, [320, 960, 1, 1]], 0, 1, 1, None, 1], - [[np.float32, 0, [256, 3, 224, 224]], [np.float32, 0, [32, 3, 3, 3]], 1, 2, 1, None, 1], - [[np.float32, 0, [16, 3, 640, 640]], [np.float32, 4, [64, 3, 7, 7]], 3, 2, 1, None, 1], - [[np.float32, 0, [4, 8, 300, 40]], [np.float32, 0, [16, 8, 3, 3]], [2,1], 1, 1, None, 1], - [[np.float32, 0, [4, 64, 150, 10]], [np.float32, 0, [32, 64, 1, 1]], 0, 1, 1, None, 1], - [[np.float32, 0, [4, 128, 75, 10]], [np.float32, 0, [64, 128, 1, 1]], 0, 1, 1, None, 1], - [[np.float32, 0, [4, 256, 75, 5]], [np.float32, 0, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], - [[np.float32, 0, [4, 384, 75, 1]], [np.float32, 0, [192, 384, 3, 1]], 0, 1, 1, None, 1], - [[np.float32, 0, [4, 384, 1, 75]], [np.float32, 0, [192, 384, 1, 3]], 0, 1, 1, None, 1], - [[np.float32, 3, [4, 256, 75, 5]], [np.float32, 0, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], - [[np.float32, 3, [4, 384, 75, 1]], [np.float32, 0, [192, 384, 3, 1]], 0, 1, 1, None, 1], - [[np.float32, 3, [4, 384, 1, 75]], [np.float32, 0, [192, 384, 1, 3]], 0, 1, 1, None, 1], - [[np.float32, 0, [4, 256, 75, 5]], [np.float32, 4, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], - [[np.float32, 0, [4, 384, 75, 1]], [np.float32, 4, [192, 384, 3, 1]], 0, 1, 1, None, 1], - [[np.float32, 0, [4, 384, 1, 75]], [np.float32, 4, [192, 384, 1, 3]], 0, 1, 1, None, 1], - # 当前不支持kernel_size_h >= padding_h*2 + input_h和kernel_size_w >= padding_w*2 + input_w, 预计330支持 - # [[np.float32, 0, [4, 384, 75, 1]], [np.float32, 0, [192, 384, 3, 3]], 0, 1, 1, None, 1], - # [[np.float32, 0, [4, 384, 75, 1]], [np.float32, 0, [192, 384, 3, 3]], [1,1], 1, 1, None, 1], - # [[np.float32, 0, [4, 384, 1, 75]], [np.float32, 0, [192, 384, 3, 3]], 0, 1, 1, None, 1], - # [[np.float32, 0, [4, 384, 1, 75]], [np.float32, 0, [192, 384, 3, 3]], [1,1], 1, 1, None, 1], - ] - #conv类算子不支持fp32数据的精度要求 - #self.conv2d_backward_result(shape_format) - - def test_group_conv2d_backward_shape_format_fp16(self, device): - shape_format= [ # input, weight, padding, stride, dilation, bias, groups - # KDXF - [[np.float16, 0, [4, 64, 75, 10]], [np.float16, 0, [128, 16, 3, 3]], [2,1], 1, 1, None, 4], - [[np.float16, 0, [4, 128, 75, 10]], [np.float16, 0, [64, 32, 1, 1]], 0, 1, 1, None, 4], - [[np.float16, 0, [4, 128, 75, 5]], [np.float16, 0, [256, 32, 3, 3]], [2,1], 1, 1, None, 4], - [[np.float16, 0, [4, 256, 75, 1]], [np.float16, 0, [384, 64, 3, 1]], [1,0], 1, 1, None, 4], - [[np.float16, 0, [4, 192, 75, 1]], [np.float16, 0, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float16, 0, [4, 128, 75, 1]], [np.float16, 0, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float16, 0, [4, 128, 75, 5]], [np.float16, 0, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], - [[np.float16, 3, [4, 192, 75, 1]], [np.float16, 0, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float16, 3, [4, 128, 75, 1]], [np.float16, 0, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float16, 3, [4, 128, 75, 5]], [np.float16, 0, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], - [[np.float16, 3, [4, 192, 75, 1]], [np.float16, 4, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float16, 3, [4, 128, 75, 1]], [np.float16, 4, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float16, 3, [4, 128, 75, 5]], [np.float16, 4, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], - [[np.float16, 0, [4, 64, 75, 5]], [np.float16, 0, [64, 1, 3, 3]], [2,1], 1, 1, None, 64], - [[np.float16, 0, [4, 64, 75, 1]], [np.float16, 0, [64, 1, 3, 1]], 0, 1, 1, None, 64], - [[np.float16, 0, [4, 64, 1, 75]], [np.float16, 0, [64, 1, 1, 3]], 0, 1, 1, None, 64], - # 当前不支持kernel_size_h >= padding_h*2 + input_h和kernel_size_w >= padding_w*2 + input_w, 预计330支持 - # [[np.float16, 0, [4, 64, 75, 1]], [np.float16, 0, [128, 16, 3, 3]], 0, 1, 1, None, 4], - # [[np.float16, 0, [4, 64, 75, 1]], [np.float16, 0, [128, 16, 3, 3]], [1,1], 1, 1, None, 4], - # [[np.float16, 0, [4, 64, 1, 75]], [np.float16, 0, [128, 16, 3, 3]], 0, 1, 1, None, 4], - # [[np.float16, 0, [4, 64, 1, 75]], [np.float16, 0, [128, 16, 3, 3]], [1,1], 1, 1, None, 4], - # 当前不支持in_channel == groups != out_channel - # [[np.float32, 0, [4, 64, 75, 5]], [np.float32, 0, [128, 1, 3, 3]], [2,1], 1, 1, None, 64], - # [[np.float32, 0, [4, 64, 75, 1]], [np.float32, 0, [128, 1, 3, 1]], 0, 1, 1, None, 64], - # [[np.float32, 0, [4, 64, 1, 75]], [np.float32, 0, [128, 1, 1, 3]], 0, 1, 1, None, 64], - ] - - def test_group_conv2d_backward_shape_format_fp32(self, device): - shape_format= [ # input, weight, padding, stride, dilation, bias, groups - # KDXF - [[np.float32, 0, [4, 64, 75, 10]], [np.float32, 0, [128, 16, 3, 3]], [2,1], 1, 1, None, 4], - [[np.float32, 0, [4, 128, 75, 10]], [np.float32, 0, [64, 32, 1, 1]], 0, 1, 1, None, 4], - [[np.float32, 0, [4, 128, 75, 5]], [np.float32, 0, [256, 32, 3, 3]], [2,1], 1, 1, None, 4], - [[np.float32, 0, [4, 256, 75, 1]], [np.float32, 0, [384, 64, 3, 1]], [1,0], 1, 1, None, 4], - [[np.float32, 0, [4, 192, 75, 1]], [np.float32, 0, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float32, 0, [4, 128, 75, 1]], [np.float32, 0, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float32, 0, [4, 128, 75, 5]], [np.float32, 0, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], - [[np.float32, 3, [4, 192, 75, 1]], [np.float32, 0, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float32, 3, [4, 128, 75, 1]], [np.float32, 0, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float32, 3, [4, 128, 75, 5]], [np.float32, 0, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], - [[np.float32, 3, [4, 192, 75, 1]], [np.float32, 4, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float32, 3, [4, 128, 75, 1]], [np.float32, 4, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], - [[np.float32, 3, [4, 128, 75, 5]], [np.float32, 4, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], - [[np.float32, 0, [4, 64, 75, 5]], [np.float32, 0, [64, 1, 3, 3]], [2,1], 1, 1, None, 64], - [[np.float32, 0, [4, 64, 75, 1]], [np.float32, 0, [64, 1, 3, 1]], 0, 1, 1, None, 64], - [[np.float32, 0, [4, 64, 1, 75]], [np.float32, 0, [64, 1, 1, 3]], 0, 1, 1, None, 64], - # 当前不支持kernel_size_h >= padding_h*2 + input_h和kernel_size_w >= padding_w*2 + input_w - # [[np.float32, 0, [4, 64, 75, 1]], [np.float32, 0, [128, 16, 3, 3]], 0, 1, 1, None, 4], - # [[np.float32, 0, [4, 64, 75, 1]], [np.float32, 0, [128, 16, 3, 3]], [1,1], 1, 1, None, 4], - # [[np.float32, 0, [4, 64, 1, 75]], [np.float32, 0, [128, 16, 3, 3]], 0, 1, 1, None, 4], - # [[np.float32, 0, [4, 64, 1, 75]], [np.float32, 0, [128, 16, 3, 3]], [1,1], 1, 1, None, 4], - # 当前不支持in_channel == groups != out_channel - # [[np.float32, 0, [4, 64, 75, 5]], [np.float32, 0, [128, 1, 3, 3]], [2,1], 1, 1, None, 64], - # [[np.float32, 0, [4, 64, 75, 1]], [np.float32, 0, [128, 1, 3, 1]], 0, 1, 1, None, 64], - # [[np.float32, 0, [4, 64, 1, 75]], [np.float32, 0, [128, 1, 1, 3]], 0, 1, 1, None, 64], - ] - - -instantiate_device_type_tests(TestConv2d, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append('..') +import torch +import numpy as np +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestConv2d(TestCase): + weight_grad = [] + input_grad = [] + + def getWeightGrad(self, grad): + self.weight_grad.append(grad.to("cpu")) + + def getInputGrad(self, grad): + self.input_grad.append(grad.to("cpu")) + + def op_exec_cpu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True, groups=1): + input1 = input + weight1 = weight + input1.requires_grad = True + input1.register_hook(lambda grad: self.getInputGrad(grad)) + + m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, groups=groups) + m1.weight.data = weight1 + m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) + cpuOutput = m1(input1) + tmp = torch.ones_like(cpuOutput) + cpuOutput.backward(tmp) + + return cpuOutput + + def op_exec_npu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True, groups=1): + input1 = input + weight1 = weight + input1.requires_grad = True + input1.register_hook(lambda grad: self.getInputGrad(grad)) + + m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, groups=groups) + m1.weight.data = weight1 + m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) + m1 = m1.to("npu") + npuOutput = m1(input1) + tmp = torch.ones_like(npuOutput) + npuOutput.backward(tmp) + + return npuOutput.to("cpu") + + def conv2d_backward_result(self, shape_format): + for item in shape_format: + self.weight_grad.clear() + self.input_grad.clear() + input_cpu, input_npu = create_common_tensor(item[0], -1, 1) + if input_cpu.dtype == torch.float16: + input_cpu = input_cpu.to(torch.float32) + weight_cpu, weight_npu = create_common_tensor(item[1], -1, 1) + if weight_cpu.dtype == torch.float16: + weight_cpu = weight_cpu.to(torch.float32) + kernel_size = (item[1][2][2], item[1][2][3]) + assert item[0][2][1]/item[6] == item[1][2][1], "ilegal parameters: con2d in_channels//groups must equal to weight.size[1]." + cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5], groups=item[6]) + weight_npu = weight_npu.to("cpu") + npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5], groups=item[6]) + + npu_output = npu_output.to(torch.float16) + cpu_output = cpu_output.to(torch.float16) + self.input_grad[0] = self.input_grad[0].to(torch.float16) + self.input_grad[1] = self.input_grad[1].to(torch.float16) + + self.weight_grad[0] = self.weight_grad[0].to(self.weight_grad[1].dtype) + + self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.detach().numpy()) + self.assertRtolEqual(self.input_grad[0].numpy(), self.input_grad[1].numpy()) + self.assertRtolEqual(self.weight_grad[0].numpy(), self.weight_grad[1].numpy()) + + def test_conv2d_backward_shape_format_fp16(self, device): + shape_format = [ # input, weight, padding, stride, dilation, bias, groups + # shuflenet + [[np.float16, 3, [1024, 232, 7, 7]], [np.float16, 4, [232, 232, 1, 1]], 0, 1, 1, None, 1], + [[np.float16, 0, [1024, 116, 14, 14]], [np.float16, 4, [116, 116, 1, 1]], 0, 1, 1, None, 1], + [[np.float16, 0, [4, 8, 300, 40]], [np.float16, 0, [16, 8, 3, 3]], [2,1], 1, 1, None, 1], + [[np.float16, 0, [4, 64, 150, 10]], [np.float16, 0, [32, 64, 1, 1]], 0, 1, 1, None, 1], + [[np.float16, 0, [4, 128, 75, 10]], [np.float16, 0, [64, 128, 1, 1]], 0, 1, 1, None, 1], + [[np.float16, 0, [4, 256, 75, 5]], [np.float16, 0, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], + [[np.float16, 0, [4, 384, 75, 1]], [np.float16, 0, [192, 384, 3, 1]], 0, 1, 1, None, 1], + [[np.float16, 0, [4, 384, 1, 75]], [np.float16, 0, [192, 384, 1, 3]], 0, 1, 1, None, 1], + [[np.float16, 3, [4, 256, 75, 5]], [np.float16, 4, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], + [[np.float16, 3, [4, 384, 75, 1]], [np.float16, 4, [192, 384, 3, 1]], 0, 1, 1, None, 1], + [[np.float16, 3, [4, 384, 1, 75]], [np.float16, 4, [192, 384, 1, 3]], 0, 1, 1, None, 1], + [[np.float16, 0, [4, 256, 75, 5]], [np.float16, 4, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], + [[np.float16, 0, [4, 384, 75, 1]], [np.float16, 4, [192, 384, 3, 1]], 0, 1, 1, None, 1], + [[np.float16, 0, [4, 384, 1, 75]], [np.float16, 4, [192, 384, 1, 3]], 0, 1, 1, None, 1], + # 当前不支持kernel_size_h >= padding_h*2 + input_h和kernel_size_w >= padding_w*2 + input_w, 预计330支持 + # [[np.float16, 0, [4, 384, 75, 1]], [np.float16, 0, [192, 384, 3, 3]], 0, 1, 1, None, 1], + # [[np.float16, 0, [4, 384, 75, 1]], [np.float16, 0, [192, 384, 3, 3]], [1,1], 1, 1, None, 1], + # [[np.float16, 0, [4, 384, 1, 75]], [np.float16, 0, [192, 384, 3, 3]], 0, 1, 1, None, 1], + # [[np.float16, 0, [4, 384, 1, 75]], [np.float16, 0, [192, 384, 3, 3]], [1,1], 1, 1, None, 1], + ] + self.conv2d_backward_result(shape_format) + + def test_conv2d_backward_shape_format_fp32(self, device): + shape_format = [ # input, weight, padding, stride, dilation, bias, groups + # mobilenet + [[np.float32, 3, [256, 960, 7, 7]], [np.float32, 0, [320, 960, 1, 1]], 0, 1, 1, None, 1], + [[np.float32, 0, [256, 3, 224, 224]], [np.float32, 0, [32, 3, 3, 3]], 1, 2, 1, None, 1], + [[np.float32, 0, [16, 3, 640, 640]], [np.float32, 4, [64, 3, 7, 7]], 3, 2, 1, None, 1], + [[np.float32, 0, [4, 8, 300, 40]], [np.float32, 0, [16, 8, 3, 3]], [2,1], 1, 1, None, 1], + [[np.float32, 0, [4, 64, 150, 10]], [np.float32, 0, [32, 64, 1, 1]], 0, 1, 1, None, 1], + [[np.float32, 0, [4, 128, 75, 10]], [np.float32, 0, [64, 128, 1, 1]], 0, 1, 1, None, 1], + [[np.float32, 0, [4, 256, 75, 5]], [np.float32, 0, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], + [[np.float32, 0, [4, 384, 75, 1]], [np.float32, 0, [192, 384, 3, 1]], 0, 1, 1, None, 1], + [[np.float32, 0, [4, 384, 1, 75]], [np.float32, 0, [192, 384, 1, 3]], 0, 1, 1, None, 1], + [[np.float32, 3, [4, 256, 75, 5]], [np.float32, 0, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], + [[np.float32, 3, [4, 384, 75, 1]], [np.float32, 0, [192, 384, 3, 1]], 0, 1, 1, None, 1], + [[np.float32, 3, [4, 384, 1, 75]], [np.float32, 0, [192, 384, 1, 3]], 0, 1, 1, None, 1], + [[np.float32, 0, [4, 256, 75, 5]], [np.float32, 4, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], + [[np.float32, 0, [4, 384, 75, 1]], [np.float32, 4, [192, 384, 3, 1]], 0, 1, 1, None, 1], + [[np.float32, 0, [4, 384, 1, 75]], [np.float32, 4, [192, 384, 1, 3]], 0, 1, 1, None, 1], + # 当前不支持kernel_size_h >= padding_h*2 + input_h和kernel_size_w >= padding_w*2 + input_w, 预计330支持 + # [[np.float32, 0, [4, 384, 75, 1]], [np.float32, 0, [192, 384, 3, 3]], 0, 1, 1, None, 1], + # [[np.float32, 0, [4, 384, 75, 1]], [np.float32, 0, [192, 384, 3, 3]], [1,1], 1, 1, None, 1], + # [[np.float32, 0, [4, 384, 1, 75]], [np.float32, 0, [192, 384, 3, 3]], 0, 1, 1, None, 1], + # [[np.float32, 0, [4, 384, 1, 75]], [np.float32, 0, [192, 384, 3, 3]], [1,1], 1, 1, None, 1], + ] + #conv类算子不支持fp32数据的精度要求 + #self.conv2d_backward_result(shape_format) + + def test_group_conv2d_backward_shape_format_fp16(self, device): + shape_format= [ # input, weight, padding, stride, dilation, bias, groups + # KDXF + [[np.float16, 0, [4, 64, 75, 10]], [np.float16, 0, [128, 16, 3, 3]], [2,1], 1, 1, None, 4], + [[np.float16, 0, [4, 128, 75, 10]], [np.float16, 0, [64, 32, 1, 1]], 0, 1, 1, None, 4], + [[np.float16, 0, [4, 128, 75, 5]], [np.float16, 0, [256, 32, 3, 3]], [2,1], 1, 1, None, 4], + [[np.float16, 0, [4, 256, 75, 1]], [np.float16, 0, [384, 64, 3, 1]], [1,0], 1, 1, None, 4], + [[np.float16, 0, [4, 192, 75, 1]], [np.float16, 0, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float16, 0, [4, 128, 75, 1]], [np.float16, 0, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float16, 0, [4, 128, 75, 5]], [np.float16, 0, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], + [[np.float16, 3, [4, 192, 75, 1]], [np.float16, 0, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float16, 3, [4, 128, 75, 1]], [np.float16, 0, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float16, 3, [4, 128, 75, 5]], [np.float16, 0, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], + [[np.float16, 3, [4, 192, 75, 1]], [np.float16, 4, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float16, 3, [4, 128, 75, 1]], [np.float16, 4, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float16, 3, [4, 128, 75, 5]], [np.float16, 4, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], + [[np.float16, 0, [4, 64, 75, 5]], [np.float16, 0, [64, 1, 3, 3]], [2,1], 1, 1, None, 64], + [[np.float16, 0, [4, 64, 75, 1]], [np.float16, 0, [64, 1, 3, 1]], 0, 1, 1, None, 64], + [[np.float16, 0, [4, 64, 1, 75]], [np.float16, 0, [64, 1, 1, 3]], 0, 1, 1, None, 64], + # 当前不支持kernel_size_h >= padding_h*2 + input_h和kernel_size_w >= padding_w*2 + input_w, 预计330支持 + # [[np.float16, 0, [4, 64, 75, 1]], [np.float16, 0, [128, 16, 3, 3]], 0, 1, 1, None, 4], + # [[np.float16, 0, [4, 64, 75, 1]], [np.float16, 0, [128, 16, 3, 3]], [1,1], 1, 1, None, 4], + # [[np.float16, 0, [4, 64, 1, 75]], [np.float16, 0, [128, 16, 3, 3]], 0, 1, 1, None, 4], + # [[np.float16, 0, [4, 64, 1, 75]], [np.float16, 0, [128, 16, 3, 3]], [1,1], 1, 1, None, 4], + # 当前不支持in_channel == groups != out_channel + # [[np.float32, 0, [4, 64, 75, 5]], [np.float32, 0, [128, 1, 3, 3]], [2,1], 1, 1, None, 64], + # [[np.float32, 0, [4, 64, 75, 1]], [np.float32, 0, [128, 1, 3, 1]], 0, 1, 1, None, 64], + # [[np.float32, 0, [4, 64, 1, 75]], [np.float32, 0, [128, 1, 1, 3]], 0, 1, 1, None, 64], + ] + + def test_group_conv2d_backward_shape_format_fp32(self, device): + shape_format= [ # input, weight, padding, stride, dilation, bias, groups + # KDXF + [[np.float32, 0, [4, 64, 75, 10]], [np.float32, 0, [128, 16, 3, 3]], [2,1], 1, 1, None, 4], + [[np.float32, 0, [4, 128, 75, 10]], [np.float32, 0, [64, 32, 1, 1]], 0, 1, 1, None, 4], + [[np.float32, 0, [4, 128, 75, 5]], [np.float32, 0, [256, 32, 3, 3]], [2,1], 1, 1, None, 4], + [[np.float32, 0, [4, 256, 75, 1]], [np.float32, 0, [384, 64, 3, 1]], [1,0], 1, 1, None, 4], + [[np.float32, 0, [4, 192, 75, 1]], [np.float32, 0, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float32, 0, [4, 128, 75, 1]], [np.float32, 0, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float32, 0, [4, 128, 75, 5]], [np.float32, 0, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], + [[np.float32, 3, [4, 192, 75, 1]], [np.float32, 0, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float32, 3, [4, 128, 75, 1]], [np.float32, 0, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float32, 3, [4, 128, 75, 5]], [np.float32, 0, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], + [[np.float32, 3, [4, 192, 75, 1]], [np.float32, 4, [384, 48, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float32, 3, [4, 128, 75, 1]], [np.float32, 4, [128, 32, 3, 1]], [2,0], 1, 1, None, 4], + [[np.float32, 3, [4, 128, 75, 5]], [np.float32, 4, [128, 32, 3, 3]], [2,1], 1, 1, None, 4], + [[np.float32, 0, [4, 64, 75, 5]], [np.float32, 0, [64, 1, 3, 3]], [2,1], 1, 1, None, 64], + [[np.float32, 0, [4, 64, 75, 1]], [np.float32, 0, [64, 1, 3, 1]], 0, 1, 1, None, 64], + [[np.float32, 0, [4, 64, 1, 75]], [np.float32, 0, [64, 1, 1, 3]], 0, 1, 1, None, 64], + # 当前不支持kernel_size_h >= padding_h*2 + input_h和kernel_size_w >= padding_w*2 + input_w + # [[np.float32, 0, [4, 64, 75, 1]], [np.float32, 0, [128, 16, 3, 3]], 0, 1, 1, None, 4], + # [[np.float32, 0, [4, 64, 75, 1]], [np.float32, 0, [128, 16, 3, 3]], [1,1], 1, 1, None, 4], + # [[np.float32, 0, [4, 64, 1, 75]], [np.float32, 0, [128, 16, 3, 3]], 0, 1, 1, None, 4], + # [[np.float32, 0, [4, 64, 1, 75]], [np.float32, 0, [128, 16, 3, 3]], [1,1], 1, 1, None, 4], + # 当前不支持in_channel == groups != out_channel + # [[np.float32, 0, [4, 64, 75, 5]], [np.float32, 0, [128, 1, 3, 3]], [2,1], 1, 1, None, 64], + # [[np.float32, 0, [4, 64, 75, 1]], [np.float32, 0, [128, 1, 3, 1]], 0, 1, 1, None, 64], + # [[np.float32, 0, [4, 64, 1, 75]], [np.float32, 0, [128, 1, 1, 3]], 0, 1, 1, None, 64], + ] + + +instantiate_device_type_tests(TestConv2d, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_conv3d.py b/test/test_npu/test_network_ops/test_conv3d.py index d06b3426e5c26dd9c0e1dc13313f27d53801bd1e..bb5b24bb96b89915bd84cfec2a71dcfddbf1b0bf 100644 --- a/test/test_npu/test_network_ops/test_conv3d.py +++ b/test/test_npu/test_network_ops/test_conv3d.py @@ -1,115 +1,115 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import numpy as np -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestConv3d(TestCase): - weight_grad = [] - input_grad = [] - - def getWeightGrad(self, grad): - self.weight_grad.append(grad.to("cpu")) - - def getInputGrad(self, grad): - self.input_grad.append(grad.to("cpu")) - - def op_exec_cpu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True, groups=1): - - input1 = input - weight1 = weight - input1.requires_grad = True - input1.register_hook(lambda grad: self.getInputGrad(grad)) - - m1 = nn.Conv3d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, groups=groups) - m1.weight.data = weight1 - m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) - cpuOutput = m1(input1) - tmp = torch.ones_like(cpuOutput) - cpuOutput.backward(tmp) - return cpuOutput - - def op_exec_npu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=False, groups=1): - - input1 = input - weight1 = weight - input1.requires_grad = True - input1.register_hook(lambda grad: self.getInputGrad(grad)) - - m1 = nn.Conv3d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, groups=groups) - m1.weight.data = weight1 - m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) - m1 = m1.to("npu") - npuOutput = m1(input1) - tmp = torch.ones_like(npuOutput) - npuOutput.backward(tmp) - - return npuOutput.to("cpu") - - def conv3d_backward_result(self, shape_format): - for item in shape_format: - self.weight_grad.clear() - self.input_grad.clear() - input_cpu, input_npu = create_common_tensor(item[0], 0, 1) - if input_cpu.dtype == torch.float16: - input_cpu = input_cpu.to(torch.float32) - weight_cpu, weight_npu = create_common_tensor(item[1], 0, 1) - if weight_cpu.dtype == torch.float16: - weight_cpu = weight_cpu.to(torch.float32) - kernel_size = (item[1][2][2], item[1][2][3],item[1][2][4]) - #assert item[0][2][1]/item[6] == item[1][2][1], "ilegal parameters: con2d in_channels//groups must equal to weight.size[1]." - cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5], groups=item[6]) - weight_npu = weight_npu.to("cpu") - - npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5], groups=item[6]) - - npu_output = npu_output.to(torch.float16) - cpu_output = cpu_output.to(torch.float16) - self.input_grad[0] = self.input_grad[0].to(torch.float16) - self.input_grad[1] = self.input_grad[1].to(torch.float16) - self.weight_grad[0] = self.weight_grad[0].to(self.weight_grad[1].dtype) - self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.cpu().detach().numpy()) - self.assertRtolEqual(self.input_grad[0].numpy(), self.input_grad[1].cpu().numpy()) - self.assertRtolEqual(self.weight_grad[0].numpy(), self.weight_grad[1].cpu().numpy()) - - def test_conv3d_backward_shape_format_fp16(self, device): - shape_format = [ # input, weight, padding, stride, dilation, bias, groups - [[np.float16, 30, [128, 128, 4, 14, 14]], [np.float16, 30, [128, 128, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], - [[np.float16, 30, [128, 64, 4, 14, 14]], [np.float16, 30, [128, 64, 3, 3, 3]], [1,1,1], [2,2,2], 1, None, 1], - [[np.float16, 30, [128, 256, 2, 7, 7]], [np.float16, 30, [256, 256, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], - [[np.float16, 30, [128, 512, 1, 4, 4]], [np.float16, 30, [512, 512, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], - [[np.float16, 30, [128, 256, 2, 7, 7]], [np.float16, 30, [512, 256, 1, 1, 1]], 0, [2,2,2], 1, None, 1] - ] - self.conv3d_backward_result(shape_format) - - def test_conv3d_backward_shape_format_fp32(self, device): - shape_format = [ # input, weight, padding, stride, dilation, bias, groups - [[np.float32, 30, [128, 128, 4, 14, 14]], [np.float32, 30, [128, 128, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], - [[np.float32, 30, [128, 64, 4, 14, 14]], [np.float32, 30, [128, 64, 3, 3, 3]], [1,1,1], [2,2,2], 1, None, 1], - [[np.float32, 30, [128, 256, 2, 7, 7]], [np.float32, 30, [256, 256, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], - [[np.float32, 30, [128, 512, 1, 4, 4]], [np.float32, 30, [512, 512, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], - [[np.float32, 30, [128, 256, 2, 7, 7]], [np.float32, 30, [512, 256, 1, 1, 1]], 0, [2,2,2], 1, None, 1] - ] - self.conv3d_backward_result(shape_format) - -instantiate_device_type_tests(TestConv3d, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import numpy as np +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestConv3d(TestCase): + weight_grad = [] + input_grad = [] + + def getWeightGrad(self, grad): + self.weight_grad.append(grad.to("cpu")) + + def getInputGrad(self, grad): + self.input_grad.append(grad.to("cpu")) + + def op_exec_cpu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=True, groups=1): + + input1 = input + weight1 = weight + input1.requires_grad = True + input1.register_hook(lambda grad: self.getInputGrad(grad)) + + m1 = nn.Conv3d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, groups=groups) + m1.weight.data = weight1 + m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) + cpuOutput = m1(input1) + tmp = torch.ones_like(cpuOutput) + cpuOutput.backward(tmp) + return cpuOutput + + def op_exec_npu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, bias=False, groups=1): + + input1 = input + weight1 = weight + input1.requires_grad = True + input1.register_hook(lambda grad: self.getInputGrad(grad)) + + m1 = nn.Conv3d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, groups=groups) + m1.weight.data = weight1 + m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) + m1 = m1.to("npu") + npuOutput = m1(input1) + tmp = torch.ones_like(npuOutput) + npuOutput.backward(tmp) + + return npuOutput.to("cpu") + + def conv3d_backward_result(self, shape_format): + for item in shape_format: + self.weight_grad.clear() + self.input_grad.clear() + input_cpu, input_npu = create_common_tensor(item[0], 0, 1) + if input_cpu.dtype == torch.float16: + input_cpu = input_cpu.to(torch.float32) + weight_cpu, weight_npu = create_common_tensor(item[1], 0, 1) + if weight_cpu.dtype == torch.float16: + weight_cpu = weight_cpu.to(torch.float32) + kernel_size = (item[1][2][2], item[1][2][3],item[1][2][4]) + #assert item[0][2][1]/item[6] == item[1][2][1], "ilegal parameters: con2d in_channels//groups must equal to weight.size[1]." + cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5], groups=item[6]) + weight_npu = weight_npu.to("cpu") + + npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5], groups=item[6]) + + npu_output = npu_output.to(torch.float16) + cpu_output = cpu_output.to(torch.float16) + self.input_grad[0] = self.input_grad[0].to(torch.float16) + self.input_grad[1] = self.input_grad[1].to(torch.float16) + self.weight_grad[0] = self.weight_grad[0].to(self.weight_grad[1].dtype) + self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.cpu().detach().numpy()) + self.assertRtolEqual(self.input_grad[0].numpy(), self.input_grad[1].cpu().numpy()) + self.assertRtolEqual(self.weight_grad[0].numpy(), self.weight_grad[1].cpu().numpy()) + + def test_conv3d_backward_shape_format_fp16(self, device): + shape_format = [ # input, weight, padding, stride, dilation, bias, groups + [[np.float16, 30, [128, 128, 4, 14, 14]], [np.float16, 30, [128, 128, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], + [[np.float16, 30, [128, 64, 4, 14, 14]], [np.float16, 30, [128, 64, 3, 3, 3]], [1,1,1], [2,2,2], 1, None, 1], + [[np.float16, 30, [128, 256, 2, 7, 7]], [np.float16, 30, [256, 256, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], + [[np.float16, 30, [128, 512, 1, 4, 4]], [np.float16, 30, [512, 512, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], + [[np.float16, 30, [128, 256, 2, 7, 7]], [np.float16, 30, [512, 256, 1, 1, 1]], 0, [2,2,2], 1, None, 1] + ] + self.conv3d_backward_result(shape_format) + + def test_conv3d_backward_shape_format_fp32(self, device): + shape_format = [ # input, weight, padding, stride, dilation, bias, groups + [[np.float32, 30, [128, 128, 4, 14, 14]], [np.float32, 30, [128, 128, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], + [[np.float32, 30, [128, 64, 4, 14, 14]], [np.float32, 30, [128, 64, 3, 3, 3]], [1,1,1], [2,2,2], 1, None, 1], + [[np.float32, 30, [128, 256, 2, 7, 7]], [np.float32, 30, [256, 256, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], + [[np.float32, 30, [128, 512, 1, 4, 4]], [np.float32, 30, [512, 512, 3, 3, 3]], [1,1,1], [1,1,1], 1, None, 1], + [[np.float32, 30, [128, 256, 2, 7, 7]], [np.float32, 30, [512, 256, 1, 1, 1]], 0, [2,2,2], 1, None, 1] + ] + self.conv3d_backward_result(shape_format) + +instantiate_device_type_tests(TestConv3d, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_conv_depthwise2d_backward.py b/test/test_npu/test_network_ops/test_conv_depthwise2d_backward.py old mode 100644 new mode 100755 index 535a8d0c0071be732055d19f842eb9daf5444c73..2ae37272167992347d4d4887530d7f93e5baaba6 --- a/test/test_npu/test_network_ops/test_conv_depthwise2d_backward.py +++ b/test/test_npu/test_network_ops/test_conv_depthwise2d_backward.py @@ -1,118 +1,118 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('..') -import torch -import numpy as np -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestConvDepthwise2d(TestCase): - weight_grad = [] - input_grad = [] - - def getWeightGrad(self, grad): - self.weight_grad.append(grad.to("cpu")) - - def getInputGrad(self, grad): - self.input_grad.append(grad.to("cpu")) - - def op_exec_cpu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, - bias=True): - input1 = input - weight1 = weight - input1.requires_grad = True - input1.register_hook(lambda grad: self.getInputGrad(grad)) - - m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, - groups=in_channels) - m1.weight.data = weight1 - m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) - cpuOutput = m1(input1) - tmp = torch.ones_like(cpuOutput) - cpuOutput.backward(tmp) - - return cpuOutput - - def op_exec_npu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, - bias=True): - input1 = input - weight1 = weight - input1.requires_grad = True - input1.register_hook(lambda grad: self.getInputGrad(grad)) - - m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, - groups=in_channels) - m1.weight.data = weight1 - m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) - m1 = m1.to("npu") - npuOutput = m1(input1) - npuOutput = npuOutput.to("cpu") - tmp = torch.ones_like(npuOutput) - npuOutput.backward(tmp) - - return npuOutput - - def conv_depthwise2d_backward_result(self, shape_format): - for item in shape_format: - self.weight_grad.clear() - self.input_grad.clear() - input_cpu, input_npu = create_common_tensor(item[0], -1, 1) - if input_cpu.dtype == torch.float16: - input_cpu = input_cpu.to(torch.float32) - weight_cpu, weight_npu = create_common_tensor(item[1], -1, 1) - if weight_cpu.dtype == torch.float16: - weight_cpu = weight_cpu.to(torch.float32) - kernel_size = (item[1][2][2], item[1][2][3]) - cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - weight_npu = weight_npu.to("cpu") - npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, - padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) - npu_output = npu_output.to(torch.float16) - cpu_output = cpu_output.to(torch.float16) - self.input_grad[0] = self.input_grad[0].to(torch.float16) - self.input_grad[1] = self.input_grad[1].to(torch.float16) - - self.weight_grad[0] = self.weight_grad[0].to(self.weight_grad[1].dtype) - - self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.detach().numpy()) - self.assertRtolEqual(self.input_grad[0].numpy(), self.input_grad[1].numpy()) - self.assertRtolEqual(self.weight_grad[0].numpy(), self.weight_grad[1].numpy()) - - def test_conv_depthwise2d_backward_shape_format_fp16(self, device): - shape_format = [ #input , weight, padding, stide, dilation, bias - # shuflenet - [[np.float16, 0, [1024, 116, 28, 28]], [np.float16, 0, [116, 1, 3, 3]], 1, 2, 1, 0], - [[np.float16, 3, [1024, 116, 14, 14]], [np.float16, 0, [116, 1, 3, 3]], 1, 1, 1, 0], - ] - self.conv_depthwise2d_backward_result(shape_format) - - def test_conv_depthwise2d_backward_shape_format_fp32(self, device): - shape_format = [ #input , weight, padding, stide, dilation, bias - # mobilenet - [[np.float32, 3, [256, 32, 112, 112]], [np.float32, 0, [32, 1, 3, 3]], 1, 1, 1, None], - [[np.float32, 3, [256, 96, 112, 112]], [np.float32, 0, [96, 1, 3, 3]], 1, 2, 1, None], - ] - #conv类算子不支持fp32数据的精度要求 - #self.conv_depthwise2d_backward_result(shape_format) - - -instantiate_device_type_tests(TestConvDepthwise2d, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append('..') +import torch +import numpy as np +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestConvDepthwise2d(TestCase): + weight_grad = [] + input_grad = [] + + def getWeightGrad(self, grad): + self.weight_grad.append(grad.to("cpu")) + + def getInputGrad(self, grad): + self.input_grad.append(grad.to("cpu")) + + def op_exec_cpu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, + bias=True): + input1 = input + weight1 = weight + input1.requires_grad = True + input1.register_hook(lambda grad: self.getInputGrad(grad)) + + m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, + groups=in_channels) + m1.weight.data = weight1 + m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) + cpuOutput = m1(input1) + tmp = torch.ones_like(cpuOutput) + cpuOutput.backward(tmp) + + return cpuOutput + + def op_exec_npu(self, input, weight, in_channels, out_channels, kernel_size, padding=0, stride=1, dilation=1, + bias=True): + input1 = input + weight1 = weight + input1.requires_grad = True + input1.register_hook(lambda grad: self.getInputGrad(grad)) + + m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, + groups=in_channels) + m1.weight.data = weight1 + m1.weight.register_hook(lambda grad: self.getWeightGrad(grad)) + m1 = m1.to("npu") + npuOutput = m1(input1) + npuOutput = npuOutput.to("cpu") + tmp = torch.ones_like(npuOutput) + npuOutput.backward(tmp) + + return npuOutput + + def conv_depthwise2d_backward_result(self, shape_format): + for item in shape_format: + self.weight_grad.clear() + self.input_grad.clear() + input_cpu, input_npu = create_common_tensor(item[0], -1, 1) + if input_cpu.dtype == torch.float16: + input_cpu = input_cpu.to(torch.float32) + weight_cpu, weight_npu = create_common_tensor(item[1], -1, 1) + if weight_cpu.dtype == torch.float16: + weight_cpu = weight_cpu.to(torch.float32) + kernel_size = (item[1][2][2], item[1][2][3]) + cpu_output = self.op_exec_cpu(input_cpu, weight_cpu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + weight_npu = weight_npu.to("cpu") + npu_output = self.op_exec_npu(input_npu, weight_npu, item[0][2][1], item[1][2][0], kernel_size=kernel_size, + padding=item[2], stride=item[3], dilation=item[4], bias=item[5]) + npu_output = npu_output.to(torch.float16) + cpu_output = cpu_output.to(torch.float16) + self.input_grad[0] = self.input_grad[0].to(torch.float16) + self.input_grad[1] = self.input_grad[1].to(torch.float16) + + self.weight_grad[0] = self.weight_grad[0].to(self.weight_grad[1].dtype) + + self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.detach().numpy()) + self.assertRtolEqual(self.input_grad[0].numpy(), self.input_grad[1].numpy()) + self.assertRtolEqual(self.weight_grad[0].numpy(), self.weight_grad[1].numpy()) + + def test_conv_depthwise2d_backward_shape_format_fp16(self, device): + shape_format = [ #input , weight, padding, stide, dilation, bias + # shuflenet + [[np.float16, 0, [1024, 116, 28, 28]], [np.float16, 0, [116, 1, 3, 3]], 1, 2, 1, 0], + [[np.float16, 3, [1024, 116, 14, 14]], [np.float16, 0, [116, 1, 3, 3]], 1, 1, 1, 0], + ] + self.conv_depthwise2d_backward_result(shape_format) + + def test_conv_depthwise2d_backward_shape_format_fp32(self, device): + shape_format = [ #input , weight, padding, stide, dilation, bias + # mobilenet + [[np.float32, 3, [256, 32, 112, 112]], [np.float32, 0, [32, 1, 3, 3]], 1, 1, 1, None], + [[np.float32, 3, [256, 96, 112, 112]], [np.float32, 0, [96, 1, 3, 3]], 1, 2, 1, None], + ] + #conv类算子不支持fp32数据的精度要求 + #self.conv_depthwise2d_backward_result(shape_format) + + +instantiate_device_type_tests(TestConvDepthwise2d, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_convolution_backward_weight.py b/test/test_npu/test_network_ops/test_convolution_backward_weight.py index beaf25c285e64c0f8c1c81f48ab61f10e3a7f369..e8924c141a0dc2bf27148830c2d8a06375f9b8cb 100644 --- a/test/test_npu/test_network_ops/test_convolution_backward_weight.py +++ b/test/test_npu/test_network_ops/test_convolution_backward_weight.py @@ -1,109 +1,109 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestCudnnConvolutionBackwardWeight(TestCase): - weight_grad = [] - - def getWeightGrad(self, grad): - self.weight_grad.append(grad.to("cpu")) - - def cpu_op_exec(self, input1, weight, stride, padding, dilation, groups): - weight.requires_grad = True - res_forward = torch._convolution(input1, - weight, - bias=None, - stride=stride, - padding=padding, - dilation=dilation, - transposed=False, - output_padding=(0, 0), - groups=groups, - benchmark=True, - deterministic=True, - cudnn_enabled=True) - grads = torch.ones_like(res_forward).float() - res_forward.backward(grads, retain_graph=True) - res_forward = res_forward.detach().numpy() - gradweight = weight.grad - return res_forward, gradweight - - def npu_op_exec(self, input1, weight, stride, padding, dilation, groups): - weight.requires_grad = True - input1 = input1.to("npu") - res_forward = torch._convolution(input1, - weight, - bias=None, - stride=stride, - padding=padding, - dilation=dilation, - transposed=False, - output_padding=(0, 0), - groups=groups, - benchmark=True, - deterministic=True, - cudnn_enabled=True) - grads = torch.ones_like(res_forward).float() - grads = grads.to("npu") - res_forward.backward(grads, retain_graph=True) - res_forward = res_forward.to("cpu") - res_forward = res_forward.detach().numpy() - gradweight = weight.grad.to("cpu") - return res_forward, gradweight - - def test_cudnn_convolution_backward_weight_shape_format(self, device): - shape_format = [ # input, weight, stride, padding, dilation, groups - [[np.float16, 0, (1, 4, 5, 5)], [np.float16, 0, (4, 4, 3, 3)], - (1, 1), (1, 1), (1, 1), 1], - [[np.float32, 0, [256, 3, 224, 224]], - [np.float32, 0, [32, 3, 3, 3]], [2, 2], [0, 0], [1, 1], 1], - [[np.float16, 3, (256, 8, 1, 1)], [np.float16, 3, (8, 8, 1, 1)], - (1, 1), (0, 0), (1, 1), 1], - [[np.float16, 3, [1024, 232, 7, 7]], - [np.float16, 4, [232, 232, 1, 1]], (1, 1), (0, 0), (1, 1), 1], - [[np.float32, 0, (1, 4, 5, 5)], [np.float32, 0, (4, 4, 3, 3)], - (1, 1), (1, 1), (1, 1), 1] - ] - - for item in shape_format: - self.weight_grad.clear() - cpu_input1, npu_input1 = create_common_tensor(item[0], -2, 2) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2, npu_input2 = create_common_tensor(item[1], -2, 2) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output, cpu_dweight = self.cpu_op_exec(cpu_input1, cpu_input2, item[2], - item[3], item[4], item[5]) - npu_output, npu_dweight = self.npu_op_exec(npu_input1, npu_input2, item[2], - item[3], item[4], item[5]) - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_dweight = cpu_dweight.to(npu_dweight.dtype) - self.assertRtolEqual(cpu_output, npu_output, 0.007) - self.assertRtolEqual(cpu_dweight, npu_dweight, 0.003) - - -instantiate_device_type_tests(TestCudnnConvolutionBackwardWeight, - globals(), - except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestCudnnConvolutionBackwardWeight(TestCase): + weight_grad = [] + + def getWeightGrad(self, grad): + self.weight_grad.append(grad.to("cpu")) + + def cpu_op_exec(self, input1, weight, stride, padding, dilation, groups): + weight.requires_grad = True + res_forward = torch._convolution(input1, + weight, + bias=None, + stride=stride, + padding=padding, + dilation=dilation, + transposed=False, + output_padding=(0, 0), + groups=groups, + benchmark=True, + deterministic=True, + cudnn_enabled=True) + grads = torch.ones_like(res_forward).float() + res_forward.backward(grads, retain_graph=True) + res_forward = res_forward.detach().numpy() + gradweight = weight.grad + return res_forward, gradweight + + def npu_op_exec(self, input1, weight, stride, padding, dilation, groups): + weight.requires_grad = True + input1 = input1.to("npu") + res_forward = torch._convolution(input1, + weight, + bias=None, + stride=stride, + padding=padding, + dilation=dilation, + transposed=False, + output_padding=(0, 0), + groups=groups, + benchmark=True, + deterministic=True, + cudnn_enabled=True) + grads = torch.ones_like(res_forward).float() + grads = grads.to("npu") + res_forward.backward(grads, retain_graph=True) + res_forward = res_forward.to("cpu") + res_forward = res_forward.detach().numpy() + gradweight = weight.grad.to("cpu") + return res_forward, gradweight + + def test_cudnn_convolution_backward_weight_shape_format(self, device): + shape_format = [ # input, weight, stride, padding, dilation, groups + [[np.float16, 0, (1, 4, 5, 5)], [np.float16, 0, (4, 4, 3, 3)], + (1, 1), (1, 1), (1, 1), 1], + [[np.float32, 0, [256, 3, 224, 224]], + [np.float32, 0, [32, 3, 3, 3]], [2, 2], [0, 0], [1, 1], 1], + [[np.float16, 3, (256, 8, 1, 1)], [np.float16, 3, (8, 8, 1, 1)], + (1, 1), (0, 0), (1, 1), 1], + [[np.float16, 3, [1024, 232, 7, 7]], + [np.float16, 4, [232, 232, 1, 1]], (1, 1), (0, 0), (1, 1), 1], + [[np.float32, 0, (1, 4, 5, 5)], [np.float32, 0, (4, 4, 3, 3)], + (1, 1), (1, 1), (1, 1), 1] + ] + + for item in shape_format: + self.weight_grad.clear() + cpu_input1, npu_input1 = create_common_tensor(item[0], -2, 2) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2, npu_input2 = create_common_tensor(item[1], -2, 2) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output, cpu_dweight = self.cpu_op_exec(cpu_input1, cpu_input2, item[2], + item[3], item[4], item[5]) + npu_output, npu_dweight = self.npu_op_exec(npu_input1, npu_input2, item[2], + item[3], item[4], item[5]) + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_dweight = cpu_dweight.to(npu_dweight.dtype) + self.assertRtolEqual(cpu_output, npu_output, 0.007) + self.assertRtolEqual(cpu_dweight, npu_dweight, 0.003) + + +instantiate_device_type_tests(TestCudnnConvolutionBackwardWeight, + globals(), + except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_copy_.py b/test/test_npu/test_network_ops/test_copy_.py index ec4aea8203021b8fb0aee615e3cba7c0caf34a05..8f6a5aee604b2cf8a5c3ecae988bd35ed4899d55 100644 --- a/test/test_npu/test_network_ops/test_copy_.py +++ b/test/test_npu/test_network_ops/test_copy_.py @@ -1,56 +1,56 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestCopy(TestCase): - - def cpu_op_exec(self, input1, input2): - output = input1.copy_(input2); - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = input1.copy_(input2); - output = output.to("cpu") - output = output.numpy() - return output - - def test_copy__(self, device): - format_list = [0] - shape_list = [(4, 1), (4, 3, 1)] - dtype_list = [np.float32, np.int32, np.float16] - shape_format = [ - [i, j, k] for i in dtype_list for j in format_list for k in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestCopy, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestCopy(TestCase): + + def cpu_op_exec(self, input1, input2): + output = input1.copy_(input2); + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = input1.copy_(input2); + output = output.to("cpu") + output = output.numpy() + return output + + def test_copy__(self, device): + format_list = [0] + shape_list = [(4, 1), (4, 3, 1)] + dtype_list = [np.float32, np.int32, np.float16] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestCopy, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_ctc_loss.py b/test/test_npu/test_network_ops/test_ctc_loss.py index 2821539914f52ef9f6f62fbe281d31672460fb3a..725fc42a933ad9799aab3d632cda103e55d6d968 100644 --- a/test/test_npu/test_network_ops/test_ctc_loss.py +++ b/test/test_npu/test_network_ops/test_ctc_loss.py @@ -1,93 +1,93 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestCtcLoss(TestCase): - def generate_data(self, item): - T = item[0][0] - C = item[0][1] - N = item[0][2] - S = item[0][3] - S_min = item[0][4] - dtype = item[1] - reduction_str = item[2] - blk = item[3] - - log_probs = np.random.uniform(-10, 10, (T, N, C)).astype(dtype) - targets = torch.randint(1, C, (N, S), dtype = torch.long) - input_lengths = torch.full((N,), T, dtype=torch.long) - target_lengths = torch.randint(S_min, S, (N,), dtype=torch.long) - - # modify from numpy.ndarray to torch.tensor - log_probs = torch.from_numpy(log_probs) - - ctc_loss = torch.nn.CTCLoss(blank= blk, zero_infinity=True, reduction=reduction_str) - - return ctc_loss, log_probs, targets, input_lengths, target_lengths - - def cpu_op_exec(self, ctc_loss, log_probs, targets, input_lengths, target_lengths): - if log_probs.dtype == torch.float16: - log_probs = log_probs.to(torch.float32) - - neg_log_likelihood = ctc_loss(log_probs.log_softmax(2), targets, input_lengths, target_lengths) - - neg_log_likelihood = neg_log_likelihood.numpy() - - return neg_log_likelihood - - def npu_op_exec(self, ctc_loss, log_probs, targets, input_lengths, target_lengths): - log_probs = log_probs.npu() - targets = targets.npu() - input_lengths = input_lengths.npu() - target_lengths = target_lengths.npu() - - neg_log_likelihood = ctc_loss(log_probs.log_softmax(2), targets, input_lengths, target_lengths) - - if neg_log_likelihood.dtype == torch.float16: - neg_log_likelihood = neg_log_likelihood.to(torch.float32) - - neg_log_likelihood = neg_log_likelihood.cpu().numpy() - - return neg_log_likelihood - - def test_ctc_loss(self, device): - sizes_list = [[50, 20, 16, 30, 10], [26, 37, 256, 18, 10]] - para_reduction = ["sum", "mean", "none"] - dtype = [np.float32, np.float16] - blank = [0, 9] - shape_format = [ - [i, j, k, l] for i in sizes_list for j in dtype for k in para_reduction for l in blank - ] - - for item in shape_format: - ctc_loss, log_probs, targets, input_lengths, target_lengths = self.generate_data(item) - - neg_log_likelihood_cpu = self.cpu_op_exec(ctc_loss, log_probs, targets, input_lengths, target_lengths) - neg_log_likelihood_npu = self.npu_op_exec(ctc_loss, log_probs, targets, input_lengths, target_lengths) - - self.assertRtolEqual(neg_log_likelihood_cpu, neg_log_likelihood_npu, 1e-3) - -instantiate_device_type_tests(TestCtcLoss, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestCtcLoss(TestCase): + def generate_data(self, item): + T = item[0][0] + C = item[0][1] + N = item[0][2] + S = item[0][3] + S_min = item[0][4] + dtype = item[1] + reduction_str = item[2] + blk = item[3] + + log_probs = np.random.uniform(-10, 10, (T, N, C)).astype(dtype) + targets = torch.randint(1, C, (N, S), dtype = torch.long) + input_lengths = torch.full((N,), T, dtype=torch.long) + target_lengths = torch.randint(S_min, S, (N,), dtype=torch.long) + + # modify from numpy.ndarray to torch.tensor + log_probs = torch.from_numpy(log_probs) + + ctc_loss = torch.nn.CTCLoss(blank= blk, zero_infinity=True, reduction=reduction_str) + + return ctc_loss, log_probs, targets, input_lengths, target_lengths + + def cpu_op_exec(self, ctc_loss, log_probs, targets, input_lengths, target_lengths): + if log_probs.dtype == torch.float16: + log_probs = log_probs.to(torch.float32) + + neg_log_likelihood = ctc_loss(log_probs.log_softmax(2), targets, input_lengths, target_lengths) + + neg_log_likelihood = neg_log_likelihood.numpy() + + return neg_log_likelihood + + def npu_op_exec(self, ctc_loss, log_probs, targets, input_lengths, target_lengths): + log_probs = log_probs.npu() + targets = targets.npu() + input_lengths = input_lengths.npu() + target_lengths = target_lengths.npu() + + neg_log_likelihood = ctc_loss(log_probs.log_softmax(2), targets, input_lengths, target_lengths) + + if neg_log_likelihood.dtype == torch.float16: + neg_log_likelihood = neg_log_likelihood.to(torch.float32) + + neg_log_likelihood = neg_log_likelihood.cpu().numpy() + + return neg_log_likelihood + + def test_ctc_loss(self, device): + sizes_list = [[50, 20, 16, 30, 10], [26, 37, 256, 18, 10]] + para_reduction = ["sum", "mean", "none"] + dtype = [np.float32, np.float16] + blank = [0, 9] + shape_format = [ + [i, j, k, l] for i in sizes_list for j in dtype for k in para_reduction for l in blank + ] + + for item in shape_format: + ctc_loss, log_probs, targets, input_lengths, target_lengths = self.generate_data(item) + + neg_log_likelihood_cpu = self.cpu_op_exec(ctc_loss, log_probs, targets, input_lengths, target_lengths) + neg_log_likelihood_npu = self.npu_op_exec(ctc_loss, log_probs, targets, input_lengths, target_lengths) + + self.assertRtolEqual(neg_log_likelihood_cpu, neg_log_likelihood_npu, 1e-3) + +instantiate_device_type_tests(TestCtcLoss, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_ctc_loss_backward.py b/test/test_npu/test_network_ops/test_ctc_loss_backward.py index 0929c060edaed47a0b84623d34196fae7e4839cf..1935b0b7630c8ea7ab7c2974e5a43a9c9d716a0d 100644 --- a/test/test_npu/test_network_ops/test_ctc_loss_backward.py +++ b/test/test_npu/test_network_ops/test_ctc_loss_backward.py @@ -1,100 +1,100 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestCtcLossBackward(TestCase): - def generate_data(self, item): - T = item[0][0] - C = item[0][1] - N = item[0][2] - S = item[0][3] - S_min = item[0][4] - dtype = item[1] - reduction_str = item[2] - - log_probs = np.random.uniform(-10, 10, (T, N, C)).astype(dtype) - targets = torch.randint(1, C, (N, S), dtype = torch.long) - input_lengths = torch.full((N,), T, dtype=torch.long) - target_lengths = torch.randint(S_min, S, (N,), dtype=torch.long) - - # modify from numpy.ndarray to torch.tensor - log_probs = torch.from_numpy(log_probs) - - ctc_loss = torch.nn.CTCLoss(zero_infinity=True, reduction=reduction_str) - - return ctc_loss, log_probs, targets, input_lengths, target_lengths - - def cpu_op_exec(self, ctc_loss, log_probs, targets, input_lengths, target_lengths): - if log_probs.dtype == torch.float16: - log_probs = log_probs.to(torch.float32) - - log_probs.requires_grad_(True) - log_probs.retain_grad() - - neg_log_likelihood = ctc_loss(log_probs.log_softmax(2), targets, input_lengths, target_lengths) - neg_log_likelihood.backward() - grad = log_probs.grad - - grad = grad.numpy() - - return grad - - def npu_op_exec(self, ctc_loss, log_probs, targets, input_lengths, target_lengths): - log_probs = copy.deepcopy(log_probs).npu() - targets = targets.npu() - log_probs.requires_grad_(True) - log_probs.retain_grad() - - neg_log_likelihood = ctc_loss(log_probs.log_softmax(2), targets, input_lengths.npu(), target_lengths.npu()) - neg_log_likelihood.backward() - grad = log_probs.grad - - if grad.dtype == torch.float16: - grad = grad.to(torch.float32) - - grad = grad.cpu().numpy() - - return grad - - def test_ctc_loss_backward(self, device): - sizes_list = [[50, 20, 16, 30, 10], [26, 37, 2560, 18, 10]] - para_reduction = ["sum", "mean"] - dtype = [np.float32] # Insufficient accuracy when use fp16 data - shape_format = [ - [i, j, k] for i in sizes_list for j in dtype for k in para_reduction - ] - - for item in shape_format: - ctc_loss, log_probs, targets, input_lengths, target_lengths = self.generate_data(item) - - grad_cpu = self.cpu_op_exec(ctc_loss, log_probs, targets, input_lengths, target_lengths) - grad_npu = self.npu_op_exec(ctc_loss, log_probs, targets, input_lengths, target_lengths) - - self.assertRtolEqual(grad_cpu, grad_npu, 1e-3) - - - -instantiate_device_type_tests(TestCtcLossBackward, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestCtcLossBackward(TestCase): + def generate_data(self, item): + T = item[0][0] + C = item[0][1] + N = item[0][2] + S = item[0][3] + S_min = item[0][4] + dtype = item[1] + reduction_str = item[2] + + log_probs = np.random.uniform(-10, 10, (T, N, C)).astype(dtype) + targets = torch.randint(1, C, (N, S), dtype = torch.long) + input_lengths = torch.full((N,), T, dtype=torch.long) + target_lengths = torch.randint(S_min, S, (N,), dtype=torch.long) + + # modify from numpy.ndarray to torch.tensor + log_probs = torch.from_numpy(log_probs) + + ctc_loss = torch.nn.CTCLoss(zero_infinity=True, reduction=reduction_str) + + return ctc_loss, log_probs, targets, input_lengths, target_lengths + + def cpu_op_exec(self, ctc_loss, log_probs, targets, input_lengths, target_lengths): + if log_probs.dtype == torch.float16: + log_probs = log_probs.to(torch.float32) + + log_probs.requires_grad_(True) + log_probs.retain_grad() + + neg_log_likelihood = ctc_loss(log_probs.log_softmax(2), targets, input_lengths, target_lengths) + neg_log_likelihood.backward() + grad = log_probs.grad + + grad = grad.numpy() + + return grad + + def npu_op_exec(self, ctc_loss, log_probs, targets, input_lengths, target_lengths): + log_probs = copy.deepcopy(log_probs).npu() + targets = targets.npu() + log_probs.requires_grad_(True) + log_probs.retain_grad() + + neg_log_likelihood = ctc_loss(log_probs.log_softmax(2), targets, input_lengths.npu(), target_lengths.npu()) + neg_log_likelihood.backward() + grad = log_probs.grad + + if grad.dtype == torch.float16: + grad = grad.to(torch.float32) + + grad = grad.cpu().numpy() + + return grad + + def test_ctc_loss_backward(self, device): + sizes_list = [[50, 20, 16, 30, 10], [26, 37, 2560, 18, 10]] + para_reduction = ["sum", "mean"] + dtype = [np.float32] # Insufficient accuracy when use fp16 data + shape_format = [ + [i, j, k] for i in sizes_list for j in dtype for k in para_reduction + ] + + for item in shape_format: + ctc_loss, log_probs, targets, input_lengths, target_lengths = self.generate_data(item) + + grad_cpu = self.cpu_op_exec(ctc_loss, log_probs, targets, input_lengths, target_lengths) + grad_npu = self.npu_op_exec(ctc_loss, log_probs, targets, input_lengths, target_lengths) + + self.assertRtolEqual(grad_cpu, grad_npu, 1e-3) + + + +instantiate_device_type_tests(TestCtcLossBackward, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_cudnn_is_acceptable.py b/test/test_npu/test_network_ops/test_cudnn_is_acceptable.py index 3bbc4b872843dd4e1e7eeab9e38fd9b656683872..d32be4ae6d50a8e8df3c5bd3498ef2aca1b55d58 100644 --- a/test/test_npu/test_network_ops/test_cudnn_is_acceptable.py +++ b/test/test_npu/test_network_ops/test_cudnn_is_acceptable.py @@ -1,38 +1,38 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestCudnnIsAcceptable(TestCase): - def test_cudnn_is_acceptable_common_shape_format(self, device): - shape_format = [ - [[np.float16, 0, 1]], - [[np.float16, 0, 5]], - [[np.float32, 4, 3]], - [[np.float32, 29, 4]] - ] - for shape in shape_format: - cpu_input, npu_input = create_common_tensor(shape[0], -1, 1) - cpu_output = np.array([torch.cudnn_is_acceptable(cpu_input)]).astype(np.float32) - npu_output = np.array([torch.cudnn_is_acceptable(npu_input)]).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestCudnnIsAcceptable, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestCudnnIsAcceptable(TestCase): + def test_cudnn_is_acceptable_common_shape_format(self, device): + shape_format = [ + [[np.float16, 0, 1]], + [[np.float16, 0, 5]], + [[np.float32, 4, 3]], + [[np.float32, 29, 4]] + ] + for shape in shape_format: + cpu_input, npu_input = create_common_tensor(shape[0], -1, 1) + cpu_output = np.array([torch.cudnn_is_acceptable(cpu_input)]).astype(np.float32) + npu_output = np.array([torch.cudnn_is_acceptable(npu_input)]).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestCudnnIsAcceptable, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_div.py b/test/test_npu/test_network_ops/test_div.py old mode 100644 new mode 100755 index 0050359a335d0cccd282743d84a905e8cf975b26..d691dc286b08a1dcfb7a921fc81911198e629ec0 --- a/test/test_npu/test_network_ops/test_div.py +++ b/test/test_npu/test_network_ops/test_div.py @@ -1,120 +1,120 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -import unittest -from util_test import create_common_tensor, test_2args_broadcast, create_dtype_tensor, UT_FAST_MODE -from common_device_type import dtypes, instantiate_device_type_tests - -class TestDiv(TestCase): - def get_outputs(self, cpu_args, npu_args, dtype): - # cpu not support fp16 div - cpu_args = [i.float() if dtype==torch.half else i for i in cpu_args] - cpu_output = torch.div(cpu_args[0], cpu_args[1]).to(dtype).numpy() - npu_output = torch.div(npu_args[0], npu_args[1]).to("cpu").numpy() - return cpu_output, npu_output - - def get_outputs_chk(self, cpu_args, npu_args, dtype): - # cpu not support fp16 div - cpu_out = torch.randn(6).to(dtype) - npu_out = torch.randn(6).to("npu").to(dtype) - cpu_args = [i.float() if dtype==torch.half else i for i in cpu_args] - torch.div(cpu_args[0], cpu_args[1], out = cpu_out) - torch.div(npu_args[0], npu_args[1], out = npu_out) - cpu_output = cpu_out.to(dtype).numpy() - npu_output = npu_out.to("cpu").numpy() - return cpu_output, npu_output - - def test_div_broadcast(self, device): - for item in test_2args_broadcast(torch.div): - self.assertRtolEqual(item[0], item[1]) - - # div not support bool - @dtypes(torch.float, torch.half, torch.int) - def test_div_dtype(self, device, dtype): - cpu_input1, npu_input1 = create_dtype_tensor((2,3,4,5), dtype) - # divisor can not be zero - cpu_input2, npu_input2 = create_dtype_tensor((2,3,4,5), dtype, no_zero=True) - cpu_output, npu_output = self.get_outputs([cpu_input1, cpu_input2], [npu_input1, npu_input2], dtype) - - # div 在int结果为负数时采用截断而不是向下取整的方式取整,所以选用numpy比较 - if dtype == torch.int: - cpu_output = np.floor_divide(cpu_input1.numpy(), cpu_input2.numpy()) - - self.assertRtolEqual(cpu_output, npu_output) - - @unittest.skipIf(UT_FAST_MODE, "Run UT in fast mode") - def test_div_shape_format_fp16(self, device): - format_list = [0, 3, 29] - shape_list = [1, (64, 10), (32, 3, 3), (256, 2048, 7, 7)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 1, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output, npu_output = self.get_outputs([cpu_input1, cpu_input2], [npu_input1, npu_input2], torch.half) - self.assertRtolEqual(cpu_output, npu_output) - - @unittest.skipIf(UT_FAST_MODE, "Run UT in fast mode") - def test_div_shape_format_fp32(self, device): - format_list = [0, 3, 29] - shape_list = [1, (64, 10), (32, 3, 3), (256, 2048, 7, 7), (2, 0, 2)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 1, 100) - cpu_output, npu_output = self.get_outputs([cpu_input1, cpu_input2], [npu_input1, npu_input2], torch.float) - self.assertRtolEqual(cpu_output, npu_output) - - def test_div_mix_dtype_1(self, device): - npu_input1, npu_input2 = create_common_tensor([np.int32, 0, (2, 3)], 1, 100) - npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) - cpu_output, npu_output = self.get_outputs([npu_input1, npu_input3], [npu_input2, npu_input4], torch.float) - self.assertRtolEqual(cpu_output, npu_output) - - def test_div_mix_dtype_2(self, device): - npu_input1, npu_input2 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) - npu_input3 = torch.tensor(3).int() - cpu_output, npu_output = self.get_outputs([npu_input1, npu_input3], [npu_input2, npu_input3], torch.float) - self.assertRtolEqual(cpu_output, npu_output) - - def test_div_scalar_dtype(self, device): - cpu_input1, npu_input1 = create_common_tensor([np.int32, 0, (2, 3)], 1, 100) - cpu_output = cpu_input1 / 0.5 - npu_output = npu_input1 / 0.5 - self.assertRtolEqual(cpu_output, npu_output.cpu()) - - @unittest.skipIf(UT_FAST_MODE, "Run UT in fast mode") - def test_div_shape_format_fp32(self, device): - format_list = [0, 3, 29] - shape_list = [1, (64, 10), (32, 3, 3), (256, 2048, 7, 7)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 1, 100) - cpu_output, npu_output = self.get_outputs_chk([cpu_input1, cpu_input2], [npu_input1, npu_input2], torch.float) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestDiv, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +import unittest +from util_test import create_common_tensor, test_2args_broadcast, create_dtype_tensor, UT_FAST_MODE +from common_device_type import dtypes, instantiate_device_type_tests + +class TestDiv(TestCase): + def get_outputs(self, cpu_args, npu_args, dtype): + # cpu not support fp16 div + cpu_args = [i.float() if dtype==torch.half else i for i in cpu_args] + cpu_output = torch.div(cpu_args[0], cpu_args[1]).to(dtype).numpy() + npu_output = torch.div(npu_args[0], npu_args[1]).to("cpu").numpy() + return cpu_output, npu_output + + def get_outputs_chk(self, cpu_args, npu_args, dtype): + # cpu not support fp16 div + cpu_out = torch.randn(6).to(dtype) + npu_out = torch.randn(6).to("npu").to(dtype) + cpu_args = [i.float() if dtype==torch.half else i for i in cpu_args] + torch.div(cpu_args[0], cpu_args[1], out = cpu_out) + torch.div(npu_args[0], npu_args[1], out = npu_out) + cpu_output = cpu_out.to(dtype).numpy() + npu_output = npu_out.to("cpu").numpy() + return cpu_output, npu_output + + def test_div_broadcast(self, device): + for item in test_2args_broadcast(torch.div): + self.assertRtolEqual(item[0], item[1]) + + # div not support bool + @dtypes(torch.float, torch.half, torch.int) + def test_div_dtype(self, device, dtype): + cpu_input1, npu_input1 = create_dtype_tensor((2,3,4,5), dtype) + # divisor can not be zero + cpu_input2, npu_input2 = create_dtype_tensor((2,3,4,5), dtype, no_zero=True) + cpu_output, npu_output = self.get_outputs([cpu_input1, cpu_input2], [npu_input1, npu_input2], dtype) + + # div 在int结果为负数时采用截断而不是向下取整的方式取整,所以选用numpy比较 + if dtype == torch.int: + cpu_output = np.floor_divide(cpu_input1.numpy(), cpu_input2.numpy()) + + self.assertRtolEqual(cpu_output, npu_output) + + @unittest.skipIf(UT_FAST_MODE, "Run UT in fast mode") + def test_div_shape_format_fp16(self, device): + format_list = [0, 3, 29] + shape_list = [1, (64, 10), (32, 3, 3), (256, 2048, 7, 7)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 1, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output, npu_output = self.get_outputs([cpu_input1, cpu_input2], [npu_input1, npu_input2], torch.half) + self.assertRtolEqual(cpu_output, npu_output) + + @unittest.skipIf(UT_FAST_MODE, "Run UT in fast mode") + def test_div_shape_format_fp32(self, device): + format_list = [0, 3, 29] + shape_list = [1, (64, 10), (32, 3, 3), (256, 2048, 7, 7), (2, 0, 2)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 1, 100) + cpu_output, npu_output = self.get_outputs([cpu_input1, cpu_input2], [npu_input1, npu_input2], torch.float) + self.assertRtolEqual(cpu_output, npu_output) + + def test_div_mix_dtype_1(self, device): + npu_input1, npu_input2 = create_common_tensor([np.int32, 0, (2, 3)], 1, 100) + npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) + cpu_output, npu_output = self.get_outputs([npu_input1, npu_input3], [npu_input2, npu_input4], torch.float) + self.assertRtolEqual(cpu_output, npu_output) + + def test_div_mix_dtype_2(self, device): + npu_input1, npu_input2 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) + npu_input3 = torch.tensor(3).int() + cpu_output, npu_output = self.get_outputs([npu_input1, npu_input3], [npu_input2, npu_input3], torch.float) + self.assertRtolEqual(cpu_output, npu_output) + + def test_div_scalar_dtype(self, device): + cpu_input1, npu_input1 = create_common_tensor([np.int32, 0, (2, 3)], 1, 100) + cpu_output = cpu_input1 / 0.5 + npu_output = npu_input1 / 0.5 + self.assertRtolEqual(cpu_output, npu_output.cpu()) + + @unittest.skipIf(UT_FAST_MODE, "Run UT in fast mode") + def test_div_shape_format_fp32(self, device): + format_list = [0, 3, 29] + shape_list = [1, (64, 10), (32, 3, 3), (256, 2048, 7, 7)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 1, 100) + cpu_output, npu_output = self.get_outputs_chk([cpu_input1, cpu_input2], [npu_input1, npu_input2], torch.float) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestDiv, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_dropout.py b/test/test_npu/test_network_ops/test_dropout.py old mode 100644 new mode 100755 index 500302a6edbc0f78c26797058326cae2f1dd7b5b..7275d1549f1d9a9f90a0542254518da4d96b8dd0 --- a/test/test_npu/test_network_ops/test_dropout.py +++ b/test/test_npu/test_network_ops/test_dropout.py @@ -1,70 +1,70 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('..') -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestDropOutDoMask(TestCase): - def cpu_op_exec(self, input): - out = torch.nn.Dropout(0.5)(input) - out = out.numpy() - return out - - def npu_op_exec(self, input): - out = torch.nn.Dropout(0.5)(input) - out = out.to("cpu") - out = out.numpy() - return out - - def dropout_list_exec(self, list): - epsilon = 1e-3 - for item in list: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - cpu_output = cpu_output.astype(npu_output.dtype) - # 该算子随机结果的比较方式 - for a, b in zip(cpu_output.flatten(), npu_output.flatten()): - if abs(a) > 0 and abs(b) > 0 and abs(a - b) > epsilon: - print(f'input = {item}, ERROR!') - break - else: - print(f'input = {item}, Successfully!') - - def test_op_shape_format_fp16(self, device): - format_list = [0, 3, 29] - shape_list = [1, (256, 1280), (32, 3, 3), (256, 2048, 7, 7)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - self.dropout_list_exec(shape_format) - - def test_op_shape_format_fp32(self, device): - format_list = [0, 3, 29] - shape_list = [1, (256, 1280), (32, 3, 3), (256, 2048, 7, 7)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - self.dropout_list_exec(shape_format) - -instantiate_device_type_tests(TestDropOutDoMask, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append('..') +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestDropOutDoMask(TestCase): + def cpu_op_exec(self, input): + out = torch.nn.Dropout(0.5)(input) + out = out.numpy() + return out + + def npu_op_exec(self, input): + out = torch.nn.Dropout(0.5)(input) + out = out.to("cpu") + out = out.numpy() + return out + + def dropout_list_exec(self, list): + epsilon = 1e-3 + for item in list: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + cpu_output = cpu_output.astype(npu_output.dtype) + # 该算子随机结果的比较方式 + for a, b in zip(cpu_output.flatten(), npu_output.flatten()): + if abs(a) > 0 and abs(b) > 0 and abs(a - b) > epsilon: + print(f'input = {item}, ERROR!') + break + else: + print(f'input = {item}, Successfully!') + + def test_op_shape_format_fp16(self, device): + format_list = [0, 3, 29] + shape_list = [1, (256, 1280), (32, 3, 3), (256, 2048, 7, 7)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + self.dropout_list_exec(shape_format) + + def test_op_shape_format_fp32(self, device): + format_list = [0, 3, 29] + shape_list = [1, (256, 1280), (32, 3, 3), (256, 2048, 7, 7)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + self.dropout_list_exec(shape_format) + +instantiate_device_type_tests(TestDropOutDoMask, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_dropoutv2.py b/test/test_npu/test_network_ops/test_dropoutv2.py index 43953b5e61bc984dd452ced5c7c874565041cd67..66d93ca1f48dceb7d0626054bd0679a095167d6a 100644 --- a/test/test_npu/test_network_ops/test_dropoutv2.py +++ b/test/test_npu/test_network_ops/test_dropoutv2.py @@ -1,43 +1,43 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - - -class TestDropOutV2(TestCase): - def _gen_seeds(self, shape): - return np.random.uniform(1, 10, size=shape).astype(np.float32) - - def npu_op_exec(self, input1, seed, prob): - output, mask, seed = torch.npu_dropoutV2(input1, seed, p = prob) - output = output.to("cpu") - output = output.numpy() - mask = mask.to("cpu") - mask = mask.numpy() - return output, mask - - def test_dropoutV2(self, device): - input = torch.tensor([1.,2.,3.,4.]).npu() - seed_shape = (int(32 * 1024 * 12),) - seed = self._gen_seeds(seed_shape) - seed = torch.from_numpy(seed).to("npu") - prob = 0.3 - output, mask = self.npu_op_exec(input, seed, prob) #result is random,only check api can exec success! - -instantiate_device_type_tests(TestDropOutV2, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + + +class TestDropOutV2(TestCase): + def _gen_seeds(self, shape): + return np.random.uniform(1, 10, size=shape).astype(np.float32) + + def npu_op_exec(self, input1, seed, prob): + output, mask, seed = torch.npu_dropoutV2(input1, seed, p = prob) + output = output.to("cpu") + output = output.numpy() + mask = mask.to("cpu") + mask = mask.numpy() + return output, mask + + def test_dropoutV2(self, device): + input = torch.tensor([1.,2.,3.,4.]).npu() + seed_shape = (int(32 * 1024 * 12),) + seed = self._gen_seeds(seed_shape) + seed = torch.from_numpy(seed).to("npu") + prob = 0.3 + output, mask = self.npu_op_exec(input, seed, prob) #result is random,only check api can exec success! + +instantiate_device_type_tests(TestDropOutV2, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_dropoutv2backward.py b/test/test_npu/test_network_ops/test_dropoutv2backward.py index c0de31fecb7d5b02b6e09ff5ceaa758cabf3bd57..62361c7cd46e8fa96a7dcd8d1904ce4ef1e23516 100644 --- a/test/test_npu/test_network_ops/test_dropoutv2backward.py +++ b/test/test_npu/test_network_ops/test_dropoutv2backward.py @@ -1,50 +1,50 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - - -class TestDropOutV2(TestCase): - def _gen_seeds(self, shape): - return np.random.uniform(1, 10, size=shape).astype(np.float32) - - def npu_op_exec(self, input1, seed, prob): - input1.requires_grad = True - output, mask, seed = torch.npu_dropoutV2(input1, seed, p = prob) - output.backward(torch.ones_like(output)) - output = output.to("cpu") - output = output.detach().numpy() - mask = mask.to("cpu") - mask = mask.numpy() - - output_grad = input1.grad - output_grad = output_grad.to("cpu") - output_grad = output_grad.detach().numpy() - - return output_grad, output, mask - - def test_dropoutV2backward(self, device): - input = torch.tensor([1.,2.,3.,4.]).npu() - seed_shape = (int(32 * 1024 * 12),) - seed = self._gen_seeds(seed_shape) - seed = torch.from_numpy(seed).to("npu") - prob = 0.3 - output_grad, output, mask = self.npu_op_exec(input, seed, prob) #result is random,only check api can exec success! - -instantiate_device_type_tests(TestDropOutV2, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + + +class TestDropOutV2(TestCase): + def _gen_seeds(self, shape): + return np.random.uniform(1, 10, size=shape).astype(np.float32) + + def npu_op_exec(self, input1, seed, prob): + input1.requires_grad = True + output, mask, seed = torch.npu_dropoutV2(input1, seed, p = prob) + output.backward(torch.ones_like(output)) + output = output.to("cpu") + output = output.detach().numpy() + mask = mask.to("cpu") + mask = mask.numpy() + + output_grad = input1.grad + output_grad = output_grad.to("cpu") + output_grad = output_grad.detach().numpy() + + return output_grad, output, mask + + def test_dropoutV2backward(self, device): + input = torch.tensor([1.,2.,3.,4.]).npu() + seed_shape = (int(32 * 1024 * 12),) + seed = self._gen_seeds(seed_shape) + seed = torch.from_numpy(seed).to("npu") + prob = 0.3 + output_grad, output, mask = self.npu_op_exec(input, seed, prob) #result is random,only check api can exec success! + +instantiate_device_type_tests(TestDropOutV2, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_embedding_backward.py b/test/test_npu/test_network_ops/test_embedding_backward.py old mode 100644 new mode 100755 index 4c88fe91554c6539914fe9d85290f45dcc560423..0603e9ff96b753c5e12d5d47121605f58676d3ad --- a/test/test_npu/test_network_ops/test_embedding_backward.py +++ b/test/test_npu/test_network_ops/test_embedding_backward.py @@ -1,66 +1,66 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -import torch.nn.functional as F - - -class TestEmbeddingBackward(TestCase): - def cpu_op_exec(self, weight, indices): - weight.requires_grad_(True) - out = F.embedding(indices, weight, scale_grad_by_freq=True, padding_idx=37) - out.backward(torch.ones_like(out)) - grad_cpu = weight.grad - return out.detach().numpy(), grad_cpu.detach().numpy() - - def npu_op_exec(self, weight, indices): - weight.requires_grad_(True) - out = F.embedding(indices, weight, scale_grad_by_freq=True, padding_idx=37) - out.backward(torch.ones_like(out)) - out_npu = out.to("cpu") - grad_npu = weight.grad - grad_npu = grad_npu.to("cpu") - return out_npu.detach().numpy(), grad_npu.detach().numpy() - - def test_embedding_backward_shape_format_fp32(self, device): - format_list = [0] - shape_list1 = [[40, 32], [40, 1024], [40000, 1024], [33712, 1024]] - shape_list2 = [[40], [40], [3125], [64, 7]] - shape_format1 = [ - [np.float32, i, j] for i in format_list for j in shape_list1 - ] - shape_format2 = [ - [np.int64, i, j] for i in format_list for j in shape_list2 - ] - shape_format = [ - [i, j] for i in shape_format1 for j in shape_format2 - ] - for item in shape_format: - weight_cpu, weight_npu = create_common_tensor(item[0], 1, 1) - indices_cpu, indices_npu = create_common_tensor(item[1], 0, min(item[0][2][0:-1])) - - cpu_out, cpu_grad = self.cpu_op_exec(weight_cpu, indices_cpu) - npu_out, npu_grad = self.npu_op_exec(weight_npu, indices_npu) - - self.assertRtolEqual(cpu_out, npu_out) - self.assertRtolEqual(cpu_grad, npu_grad) - - -instantiate_device_type_tests(TestEmbeddingBackward, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() - +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +import torch.nn.functional as F + + +class TestEmbeddingBackward(TestCase): + def cpu_op_exec(self, weight, indices): + weight.requires_grad_(True) + out = F.embedding(indices, weight, scale_grad_by_freq=True, padding_idx=37) + out.backward(torch.ones_like(out)) + grad_cpu = weight.grad + return out.detach().numpy(), grad_cpu.detach().numpy() + + def npu_op_exec(self, weight, indices): + weight.requires_grad_(True) + out = F.embedding(indices, weight, scale_grad_by_freq=True, padding_idx=37) + out.backward(torch.ones_like(out)) + out_npu = out.to("cpu") + grad_npu = weight.grad + grad_npu = grad_npu.to("cpu") + return out_npu.detach().numpy(), grad_npu.detach().numpy() + + def test_embedding_backward_shape_format_fp32(self, device): + format_list = [0] + shape_list1 = [[40, 32], [40, 1024], [40000, 1024], [33712, 1024]] + shape_list2 = [[40], [40], [3125], [64, 7]] + shape_format1 = [ + [np.float32, i, j] for i in format_list for j in shape_list1 + ] + shape_format2 = [ + [np.int64, i, j] for i in format_list for j in shape_list2 + ] + shape_format = [ + [i, j] for i in shape_format1 for j in shape_format2 + ] + for item in shape_format: + weight_cpu, weight_npu = create_common_tensor(item[0], 1, 1) + indices_cpu, indices_npu = create_common_tensor(item[1], 0, min(item[0][2][0:-1])) + + cpu_out, cpu_grad = self.cpu_op_exec(weight_cpu, indices_cpu) + npu_out, npu_grad = self.npu_op_exec(weight_npu, indices_npu) + + self.assertRtolEqual(cpu_out, npu_out) + self.assertRtolEqual(cpu_grad, npu_grad) + + +instantiate_device_type_tests(TestEmbeddingBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() + diff --git a/test/test_npu/test_network_ops/test_embedding_bag.py b/test/test_npu/test_network_ops/test_embedding_bag.py index e6ca989223ae2b90a4402d93ef1ce24fc8454612..4ee9f0b676ebfd5f7f242d9f4f93ca799da9dfa6 100644 --- a/test/test_npu/test_network_ops/test_embedding_bag.py +++ b/test/test_npu/test_network_ops/test_embedding_bag.py @@ -1,44 +1,44 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -import torch.nn.functional as F - -class TestEmbeddingBag(TestCase): - def test_embedding_bag_1d(self, device): - cpu_weight = torch.rand(10, 3) - cpu_indices = torch.tensor([1, 2, 4, 5, 4, 3, 2, 9]) - cpu_offsets = torch.tensor([0, 4]) - npu_weight = cpu_weight.npu() - npu_indices = cpu_indices.npu() - npu_offsets = cpu_offsets.npu() - cpu_output = F.embedding_bag(cpu_weight, cpu_indices, cpu_offsets).detach().numpy() - npu_output = F.embedding_bag(npu_weight, npu_indices, npu_offsets).cpu().detach().numpy() - self.assertRtolEqual(cpu_output, npu_output) - - def test_embedding_bag_2d(self, device): - cpu_weight = torch.rand(10, 3) - cpu_indices = torch.tensor([[1, 2, 4, 5, 4, 3, 2, 9], [1, 2, 4, 5, 4, 3, 2, 9]]) - npu_weight = cpu_weight.npu() - npu_indices = cpu_indices.npu() - cpu_output = F.embedding_bag(cpu_weight, cpu_indices).detach().numpy() - npu_output = F.embedding_bag(npu_weight, npu_indices).cpu().detach().numpy() - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestEmbeddingBag, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +import torch.nn.functional as F + +class TestEmbeddingBag(TestCase): + def test_embedding_bag_1d(self, device): + cpu_weight = torch.rand(10, 3) + cpu_indices = torch.tensor([1, 2, 4, 5, 4, 3, 2, 9]) + cpu_offsets = torch.tensor([0, 4]) + npu_weight = cpu_weight.npu() + npu_indices = cpu_indices.npu() + npu_offsets = cpu_offsets.npu() + cpu_output = F.embedding_bag(cpu_weight, cpu_indices, cpu_offsets).detach().numpy() + npu_output = F.embedding_bag(npu_weight, npu_indices, npu_offsets).cpu().detach().numpy() + self.assertRtolEqual(cpu_output, npu_output) + + def test_embedding_bag_2d(self, device): + cpu_weight = torch.rand(10, 3) + cpu_indices = torch.tensor([[1, 2, 4, 5, 4, 3, 2, 9], [1, 2, 4, 5, 4, 3, 2, 9]]) + npu_weight = cpu_weight.npu() + npu_indices = cpu_indices.npu() + cpu_output = F.embedding_bag(cpu_weight, cpu_indices).detach().numpy() + npu_output = F.embedding_bag(npu_weight, npu_indices).cpu().detach().numpy() + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestEmbeddingBag, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_embedding_renorm.py b/test/test_npu/test_network_ops/test_embedding_renorm.py similarity index 97% rename from test/test_npu/test_embedding_renorm.py rename to test/test_npu/test_network_ops/test_embedding_renorm.py index 51f06efe73e646ebd64fb4c482adc83d12fe406a..3f791824546f14e0e2504e90e4ae38f5d8be9a94 100644 --- a/test/test_npu/test_embedding_renorm.py +++ b/test/test_npu/test_network_ops/test_embedding_renorm.py @@ -1,117 +1,115 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestEmbeddingRenorm(TestCase): - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.LongTensor(np.random.uniform(0,shape[0], int(shape[0]/2,)).astype(np.int32)) - #npu_input2=torch.LongTensor([[0,1,1,0,1],[0,1,1,0,1],[1,0,1,1,2]]) - return npu_input1, npu_input2 - - def cpu_op_exec(self, input1, input2, max_norm, norm_type): - stype = input1.dtype - if stype == torch.float16: - input1 = input1.float() - output = torch.embedding_renorm_(input1, input2, max_norm=max_norm, norm_type=norm_type) - if stype == torch.float16: - output = output.half() - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2, max_norm,norm_type): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.embedding_renorm_(input1, input2, max_norm=max_norm, norm_type=norm_type) - output = output.to("cpu") - output = output.numpy() - return output - - def test_embedding_renorm_float16_2(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (5, 3), np.float16) - cpu_input1 = copy.deepcopy(npu_input1) - cpu_input2 = copy.deepcopy(npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.1, 2) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.1, 2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_embedding_renorm_float16_0(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (10, 4),np.float16) - cpu_input1 = copy.deepcopy(npu_input1) - cpu_input2 = copy.deepcopy(npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.2, 0) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.2, 0) - self.assertRtolEqual(cpu_output, npu_output) - - def test_embedding_renorm_float16_1(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3), np.float16) - cpu_input1 = copy.deepcopy(npu_input1) - cpu_input2 = copy.deepcopy(npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.5, 1) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.5, 1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_embedding_renorm_float16_10(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (4, 6), np.float16) - cpu_input1 = copy.deepcopy(npu_input1) - cpu_input2 = copy.deepcopy(npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 1.0, 10) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 1.0, 10) - self.assertRtolEqual(cpu_output, npu_output) - - def test_embedding_renorm_float32_2(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (5, 3), np.float32) - cpu_input1 = copy.deepcopy(npu_input1) - cpu_input2 = copy.deepcopy(npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.1, 2) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.1, 2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_embedding_renorm_float32_0(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (10, 4), np.float32) - cpu_input1 = copy.deepcopy(npu_input1) - cpu_input2 = copy.deepcopy(npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.2, 0) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.2, 0) - self.assertRtolEqual(cpu_output, npu_output) - - def test_embedding_renorm_float32_1(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3), np.float32) - cpu_input1 = copy.deepcopy(npu_input1) - cpu_input2 = copy.deepcopy(npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.5, 1) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.5, 1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_embedding_renorm_float32_10(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (4,6), np.float32) - cpu_input1 = copy.deepcopy(npu_input1) - cpu_input2 = copy.deepcopy(npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 1.0, 10) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 1.0, 10) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestEmbeddingRenorm, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestEmbeddingRenorm(TestCase): + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.LongTensor(np.random.uniform(0,shape[0], int(shape[0]/2,)).astype(np.int32)) + + return npu_input1, npu_input2 + + def cpu_op_exec(self, input1, input2, max_norm, norm_type): + stype = input1.dtype + if stype == torch.float16: + input1 = input1.float() + output = torch.embedding_renorm_(input1, input2, max_norm=max_norm, norm_type=norm_type) + if stype == torch.float16: + output = output.half() + return output + + def npu_op_exec(self, input1, input2, max_norm,norm_type): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = torch.embedding_renorm_(input1, input2, max_norm=max_norm, norm_type=norm_type) + output = output.to("cpu") + return output + + def test_embedding_renorm_float16_2(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (5, 3), np.float16) + cpu_input1 = copy.deepcopy(npu_input1) + cpu_input2 = copy.deepcopy(npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.1, 2) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.1, 2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_embedding_renorm_float16_0(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (10, 4),np.float16) + cpu_input1 = copy.deepcopy(npu_input1) + cpu_input2 = copy.deepcopy(npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.2, 0) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.2, 0) + self.assertRtolEqual(cpu_output, npu_output) + + def test_embedding_renorm_float16_1(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3), np.float16) + cpu_input1 = copy.deepcopy(npu_input1) + cpu_input2 = copy.deepcopy(npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.5, 1) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.5, 1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_embedding_renorm_float16_10(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (4, 6), np.float16) + cpu_input1 = copy.deepcopy(npu_input1) + cpu_input2 = copy.deepcopy(npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 1.0, 10) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 1.0, 10) + self.assertRtolEqual(cpu_output, npu_output) + + def test_embedding_renorm_float32_2(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (5, 3), np.float32) + cpu_input1 = copy.deepcopy(npu_input1) + cpu_input2 = copy.deepcopy(npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.1, 2) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.1, 2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_embedding_renorm_float32_0(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (10, 4), np.float32) + cpu_input1 = copy.deepcopy(npu_input1) + cpu_input2 = copy.deepcopy(npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.2, 0) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.2, 0) + self.assertRtolEqual(cpu_output, npu_output) + + def test_embedding_renorm_float32_1(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (3, 3), np.float32) + cpu_input1 = copy.deepcopy(npu_input1) + cpu_input2 = copy.deepcopy(npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.5, 1) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.5, 1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_embedding_renorm_float32_10(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (4,6), np.float32) + cpu_input1 = copy.deepcopy(npu_input1) + cpu_input2 = copy.deepcopy(npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 1.0, 10) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 1.0, 10) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestEmbeddingRenorm, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_embeddingdensebackward.py b/test/test_npu/test_network_ops/test_embeddingdensebackward.py index c86e4dbfd23fa1cc5c3da0eaab5bbff35c1c30e3..347e242b6212fe3cfbea1e3ea06188b964015480 100644 --- a/test/test_npu/test_network_ops/test_embeddingdensebackward.py +++ b/test/test_npu/test_network_ops/test_embeddingdensebackward.py @@ -1,66 +1,66 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -import torch.nn.functional as F - - -class TestEmbeddingDenseBackward(TestCase): - def cpu_op_exec(self, weight, indices): - weight.requires_grad_(True) - out = F.embedding(indices, weight, scale_grad_by_freq=True, padding_idx=37) - out.backward(torch.ones_like(out)) - grad_cpu = weight.grad - return out.detach().numpy(), grad_cpu.detach().numpy() - - def npu_op_exec(self, weight, indices): - weight.requires_grad_(True) - out = F.embedding(indices, weight, scale_grad_by_freq=True, padding_idx=37) - out.backward(torch.ones_like(out)) - out_npu = out.to("cpu") - grad_npu = weight.grad - grad_npu = grad_npu.to("cpu") - return out_npu.detach().numpy(), grad_npu.detach().numpy() - - def test_embedding_dense_backward_shape_format_fp32(self, device): - format_list = [0] - shape_list1 = [[40, 32], [40, 1024], [40000, 1024], [33712, 1024]] - shape_list2 = [[40], [40], [3125], [64, 7]] - shape_format1 = [ - [np.float32, i, j] for i in format_list for j in shape_list1 - ] - shape_format2 = [ - [np.int64, i, j] for i in format_list for j in shape_list2 - ] - shape_format = [ - [i, j] for i in shape_format1 for j in shape_format2 - ] - for item in shape_format: - weight_cpu, weight_npu = create_common_tensor(item[0], 1, 1) - indices_cpu, indices_npu = create_common_tensor(item[1], 0, min(item[0][2][0:-1])) - - cpu_out, cpu_grad = self.cpu_op_exec(weight_cpu, indices_cpu) - npu_out, npu_grad = self.npu_op_exec(weight_npu, indices_npu) - - self.assertRtolEqual(cpu_out, npu_out) - self.assertRtolEqual(cpu_grad, npu_grad) - - -instantiate_device_type_tests(TestEmbeddingDenseBackward, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() - +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +import torch.nn.functional as F + + +class TestEmbeddingDenseBackward(TestCase): + def cpu_op_exec(self, weight, indices): + weight.requires_grad_(True) + out = F.embedding(indices, weight, scale_grad_by_freq=True, padding_idx=37) + out.backward(torch.ones_like(out)) + grad_cpu = weight.grad + return out.detach().numpy(), grad_cpu.detach().numpy() + + def npu_op_exec(self, weight, indices): + weight.requires_grad_(True) + out = F.embedding(indices, weight, scale_grad_by_freq=True, padding_idx=37) + out.backward(torch.ones_like(out)) + out_npu = out.to("cpu") + grad_npu = weight.grad + grad_npu = grad_npu.to("cpu") + return out_npu.detach().numpy(), grad_npu.detach().numpy() + + def test_embedding_dense_backward_shape_format_fp32(self, device): + format_list = [0] + shape_list1 = [[40, 32], [40, 1024], [40000, 1024], [33712, 1024]] + shape_list2 = [[40], [40], [3125], [64, 7]] + shape_format1 = [ + [np.float32, i, j] for i in format_list for j in shape_list1 + ] + shape_format2 = [ + [np.int64, i, j] for i in format_list for j in shape_list2 + ] + shape_format = [ + [i, j] for i in shape_format1 for j in shape_format2 + ] + for item in shape_format: + weight_cpu, weight_npu = create_common_tensor(item[0], 1, 1) + indices_cpu, indices_npu = create_common_tensor(item[1], 0, min(item[0][2][0:-1])) + + cpu_out, cpu_grad = self.cpu_op_exec(weight_cpu, indices_cpu) + npu_out, npu_grad = self.npu_op_exec(weight_npu, indices_npu) + + self.assertRtolEqual(cpu_out, npu_out) + self.assertRtolEqual(cpu_grad, npu_grad) + + +instantiate_device_type_tests(TestEmbeddingDenseBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() + diff --git a/test/test_npu/test_network_ops/test_empty_strided.py b/test/test_npu/test_network_ops/test_empty_strided.py index e0fdf0278643528449128282133c92761afcbb69..090b9cc6c19ffe5c949b7a418d5286852f5921fd 100644 --- a/test/test_npu/test_network_ops/test_empty_strided.py +++ b/test/test_npu/test_network_ops/test_empty_strided.py @@ -1,44 +1,44 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestEmptyStrided(TestCase): - def test_empty_strided(self, device): - for shape in [(2, 3, 4), (0, 2, 0)]: - # some of these cases are pretty strange, just verifying that if as_strided - # allows them then empty_strided can as well. - for strides in [(12, 4, 1), (2, 4, 6), (0, 0, 0)]: - empty_strided = torch.empty_strided(shape, strides, device=device) - # as_strided checks the storage size is big enough to support such a strided tensor; - # instead of repeating this calculation, we just use empty_strided which does the same - # calculation when setting the storage size. - as_strided = torch.empty(empty_strided.storage().size(), - device=device).as_strided(shape, strides) - - self.assertEqual(empty_strided.shape, as_strided.shape) - self.assertEqual(empty_strided.stride(), as_strided.stride()) - -instantiate_device_type_tests(TestEmptyStrided, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestEmptyStrided(TestCase): + def test_empty_strided(self, device): + for shape in [(2, 3, 4), (0, 2, 0)]: + # some of these cases are pretty strange, just verifying that if as_strided + # allows them then empty_strided can as well. + for strides in [(12, 4, 1), (2, 4, 6), (0, 0, 0)]: + empty_strided = torch.empty_strided(shape, strides, device=device) + # as_strided checks the storage size is big enough to support such a strided tensor; + # instead of repeating this calculation, we just use empty_strided which does the same + # calculation when setting the storage size. + as_strided = torch.empty(empty_strided.storage().size(), + device=device).as_strided(shape, strides) + + self.assertEqual(empty_strided.shape, as_strided.shape) + self.assertEqual(empty_strided.stride(), as_strided.stride()) + +instantiate_device_type_tests(TestEmptyStrided, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_exp.py b/test/test_npu/test_network_ops/test_exp.py old mode 100644 new mode 100755 index d7450774d5cc73c14ea4490dc82ee61a4a68a3e7..94a800743f67e679f17429d189d5745061fb5844 --- a/test/test_npu/test_network_ops/test_exp.py +++ b/test/test_npu/test_network_ops/test_exp.py @@ -1,109 +1,109 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestExp(TestCase): - def cpu_op_exec(self, input): - output = torch.exp(input) - output = output.numpy() - return output - - def npu_op_exec(self, input): - output = torch.exp(input) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_out_exec(self, input, output): - torch.exp(input, out = output) - output = output.to("cpu").numpy() - return output - - def test_exp_shape_format_fp16(self, device): - format_list = [0, 3] - shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, -1, 1) - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_exp_shape_format_fp32(self, device): - format_list = [0, 3] - shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, -1, 1) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - # cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_exp_out_float32_shape_format(self, device): - shape_format = [ - [[np.float32, 0, [1024, 32, 7, 7]], [np.float32, 0, [1024, 32, 7, 7]]], - [[np.float32, 0, [1024, 32, 7]], [np.float32, 0, [1024, 32]]], - [[np.float32, 0, [1024, 32]], [np.float32, 0, [1024, 32]]], - [[np.float32, 0, [1024]], [np.float32, 0, [1024, 1]]], - [[np.float32, 3, [1024, 32, 7, 7]], [np.float32, 3, [1024, 32, 7, 7]]], - [[np.float32, 3, [1024, 32, 7]], [np.float32, 3, [1024, 32]]], - [[np.float32, 3, [1024, 32]], [np.float32, 3, [1024, 20]]], - [[np.float32, 3, [1024]], [np.float32, 3, [1024]]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], -1, 1) - cpu_output, npu_output = create_common_tensor(item[1], -1, 1) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_out_exec(npu_input, npu_output) - self.assertRtolEqual(cpu_output, npu_output) - - def test_exp_out_float16_shape_format(self, device): - shape_format = [ - [[np.float16, 0, [1024, 32, 7, 7]], [np.float16, 0, [1024, 32, 7, 7]]], - [[np.float16, 0, [1024, 32, 7]], [np.float16, 0, [1024, 32]]], - [[np.float16, 0, [1024, 32]], [np.float16, 0, [1024, 32]]], - [[np.float16, 0, [1024]], [np.float16, 0, [1024, 1]]], - [[np.float16, 3, [1024, 32, 7, 7]], [np.float16, 3, [1024, 32, 7, 7]]], - [[np.float16, 3, [1024, 32, 7]], [np.float16, 3, [1024, 32]]], - [[np.float16, 3, [1024, 32]], [np.float16, 3, [1024, 20]]], - [[np.float16, 3, [1024]], [np.float16, 3, [1024]]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], -1, 1) - cpu_output, npu_output = create_common_tensor(item[1], -1, 1) - if item[0][0] == np.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = cpu_output.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_out_exec(npu_input, npu_output) - if item[0][0] == np.float16: - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestExp, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestExp(TestCase): + def cpu_op_exec(self, input): + output = torch.exp(input) + output = output.numpy() + return output + + def npu_op_exec(self, input): + output = torch.exp(input) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_out_exec(self, input, output): + torch.exp(input, out = output) + output = output.to("cpu").numpy() + return output + + def test_exp_shape_format_fp16(self, device): + format_list = [0, 3] + shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, -1, 1) + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_exp_shape_format_fp32(self, device): + format_list = [0, 3] + shape_list = [[5], [2, 4], [2, 2, 4], [2, 3, 3, 4]] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, -1, 1) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + # cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_exp_out_float32_shape_format(self, device): + shape_format = [ + [[np.float32, 0, [1024, 32, 7, 7]], [np.float32, 0, [1024, 32, 7, 7]]], + [[np.float32, 0, [1024, 32, 7]], [np.float32, 0, [1024, 32]]], + [[np.float32, 0, [1024, 32]], [np.float32, 0, [1024, 32]]], + [[np.float32, 0, [1024]], [np.float32, 0, [1024, 1]]], + [[np.float32, 3, [1024, 32, 7, 7]], [np.float32, 3, [1024, 32, 7, 7]]], + [[np.float32, 3, [1024, 32, 7]], [np.float32, 3, [1024, 32]]], + [[np.float32, 3, [1024, 32]], [np.float32, 3, [1024, 20]]], + [[np.float32, 3, [1024]], [np.float32, 3, [1024]]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], -1, 1) + cpu_output, npu_output = create_common_tensor(item[1], -1, 1) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_out_exec(npu_input, npu_output) + self.assertRtolEqual(cpu_output, npu_output) + + def test_exp_out_float16_shape_format(self, device): + shape_format = [ + [[np.float16, 0, [1024, 32, 7, 7]], [np.float16, 0, [1024, 32, 7, 7]]], + [[np.float16, 0, [1024, 32, 7]], [np.float16, 0, [1024, 32]]], + [[np.float16, 0, [1024, 32]], [np.float16, 0, [1024, 32]]], + [[np.float16, 0, [1024]], [np.float16, 0, [1024, 1]]], + [[np.float16, 3, [1024, 32, 7, 7]], [np.float16, 3, [1024, 32, 7, 7]]], + [[np.float16, 3, [1024, 32, 7]], [np.float16, 3, [1024, 32]]], + [[np.float16, 3, [1024, 32]], [np.float16, 3, [1024, 20]]], + [[np.float16, 3, [1024]], [np.float16, 3, [1024]]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], -1, 1) + cpu_output, npu_output = create_common_tensor(item[1], -1, 1) + if item[0][0] == np.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = cpu_output.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_out_exec(npu_input, npu_output) + if item[0][0] == np.float16: + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestExp, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_expand.py b/test/test_npu/test_network_ops/test_expand.py index 9fbc7f9435afa59d283c8759fc5b3ef8d9708e98..7143ecc0564877a87bbe00710b3161d919004ab3 100644 --- a/test/test_npu/test_network_ops/test_expand.py +++ b/test/test_npu/test_network_ops/test_expand.py @@ -1,53 +1,53 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestExpand(TestCase): - def cpu_op_exec(self, input1, size): - output = input1.expand(size) - output = output.numpy() - return output - - def npu_op_exec(self,input1, size): - output = input1.expand(size) - output = output.cpu().numpy() - return output - - def test_expand(self, device): - shape_format = [ - [[np.float32, 0, [1, 3]], (3, 3)], - [[np.float16, 0, [5, 1]], (-1, 7)], - [[np.int32, 0, [1, 1]], (3, 3)], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_output = self.cpu_op_exec(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestExpand, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestExpand(TestCase): + def cpu_op_exec(self, input1, size): + output = input1.expand(size) + output = output.numpy() + return output + + def npu_op_exec(self,input1, size): + output = input1.expand(size) + output = output.cpu().numpy() + return output + + def test_expand(self, device): + shape_format = [ + [[np.float32, 0, [1, 3]], (3, 3)], + [[np.float16, 0, [5, 1]], (-1, 7)], + [[np.int32, 0, [1, 1]], (3, 3)], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_output = self.cpu_op_exec(cpu_input1, item[1]) + npu_output = self.npu_op_exec(npu_input1, item[1]) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestExpand, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_fastgelu.py b/test/test_npu/test_network_ops/test_fastgelu.py index ba7f02b10db6a2e1b6f21c045537eb1242984147..98aa6a90906331487f60b2e0182ea28b89732e32 100644 --- a/test/test_npu/test_network_ops/test_fastgelu.py +++ b/test/test_npu/test_network_ops/test_fastgelu.py @@ -1,36 +1,36 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - - -class TestFastGelu(TestCase): - def npu_op_exec(self, input1): - output = torch.fast_gelu(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def test_fastgelu(self, device): - input = torch.tensor([1.,2.,3.,4.]).npu() - exoutput = torch.tensor([0.8458, 1.9357, 2.9819, 3.9956]) - output = self.npu_op_exec(input) - self.assertRtolEqual(exoutput.numpy(), output) - -instantiate_device_type_tests(TestFastGelu, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + + +class TestFastGelu(TestCase): + def npu_op_exec(self, input1): + output = torch.fast_gelu(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def test_fastgelu(self, device): + input = torch.tensor([1.,2.,3.,4.]).npu() + exoutput = torch.tensor([0.8458, 1.9357, 2.9819, 3.9956]) + output = self.npu_op_exec(input) + self.assertRtolEqual(exoutput.numpy(), output) + +instantiate_device_type_tests(TestFastGelu, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_fastgelubackward.py b/test/test_npu/test_network_ops/test_fastgelubackward.py index f6164eb4a01bd961c9a2f022200e6782ff9664e1..c23fb4ecffab54bed3048d4120b818efa220920b 100644 --- a/test/test_npu/test_network_ops/test_fastgelubackward.py +++ b/test/test_npu/test_network_ops/test_fastgelubackward.py @@ -1,42 +1,42 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - - -class TestFastGelu(TestCase): - def npu_op_exec(self, input1): - input1.requires_grad = True - output = torch.fast_gelu(input1) - output.backward(torch.ones_like(output)) - output_grad = input1.grad - output_grad = output_grad.to("cpu") - output_grad = output_grad.detach().numpy() - output = output.cpu().detach().numpy() - return output_grad, output - - def test_fastgelu(self, device): - input = torch.tensor([1.,2.,3.,4.]).npu() - exoutputgrad = torch.tensor([1.0677795, 1.0738151, 1.0245483, 1.0064018]) - exoutput = torch.tensor([0.8458, 1.9357, 2.9819, 3.9956]) - outputgrad, output = self.npu_op_exec(input) - self.assertRtolEqual(exoutputgrad.numpy(), outputgrad) - self.assertRtolEqual(exoutput.numpy(), output) - -instantiate_device_type_tests(TestFastGelu, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + + +class TestFastGelu(TestCase): + def npu_op_exec(self, input1): + input1.requires_grad = True + output = torch.fast_gelu(input1) + output.backward(torch.ones_like(output)) + output_grad = input1.grad + output_grad = output_grad.to("cpu") + output_grad = output_grad.detach().numpy() + output = output.cpu().detach().numpy() + return output_grad, output + + def test_fastgelu(self, device): + input = torch.tensor([1.,2.,3.,4.]).npu() + exoutputgrad = torch.tensor([1.0677795, 1.0738151, 1.0245483, 1.0064018]) + exoutput = torch.tensor([0.8458, 1.9357, 2.9819, 3.9956]) + outputgrad, output = self.npu_op_exec(input) + self.assertRtolEqual(exoutputgrad.numpy(), outputgrad) + self.assertRtolEqual(exoutput.numpy(), output) + +instantiate_device_type_tests(TestFastGelu, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_fill_.py b/test/test_npu/test_network_ops/test_fill_.py old mode 100644 new mode 100755 index a9fcb1ef0396c231d88a747e9bf9f0767b13174a..1c3f6630231b3213d1bc0979b9ffb589eaf799fa --- a/test/test_npu/test_network_ops/test_fill_.py +++ b/test/test_npu/test_network_ops/test_fill_.py @@ -1,66 +1,66 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np - -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestFill_(TestCase): - def cpu_op_exec(self, input1, input2): - output = torch.fill_(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.fill_(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def test_fills_shape_format_fp16(self, device): - format_list = [0, 3] - shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1024]] - value_list = [0.8, 1.25, torch.tensor(0.8), torch.tensor(1.25)] - shape_format = [ - [[np.float16, i, j], v] for i in format_list for j in shape_list for v in value_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_fill_shape_format_fp32(self, device): - format_list = [0, 3] - shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1024]] - value_list = [0.8, 1.25, torch.tensor(0.8), torch.tensor(1.25)] - shape_format = [ - [[np.float32, i, j], v] for i in format_list for j in shape_list for v in value_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestFill_, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np + +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestFill_(TestCase): + def cpu_op_exec(self, input1, input2): + output = torch.fill_(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.fill_(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def test_fills_shape_format_fp16(self, device): + format_list = [0, 3] + shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1024]] + value_list = [0.8, 1.25, torch.tensor(0.8), torch.tensor(1.25)] + shape_format = [ + [[np.float16, i, j], v] for i in format_list for j in shape_list for v in value_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input1, item[1]) + npu_output = self.npu_op_exec(npu_input1, item[1]) + self.assertRtolEqual(cpu_output, npu_output) + + def test_fill_shape_format_fp32(self, device): + format_list = [0, 3] + shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1024]] + value_list = [0.8, 1.25, torch.tensor(0.8), torch.tensor(1.25)] + shape_format = [ + [[np.float32, i, j], v] for i in format_list for j in shape_list for v in value_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input1, item[1]) + npu_output = self.npu_op_exec(npu_input1, item[1]) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestFill_, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_fill_diagonal.py b/test/test_npu/test_network_ops/test_fill_diagonal.py index 7fb9759f1fd34a1989029fefdb7f0b2d404a7628..f4113422e8df0fe99400ee03cb9665c2cea0500b 100644 --- a/test/test_npu/test_network_ops/test_fill_diagonal.py +++ b/test/test_npu/test_network_ops/test_fill_diagonal.py @@ -1,83 +1,83 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestFillDiagonal(TestCase): - def npu_op_exec(self, input): - input = input.npu() - input.fill_diagonal_(1) - output = input.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec(self, input): - input.fill_diagonal_(1) - output = input.numpy() - return output - - def npu_op_wrap_exec(self, input): - input = input.npu() - input.fill_diagonal_(1, wrap=True) - output = input.to("cpu") - output = output.numpy() - return output - - def cpu_op_wrap_exec(self, input): - input.fill_diagonal_(1, wrap=True) - output = input.numpy() - return output - - def test_fill_diagonal_shape_format_fp32(self, device): - format_list = [0, 3] - shape_list = ([7, 3], [3, 3, 3]) - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input.clone() - npu_input1 = npu_input.clone() - cpu_output1 = self.cpu_op_exec(cpu_input) - npu_output1 = self.npu_op_exec(npu_input) - cpu_output2 = self.cpu_op_wrap_exec(cpu_input1) - npu_output2 = self.npu_op_wrap_exec(npu_input1) - self.assertRtolEqual(cpu_output1, npu_output1) - self.assertRtolEqual(cpu_output2, npu_output2) - - def test_fill_diagonal_shape_format_fp16(self, device): - format_list = [0, 3] - shape_list = ([7, 3], [3, 3, 3]) - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input.clone() - npu_input1 = npu_input.clone() - cpu_output1 = self.cpu_op_exec(cpu_input) - npu_output1 = self.npu_op_exec(npu_input) - cpu_output2 = self.cpu_op_wrap_exec(cpu_input1) - npu_output2 = self.npu_op_wrap_exec(npu_input1) - self.assertRtolEqual(cpu_output1, npu_output1) - self.assertRtolEqual(cpu_output2, npu_output2) - - -instantiate_device_type_tests(TestFillDiagonal, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestFillDiagonal(TestCase): + def npu_op_exec(self, input): + input = input.npu() + input.fill_diagonal_(1) + output = input.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec(self, input): + input.fill_diagonal_(1) + output = input.numpy() + return output + + def npu_op_wrap_exec(self, input): + input = input.npu() + input.fill_diagonal_(1, wrap=True) + output = input.to("cpu") + output = output.numpy() + return output + + def cpu_op_wrap_exec(self, input): + input.fill_diagonal_(1, wrap=True) + output = input.numpy() + return output + + def test_fill_diagonal_shape_format_fp32(self, device): + format_list = [0, 3] + shape_list = ([7, 3], [3, 3, 3]) + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input.clone() + npu_input1 = npu_input.clone() + cpu_output1 = self.cpu_op_exec(cpu_input) + npu_output1 = self.npu_op_exec(npu_input) + cpu_output2 = self.cpu_op_wrap_exec(cpu_input1) + npu_output2 = self.npu_op_wrap_exec(npu_input1) + self.assertRtolEqual(cpu_output1, npu_output1) + self.assertRtolEqual(cpu_output2, npu_output2) + + def test_fill_diagonal_shape_format_fp16(self, device): + format_list = [0, 3] + shape_list = ([7, 3], [3, 3, 3]) + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input.clone() + npu_input1 = npu_input.clone() + cpu_output1 = self.cpu_op_exec(cpu_input) + npu_output1 = self.npu_op_exec(npu_input) + cpu_output2 = self.cpu_op_wrap_exec(cpu_input1) + npu_output2 = self.npu_op_wrap_exec(npu_input1) + self.assertRtolEqual(cpu_output1, npu_output1) + self.assertRtolEqual(cpu_output2, npu_output2) + + +instantiate_device_type_tests(TestFillDiagonal, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_floatstatus.py b/test/test_npu/test_network_ops/test_floatstatus.py index bc3b73a53023a756500ca085a58595bd341c7938..bf7342e8feb261ea195da1d12962c04d17385f10 100644 --- a/test/test_npu/test_network_ops/test_floatstatus.py +++ b/test/test_npu/test_network_ops/test_floatstatus.py @@ -1,36 +1,36 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - - -class TestFloatStatus(TestCase): - def npu_op_exec(self, input1): - output = torch.npu_alloc_float_status(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def test_floatstatus(self, device): - input = torch.randn([1,2,3]).npu() - exoutput = torch.tensor([0., 0., 0., 0., 0., 0., 0., 0.]) - output = self.npu_op_exec(input) - self.assertRtolEqual(exoutput.numpy(), output) - -instantiate_device_type_tests(TestFloatStatus, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + + +class TestFloatStatus(TestCase): + def npu_op_exec(self, input1): + output = torch.npu_alloc_float_status(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def test_floatstatus(self, device): + input = torch.randn([1,2,3]).npu() + exoutput = torch.tensor([0., 0., 0., 0., 0., 0., 0., 0.]) + output = self.npu_op_exec(input) + self.assertRtolEqual(exoutput.numpy(), output) + +instantiate_device_type_tests(TestFloatStatus, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_floor.py b/test/test_npu/test_network_ops/test_floor.py old mode 100644 new mode 100755 index fb2f2985aa7dc97f4c64b7825b50e2f1cf1150ed..99214bb38a962e9c35a71052f19aed60c9758069 --- a/test/test_npu/test_network_ops/test_floor.py +++ b/test/test_npu/test_network_ops/test_floor.py @@ -1,157 +1,157 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestFloor(TestCase): - def cpu_op_exec(self, input): - output = torch.floor(input) - output = output.numpy() - return output - - def npu_op_exec(self, input): - output = torch.floor(input) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inter_exec(self, input): - torch.floor_(input) - output = input.numpy() - return output - - def npu_op_inter_exec(self, input): - torch.floor_(input) - output = input.to("cpu") - output = output.numpy() - return output - - def cpu_op_out_exec(self, input, output): - torch.floor(input, out = output) - output = output.numpy() - return output - - def npu_op_out_exec(self, input, output): - torch.floor(input, out = output) - output = output.to("cpu") - output = output.numpy() - return output - - def test_floor_float32_shape_format(self, device): - format_list = [0, 3] - shape_list = [[256, 1, 1, 1], [1024, 32, 7, 7], [1024, 32, 7], [1024, 32], [1024]] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 100) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - - def test_floor_inter_float32_shape_format(self, device): - format_list = [0, 3] - shape_list = [[256, 1, 1, 1], [1024, 32, 7, 7], [1024, 32, 7], [1024, 32], [1024]] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 100) - cpu_output = self.cpu_op_inter_exec(cpu_input) - npu_output = self.npu_op_inter_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - - def test_floor_out_float32_shape_format(self, device): - shape_format = [ - [[np.float32, 0, [1024, 32, 7, 7]], [np.float32, 0, [1024, 32, 7, 7]]], - [[np.float32, 0, [1024, 32, 7]], [np.float32, 0, [1024, 32]]], - [[np.float32, 0, [1024, 32]], [np.float32, 0, [1024, 32]]], - [[np.float32, 0, [1024]], [np.float32, 0, [1024, 1]]], - [[np.float32, 3, [1024, 32, 7, 7]], [np.float32, 3, [1024, 32, 7, 7]]], - [[np.float32, 3, [1024, 32, 7]], [np.float32, 3, [1024, 32]]], - [[np.float32, 3, [1024, 32]], [np.float32, 3, [1024, 20]]], - [[np.float32, 3, [1024]], [np.float32, 3, [1024]]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 1, 100) - cpu_output, npu_output = create_common_tensor(item[1], 1, 100) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_out_exec(npu_input, npu_output) - self.assertRtolEqual(cpu_output, npu_output) - - def test_floor_float16_shape_format(self, device): - format_list = [0, 3] - shape_list = [[256, 1, 1, 1], [1024, 32, 7, 7], [1024, 32, 7], [1024, 32], [1024]] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 100) - if item[0] == np.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - if item[0] == np.float16: - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - def test_floor_inter_float16_shape_format(self, device): - format_list = [0, 3] - shape_list = [[256, 1, 1, 1], [1024, 32, 7, 7], [1024, 32, 7], [1024, 32], [1024]] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 100) - if item[0] == np.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_inter_exec(cpu_input) - npu_output = self.npu_op_inter_exec(npu_input) - if item[0] == np.float16: - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - def test_floor_out_float16_shape_format(self, device): - shape_format = [ - [[np.float16, 0, [1024, 32, 7, 7]], [np.float16, 0, [1024, 32, 7, 7]]], - [[np.float16, 0, [1024, 32, 7]], [np.float16, 0, [1024, 32]]], - [[np.float16, 0, [1024, 32]], [np.float16, 0, [1024, 32]]], - [[np.float16, 0, [1024]], [np.float16, 0, [1024, 1]]], - [[np.float16, 3, [1024, 32, 7, 7]], [np.float16, 3, [1024, 32, 7, 7]]], - [[np.float16, 3, [1024, 32, 7]], [np.float16, 3, [1024, 32]]], - [[np.float16, 3, [1024, 32]], [np.float16, 3, [1024, 20]]], - [[np.float16, 3, [1024]], [np.float16, 3, [1024]]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 1, 100) - cpu_output, npu_output = create_common_tensor(item[1], 1, 100) - if item[0][0] == np.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = cpu_output.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_out_exec(npu_input, npu_output) - if item[0][0] == np.float16: - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestFloor, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestFloor(TestCase): + def cpu_op_exec(self, input): + output = torch.floor(input) + output = output.numpy() + return output + + def npu_op_exec(self, input): + output = torch.floor(input) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inter_exec(self, input): + torch.floor_(input) + output = input.numpy() + return output + + def npu_op_inter_exec(self, input): + torch.floor_(input) + output = input.to("cpu") + output = output.numpy() + return output + + def cpu_op_out_exec(self, input, output): + torch.floor(input, out = output) + output = output.numpy() + return output + + def npu_op_out_exec(self, input, output): + torch.floor(input, out = output) + output = output.to("cpu") + output = output.numpy() + return output + + def test_floor_float32_shape_format(self, device): + format_list = [0, 3] + shape_list = [[256, 1, 1, 1], [1024, 32, 7, 7], [1024, 32, 7], [1024, 32], [1024]] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 100) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + def test_floor_inter_float32_shape_format(self, device): + format_list = [0, 3] + shape_list = [[256, 1, 1, 1], [1024, 32, 7, 7], [1024, 32, 7], [1024, 32], [1024]] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 100) + cpu_output = self.cpu_op_inter_exec(cpu_input) + npu_output = self.npu_op_inter_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + def test_floor_out_float32_shape_format(self, device): + shape_format = [ + [[np.float32, 0, [1024, 32, 7, 7]], [np.float32, 0, [1024, 32, 7, 7]]], + [[np.float32, 0, [1024, 32, 7]], [np.float32, 0, [1024, 32]]], + [[np.float32, 0, [1024, 32]], [np.float32, 0, [1024, 32]]], + [[np.float32, 0, [1024]], [np.float32, 0, [1024, 1]]], + [[np.float32, 3, [1024, 32, 7, 7]], [np.float32, 3, [1024, 32, 7, 7]]], + [[np.float32, 3, [1024, 32, 7]], [np.float32, 3, [1024, 32]]], + [[np.float32, 3, [1024, 32]], [np.float32, 3, [1024, 20]]], + [[np.float32, 3, [1024]], [np.float32, 3, [1024]]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 1, 100) + cpu_output, npu_output = create_common_tensor(item[1], 1, 100) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_out_exec(npu_input, npu_output) + self.assertRtolEqual(cpu_output, npu_output) + + def test_floor_float16_shape_format(self, device): + format_list = [0, 3] + shape_list = [[256, 1, 1, 1], [1024, 32, 7, 7], [1024, 32, 7], [1024, 32], [1024]] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 100) + if item[0] == np.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + if item[0] == np.float16: + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_floor_inter_float16_shape_format(self, device): + format_list = [0, 3] + shape_list = [[256, 1, 1, 1], [1024, 32, 7, 7], [1024, 32, 7], [1024, 32], [1024]] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 100) + if item[0] == np.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_inter_exec(cpu_input) + npu_output = self.npu_op_inter_exec(npu_input) + if item[0] == np.float16: + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_floor_out_float16_shape_format(self, device): + shape_format = [ + [[np.float16, 0, [1024, 32, 7, 7]], [np.float16, 0, [1024, 32, 7, 7]]], + [[np.float16, 0, [1024, 32, 7]], [np.float16, 0, [1024, 32]]], + [[np.float16, 0, [1024, 32]], [np.float16, 0, [1024, 32]]], + [[np.float16, 0, [1024]], [np.float16, 0, [1024, 1]]], + [[np.float16, 3, [1024, 32, 7, 7]], [np.float16, 3, [1024, 32, 7, 7]]], + [[np.float16, 3, [1024, 32, 7]], [np.float16, 3, [1024, 32]]], + [[np.float16, 3, [1024, 32]], [np.float16, 3, [1024, 20]]], + [[np.float16, 3, [1024]], [np.float16, 3, [1024]]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 1, 100) + cpu_output, npu_output = create_common_tensor(item[1], 1, 100) + if item[0][0] == np.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = cpu_output.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_out_exec(npu_input, npu_output) + if item[0][0] == np.float16: + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestFloor, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_fmod.py b/test/test_npu/test_network_ops/test_fmod.py old mode 100644 new mode 100755 index ebe4bc79a9699aa047cfbd5f6629718248cbb0cc..8da4dfb6f84f2c6f6fb00fa326a061c08385e458 --- a/test/test_npu/test_network_ops/test_fmod.py +++ b/test/test_npu/test_network_ops/test_fmod.py @@ -1,119 +1,119 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestFmod(TestCase): - def cpu_op_exec(self, input1, input2): - output = torch.fmod(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.fmod(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2, input3): - torch.fmod(input1, input2, out=input3) - output = input3.to("cpu") - output = output.numpy() - return output - - def case_exec_tensor(self, shape): - for item in shape: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 100) - npu_input3 = torch.empty(0).npu().to(cpu_input1.dtype) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - self.assertEqual(cpu_output, npu_output) - self.assertEqual(npu_output_out, npu_output) - - def case_exec_scalar(self, shape): - for item in shape: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - npu_input3 = torch.empty(0).npu().to(cpu_input1.dtype) - cpu_output = self.cpu_op_exec(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - npu_output_out = self.npu_op_exec_out(npu_input1, item[1], npu_input3) - self.assertEqual(cpu_output, npu_output) - self.assertEqual(npu_output_out, npu_output) - - def case_exec_tensor_fp16(self, shape): - for item in shape: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 100) - npu_input3 = torch.empty(0).npu().to(cpu_input1.dtype) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - self.assertEqual(npu_output_out, npu_output) - - def case_exec_scalar_fp32(self, shape): - for item in shape: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - npu_input3 = torch.empty(0).npu().to(cpu_input1.dtype) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - npu_output_out = self.npu_op_exec_out(npu_input1, item[1], npu_input3) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - self.assertEqual(npu_output_out, npu_output) - - def test_fmod_shape_format_fp32(self, device): - format_list = [0, 3] - shape_list = [[5, 6], [3, 4, 5]] - shape_format_tensor = [[[np.float32, i, j], [np.float32, i, j]] - for i in format_list for j in shape_list] - shape_format_scalar_tensor = [ - [[np.float32, i, j], 5] for i in format_list for j in shape_list - ] - self.case_exec_tensor(shape_format_tensor) - self.case_exec_scalar(shape_format_scalar_tensor) - - def test_fmod_shape_format_fp16(self, device): - format_list = [0, 3] - shape_list = [[5, 6], [3, 4, 5]] - shape_format_tensor = [[[np.float16, i, j], [np.float16, i, j]] - for i in format_list for j in shape_list] - shape_format_scalar_tensor = [ - [[np.float16, i, j], 5] for i in format_list for j in shape_list - ] - self.case_exec_tensor_fp16(shape_format_tensor) - self.case_exec_scalar_fp32(shape_format_scalar_tensor) - - def test_fmod_mix_dtype(self, device): - npu_input1, npu_input2 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) - npu_input3, npu_input4 = create_common_tensor([np.float16, 0, (2, 3)], 1, 100) - cpu_output = self.cpu_op_exec(npu_input1, npu_input3) - npu_output = self.npu_op_exec(npu_input2, npu_input4) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestFmod, globals(), except_for="cpu") - -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestFmod(TestCase): + def cpu_op_exec(self, input1, input2): + output = torch.fmod(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.fmod(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2, input3): + torch.fmod(input1, input2, out=input3) + output = input3.to("cpu") + output = output.numpy() + return output + + def case_exec_tensor(self, shape): + for item in shape: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 100) + npu_input3 = torch.empty(0).npu().to(cpu_input1.dtype) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + self.assertEqual(cpu_output, npu_output) + self.assertEqual(npu_output_out, npu_output) + + def case_exec_scalar(self, shape): + for item in shape: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + npu_input3 = torch.empty(0).npu().to(cpu_input1.dtype) + cpu_output = self.cpu_op_exec(cpu_input1, item[1]) + npu_output = self.npu_op_exec(npu_input1, item[1]) + npu_output_out = self.npu_op_exec_out(npu_input1, item[1], npu_input3) + self.assertEqual(cpu_output, npu_output) + self.assertEqual(npu_output_out, npu_output) + + def case_exec_tensor_fp16(self, shape): + for item in shape: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 100) + npu_input3 = torch.empty(0).npu().to(cpu_input1.dtype) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + self.assertEqual(npu_output_out, npu_output) + + def case_exec_scalar_fp32(self, shape): + for item in shape: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + npu_input3 = torch.empty(0).npu().to(cpu_input1.dtype) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, item[1]) + npu_output = self.npu_op_exec(npu_input1, item[1]) + npu_output_out = self.npu_op_exec_out(npu_input1, item[1], npu_input3) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + self.assertEqual(npu_output_out, npu_output) + + def test_fmod_shape_format_fp32(self, device): + format_list = [0, 3] + shape_list = [[5, 6], [3, 4, 5]] + shape_format_tensor = [[[np.float32, i, j], [np.float32, i, j]] + for i in format_list for j in shape_list] + shape_format_scalar_tensor = [ + [[np.float32, i, j], 5] for i in format_list for j in shape_list + ] + self.case_exec_tensor(shape_format_tensor) + self.case_exec_scalar(shape_format_scalar_tensor) + + def test_fmod_shape_format_fp16(self, device): + format_list = [0, 3] + shape_list = [[5, 6], [3, 4, 5]] + shape_format_tensor = [[[np.float16, i, j], [np.float16, i, j]] + for i in format_list for j in shape_list] + shape_format_scalar_tensor = [ + [[np.float16, i, j], 5] for i in format_list for j in shape_list + ] + self.case_exec_tensor_fp16(shape_format_tensor) + self.case_exec_scalar_fp32(shape_format_scalar_tensor) + + def test_fmod_mix_dtype(self, device): + npu_input1, npu_input2 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) + npu_input3, npu_input4 = create_common_tensor([np.float16, 0, (2, 3)], 1, 100) + cpu_output = self.cpu_op_exec(npu_input1, npu_input3) + npu_output = self.npu_op_exec(npu_input2, npu_input4) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestFmod, globals(), except_for="cpu") + +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_format_div.py b/test/test_npu/test_network_ops/test_format_div.py index bec9df2c4afc543fdc5c4c0514b13b31c73c356f..f3cb28c3eaa867dd8f26e99d5cdd1b95293965dd 100644 --- a/test/test_npu/test_network_ops/test_format_div.py +++ b/test/test_npu/test_network_ops/test_format_div.py @@ -1,53 +1,53 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests, formats -from util_test import create_common_tensor, create_dtype_tensor - - -class TestDiv(TestCase): - - def cpu_op_exec(self, input1, input2): - output = torch.div(input1, input2) - return output.numpy() - - def npu_op_exec(self, input1, input2): - output = torch.div(input1, input2) - output = output.to("cpu") - return output.numpy() - - @formats(0, 3) - def test_div_shape_format(self, device, npu_format): - shape_list = [6] - shape_format = [ - [np.float16, npu_format, j] for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 1, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestDiv, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests, formats +from util_test import create_common_tensor, create_dtype_tensor + + +class TestDiv(TestCase): + + def cpu_op_exec(self, input1, input2): + output = torch.div(input1, input2) + return output.numpy() + + def npu_op_exec(self, input1, input2): + output = torch.div(input1, input2) + output = output.to("cpu") + return output.numpy() + + @formats(0, 3) + def test_div_shape_format(self, device, npu_format): + shape_list = [6] + shape_format = [ + [np.float16, npu_format, j] for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 1, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestDiv, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_full.py b/test/test_npu/test_network_ops/test_full.py old mode 100644 new mode 100755 index 0d9f7829fa001d7941678a1bb956f906862efbdb..723443f0d471f2d5025611f6d0044f869726d3ac --- a/test/test_npu/test_network_ops/test_full.py +++ b/test/test_npu/test_network_ops/test_full.py @@ -1,55 +1,55 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestFull(TestCase): - def test_full_shape_format_fp16(self, device): - format_list = [0, 3] - dtype_list = [torch.float32, torch.float16, torch.int32] - shape_list = [[5, 8], [2, 4, 1, 1], [16]] - shape_format = [[[np.float16, i, j], k] - for i in format_list for j in shape_list for k in dtype_list] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output = torch.full(cpu_input.size(), 6, dtype=item[1]) - cpu_output = cpu_output.numpy() - npu_output = torch.full(npu_input.size(), 6, dtype=item[1]) - npu_output = npu_output.to("cpu") - npu_output = npu_output.numpy() - self.assertRtolEqual(cpu_output, npu_output) - - def test_full_shape_format_fp32(self, device): - format_list = [0, 3] - dtype_list = [torch.float32, torch.float16, torch.int32] - shape_list = [[5, 8], [2, 4, 1, 1], [16]] - shape_format = [[[np.float32, i, j], k] - for i in format_list for j in shape_list for k in dtype_list] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output = torch.full(cpu_input.size(), 6, dtype=item[1]) - cpu_output = cpu_output.numpy() - npu_output = torch.full(npu_input.size(), 6, dtype=item[1]) - npu_output = npu_output.to("cpu") - npu_output = npu_output.numpy() - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestFull, globals(), except_for="cpu") -if __name__ == '__main__': - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestFull(TestCase): + def test_full_shape_format_fp16(self, device): + format_list = [0, 3] + dtype_list = [torch.float32, torch.float16, torch.int32] + shape_list = [[5, 8], [2, 4, 1, 1], [16]] + shape_format = [[[np.float16, i, j], k] + for i in format_list for j in shape_list for k in dtype_list] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output = torch.full(cpu_input.size(), 6, dtype=item[1]) + cpu_output = cpu_output.numpy() + npu_output = torch.full(npu_input.size(), 6, dtype=item[1]) + npu_output = npu_output.to("cpu") + npu_output = npu_output.numpy() + self.assertRtolEqual(cpu_output, npu_output) + + def test_full_shape_format_fp32(self, device): + format_list = [0, 3] + dtype_list = [torch.float32, torch.float16, torch.int32] + shape_list = [[5, 8], [2, 4, 1, 1], [16]] + shape_format = [[[np.float32, i, j], k] + for i in format_list for j in shape_list for k in dtype_list] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output = torch.full(cpu_input.size(), 6, dtype=item[1]) + cpu_output = cpu_output.numpy() + npu_output = torch.full(npu_input.size(), 6, dtype=item[1]) + npu_output = npu_output.to("cpu") + npu_output = npu_output.numpy() + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestFull, globals(), except_for="cpu") +if __name__ == '__main__': + run_tests() diff --git a/test/test_npu/test_network_ops/test_gather.py b/test/test_npu/test_network_ops/test_gather.py index 041d68ff1de7bd9e12c256034ccdb2bbcfedb37a..891f07268e3b405394fba7c171eaa2bcf9e350ee 100644 --- a/test/test_npu/test_network_ops/test_gather.py +++ b/test/test_npu/test_network_ops/test_gather.py @@ -1,76 +1,76 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestIndex(TestCase): - def cpu_op_exec(self, input1, dim, index): - output = torch.index_select(input1, dim, index) - output = output.numpy() - return output - - def npu_op_exec(self, input1, dim, index): - index = index.to("npu") - output = torch.index_select(input1, dim, index) - output = output.to("cpu") - output = output.numpy() - return output - - def test_index_shape_format_fp32(self, device): - format_list = [0, 3, 29] - shape_list = [(1000, 1280), (32, 3, 3), (1024, 464, 7, 7)] - shape_format = [ - [[np.float32, i, j], [np.int64, 0, [2]]] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - dim = np.random.randint(0, len(item[0][2])) - index1 = np.random.uniform(0, item[0][2][dim], item[1][2]).astype(np.int64) - index = torch.from_numpy(index1) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, dim, index) - npu_output = self.npu_op_exec(npu_input1, dim, index) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_index_shape_format_fp16(self, device): - format_list = [0, 3, 29] - shape_list = [(1000, 1280), (32, 3, 3), (1024, 464, 7, 7)] - shape_format = [ - [[np.float16, i, j], [np.int64, 0, [2]]] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - dim = np.random.randint(0, len(item[0][2])) - index1 = np.random.uniform(0, item[0][2][dim], item[1][2]).astype(np.int64) - index = torch.from_numpy(index1) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, dim, index) - npu_output = self.npu_op_exec(npu_input1, dim, index) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestIndex, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestIndex(TestCase): + def cpu_op_exec(self, input1, dim, index): + output = torch.index_select(input1, dim, index) + output = output.numpy() + return output + + def npu_op_exec(self, input1, dim, index): + index = index.to("npu") + output = torch.index_select(input1, dim, index) + output = output.to("cpu") + output = output.numpy() + return output + + def test_index_shape_format_fp32(self, device): + format_list = [0, 3, 29] + shape_list = [(1000, 1280), (32, 3, 3), (1024, 464, 7, 7)] + shape_format = [ + [[np.float32, i, j], [np.int64, 0, [2]]] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + dim = np.random.randint(0, len(item[0][2])) + index1 = np.random.uniform(0, item[0][2][dim], item[1][2]).astype(np.int64) + index = torch.from_numpy(index1) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, dim, index) + npu_output = self.npu_op_exec(npu_input1, dim, index) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_index_shape_format_fp16(self, device): + format_list = [0, 3, 29] + shape_list = [(1000, 1280), (32, 3, 3), (1024, 464, 7, 7)] + shape_format = [ + [[np.float16, i, j], [np.int64, 0, [2]]] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + dim = np.random.randint(0, len(item[0][2])) + index1 = np.random.uniform(0, item[0][2][dim], item[1][2]).astype(np.int64) + index = torch.from_numpy(index1) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, dim, index) + npu_output = self.npu_op_exec(npu_input1, dim, index) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestIndex, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_ge.py b/test/test_npu/test_network_ops/test_ge.py old mode 100644 new mode 100755 index b098b29274480835733577f25eebd03476d3e9b1..836387473b4b1a6c796b2927b8b31bf2e45ea6a9 --- a/test/test_npu/test_network_ops/test_ge.py +++ b/test/test_npu/test_network_ops/test_ge.py @@ -1,301 +1,301 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestGe(TestCase): - def generate_scalar(self, min, max): - scalar = np.random.uniform(min, max) - return scalar - - def cpu_op_exec(self, input1, input2): - output = torch.ge(input1, input2) - output = output.numpy() - return output - - def cpu_op_exec_out(self, input1, input2, input3): - torch.ge(input1, input2, out = input3) - output = input3.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.ge(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2, input3): - torch.ge(input1, input2, out = input3) - output = input3.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_exec(self, input1, input2): - output = input1.ge_(input2) - output = input1 - output = output.numpy() - return output - - def npu_op_inplace_exec(self, input1, input2): - output = input1.ge_(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_scalar(self, input, scalar): - output = torch.ge(input, scalar) - output = output.numpy() - return output - - def cpu_op_exec_scalar_out(self, input1, scalar, input2): - torch.ge(input1, scalar, out = input2) - output = input2.numpy() - return output - - def npu_op_exec_scalar(self, input, scalar): - output = torch.ge(input, scalar) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_scalar_out(self, input1, scalar, input2): - torch.ge(input1, scalar, out = input2) - output = input2.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_exec_scalar(self, input, scalar): - output = input.ge_(scalar) - output = output.numpy() - return output - - def npu_op_inplace_exec_scalar(self, input, scalar): - output = input.ge_(scalar) - output = output.to("cpu") - output = output.numpy() - return output - - def ge_tensor_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) - cpu_input3 = torch.randn(item[1][2])<0 - npu_input3 = cpu_input3.npu() - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - if cpu_input3.dtype == torch.float16: - cpu_input3 = cpu_input3.to(torch.float32) - cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) - - self.assertRtolEqual(cpu_output_out, npu_output_out) - - def test_ge_tensor_out(self, device): - shape_format = [ - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], - [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], - [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], - [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], - ] - self.ge_tensor_out_result(shape_format) - - def ge_scalar_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2 = torch.randn(item[1][2])<0 - npu_input2 = cpu_input2.npu() - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - scalar = self.generate_scalar(0, 100) - cpu_output_out = self.cpu_op_exec_scalar_out(cpu_input1, scalar, cpu_input2) - npu_output_out = self.npu_op_exec_scalar_out(npu_input1, scalar, npu_input2) - cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) - self.assertRtolEqual(cpu_output_out, npu_output_out) - - def test_ge_scalar_out(self, device): - shape_format = [ - [[np.float16, 0, [4, 4, 128, 128]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [12, 10, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [16, 3, 1111, 1212]], [np.float16, 0, [3, 3, 3]]], - [[np.float16, 0, [16, 16, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], - [[np.float32, 0, [20, 10, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], - [[np.float32, 0, [1313, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], - [[np.float32, 0, [16, 22, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], - ] - self.ge_scalar_out_result(shape_format) - - def test_ge_bool(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - scalar_list = [True, False] - shape_format = [ - [[np.int32, i, j], k] for i in format_list for j in shape_list - for k in scalar_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 100) - cpu_output1 = self.cpu_op_exec_scalar(cpu_input1 > 50, item[1]) - npu_output1 = self.npu_op_exec_scalar(npu_input1 > 50, item[1]) - cpu_output2 = self.cpu_op_exec(cpu_input1 > 50, cpu_input2 > 50) - npu_output2 = self.npu_op_exec(npu_input1 > 50, npu_input2 > 50) - self.assertEqual(cpu_output1, npu_output1) - self.assertEqual(cpu_output2, npu_output2) - - def test_ge_scalar_float32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_exec_scalar(npu_input, scalar) - self.assertEqual(cpu_output, npu_output) - - def test_ge_scalar_float16(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input = cpu_input.to(torch.float32) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_exec_scalar(npu_input, scalar) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_ge_scalar_int32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.int32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_exec_scalar(npu_input, scalar) - self.assertEqual(cpu_output, npu_output) - - def test_ge_tensor_float32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float32, i, j], [np.float32, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertEqual(cpu_output, npu_output) - - def test_ge_tensor_float16(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float16, i, j], [np.float16, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_ge_inplace_float32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float32, i, j], [np.float32, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) - self.assertEqual(cpu_output, npu_output) - - def test_ge_inplace_float16(self, device): - format_list = [0, 3] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float16, i, j], [np.float16, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_ge_inplace_scalar_float32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar) - self.assertEqual(cpu_output, npu_output) - - def test_ge_inplace_scalar_float16(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input = cpu_input.to(torch.float32) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_ge_mix_dtype(self, device): - npu_input1, npu_input2 = create_common_tensor([np.float16, 0, (2, 3)], 1, 100) - npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) - cpu_output = self.cpu_op_exec(npu_input1, npu_input3) - npu_output = self.npu_op_exec(npu_input2, npu_input4) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestGe, globals(), except_for="cpu") -if __name__ == '__main__': - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestGe(TestCase): + def generate_scalar(self, min, max): + scalar = np.random.uniform(min, max) + return scalar + + def cpu_op_exec(self, input1, input2): + output = torch.ge(input1, input2) + output = output.numpy() + return output + + def cpu_op_exec_out(self, input1, input2, input3): + torch.ge(input1, input2, out = input3) + output = input3.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.ge(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2, input3): + torch.ge(input1, input2, out = input3) + output = input3.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_exec(self, input1, input2): + output = input1.ge_(input2) + output = input1 + output = output.numpy() + return output + + def npu_op_inplace_exec(self, input1, input2): + output = input1.ge_(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_scalar(self, input, scalar): + output = torch.ge(input, scalar) + output = output.numpy() + return output + + def cpu_op_exec_scalar_out(self, input1, scalar, input2): + torch.ge(input1, scalar, out = input2) + output = input2.numpy() + return output + + def npu_op_exec_scalar(self, input, scalar): + output = torch.ge(input, scalar) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_scalar_out(self, input1, scalar, input2): + torch.ge(input1, scalar, out = input2) + output = input2.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_exec_scalar(self, input, scalar): + output = input.ge_(scalar) + output = output.numpy() + return output + + def npu_op_inplace_exec_scalar(self, input, scalar): + output = input.ge_(scalar) + output = output.to("cpu") + output = output.numpy() + return output + + def ge_tensor_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) + cpu_input3 = torch.randn(item[1][2])<0 + npu_input3 = cpu_input3.npu() + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + if cpu_input3.dtype == torch.float16: + cpu_input3 = cpu_input3.to(torch.float32) + cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) + + self.assertRtolEqual(cpu_output_out, npu_output_out) + + def test_ge_tensor_out(self, device): + shape_format = [ + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], + [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], + [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], + [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], + ] + self.ge_tensor_out_result(shape_format) + + def ge_scalar_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2 = torch.randn(item[1][2])<0 + npu_input2 = cpu_input2.npu() + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + scalar = self.generate_scalar(0, 100) + cpu_output_out = self.cpu_op_exec_scalar_out(cpu_input1, scalar, cpu_input2) + npu_output_out = self.npu_op_exec_scalar_out(npu_input1, scalar, npu_input2) + cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) + self.assertRtolEqual(cpu_output_out, npu_output_out) + + def test_ge_scalar_out(self, device): + shape_format = [ + [[np.float16, 0, [4, 4, 128, 128]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [12, 10, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [16, 3, 1111, 1212]], [np.float16, 0, [3, 3, 3]]], + [[np.float16, 0, [16, 16, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], + [[np.float32, 0, [20, 10, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], + [[np.float32, 0, [1313, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], + [[np.float32, 0, [16, 22, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], + ] + self.ge_scalar_out_result(shape_format) + + def test_ge_bool(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + scalar_list = [True, False] + shape_format = [ + [[np.int32, i, j], k] for i in format_list for j in shape_list + for k in scalar_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 100) + cpu_output1 = self.cpu_op_exec_scalar(cpu_input1 > 50, item[1]) + npu_output1 = self.npu_op_exec_scalar(npu_input1 > 50, item[1]) + cpu_output2 = self.cpu_op_exec(cpu_input1 > 50, cpu_input2 > 50) + npu_output2 = self.npu_op_exec(npu_input1 > 50, npu_input2 > 50) + self.assertEqual(cpu_output1, npu_output1) + self.assertEqual(cpu_output2, npu_output2) + + def test_ge_scalar_float32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_exec_scalar(npu_input, scalar) + self.assertEqual(cpu_output, npu_output) + + def test_ge_scalar_float16(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input = cpu_input.to(torch.float32) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_exec_scalar(npu_input, scalar) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_ge_scalar_int32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.int32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_exec_scalar(npu_input, scalar) + self.assertEqual(cpu_output, npu_output) + + def test_ge_tensor_float32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float32, i, j], [np.float32, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertEqual(cpu_output, npu_output) + + def test_ge_tensor_float16(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float16, i, j], [np.float16, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_ge_inplace_float32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float32, i, j], [np.float32, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) + self.assertEqual(cpu_output, npu_output) + + def test_ge_inplace_float16(self, device): + format_list = [0, 3] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float16, i, j], [np.float16, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_ge_inplace_scalar_float32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar) + self.assertEqual(cpu_output, npu_output) + + def test_ge_inplace_scalar_float16(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input = cpu_input.to(torch.float32) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_ge_mix_dtype(self, device): + npu_input1, npu_input2 = create_common_tensor([np.float16, 0, (2, 3)], 1, 100) + npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) + cpu_output = self.cpu_op_exec(npu_input1, npu_input3) + npu_output = self.npu_op_exec(npu_input2, npu_input4) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestGe, globals(), except_for="cpu") +if __name__ == '__main__': + run_tests() diff --git a/test/test_npu/test_gelu_backward.py b/test/test_npu/test_network_ops/test_gelu_backward.py similarity index 77% rename from test/test_npu/test_gelu_backward.py rename to test/test_npu/test_network_ops/test_gelu_backward.py index a21092c621a4f257e02569101c0fbf0c6f242ab9..439e57e28c7d5e1640670a4765461fe7ec3ef256 100644 --- a/test/test_npu/test_gelu_backward.py +++ b/test/test_npu/test_network_ops/test_gelu_backward.py @@ -33,7 +33,7 @@ class TestGeluBackward(TestCase): z = output.sum() z.backward() res = input1.grad - return res.detach() + return res.detach().numpy() def npu_op_exec(self, input1): input1 = input1.to("npu") @@ -42,44 +42,37 @@ class TestGeluBackward(TestCase): z = output.sum() z.backward() res = input1.grad.to("cpu") - return res.detach() + return res.detach().numpy() def test_gelu_backward_float32_1(self, device): - input1= self.generate_single_data(0, 100, (4,3,1,1), np.float32) + input1= self.generate_single_data(0, 100, (4, 3, 1, 1), np.float32) cpu_input1 = copy.deepcopy(input1) cpu_output = self.cpu_op_exec(cpu_input1) npu_output = self.npu_op_exec(input1) self.assertRtolEqual(cpu_output, npu_output) def test_gelu_backward_float32_2(self, device): - input1= self.generate_single_data(0, 100, (4,3,10), np.float32) + input1= self.generate_single_data(0, 100, (15, 3, 1), np.float32) cpu_input1 = copy.deepcopy(input1) cpu_output = self.cpu_op_exec(cpu_input1) npu_output = self.npu_op_exec(input1) self.assertRtolEqual(cpu_output, npu_output) def test_gelu_backward_float32_3(self, device): - input1= self.generate_single_data(0, 100, (400,30,10), np.float32) - cpu_input1 = copy.deepcopy(input1) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_gelu_backward_float32_4(self, device): - input1= self.generate_single_data(-30, 0, (4,4), np.float32) + input1= self.generate_single_data(0, 100, (4, 4), np.float32) cpu_input1 = copy.deepcopy(input1) cpu_output = self.cpu_op_exec(cpu_input1) npu_output = self.npu_op_exec(input1) self.assertRtolEqual(cpu_output, npu_output) def test_gelu_backward_float16(self, device): - input1 = self.generate_single_data(0, 100, (5, 10, 100) , np.float16) - input1 = input1.to(torch.float32) - cpu_input1 = copy.deepcopy(input1) + input1 = self.generate_single_data(0, 100, (5, 10, 100), np.float16) + cpu_input1 = input1.to(torch.float32) cpu_output = self.cpu_op_exec(cpu_input1) + cpu_output = cpu_output.astype(np.float16) npu_output = self.npu_op_exec(input1) self.assertRtolEqual(cpu_output, npu_output) instantiate_device_type_tests(TestGeluBackward, globals(), except_for="cpu") if __name__ == "__main__": - run_tests() \ No newline at end of file + run_tests() diff --git a/test/test_npu/test_network_ops/test_ger.py b/test/test_npu/test_network_ops/test_ger.py index 20d41f7b286647af116cfc497317f6c1f73aac2e..3799258025a57b92cc9e72ad653415bbcea5ecc2 100644 --- a/test/test_npu/test_network_ops/test_ger.py +++ b/test/test_npu/test_network_ops/test_ger.py @@ -1,102 +1,102 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import copy -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestGer(TestCase): - - def cpu_op_exec(self,input1, input2): - output = torch.ger(input1, input2) - output = output.numpy() - - return output - - def npu_op_exec(self,input1, input2): - output = torch.ger(input1, input2) - output = output.to("cpu").numpy() - - return output - - def npu_op_exec_out(self,input1, input2, output): - torch.ger(input1, input2, out=output) - output = output.to("cpu").numpy() - - return output - - def ger_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def ger_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) - cpu_input3, npu_input3 = create_common_tensor(item[2], -100, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - if cpu_input3.dtype == torch.float16: - cpu_input3 = cpu_input3.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - cpu_output = cpu_output.astype(npu_output_out.dtype) - self.assertRtolEqual(cpu_output, npu_output_out) - - def test_ger_result(self, device): - shape_format = [ - [[np.float16, 0, [128]], [np.float16, 0, [256]]], - [[np.float16, 0, [128]], [np.float16, 0, [58]]], - [[np.float16, 0, [128]], [np.float16, 0, [3]]], - [[np.float16, 0, [128]], [np.float16, 0, [116]]], - [[np.float32, 0, [256]], [np.float32, 0, [128]]], - [[np.float32, 0, [256]], [np.float32, 0, [3]]], - [[np.float32, 0, [2]], [np.float32, 0, [3]]], - [[np.float32, 0, [128]], [np.float32, 0, [232]]], - ] - self.ger_result(shape_format) - - def test_ger_out_result(self, device): - shape_format = [ - [[np.float16, 0, [128]], [np.float16, 0, [256]], [np.float16, 0, [256, 116]]], - [[np.float16, 0, [128]], [np.float16, 0, [58]], [np.float16, 0, [58, 58, 1, 1]]], - [[np.float16, 0, [128]], [np.float16, 0, [3]], [np.float16, 0, [3, 3]]], - [[np.float16, 0, [128]], [np.float16, 0, [116]], [np.float16, 0, [128, 116]]], - [[np.float32, 0, [256]], [np.float32, 0, [128]], [np.float32, 0, [128, 128, 3, 3]]], - [[np.float32, 0, [256]], [np.float32, 0, [3]], [np.float32, 0, [256, 3]]], - [[np.float32, 0, [2]], [np.float32, 0, [3]], [np.float32, 0, [3, 1, 3, 3]]], - [[np.float32, 0, [128]], [np.float32, 0, [232]], [np.float32, 0, [232, 232]]], - ] - self.ger_out_result(shape_format) - - -instantiate_device_type_tests(TestGer, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import copy +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestGer(TestCase): + + def cpu_op_exec(self,input1, input2): + output = torch.ger(input1, input2) + output = output.numpy() + + return output + + def npu_op_exec(self,input1, input2): + output = torch.ger(input1, input2) + output = output.to("cpu").numpy() + + return output + + def npu_op_exec_out(self,input1, input2, output): + torch.ger(input1, input2, out=output) + output = output.to("cpu").numpy() + + return output + + def ger_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def ger_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) + cpu_input3, npu_input3 = create_common_tensor(item[2], -100, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + if cpu_input3.dtype == torch.float16: + cpu_input3 = cpu_input3.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + cpu_output = cpu_output.astype(npu_output_out.dtype) + self.assertRtolEqual(cpu_output, npu_output_out) + + def test_ger_result(self, device): + shape_format = [ + [[np.float16, 0, [128]], [np.float16, 0, [256]]], + [[np.float16, 0, [128]], [np.float16, 0, [58]]], + [[np.float16, 0, [128]], [np.float16, 0, [3]]], + [[np.float16, 0, [128]], [np.float16, 0, [116]]], + [[np.float32, 0, [256]], [np.float32, 0, [128]]], + [[np.float32, 0, [256]], [np.float32, 0, [3]]], + [[np.float32, 0, [2]], [np.float32, 0, [3]]], + [[np.float32, 0, [128]], [np.float32, 0, [232]]], + ] + self.ger_result(shape_format) + + def test_ger_out_result(self, device): + shape_format = [ + [[np.float16, 0, [128]], [np.float16, 0, [256]], [np.float16, 0, [256, 116]]], + [[np.float16, 0, [128]], [np.float16, 0, [58]], [np.float16, 0, [58, 58, 1, 1]]], + [[np.float16, 0, [128]], [np.float16, 0, [3]], [np.float16, 0, [3, 3]]], + [[np.float16, 0, [128]], [np.float16, 0, [116]], [np.float16, 0, [128, 116]]], + [[np.float32, 0, [256]], [np.float32, 0, [128]], [np.float32, 0, [128, 128, 3, 3]]], + [[np.float32, 0, [256]], [np.float32, 0, [3]], [np.float32, 0, [256, 3]]], + [[np.float32, 0, [2]], [np.float32, 0, [3]], [np.float32, 0, [3, 1, 3, 3]]], + [[np.float32, 0, [128]], [np.float32, 0, [232]], [np.float32, 0, [232, 232]]], + ] + self.ger_out_result(shape_format) + + +instantiate_device_type_tests(TestGer, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_grid_assign_positive.py b/test/test_npu/test_network_ops/test_grid_assign_positive.py index 3e3c717523b993ee9fa6edf7ac9221db96d2784b..166c4921a94ce91261f8a0f072a61e886b042c68 100644 --- a/test/test_npu/test_network_ops/test_grid_assign_positive.py +++ b/test/test_npu/test_network_ops/test_grid_assign_positive.py @@ -1,52 +1,52 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestGridAssignPositive(TestCase): - def npu_op_exec(self, *args): - out = torch.npu_grid_assign_positive(*args) - out = out.to("cpu") - return out.detach().numpy() - - def test_grid_assign_positive(self, device): - assigned_gt_inds = torch.rand((4,), dtype=torch.float32).to("npu") - overlaps = torch.rand((2,4), dtype=torch.float32).to("npu") - box_responsible_flags = torch.tensor([1,1,1,0], dtype=torch.uint8).to("npu") - max_overlap = torch.rand((4,), dtype=torch.float32).to("npu") - argmax_overlap = torch.tensor([1,0,1,0], dtype=torch.int32).to("npu") - gt_max_overlaps = torch.rand((2,), dtype=torch.float32).to("npu") - gt_argmax_overlaps = torch.tensor([1,0],dtype=torch.int32).to("npu") - inputs = [assigned_gt_inds,overlaps,box_responsible_flags,max_overlap, - argmax_overlap,gt_max_overlaps,gt_argmax_overlaps] - num_gts = 128 - pos_iou_thr = .5 - min_pos_iou = .0 - gt_max_assign_all = True - attrs = [num_gts, pos_iou_thr, min_pos_iou, gt_max_assign_all] - - params = inputs + attrs - expect_cpu = torch.tensor([2., 1., 0.25984418, 0.36664134], dtype=torch.float32) - npu_output = self.npu_op_exec(*params) - self.assertRtolEqual(expect_cpu.numpy(), npu_output) - -instantiate_device_type_tests(TestGridAssignPositive, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestGridAssignPositive(TestCase): + def npu_op_exec(self, *args): + out = torch.npu_grid_assign_positive(*args) + out = out.to("cpu") + return out.detach().numpy() + + def test_grid_assign_positive(self, device): + assigned_gt_inds = torch.rand((4,), dtype=torch.float32).to("npu") + overlaps = torch.rand((2,4), dtype=torch.float32).to("npu") + box_responsible_flags = torch.tensor([1,1,1,0], dtype=torch.uint8).to("npu") + max_overlap = torch.rand((4,), dtype=torch.float32).to("npu") + argmax_overlap = torch.tensor([1,0,1,0], dtype=torch.int32).to("npu") + gt_max_overlaps = torch.rand((2,), dtype=torch.float32).to("npu") + gt_argmax_overlaps = torch.tensor([1,0],dtype=torch.int32).to("npu") + inputs = [assigned_gt_inds,overlaps,box_responsible_flags,max_overlap, + argmax_overlap,gt_max_overlaps,gt_argmax_overlaps] + num_gts = 128 + pos_iou_thr = .5 + min_pos_iou = .0 + gt_max_assign_all = True + attrs = [num_gts, pos_iou_thr, min_pos_iou, gt_max_assign_all] + + params = inputs + attrs + expect_cpu = torch.tensor([2., 1., 0.25984418, 0.36664134], dtype=torch.float32) + npu_output = self.npu_op_exec(*params) + self.assertRtolEqual(expect_cpu.numpy(), npu_output) + +instantiate_device_type_tests(TestGridAssignPositive, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_grid_sampler.py b/test/test_npu/test_network_ops/test_grid_sampler.py index 12030a5371d9fe995a1d286cade21fc9b31ec903..dbfb6a746e9aa4aaf1ad4450c1b40c6acf7ed66f 100644 --- a/test/test_npu/test_network_ops/test_grid_sampler.py +++ b/test/test_npu/test_network_ops/test_grid_sampler.py @@ -1,74 +1,74 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestGridSampler(TestCase): - def test_grid_sampler_fp32(self, device): - format_list = [0] - shape_list = [[100, 1, 28, 28], [100, 64, 32, 28]] - shape_format = [ - [np.float32, j, k] for j in format_list for k in shape_list - ] - sample_format = [np.float32, 0, [100, 1, 1, 2]] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_sample, npu_sample = create_common_tensor(sample_format, -1, 1) - cpu_output = self.cpu_op_exec(cpu_input, cpu_sample) - npu_output = self.npu_op_exec(npu_input, npu_sample) - self.assertRtolEqual(cpu_output, npu_output) - - def test_grid_sampler_fp16(self, device): - format_list = [0] - shape_list = [[1, 1, 3, 3], [1, 2, 3, 4]] - shape_format = [ - [np.float16, j, k] for j in format_list for k in shape_list - ] - sample_format = [np.float32, 0, [1, 2, 2, 2]] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 10) - cpu_sample, npu_sample = create_common_tensor(sample_format, -1, 1) - cpu_output = self.cpu_op_fp16_exec(cpu_input, cpu_sample) - npu_output = self.npu_op_exec(npu_input, npu_sample) - self.assertRtolEqual(cpu_output, npu_output) - - def cpu_op_exec(self, input, sample): - output = torch.grid_sampler(input, sample, 0, 0, True) - output = output.numpy() - return output - - def npu_op_exec(self, input, sample): - output = torch.grid_sampler(input, sample, 0, 0, True) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_fp16_exec(self, input, sample): - input = input.to(torch.float32) - sample = sample.to(torch.float32) - output = torch.grid_sampler(input, sample, 0, 0, True) - output = output.numpy() - output = output.astype(np.float16) - return output - -instantiate_device_type_tests(TestGridSampler, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestGridSampler(TestCase): + def test_grid_sampler_fp32(self, device): + format_list = [0] + shape_list = [[100, 1, 28, 28], [100, 64, 32, 28]] + shape_format = [ + [np.float32, j, k] for j in format_list for k in shape_list + ] + sample_format = [np.float32, 0, [100, 1, 1, 2]] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_sample, npu_sample = create_common_tensor(sample_format, -1, 1) + cpu_output = self.cpu_op_exec(cpu_input, cpu_sample) + npu_output = self.npu_op_exec(npu_input, npu_sample) + self.assertRtolEqual(cpu_output, npu_output) + + def test_grid_sampler_fp16(self, device): + format_list = [0] + shape_list = [[1, 1, 3, 3], [1, 2, 3, 4]] + shape_format = [ + [np.float16, j, k] for j in format_list for k in shape_list + ] + sample_format = [np.float32, 0, [1, 2, 2, 2]] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 10) + cpu_sample, npu_sample = create_common_tensor(sample_format, -1, 1) + cpu_output = self.cpu_op_fp16_exec(cpu_input, cpu_sample) + npu_output = self.npu_op_exec(npu_input, npu_sample) + self.assertRtolEqual(cpu_output, npu_output) + + def cpu_op_exec(self, input, sample): + output = torch.grid_sampler(input, sample, 0, 0, True) + output = output.numpy() + return output + + def npu_op_exec(self, input, sample): + output = torch.grid_sampler(input, sample, 0, 0, True) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_fp16_exec(self, input, sample): + input = input.to(torch.float32) + sample = sample.to(torch.float32) + output = torch.grid_sampler(input, sample, 0, 0, True) + output = output.numpy() + output = output.astype(np.float16) + return output + +instantiate_device_type_tests(TestGridSampler, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_gru.py b/test/test_npu/test_network_ops/test_gru.py index f21975b333b636882c62d11445af21bd3b6efc0a..17fd7b8351c597609a34cccf2ef8b026867204e5 100644 --- a/test/test_npu/test_network_ops/test_gru.py +++ b/test/test_npu/test_network_ops/test_gru.py @@ -1,63 +1,63 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestGru(TestCase): - def test_gru(self, device): - # shape_format:[[dtype, (num_step, batch_size, input_size)], input_size, hidden_size] - shape_format = [ - [[np.float16, (16, 32, 64)], [np.float16, (1, 32, 32)], 64, 32], - [[np.float16, (2, 32, 64)], [np.float16, (1, 32, 32)], 64, 32], - [[np.float32, (5, 32, 64)], [np.float32, (1, 32, 32)], 64, 32], - [[np.float32, (10, 33, 128)], [np.float32, (1, 33, 64)], 128, 64], - ] - - for item in shape_format: - cpu_gru = torch.nn.GRU(input_size=item[2], hidden_size=item[3], num_layers=1, bidirectional=False) - npu_gru = copy.deepcopy(cpu_gru).npu() - - input1 = np.random.uniform(0, 1, item[0][1]).astype(item[0][0]) - if item[0][0] == np.float16: - cpu_input1 = torch.from_numpy(input1.astype(np.float32)) # cpu only support fp32 - else: - cpu_input1 = torch.from_numpy(input1) - npu_input1 = torch.from_numpy(input1).npu() - - h0 = np.random.uniform(0, 1, item[1][1]).astype(item[1][0]) - if item[1][0] == np.float16: - cpu_h0 = torch.from_numpy(h0.astype(np.float32)) # cpu only support fp32 - else: - cpu_h0 = torch.from_numpy(h0) - npu_h0 = torch.from_numpy(h0).npu() - - cpu_output_y, cpu_output_h = cpu_gru(cpu_input1, cpu_h0) - npu_output_y, npu_output_h = npu_gru(npu_input1, npu_h0) - - self.assertRtolEqual(cpu_output_y.detach().numpy(), npu_output_y.cpu().detach().numpy().astype(np.float32), prec=1.e-1) - self.assertRtolEqual(cpu_output_h.detach().numpy(), npu_output_h.cpu().detach().numpy().astype(np.float32), prec=1.e-1) - - -instantiate_device_type_tests(TestGru, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestGru(TestCase): + def test_gru(self, device): + # shape_format:[[dtype, (num_step, batch_size, input_size)], input_size, hidden_size] + shape_format = [ + [[np.float16, (16, 32, 64)], [np.float16, (1, 32, 32)], 64, 32], + [[np.float16, (2, 32, 64)], [np.float16, (1, 32, 32)], 64, 32], + [[np.float32, (5, 32, 64)], [np.float32, (1, 32, 32)], 64, 32], + [[np.float32, (10, 33, 128)], [np.float32, (1, 33, 64)], 128, 64], + ] + + for item in shape_format: + cpu_gru = torch.nn.GRU(input_size=item[2], hidden_size=item[3], num_layers=1, bidirectional=False) + npu_gru = copy.deepcopy(cpu_gru).npu() + + input1 = np.random.uniform(0, 1, item[0][1]).astype(item[0][0]) + if item[0][0] == np.float16: + cpu_input1 = torch.from_numpy(input1.astype(np.float32)) # cpu only support fp32 + else: + cpu_input1 = torch.from_numpy(input1) + npu_input1 = torch.from_numpy(input1).npu() + + h0 = np.random.uniform(0, 1, item[1][1]).astype(item[1][0]) + if item[1][0] == np.float16: + cpu_h0 = torch.from_numpy(h0.astype(np.float32)) # cpu only support fp32 + else: + cpu_h0 = torch.from_numpy(h0) + npu_h0 = torch.from_numpy(h0).npu() + + cpu_output_y, cpu_output_h = cpu_gru(cpu_input1, cpu_h0) + npu_output_y, npu_output_h = npu_gru(npu_input1, npu_h0) + + self.assertRtolEqual(cpu_output_y.detach().numpy(), npu_output_y.cpu().detach().numpy().astype(np.float32), prec=1.e-1) + self.assertRtolEqual(cpu_output_h.detach().numpy(), npu_output_h.cpu().detach().numpy().astype(np.float32), prec=1.e-1) + + +instantiate_device_type_tests(TestGru, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_gru_backward.py b/test/test_npu/test_network_ops/test_gru_backward.py index a8dd2ea87ba2992f9564ab8caeb69f0cef1132b7..313bf7f6dd6fd5b55289b47739182348af5cca72 100644 --- a/test/test_npu/test_network_ops/test_gru_backward.py +++ b/test/test_npu/test_network_ops/test_gru_backward.py @@ -1,85 +1,85 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestGruBackward(TestCase): - def test_gru_backward(self, device): - # shape_format:[[dtype, (num_step, batch_size, input_size)], input_size, hidden_size] - shape_format = [ - [[np.float16, (16, 32, 64)], 64, 32], - [[np.float16, (5, 32, 64)], 64, 32], - [[np.float32, (5, 32, 64)], 64, 32], - [[np.float32, (5, 32, 64)], 64, 64], - ] - - for item in shape_format: - cpu_gru = torch.nn.GRU(input_size=item[1], hidden_size=item[2], num_layers=1, bidirectional=False) - cpu_gru.weight_ih_l0.requires_grad_(True) - cpu_gru.weight_hh_l0.requires_grad_(True) - cpu_gru.bias_ih_l0.requires_grad_(True) - cpu_gru.bias_hh_l0.requires_grad_(True) - npu_gru = copy.deepcopy(cpu_gru).npu() - - input1 = np.random.uniform(0, 1, item[0][1]).astype(item[0][0]) - cpu_input1 = torch.from_numpy(input1.astype(np.float32)) - cpu_input1.requires_grad_(True) - npu_input1 = torch.from_numpy(input1).npu() - npu_input1.requires_grad_(True) - - cpu_output_y, cpu_output_h = cpu_gru(cpu_input1) - npu_output_y, npu_output_h = npu_gru(npu_input1) - - self.assertRtolEqual(cpu_output_y.detach().numpy(), npu_output_y.cpu().detach().numpy().astype(np.float32), prec=1.e-1) - self.assertRtolEqual(cpu_output_h.detach().numpy(), npu_output_h.cpu().detach().numpy().astype(np.float32), prec=1.e-1) - - cpu_input1.retain_grad() - cpu_output_y.backward(torch.ones(cpu_output_y.size(), dtype=torch.float)) - cpu_dx = cpu_input1.grad - cpu_dw_ih = cpu_gru.weight_ih_l0.grad - cpu_dw_hh = cpu_gru.weight_hh_l0.grad - cpu_db_ih = cpu_gru.bias_ih_l0.grad - cpu_db_hh = cpu_gru.bias_hh_l0.grad - - npu_input1.retain_grad() - npu_output_y.backward(torch.ones(npu_output_y.size(), dtype=torch.float).npu()) - npu_dx = npu_input1.grad - npu_dw_ih = npu_gru.weight_ih_l0.grad - npu_dw_hh = npu_gru.weight_hh_l0.grad - npu_db_ih = npu_gru.bias_ih_l0.grad - npu_db_hh = npu_gru.bias_hh_l0.grad - - self.assertRtolEqual(cpu_dx.numpy(), npu_dx.cpu().numpy().astype(np.float32), prec=1.e-1) - self.assertRtolEqual(cpu_dw_ih.numpy(), npu_dw_ih.cpu().numpy().astype(np.float32), prec=1.e-1) - self.assertRtolEqual(cpu_dw_hh.numpy(), npu_dw_hh.cpu().numpy().astype(np.float32), prec=1.e-1) - # TODO(ascend): Insufficient precision - #精度未满足 self.assertRtolEqual(cpu_db_ih.numpy(), npu_db_ih.cpu().numpy().astype(np.float32), prec=1.e-1) - self.assertRtolEqual(cpu_db_ih.numpy(), npu_db_ih.cpu().numpy().astype(np.float32), prec=1.e1) - # TODO(ascend): Insufficient precision - #精度未满足 self.assertRtolEqual(cpu_db_hh.numpy(), npu_db_hh.cpu().numpy().astype(np.float32), prec=1.e-1) - self.assertRtolEqual(cpu_db_hh.numpy(), npu_db_hh.cpu().numpy().astype(np.float32), prec=1.e1) - - -instantiate_device_type_tests(TestGruBackward, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestGruBackward(TestCase): + def test_gru_backward(self, device): + # shape_format:[[dtype, (num_step, batch_size, input_size)], input_size, hidden_size] + shape_format = [ + [[np.float16, (16, 32, 64)], 64, 32], + [[np.float16, (5, 32, 64)], 64, 32], + [[np.float32, (5, 32, 64)], 64, 32], + [[np.float32, (5, 32, 64)], 64, 64], + ] + + for item in shape_format: + cpu_gru = torch.nn.GRU(input_size=item[1], hidden_size=item[2], num_layers=1, bidirectional=False) + cpu_gru.weight_ih_l0.requires_grad_(True) + cpu_gru.weight_hh_l0.requires_grad_(True) + cpu_gru.bias_ih_l0.requires_grad_(True) + cpu_gru.bias_hh_l0.requires_grad_(True) + npu_gru = copy.deepcopy(cpu_gru).npu() + + input1 = np.random.uniform(0, 1, item[0][1]).astype(item[0][0]) + cpu_input1 = torch.from_numpy(input1.astype(np.float32)) + cpu_input1.requires_grad_(True) + npu_input1 = torch.from_numpy(input1).npu() + npu_input1.requires_grad_(True) + + cpu_output_y, cpu_output_h = cpu_gru(cpu_input1) + npu_output_y, npu_output_h = npu_gru(npu_input1) + + self.assertRtolEqual(cpu_output_y.detach().numpy(), npu_output_y.cpu().detach().numpy().astype(np.float32), prec=1.e-1) + self.assertRtolEqual(cpu_output_h.detach().numpy(), npu_output_h.cpu().detach().numpy().astype(np.float32), prec=1.e-1) + + cpu_input1.retain_grad() + cpu_output_y.backward(torch.ones(cpu_output_y.size(), dtype=torch.float)) + cpu_dx = cpu_input1.grad + cpu_dw_ih = cpu_gru.weight_ih_l0.grad + cpu_dw_hh = cpu_gru.weight_hh_l0.grad + cpu_db_ih = cpu_gru.bias_ih_l0.grad + cpu_db_hh = cpu_gru.bias_hh_l0.grad + + npu_input1.retain_grad() + npu_output_y.backward(torch.ones(npu_output_y.size(), dtype=torch.float).npu()) + npu_dx = npu_input1.grad + npu_dw_ih = npu_gru.weight_ih_l0.grad + npu_dw_hh = npu_gru.weight_hh_l0.grad + npu_db_ih = npu_gru.bias_ih_l0.grad + npu_db_hh = npu_gru.bias_hh_l0.grad + + self.assertRtolEqual(cpu_dx.numpy(), npu_dx.cpu().numpy().astype(np.float32), prec=1.e-1) + self.assertRtolEqual(cpu_dw_ih.numpy(), npu_dw_ih.cpu().numpy().astype(np.float32), prec=1.e-1) + self.assertRtolEqual(cpu_dw_hh.numpy(), npu_dw_hh.cpu().numpy().astype(np.float32), prec=1.e-1) + # TODO(ascend): Insufficient precision + #精度未满足 self.assertRtolEqual(cpu_db_ih.numpy(), npu_db_ih.cpu().numpy().astype(np.float32), prec=1.e-1) + self.assertRtolEqual(cpu_db_ih.numpy(), npu_db_ih.cpu().numpy().astype(np.float32), prec=1.e1) + # TODO(ascend): Insufficient precision + #精度未满足 self.assertRtolEqual(cpu_db_hh.numpy(), npu_db_hh.cpu().numpy().astype(np.float32), prec=1.e-1) + self.assertRtolEqual(cpu_db_hh.numpy(), npu_db_hh.cpu().numpy().astype(np.float32), prec=1.e1) + + +instantiate_device_type_tests(TestGruBackward, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_gt.py b/test/test_npu/test_network_ops/test_gt.py old mode 100644 new mode 100755 index d3ec28991001811d22a6eda7da3cb86b7ee4aa02..5dc5e2f8d58d27a324afc6beb7d92adab430c3e5 --- a/test/test_npu/test_network_ops/test_gt.py +++ b/test/test_npu/test_network_ops/test_gt.py @@ -1,333 +1,333 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestGt(TestCase): - def generate_scalar(self, min, max): - scalar = np.random.uniform(min, max) - return scalar - - def cpu_op_exec(self, input1, input2): - output = torch.gt(input1, input2) - output = output.numpy() - return output - - def cpu_op_exec_out(self, input1, input2, input3): - torch.gt(input1, input2, out = input3) - output = input3.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.gt(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_exec(self, input1, input2): - output = input1.gt_(input2) - output = input1 - output = output.numpy() - return output - - def npu_op_inplace_exec(self, input1, input2): - output = input1.gt_(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2, output): - torch.gt(input1, input2, out=output) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_scalar(self, input, scalar): - output = torch.gt(input, scalar) - output = output.numpy() - return output - - def cpu_op_exec_scalar_out(self, input1, scalar, input2): - torch.gt(input1, scalar, out = input2) - output = input2.numpy() - return output - - def npu_op_exec_scalar(self, input, scalar): - output = torch.gt(input, scalar) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_exec_scalar(self, input, scalar): - output = input.gt_(scalar) - output = output.numpy() - return output - - def npu_op_inplace_exec_scalar(self, input, scalar): - input = input.to("npu") - output = input.gt_(scalar) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_scalar_out(self, input, scalar, output): - torch.gt(input, scalar, out=output) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_stride_exec(self, input1, input2): - input1 = input1.as_strided([2, 2], [1, 2], 1) - input2 = input2.as_strided([2, 2], [1, 2], 1) - output = input1.gt_(input2) - output = output.numpy() - return output - - def npu_op_inplace_stride_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - input1 = input1.as_strided([2, 2], [1, 2], 1) - input2 = input2.as_strided([2, 2], [1, 2], 1) - output = input1.gt_(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_stride_scalar_exec(self, input1, input2): - input1 = input1.as_strided([2, 2], [1, 2], 1) - output = input1.gt_(input2) - output = output.numpy() - return output - - def npu_op_inplace_stride_scalar_exec(self, input1, input2): - input1 = input1.to("npu") - input1 = input1.as_strided([2, 2], [1, 2], 1) - output = input1.gt_(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def gt_tensor_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) - cpu_input3 = torch.randn(item[1][2])<0 - npu_input3 = cpu_input3.npu() - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - if cpu_input3.dtype == torch.float16: - cpu_input3 = cpu_input3.to(torch.float32) - cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) - - self.assertRtolEqual(cpu_output_out, npu_output_out) - - def test_gt_bool(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - scalar_list = [True, False] - shape_format = [ - [[np.int32, i, j], k] for i in format_list for j in shape_list - for k in scalar_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 100) - cpu_output1 = self.cpu_op_exec_scalar(cpu_input1 > 50, item[1]) - npu_output1 = self.npu_op_exec_scalar(npu_input1 > 50, item[1]) - cpu_output2 = self.cpu_op_exec(cpu_input1 > 50, cpu_input2 > 50) - npu_output2 = self.npu_op_exec(npu_input1 > 50, npu_input2 > 50) - self.assertEqual(cpu_output1, npu_output1) - self.assertEqual(cpu_output2, npu_output2) - - def test_gt_tensor_out(self, device): - shape_format = [ - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], - [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], - [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], - [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], - ] - self.gt_tensor_out_result(shape_format) - - def gt_scalar_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2 = torch.randn(item[1][2])<0 - npu_input2 = cpu_input2.npu() - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - scalar = self.generate_scalar(0, 100) - cpu_output_out = self.cpu_op_exec_scalar_out(cpu_input1, scalar, cpu_input2) - npu_output_out = self.npu_op_exec_scalar_out(npu_input1, scalar, npu_input2) - cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) - self.assertRtolEqual(cpu_output_out, npu_output_out) - - def test_gt_scalar_out(self, device): - shape_format = [ - [[np.float16, 0, [12, 4, 12, 121]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [12, 10, 14, 111]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 2, [16, 3, 11, 121, 21]], [np.float16, 0, [3, 3, 3]]], - [[np.float16, 0, [16, 16, 14]], [np.float16, 0, [128, 116, 14, 14]]], - [[np.float32, 0, [20, 10, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], - [[np.float32, 2, [1313, 3, 3, 3, 121]], [np.float32, 0, [3, 1, 3]]], - [[np.float32, 0, [16, 22, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], - ] - self.gt_scalar_out_result(shape_format) - - def test_gt_scalar_float32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_exec_scalar(npu_input, scalar) - self.assertEqual(cpu_output, npu_output) - - def test_gt_scalar_float16(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input = cpu_input.to(torch.float32) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_exec_scalar(npu_input, scalar) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_gt_scalar_int32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.int32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_exec_scalar(npu_input, scalar) - self.assertEqual(cpu_output, npu_output) - - def test_gt_tensor_float32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float32, i, j], [np.float32, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertEqual(cpu_output, npu_output) - - def test_gt_tensor_float16(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float16, i, j], [np.float16, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_gt_inplace_float32(self, device): - format_list = [0, 3] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float32, i, j], [np.float32, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) - self.assertEqual(cpu_output, npu_output) - - def test_gt_inplace_float16(self, device): - format_list = [0, 3] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float16, i, j], [np.float16, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_gt_inplace_scalar_float32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar) - self.assertEqual(cpu_output, npu_output) - - def test_gt_inplace_scalar_float16(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input = cpu_input.to(torch.float32) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_gt_mix_dtype(self, device): - npu_input1, npu_input2 = create_common_tensor([np.float16, 0, (2, 3)], 1, 100) - npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) - cpu_output = self.cpu_op_exec(npu_input1, npu_input3) - npu_output = self.npu_op_exec(npu_input2, npu_input4) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestGt, globals(), except_for="cpu") -if __name__ == '__main__': - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestGt(TestCase): + def generate_scalar(self, min, max): + scalar = np.random.uniform(min, max) + return scalar + + def cpu_op_exec(self, input1, input2): + output = torch.gt(input1, input2) + output = output.numpy() + return output + + def cpu_op_exec_out(self, input1, input2, input3): + torch.gt(input1, input2, out = input3) + output = input3.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.gt(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_exec(self, input1, input2): + output = input1.gt_(input2) + output = input1 + output = output.numpy() + return output + + def npu_op_inplace_exec(self, input1, input2): + output = input1.gt_(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2, output): + torch.gt(input1, input2, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_scalar(self, input, scalar): + output = torch.gt(input, scalar) + output = output.numpy() + return output + + def cpu_op_exec_scalar_out(self, input1, scalar, input2): + torch.gt(input1, scalar, out = input2) + output = input2.numpy() + return output + + def npu_op_exec_scalar(self, input, scalar): + output = torch.gt(input, scalar) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_exec_scalar(self, input, scalar): + output = input.gt_(scalar) + output = output.numpy() + return output + + def npu_op_inplace_exec_scalar(self, input, scalar): + input = input.to("npu") + output = input.gt_(scalar) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_scalar_out(self, input, scalar, output): + torch.gt(input, scalar, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_stride_exec(self, input1, input2): + input1 = input1.as_strided([2, 2], [1, 2], 1) + input2 = input2.as_strided([2, 2], [1, 2], 1) + output = input1.gt_(input2) + output = output.numpy() + return output + + def npu_op_inplace_stride_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + input1 = input1.as_strided([2, 2], [1, 2], 1) + input2 = input2.as_strided([2, 2], [1, 2], 1) + output = input1.gt_(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_stride_scalar_exec(self, input1, input2): + input1 = input1.as_strided([2, 2], [1, 2], 1) + output = input1.gt_(input2) + output = output.numpy() + return output + + def npu_op_inplace_stride_scalar_exec(self, input1, input2): + input1 = input1.to("npu") + input1 = input1.as_strided([2, 2], [1, 2], 1) + output = input1.gt_(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def gt_tensor_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) + cpu_input3 = torch.randn(item[1][2])<0 + npu_input3 = cpu_input3.npu() + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + if cpu_input3.dtype == torch.float16: + cpu_input3 = cpu_input3.to(torch.float32) + cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) + + self.assertRtolEqual(cpu_output_out, npu_output_out) + + def test_gt_bool(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + scalar_list = [True, False] + shape_format = [ + [[np.int32, i, j], k] for i in format_list for j in shape_list + for k in scalar_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 100) + cpu_output1 = self.cpu_op_exec_scalar(cpu_input1 > 50, item[1]) + npu_output1 = self.npu_op_exec_scalar(npu_input1 > 50, item[1]) + cpu_output2 = self.cpu_op_exec(cpu_input1 > 50, cpu_input2 > 50) + npu_output2 = self.npu_op_exec(npu_input1 > 50, npu_input2 > 50) + self.assertEqual(cpu_output1, npu_output1) + self.assertEqual(cpu_output2, npu_output2) + + def test_gt_tensor_out(self, device): + shape_format = [ + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], + [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], + [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], + [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], + ] + self.gt_tensor_out_result(shape_format) + + def gt_scalar_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2 = torch.randn(item[1][2])<0 + npu_input2 = cpu_input2.npu() + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + scalar = self.generate_scalar(0, 100) + cpu_output_out = self.cpu_op_exec_scalar_out(cpu_input1, scalar, cpu_input2) + npu_output_out = self.npu_op_exec_scalar_out(npu_input1, scalar, npu_input2) + cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) + self.assertRtolEqual(cpu_output_out, npu_output_out) + + def test_gt_scalar_out(self, device): + shape_format = [ + [[np.float16, 0, [12, 4, 12, 121]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [12, 10, 14, 111]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 2, [16, 3, 11, 121, 21]], [np.float16, 0, [3, 3, 3]]], + [[np.float16, 0, [16, 16, 14]], [np.float16, 0, [128, 116, 14, 14]]], + [[np.float32, 0, [20, 10, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], + [[np.float32, 2, [1313, 3, 3, 3, 121]], [np.float32, 0, [3, 1, 3]]], + [[np.float32, 0, [16, 22, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], + ] + self.gt_scalar_out_result(shape_format) + + def test_gt_scalar_float32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_exec_scalar(npu_input, scalar) + self.assertEqual(cpu_output, npu_output) + + def test_gt_scalar_float16(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input = cpu_input.to(torch.float32) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_exec_scalar(npu_input, scalar) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_gt_scalar_int32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.int32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_exec_scalar(npu_input, scalar) + self.assertEqual(cpu_output, npu_output) + + def test_gt_tensor_float32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float32, i, j], [np.float32, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertEqual(cpu_output, npu_output) + + def test_gt_tensor_float16(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float16, i, j], [np.float16, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_gt_inplace_float32(self, device): + format_list = [0, 3] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float32, i, j], [np.float32, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) + self.assertEqual(cpu_output, npu_output) + + def test_gt_inplace_float16(self, device): + format_list = [0, 3] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float16, i, j], [np.float16, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_gt_inplace_scalar_float32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar) + self.assertEqual(cpu_output, npu_output) + + def test_gt_inplace_scalar_float16(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input = cpu_input.to(torch.float32) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_gt_mix_dtype(self, device): + npu_input1, npu_input2 = create_common_tensor([np.float16, 0, (2, 3)], 1, 100) + npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) + cpu_output = self.cpu_op_exec(npu_input1, npu_input3) + npu_output = self.npu_op_exec(npu_input2, npu_input4) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestGt, globals(), except_for="cpu") +if __name__ == '__main__': + run_tests() diff --git a/test/test_npu/test_network_ops/test_hardtanh.py b/test/test_npu/test_network_ops/test_hardtanh.py old mode 100644 new mode 100755 index a20eb8bd935c932f254aa4adacd17347f16b88d0..742620836b9d21e6a8fd9db0765e79a55486b47c --- a/test/test_npu/test_network_ops/test_hardtanh.py +++ b/test/test_npu/test_network_ops/test_hardtanh.py @@ -1,120 +1,120 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('..') -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestHardtanh(TestCase): - def cpu_op_backward_exec(self, input, min, max): - w = torch.ones_like(input) - input.requires_grad_(True) - output = torch.nn.functional.hardtanh(input, min, max) - output.backward(w) - output = output.detach().numpy() - res = input.grad - res = res.numpy() - return output, res - - def npu_op_backward_exec(self, input, min, max): - w = torch.ones_like(input) - w = w.to("npu") - input.requires_grad_(True) - output = torch.nn.functional.hardtanh(input, min, max) - output.backward(w) - output = output.to("cpu").detach().numpy() - res = input.grad - res = res.to("cpu").numpy() - return output, res - - def hardtanh_result(self, shape_format): - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 2) - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - - cpu_output_forward, cpu_output_backward = self.cpu_op_backward_exec(cpu_input, 0, 1) - npu_output_forward, npu_output_backward = self.npu_op_backward_exec(npu_input, 0, 1) - cpu_output_forward = cpu_output_forward.astype(npu_output_forward.dtype) - cpu_output_backward = cpu_output_backward.astype(npu_output_backward.dtype) - - self.assertRtolEqual(cpu_output_forward, npu_output_forward) - self.assertRtolEqual(cpu_output_backward, npu_output_backward) - - # 1d do not support format 29 - def test_hardtanh_shape_format_fp16_1d(self, device): - format_list = [0, 3, 4] - shape_format = [ - [np.float16, i, [18]] for i in format_list - ] - self.hardtanh_result(shape_format) - - def test_hardtanh_shape_format_fp32_1d(self, device): - format_list = [0, 3, 4] - shape_format = [ - [np.float32, i, [18]] for i in format_list - ] - self.hardtanh_result(shape_format) - - def test_hardtanh_shape_format_fp16_2d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [ - [np.float16, i, [256, 1000]] for i in format_list - ] - self.hardtanh_result(shape_format) - - def test_hardtanh_shape_format_fp32_2d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [ - [np.float32, i, [256, 1000]] for i in format_list - ] - self.hardtanh_result(shape_format) - - def test_hardtanh_shape_format_fp16_3d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [ - [np.float16, i, [32, 328, 368]] for i in format_list - ] - self.hardtanh_result(shape_format) - - def test_hardtanh_shape_format_fp32_3d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [ - [np.float32, i, [32, 328, 368]] for i in format_list - ] - self.hardtanh_result(shape_format) - - def test_hardtanh_shape_format_fp16_4d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [ - [np.float16, i, [256, 576, 7, 7]] for i in format_list - ] - self.hardtanh_result(shape_format) - - def test_hardtanh_shape_format_fp32_4d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [ - [np.float32, i, [256, 576, 7, 7]] for i in format_list - ] - self.hardtanh_result(shape_format) - - -instantiate_device_type_tests(TestHardtanh, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append('..') +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestHardtanh(TestCase): + def cpu_op_backward_exec(self, input, min, max): + w = torch.ones_like(input) + input.requires_grad_(True) + output = torch.nn.functional.hardtanh(input, min, max) + output.backward(w) + output = output.detach().numpy() + res = input.grad + res = res.numpy() + return output, res + + def npu_op_backward_exec(self, input, min, max): + w = torch.ones_like(input) + w = w.to("npu") + input.requires_grad_(True) + output = torch.nn.functional.hardtanh(input, min, max) + output.backward(w) + output = output.to("cpu").detach().numpy() + res = input.grad + res = res.to("cpu").numpy() + return output, res + + def hardtanh_result(self, shape_format): + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 2) + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + + cpu_output_forward, cpu_output_backward = self.cpu_op_backward_exec(cpu_input, 0, 1) + npu_output_forward, npu_output_backward = self.npu_op_backward_exec(npu_input, 0, 1) + cpu_output_forward = cpu_output_forward.astype(npu_output_forward.dtype) + cpu_output_backward = cpu_output_backward.astype(npu_output_backward.dtype) + + self.assertRtolEqual(cpu_output_forward, npu_output_forward) + self.assertRtolEqual(cpu_output_backward, npu_output_backward) + + # 1d do not support format 29 + def test_hardtanh_shape_format_fp16_1d(self, device): + format_list = [0, 3, 4] + shape_format = [ + [np.float16, i, [18]] for i in format_list + ] + self.hardtanh_result(shape_format) + + def test_hardtanh_shape_format_fp32_1d(self, device): + format_list = [0, 3, 4] + shape_format = [ + [np.float32, i, [18]] for i in format_list + ] + self.hardtanh_result(shape_format) + + def test_hardtanh_shape_format_fp16_2d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [ + [np.float16, i, [256, 1000]] for i in format_list + ] + self.hardtanh_result(shape_format) + + def test_hardtanh_shape_format_fp32_2d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [ + [np.float32, i, [256, 1000]] for i in format_list + ] + self.hardtanh_result(shape_format) + + def test_hardtanh_shape_format_fp16_3d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [ + [np.float16, i, [32, 328, 368]] for i in format_list + ] + self.hardtanh_result(shape_format) + + def test_hardtanh_shape_format_fp32_3d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [ + [np.float32, i, [32, 328, 368]] for i in format_list + ] + self.hardtanh_result(shape_format) + + def test_hardtanh_shape_format_fp16_4d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [ + [np.float16, i, [256, 576, 7, 7]] for i in format_list + ] + self.hardtanh_result(shape_format) + + def test_hardtanh_shape_format_fp32_4d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [ + [np.float32, i, [256, 576, 7, 7]] for i in format_list + ] + self.hardtanh_result(shape_format) + + +instantiate_device_type_tests(TestHardtanh, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_ifmr.py b/test/test_npu/test_network_ops/test_ifmr.py index b78cae42a69f917b21374cb532cd759e38f9b36c..24e3381172b36ebbb75218bab9d102314ff49b32 100644 --- a/test/test_npu/test_network_ops/test_ifmr.py +++ b/test/test_npu/test_network_ops/test_ifmr.py @@ -1,139 +1,139 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from functools import reduce - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestIFMR(TestCase): - def cpu_op_exec(self, - input_data, - with_offset, - bins_num=128, - min_percentile=0.999999, - max_percentile=0.999999, - search_range=[0.7, 1.3], - search_step=0.01): - pre_mode = np.float32 - input_data = input_data.numpy().astype(pre_mode) - data_min = input_data.min() - data_max = input_data.max() - data_num = reduce(lambda x, y: x * y, input_data.shape) - data_num = np.array(data_num, pre_mode) - - bins, threshold = np.histogram(input_data, bins_num) - cumsum = np.cumsum(bins).astype(np.int32) - - bins_num = np.array(bins_num, pre_mode) - cdf = cumsum.astype(pre_mode) / data_num - max_index = np.where(cdf >= np.array(max_percentile, pre_mode), 0, - 1).sum().astype(pre_mode) - min_index = np.where(cdf >= np.array(1 - min_percentile, pre_mode), 0, - 1).sum().astype(pre_mode) - max_init = max_index / bins_num * (data_max - data_min) + data_min - min_init = min_index / bins_num * (data_max - data_min) + data_min - - step = np.arange(search_range[0], - search_range[1], - search_step, - dtype=pre_mode) - if with_offset: - if max_init < 0: - max_init = np.array(0, pre_mode) - if min_init > 0: - min_init = np.array(0, pre_mode) - min_list = min_init * np.ones(step.shape, dtype=pre_mode) - else: - max_init = np.max([np.abs(max_init), np.abs(min_init)]) - max_list = max_init * step - - if with_offset: - scale = (max_list - min_list) / 255 - scale = np.where(scale < 1.192092896e-07, 1, scale) - offset = np.round(min_list / scale) - offset = -(offset + 128) - else: - scale = max_list / 127 - offset = np.round(scale * 0) - - loss_list = np.zeros(step.shape, dtype=pre_mode) - for i in range(step.size): - quant_data = np.round(input_data / scale[i]) + offset[i] - np.clip(quant_data, -128, 127, out=quant_data) - quant_data = (quant_data - offset[i]) * scale[i] - loss_list[i] = np.sum(np.square(quant_data - input_data)) - index = np.argmin(loss_list) - return scale[index], offset[index] - - def npu_op_exec(self, input_data, with_offset): - min_value = torch.min(input_data) - max_value = torch.max(input_data) - min_value = torch.reshape(min_value, (1, )) - max_value = torch.reshape(max_value, (1, )) - hist = torch.histc(input_data.to('cpu'), - bins=128, - min=min_value[0].to('cpu'), - max=max_value[0].to('cpu')) - cdf = torch.cumsum(hist, dim=0).int() - - cdf = cdf.to('npu') - scale, offset = torch.npu_ifmr(input_data, - min_value, - max_value, - cdf, - min_percentile=0.999999, - max_percentile=0.999999, - search_start=0.7, - search_end=1.3, - search_step=0.01, - with_offset=with_offset) - - return scale, offset - - def test_ifrm_with_offset(self, device): - format_list = [0, 3] - shape_list = [(2, 2, 3, 4), (5, 5)] - shape_format = [[np.float32, i, j] for i in format_list - for j in shape_list] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, -1, 1) - scale_cpu, offset_cpu = self.cpu_op_exec(cpu_input, - with_offset=True) - scale_npu, offset_npu = self.npu_op_exec(npu_input, - with_offset=True) - self.assertTrue((scale_cpu - scale_npu[0]) / scale_cpu < 0.0001) - self.assertEqual(offset_cpu, offset_npu[0]) - - def test_ifrm_without_offset(self, device): - format_list = [0, 3] - shape_list = [(2, 2, 3, 4), (5, 5)] - shape_format = [[np.float32, i, j] for i in format_list - for j in shape_list] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, -1, 1) - scale_cpu, offset_cpu = self.cpu_op_exec(cpu_input, - with_offset=False) - scale_npu, offset_npu = self.npu_op_exec(npu_input, - with_offset=False) - self.assertTrue((scale_cpu - scale_npu[0]) / scale_cpu < 0.0001) - self.assertEqual(offset_cpu, offset_npu[0]) - - -instantiate_device_type_tests(TestIFMR, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import reduce + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestIFMR(TestCase): + def cpu_op_exec(self, + input_data, + with_offset, + bins_num=128, + min_percentile=0.999999, + max_percentile=0.999999, + search_range=[0.7, 1.3], + search_step=0.01): + pre_mode = np.float32 + input_data = input_data.numpy().astype(pre_mode) + data_min = input_data.min() + data_max = input_data.max() + data_num = reduce(lambda x, y: x * y, input_data.shape) + data_num = np.array(data_num, pre_mode) + + bins, threshold = np.histogram(input_data, bins_num) + cumsum = np.cumsum(bins).astype(np.int32) + + bins_num = np.array(bins_num, pre_mode) + cdf = cumsum.astype(pre_mode) / data_num + max_index = np.where(cdf >= np.array(max_percentile, pre_mode), 0, + 1).sum().astype(pre_mode) + min_index = np.where(cdf >= np.array(1 - min_percentile, pre_mode), 0, + 1).sum().astype(pre_mode) + max_init = max_index / bins_num * (data_max - data_min) + data_min + min_init = min_index / bins_num * (data_max - data_min) + data_min + + step = np.arange(search_range[0], + search_range[1], + search_step, + dtype=pre_mode) + if with_offset: + if max_init < 0: + max_init = np.array(0, pre_mode) + if min_init > 0: + min_init = np.array(0, pre_mode) + min_list = min_init * np.ones(step.shape, dtype=pre_mode) + else: + max_init = np.max([np.abs(max_init), np.abs(min_init)]) + max_list = max_init * step + + if with_offset: + scale = (max_list - min_list) / 255 + scale = np.where(scale < 1.192092896e-07, 1, scale) + offset = np.round(min_list / scale) + offset = -(offset + 128) + else: + scale = max_list / 127 + offset = np.round(scale * 0) + + loss_list = np.zeros(step.shape, dtype=pre_mode) + for i in range(step.size): + quant_data = np.round(input_data / scale[i]) + offset[i] + np.clip(quant_data, -128, 127, out=quant_data) + quant_data = (quant_data - offset[i]) * scale[i] + loss_list[i] = np.sum(np.square(quant_data - input_data)) + index = np.argmin(loss_list) + return scale[index], offset[index] + + def npu_op_exec(self, input_data, with_offset): + min_value = torch.min(input_data) + max_value = torch.max(input_data) + min_value = torch.reshape(min_value, (1, )) + max_value = torch.reshape(max_value, (1, )) + hist = torch.histc(input_data.to('cpu'), + bins=128, + min=min_value[0].to('cpu'), + max=max_value[0].to('cpu')) + cdf = torch.cumsum(hist, dim=0).int() + + cdf = cdf.to('npu') + scale, offset = torch.npu_ifmr(input_data, + min_value, + max_value, + cdf, + min_percentile=0.999999, + max_percentile=0.999999, + search_start=0.7, + search_end=1.3, + search_step=0.01, + with_offset=with_offset) + + return scale, offset + + def test_ifrm_with_offset(self, device): + format_list = [0, 3] + shape_list = [(2, 2, 3, 4), (5, 5)] + shape_format = [[np.float32, i, j] for i in format_list + for j in shape_list] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, -1, 1) + scale_cpu, offset_cpu = self.cpu_op_exec(cpu_input, + with_offset=True) + scale_npu, offset_npu = self.npu_op_exec(npu_input, + with_offset=True) + self.assertTrue((scale_cpu - scale_npu[0]) / scale_cpu < 0.0001) + self.assertEqual(offset_cpu, offset_npu[0]) + + def test_ifrm_without_offset(self, device): + format_list = [0, 3] + shape_list = [(2, 2, 3, 4), (5, 5)] + shape_format = [[np.float32, i, j] for i in format_list + for j in shape_list] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, -1, 1) + scale_cpu, offset_cpu = self.cpu_op_exec(cpu_input, + with_offset=False) + scale_npu, offset_npu = self.npu_op_exec(npu_input, + with_offset=False) + self.assertTrue((scale_cpu - scale_npu[0]) / scale_cpu < 0.0001) + self.assertEqual(offset_cpu, offset_npu[0]) + + +instantiate_device_type_tests(TestIFMR, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_im2col_backward.py b/test/test_npu/test_network_ops/test_im2col_backward.py index 63b309c5bb06a26488b9462cfb340df451cc22b1..54fa10e998542df4563d274029546230ee31c6c5 100644 --- a/test/test_npu/test_network_ops/test_im2col_backward.py +++ b/test/test_npu/test_network_ops/test_im2col_backward.py @@ -1,34 +1,34 @@ -import torch -import numpy as np -import sys -import copy -import os -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestIm2colBackward(TestCase): - def test_im2col_backward_fp16(self, device): - fold_cpu = torch.nn.Fold(output_size=(18, 18), kernel_size=(3, 3)) - input_cpu = torch.rand(1, 16 * 3 * 3, 256).half() - fold_npu = fold_cpu.npu() - input_npu = input_cpu.npu() - output_cpu = fold_cpu(input_cpu) - output_npu = fold_npu(input_npu) - - self.assertRtolEqual(output_cpu.numpy(), output_npu.cpu().numpy()) - - def test_im2col_backward_fp32(self, device): - fold_cpu = torch.nn.Fold(output_size=(18, 18), kernel_size=(3, 3)) - input_cpu = torch.rand(1, 16 * 3 * 3, 256) - fold_npu = fold_cpu.npu() - input_npu = input_cpu.npu() - output_cpu = fold_cpu(input_cpu).numpy() - output_npu = fold_npu(input_npu).cpu().numpy() - - self.assertRtolEqual(output_cpu, output_npu) - - -instantiate_device_type_tests(TestIm2colBackward, globals(), except_for='cpu') -if __name__ == '__main__': +import torch +import numpy as np +import sys +import copy +import os +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestIm2colBackward(TestCase): + def test_im2col_backward_fp16(self, device): + fold_cpu = torch.nn.Fold(output_size=(18, 18), kernel_size=(3, 3)) + input_cpu = torch.rand(1, 16 * 3 * 3, 256).half() + fold_npu = fold_cpu.npu() + input_npu = input_cpu.npu() + output_cpu = fold_cpu(input_cpu) + output_npu = fold_npu(input_npu) + + self.assertRtolEqual(output_cpu.numpy(), output_npu.cpu().numpy()) + + def test_im2col_backward_fp32(self, device): + fold_cpu = torch.nn.Fold(output_size=(18, 18), kernel_size=(3, 3)) + input_cpu = torch.rand(1, 16 * 3 * 3, 256) + fold_npu = fold_cpu.npu() + input_npu = input_cpu.npu() + output_cpu = fold_cpu(input_cpu).numpy() + output_npu = fold_npu(input_npu).cpu().numpy() + + self.assertRtolEqual(output_cpu, output_npu) + + +instantiate_device_type_tests(TestIm2colBackward, globals(), except_for='cpu') +if __name__ == '__main__': run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_index_add.py b/test/test_npu/test_network_ops/test_index_add.py index fcd1749abbacf6a4818e10b1cd3c309f1d9d83d8..7701f21498eac07b30cd898e65316a23f64cbf22 100644 --- a/test/test_npu/test_network_ops/test_index_add.py +++ b/test/test_npu/test_network_ops/test_index_add.py @@ -1,175 +1,175 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestIndexAdd(TestCase): - - def cpu_op_exec(self, var, index, source, dim): - output = var.index_add(dim=dim, index=index.long(), source=source) - output = output.numpy() - return output - - def npu_op_exec(self, var, index, source, dim): - output = torch.index_add(var, dim, index.int(), source) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inter_exec(self, var, index, source, dim): - output = var.index_add_(dim=dim, index=index.long(), source=source) - output = output.numpy() - return output - - def npu_op_inter_exec(self, var, index, source, dim): - output = var.index_add_(dim, index.int(), source) - output = output.to("cpu") - output = output.numpy() - return output - - def test_index_add_float32(self, device): - shape_format = [ - [[np.float32, -1, (5, 3)], [np.int32, -1, (3, )], [np.float32, -1, (3, 3)], 0], - [[np.float32, -1, (6, 4)], [np.int32, -1, (5, )], [np.float32, -1, (5, 4)], 0], - [[np.float32, -1, (3, 2)], [np.int32, -1, (2, )], [np.float32, -1, (2, 2)], 0], - [[np.float32, -1, (8, 6)], [np.int32, -1, (4, )], [np.float32, -1, (4, 6)], 0], - [[np.float32, -1, (3, 5)], [np.int32, -1, (2, )], [np.float32, -1, (3, 2)], 1], - [[np.float32, 4, (4, 6)], [np.int32, -1, (5, )], [np.float32, 4, (4, 5)], 1], - [[np.float32, 3, (2, 3)], [np.int32, -1, (2, )], [np.float32, 3, (2, 2)], 1], - [[np.float32, -1, (16, 7, 5, 9, 11)], [np.int32, -1, (11, )], [np.float32, -1, (16, 7, 5, 9, 11)], 4], - [[np.float32, 3, (1600, 200)], [np.int32, -1, (200, )], [np.float32, 3, (1600, 200)], 1], - ] - for item in shape_format: - cpu_var, npu_var = create_common_tensor(item[0], -10, 10) - cpu_index, npu_index = create_common_tensor(item[1], 0, 2) - cpu_source, npu_source = create_common_tensor(item[2], -10, 10) - - - cpu_output = self.cpu_op_exec(cpu_var, cpu_index, cpu_source, item[3]) - npu_output = self.npu_op_exec(npu_var, npu_index, npu_source, item[3]) - self.assertRtolEqual(cpu_output, npu_output) - cpu_output = self.cpu_op_inter_exec(cpu_var, cpu_index, cpu_source, item[3]) - npu_output = self.npu_op_inter_exec(npu_var, npu_index, npu_source, item[3]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_index_add_int32(self, device): - shape_format = [ - [[np.int32, -1, (5, 3)], [np.int32, -1, (3, )], [np.int32, -1, (3, 3)], 0], - [[np.int32, -1, (6, 4)], [np.int32, -1, (5, )], [np.int32, -1, (5, 4)], 0], - [[np.int32, -1, (3, 2)], [np.int32, -1, (2, )], [np.int32, -1, (2, 2)], 0], - [[np.int32, -1, (3, 5)], [np.int32, -1, (2, )], [np.int32, -1, (3, 2)], 1], - [[np.int32, -1, (4, 6)], [np.int32, -1, (5, )], [np.int32, -1, (4, 5)], 1], - [[np.int32, -1, (2, 3)], [np.int32, -1, (2, )], [np.int32, -1, (2, 2)], 1], - [[np.int32, -1, (16, 7, 5, 9, 11)], [np.int32, -1, (11, )], [np.int32, -1, (16, 7, 5, 9, 11)], 4], - [[np.int32, -1, (1600, 200)], [np.int32, -1, (200, )], [np.int32, -1, (1600, 200)], 1], - ] - for item in shape_format: - cpu_var, npu_var = create_common_tensor(item[0], -10, 10) - cpu_index, npu_index = create_common_tensor(item[1], 0, 2) - cpu_source, npu_source = create_common_tensor(item[2], -10, 10) - - cpu_output = self.cpu_op_exec(cpu_var, cpu_index, cpu_source, item[3]) - npu_output = self.npu_op_exec(npu_var, npu_index, npu_source, item[3]) - self.assertRtolEqual(cpu_output, npu_output) - cpu_output = self.cpu_op_inter_exec(cpu_var, cpu_index, cpu_source, item[3]) - npu_output = self.npu_op_inter_exec(npu_var, npu_index, npu_source, item[3]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_index_add_int8(self, device): - shape_format = [ - [[np.int8, -1, (5, 3)], [np.int32, -1, (3, )], [np.int8, -1, (3, 3)], 0], - [[np.int8, -1, (6, 4)], [np.int32, -1, (5, )], [np.int8, -1, (5, 4)], 0], - [[np.int8, -1, (3, 2)], [np.int32, -1, (2, )], [np.int8, -1, (2, 2)], 0], - [[np.int8, -1, (3, 5)], [np.int32, -1, (2, )], [np.int8, -1, (3, 2)], 1], - [[np.int8, -1, (4, 6)], [np.int32, -1, (5, )], [np.int8, -1, (4, 5)], 1], - [[np.int8, -1, (2, 3)], [np.int32, -1, (2, )], [np.int8, -1, (2, 2)], 1], - [[np.int8, -1, (16, 7, 5, 9, 11)], [np.int32, -1, (11, )], [np.int8, -1, (16, 7, 5, 9, 11)], 4], - [[np.int8, -1, (1600, 200)], [np.int32, -1, (200, )], [np.int8, -1, (1600, 200)], 1], - ] - for item in shape_format: - cpu_var, npu_var = create_common_tensor(item[0], -10, 10) - cpu_index = torch.arange(0, item[1][2][0]) - npu_index = cpu_index.npu() - cpu_source, npu_source = create_common_tensor(item[2], -10, 10) - - cpu_output = self.cpu_op_exec(cpu_var, cpu_index, cpu_source, item[3]) - npu_output = self.npu_op_exec(npu_var, npu_index, npu_source, item[3]) - self.assertRtolEqual(cpu_output, npu_output) - cpu_output = self.cpu_op_inter_exec(cpu_var, cpu_index, cpu_source, item[3]) - npu_output = self.npu_op_inter_exec(npu_var, npu_index, npu_source, item[3]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_index_add_uint8(self, device): - shape_format = [ - [[np.uint8, -1, (5, 3)], [np.int32, -1, (3, )], [np.uint8, -1, (3, 3)], 0], - [[np.uint8, -1, (6, 4)], [np.int32, -1, (5, )], [np.uint8, -1, (5, 4)], 0], - [[np.uint8, -1, (3, 2)], [np.int32, -1, (2, )], [np.uint8, -1, (2, 2)], 0], - [[np.uint8, -1, (3, 5)], [np.int32, -1, (2, )], [np.uint8, -1, (3, 2)], 1], - [[np.uint8, -1, (4, 6)], [np.int32, -1, (5, )], [np.uint8, -1, (4, 5)], 1], - [[np.uint8, -1, (2, 3)], [np.int32, -1, (2, )], [np.uint8, -1, (2, 2)], 1], - [[np.uint8, -1, (16, 7, 5, 9, 11)], [np.int32, -1, (11, )], [np.uint8, -1, (16, 7, 5, 9, 11)], 4], - [[np.uint8, -1, (1600, 200)], [np.int32, -1, (200, )], [np.uint8, -1, (1600, 200)], 1], - ] - for item in shape_format: - cpu_var, npu_var = create_common_tensor(item[0], 0, 10) - cpu_index = torch.arange(0, item[1][2][0]) - npu_index = cpu_index.npu() - cpu_source, npu_source = create_common_tensor(item[2], 0, 10) - - - cpu_output = self.cpu_op_exec(cpu_var, cpu_index, cpu_source, item[3]) - npu_output = self.npu_op_exec(npu_var, npu_index, npu_source, item[3]) - self.assertRtolEqual(cpu_output, npu_output) - cpu_output = self.cpu_op_inter_exec(cpu_var, cpu_index, cpu_source, item[3]) - npu_output = self.npu_op_inter_exec(npu_var, npu_index, npu_source, item[3]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_index_add_fp16(self, device): - shape_format = [ - [[np.float16, -1, (5, 3)], [np.int32, -1, (3, )], [np.float16, -1, (3, 3)], 0], - [[np.float16, -1, (3, 2)], [np.int32, -1, (2, )], [np.float16, -1, (2, 2)], 0], - [[np.float16, -1, (3, 5)], [np.int32, -1, (2, )], [np.float16, -1, (3, 2)], 1], - [[np.float16, -1, (2, 6)], [np.int32, -1, (4, )], [np.float16, -1, (2, 4)], 1], - [[np.float16, -1, (16, 7, 5, 9, 11)], [np.int32, -1, (11, )], [np.float16, -1, (16, 7, 5, 9, 11)], 4], - [[np.float16, -1, (1600, 200)], [np.int32, -1, (200, )], [np.float16, -1, (1600, 200)], 1], - ] - for item in shape_format: - cpu_var, npu_var = create_common_tensor(item[0], 0, 10) - cpu_index = torch.arange(0, item[1][2][0]) - npu_index = cpu_index.npu() - cpu_source, npu_source = create_common_tensor(item[2], 0, 10) - - cpu_var = cpu_var.to(torch.float32) - cpu_source = cpu_source.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_var, cpu_index, cpu_source, item[3]) - npu_output = self.npu_op_exec(npu_var, npu_index, npu_source, item[3]) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - cpu_output = self.cpu_op_inter_exec(cpu_var, cpu_index, cpu_source, item[3]) - npu_output = self.npu_op_inter_exec(npu_var, npu_index, npu_source, item[3]) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestIndexAdd, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestIndexAdd(TestCase): + + def cpu_op_exec(self, var, index, source, dim): + output = var.index_add(dim=dim, index=index.long(), source=source) + output = output.numpy() + return output + + def npu_op_exec(self, var, index, source, dim): + output = torch.index_add(var, dim, index.int(), source) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inter_exec(self, var, index, source, dim): + output = var.index_add_(dim=dim, index=index.long(), source=source) + output = output.numpy() + return output + + def npu_op_inter_exec(self, var, index, source, dim): + output = var.index_add_(dim, index.int(), source) + output = output.to("cpu") + output = output.numpy() + return output + + def test_index_add_float32(self, device): + shape_format = [ + [[np.float32, -1, (5, 3)], [np.int32, -1, (3, )], [np.float32, -1, (3, 3)], 0], + [[np.float32, -1, (6, 4)], [np.int32, -1, (5, )], [np.float32, -1, (5, 4)], 0], + [[np.float32, -1, (3, 2)], [np.int32, -1, (2, )], [np.float32, -1, (2, 2)], 0], + [[np.float32, -1, (8, 6)], [np.int32, -1, (4, )], [np.float32, -1, (4, 6)], 0], + [[np.float32, -1, (3, 5)], [np.int32, -1, (2, )], [np.float32, -1, (3, 2)], 1], + [[np.float32, 4, (4, 6)], [np.int32, -1, (5, )], [np.float32, 4, (4, 5)], 1], + [[np.float32, 3, (2, 3)], [np.int32, -1, (2, )], [np.float32, 3, (2, 2)], 1], + [[np.float32, -1, (16, 7, 5, 9, 11)], [np.int32, -1, (11, )], [np.float32, -1, (16, 7, 5, 9, 11)], 4], + [[np.float32, 3, (1600, 200)], [np.int32, -1, (200, )], [np.float32, 3, (1600, 200)], 1], + ] + for item in shape_format: + cpu_var, npu_var = create_common_tensor(item[0], -10, 10) + cpu_index, npu_index = create_common_tensor(item[1], 0, 2) + cpu_source, npu_source = create_common_tensor(item[2], -10, 10) + + + cpu_output = self.cpu_op_exec(cpu_var, cpu_index, cpu_source, item[3]) + npu_output = self.npu_op_exec(npu_var, npu_index, npu_source, item[3]) + self.assertRtolEqual(cpu_output, npu_output) + cpu_output = self.cpu_op_inter_exec(cpu_var, cpu_index, cpu_source, item[3]) + npu_output = self.npu_op_inter_exec(npu_var, npu_index, npu_source, item[3]) + self.assertRtolEqual(cpu_output, npu_output) + + def test_index_add_int32(self, device): + shape_format = [ + [[np.int32, -1, (5, 3)], [np.int32, -1, (3, )], [np.int32, -1, (3, 3)], 0], + [[np.int32, -1, (6, 4)], [np.int32, -1, (5, )], [np.int32, -1, (5, 4)], 0], + [[np.int32, -1, (3, 2)], [np.int32, -1, (2, )], [np.int32, -1, (2, 2)], 0], + [[np.int32, -1, (3, 5)], [np.int32, -1, (2, )], [np.int32, -1, (3, 2)], 1], + [[np.int32, -1, (4, 6)], [np.int32, -1, (5, )], [np.int32, -1, (4, 5)], 1], + [[np.int32, -1, (2, 3)], [np.int32, -1, (2, )], [np.int32, -1, (2, 2)], 1], + [[np.int32, -1, (16, 7, 5, 9, 11)], [np.int32, -1, (11, )], [np.int32, -1, (16, 7, 5, 9, 11)], 4], + [[np.int32, -1, (1600, 200)], [np.int32, -1, (200, )], [np.int32, -1, (1600, 200)], 1], + ] + for item in shape_format: + cpu_var, npu_var = create_common_tensor(item[0], -10, 10) + cpu_index, npu_index = create_common_tensor(item[1], 0, 2) + cpu_source, npu_source = create_common_tensor(item[2], -10, 10) + + cpu_output = self.cpu_op_exec(cpu_var, cpu_index, cpu_source, item[3]) + npu_output = self.npu_op_exec(npu_var, npu_index, npu_source, item[3]) + self.assertRtolEqual(cpu_output, npu_output) + cpu_output = self.cpu_op_inter_exec(cpu_var, cpu_index, cpu_source, item[3]) + npu_output = self.npu_op_inter_exec(npu_var, npu_index, npu_source, item[3]) + self.assertRtolEqual(cpu_output, npu_output) + + def test_index_add_int8(self, device): + shape_format = [ + [[np.int8, -1, (5, 3)], [np.int32, -1, (3, )], [np.int8, -1, (3, 3)], 0], + [[np.int8, -1, (6, 4)], [np.int32, -1, (5, )], [np.int8, -1, (5, 4)], 0], + [[np.int8, -1, (3, 2)], [np.int32, -1, (2, )], [np.int8, -1, (2, 2)], 0], + [[np.int8, -1, (3, 5)], [np.int32, -1, (2, )], [np.int8, -1, (3, 2)], 1], + [[np.int8, -1, (4, 6)], [np.int32, -1, (5, )], [np.int8, -1, (4, 5)], 1], + [[np.int8, -1, (2, 3)], [np.int32, -1, (2, )], [np.int8, -1, (2, 2)], 1], + [[np.int8, -1, (16, 7, 5, 9, 11)], [np.int32, -1, (11, )], [np.int8, -1, (16, 7, 5, 9, 11)], 4], + [[np.int8, -1, (1600, 200)], [np.int32, -1, (200, )], [np.int8, -1, (1600, 200)], 1], + ] + for item in shape_format: + cpu_var, npu_var = create_common_tensor(item[0], -10, 10) + cpu_index = torch.arange(0, item[1][2][0]) + npu_index = cpu_index.npu() + cpu_source, npu_source = create_common_tensor(item[2], -10, 10) + + cpu_output = self.cpu_op_exec(cpu_var, cpu_index, cpu_source, item[3]) + npu_output = self.npu_op_exec(npu_var, npu_index, npu_source, item[3]) + self.assertRtolEqual(cpu_output, npu_output) + cpu_output = self.cpu_op_inter_exec(cpu_var, cpu_index, cpu_source, item[3]) + npu_output = self.npu_op_inter_exec(npu_var, npu_index, npu_source, item[3]) + self.assertRtolEqual(cpu_output, npu_output) + + def test_index_add_uint8(self, device): + shape_format = [ + [[np.uint8, -1, (5, 3)], [np.int32, -1, (3, )], [np.uint8, -1, (3, 3)], 0], + [[np.uint8, -1, (6, 4)], [np.int32, -1, (5, )], [np.uint8, -1, (5, 4)], 0], + [[np.uint8, -1, (3, 2)], [np.int32, -1, (2, )], [np.uint8, -1, (2, 2)], 0], + [[np.uint8, -1, (3, 5)], [np.int32, -1, (2, )], [np.uint8, -1, (3, 2)], 1], + [[np.uint8, -1, (4, 6)], [np.int32, -1, (5, )], [np.uint8, -1, (4, 5)], 1], + [[np.uint8, -1, (2, 3)], [np.int32, -1, (2, )], [np.uint8, -1, (2, 2)], 1], + [[np.uint8, -1, (16, 7, 5, 9, 11)], [np.int32, -1, (11, )], [np.uint8, -1, (16, 7, 5, 9, 11)], 4], + [[np.uint8, -1, (1600, 200)], [np.int32, -1, (200, )], [np.uint8, -1, (1600, 200)], 1], + ] + for item in shape_format: + cpu_var, npu_var = create_common_tensor(item[0], 0, 10) + cpu_index = torch.arange(0, item[1][2][0]) + npu_index = cpu_index.npu() + cpu_source, npu_source = create_common_tensor(item[2], 0, 10) + + + cpu_output = self.cpu_op_exec(cpu_var, cpu_index, cpu_source, item[3]) + npu_output = self.npu_op_exec(npu_var, npu_index, npu_source, item[3]) + self.assertRtolEqual(cpu_output, npu_output) + cpu_output = self.cpu_op_inter_exec(cpu_var, cpu_index, cpu_source, item[3]) + npu_output = self.npu_op_inter_exec(npu_var, npu_index, npu_source, item[3]) + self.assertRtolEqual(cpu_output, npu_output) + + def test_index_add_fp16(self, device): + shape_format = [ + [[np.float16, -1, (5, 3)], [np.int32, -1, (3, )], [np.float16, -1, (3, 3)], 0], + [[np.float16, -1, (3, 2)], [np.int32, -1, (2, )], [np.float16, -1, (2, 2)], 0], + [[np.float16, -1, (3, 5)], [np.int32, -1, (2, )], [np.float16, -1, (3, 2)], 1], + [[np.float16, -1, (2, 6)], [np.int32, -1, (4, )], [np.float16, -1, (2, 4)], 1], + [[np.float16, -1, (16, 7, 5, 9, 11)], [np.int32, -1, (11, )], [np.float16, -1, (16, 7, 5, 9, 11)], 4], + [[np.float16, -1, (1600, 200)], [np.int32, -1, (200, )], [np.float16, -1, (1600, 200)], 1], + ] + for item in shape_format: + cpu_var, npu_var = create_common_tensor(item[0], 0, 10) + cpu_index = torch.arange(0, item[1][2][0]) + npu_index = cpu_index.npu() + cpu_source, npu_source = create_common_tensor(item[2], 0, 10) + + cpu_var = cpu_var.to(torch.float32) + cpu_source = cpu_source.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_var, cpu_index, cpu_source, item[3]) + npu_output = self.npu_op_exec(npu_var, npu_index, npu_source, item[3]) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + cpu_output = self.cpu_op_inter_exec(cpu_var, cpu_index, cpu_source, item[3]) + npu_output = self.npu_op_inter_exec(npu_var, npu_index, npu_source, item[3]) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestIndexAdd, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_index_put.py b/test/test_npu/test_network_ops/test_index_put.py old mode 100644 new mode 100755 index 2ab3b47b940c269bc9dda2b2719f193b140e2cb1..60cc47d362372f8b2ea2de9d0d003e04949c7b9b --- a/test/test_npu/test_network_ops/test_index_put.py +++ b/test/test_npu/test_network_ops/test_index_put.py @@ -1,134 +1,144 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestIndexPut(TestCase): - def cpu_op_exec(self, input, indices, value): - output = input.index_put(indices, value) - output = output.numpy() - return output - - def npu_op_exec(self, input, indices, value): - output = input.index_put(indices, value) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inp_exec(self, input, indices, value): - input.index_put_(indices, value) - output = input.numpy() - return output - - def npu_op_inp_exec(self, input, indices, value): - input.index_put_(indices, value) - input = input.to("cpu") - output = input.numpy() - return output - - def case_exec(self, shape): - cpu_indices = [] - npu_indices = [] - for item in shape: - cpu_input, npu_input = create_common_tensor(item[0], 1, 100) - for i in range(1, 3): - cpu_indices1, npu_indices1 = create_common_tensor( - item[1], 1, 5) - cpu_indices.append(cpu_indices1) - npu_indices.append(npu_indices1) - cpu_value, npu_value = create_common_tensor(item[2], 1, 100) - cpu_output = self.cpu_op_exec(cpu_input, cpu_indices, cpu_value) - npu_output = self.npu_op_exec(npu_input, npu_indices, npu_value) - self.assertEqual(cpu_output, npu_output) - - def case_exec_fp16(self, shape): - cpu_indices = [] - npu_indices = [] - for item in shape: - cpu_input, npu_input = create_common_tensor(item[0], 1, 100) - cpu_input = cpu_input.to(torch.float32) - for i in range(1, 3): - cpu_indices1, npu_indices1 = create_common_tensor( - item[1], 1, 5) - cpu_indices.append(cpu_indices1) - npu_indices.append(npu_indices1) - cpu_value, npu_value = create_common_tensor(item[2], 1, 100) - cpu_value = cpu_value.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input, cpu_indices, cpu_value) - npu_output = self.npu_op_exec(npu_input, npu_indices, npu_value) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def case_inp_exec(self, shape): - cpu_indices = [] - npu_indices = [] - for item in shape: - cpu_input, npu_input = create_common_tensor(item[0], 1, 100) - for i in range(1, 3): - cpu_indices1, npu_indices1 = create_common_tensor( - item[1], 1, 5) - cpu_indices.append(cpu_indices1) - npu_indices.append(npu_indices1) - cpu_value, npu_value = create_common_tensor(item[2], 1, 100) - cpu_output = self.cpu_op_inp_exec( - cpu_input, cpu_indices, cpu_value) - npu_output = self.npu_op_inp_exec( - npu_input, npu_indices, npu_value) - self.assertEqual(cpu_output, npu_output) - - def case_inp_exec_fp16(self, shape): - cpu_indices = [] - npu_indices = [] - for item in shape: - cpu_input, npu_input = create_common_tensor(item[0], 1, 100) - cpu_input = cpu_input.to(torch.float32) - for i in range(1, 3): - cpu_indices1, npu_indices1 = create_common_tensor( - item[1], 1, 5) - cpu_indices.append(cpu_indices1) - npu_indices.append(npu_indices1) - cpu_value, npu_value = create_common_tensor(item[2], 1, 100) - cpu_value = cpu_value.to(torch.float32) - cpu_output = self.cpu_op_inp_exec( - cpu_input, cpu_indices, cpu_value) - npu_output = self.npu_op_inp_exec( - npu_input, npu_indices, npu_value) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_index_put_shape_format_fp32(self, device): - format_list = [0] - shape_list = [(5, 6)] - shape_format = [[[np.float32, i, j], [np.int64, 0, [1, 2]], [ - np.float32, 0, [1, 2]]] for i in format_list for j in shape_list] - self.case_exec(shape_format) - self.case_inp_exec(shape_format) - - def test_index_put_shape_format_fp16(self, device): - format_list = [0] - shape_list = [(5, 6)] - shape_format = [[[np.float16, i, j], [np.int64, 0, [1, 2]], [ - np.float16, 0, [1, 2]]] for i in format_list for j in shape_list] - self.case_exec_fp16(shape_format) - self.case_inp_exec_fp16(shape_format) - - -instantiate_device_type_tests(TestIndexPut, globals(), except_for="cpu") - -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestIndexPut(TestCase): + def cpu_op_exec(self, input, indices, value): + output = input.index_put(indices, value) + output = output.numpy() + return output + + def npu_op_exec(self, input, indices, value): + output = input.index_put(indices, value) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inp_exec(self, input, indices, value): + input.index_put_(indices, value) + output = input.numpy() + return output + + def npu_op_inp_exec(self, input, indices, value): + input.index_put_(indices, value) + input = input.to("cpu") + output = input.numpy() + return output + + def case_exec(self, shape): + cpu_indices = [] + npu_indices = [] + for item in shape: + cpu_input, npu_input = create_common_tensor(item[0], 1, 100) + for i in range(1, 3): + cpu_indices1, npu_indices1 = create_common_tensor( + item[1], 1, 5) + cpu_indices.append(cpu_indices1) + npu_indices.append(npu_indices1) + cpu_value, npu_value = create_common_tensor(item[2], 1, 100) + cpu_output = self.cpu_op_exec(cpu_input, cpu_indices, cpu_value) + npu_output = self.npu_op_exec(npu_input, npu_indices, npu_value) + self.assertEqual(cpu_output, npu_output) + + def case_exec_fp16(self, shape): + cpu_indices = [] + npu_indices = [] + for item in shape: + cpu_input, npu_input = create_common_tensor(item[0], 1, 100) + cpu_input = cpu_input.to(torch.float32) + for i in range(1, 3): + cpu_indices1, npu_indices1 = create_common_tensor( + item[1], 1, 5) + cpu_indices.append(cpu_indices1) + npu_indices.append(npu_indices1) + cpu_value, npu_value = create_common_tensor(item[2], 1, 100) + cpu_value = cpu_value.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input, cpu_indices, cpu_value) + npu_output = self.npu_op_exec(npu_input, npu_indices, npu_value) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def case_inp_exec(self, shape): + cpu_indices = [] + npu_indices = [] + for item in shape: + cpu_input, npu_input = create_common_tensor(item[0], 1, 100) + for i in range(1, 3): + cpu_indices1, npu_indices1 = create_common_tensor( + item[1], 1, 5) + cpu_indices.append(cpu_indices1) + npu_indices.append(npu_indices1) + cpu_value, npu_value = create_common_tensor(item[2], 1, 100) + cpu_output = self.cpu_op_inp_exec( + cpu_input, cpu_indices, cpu_value) + npu_output = self.npu_op_inp_exec( + npu_input, npu_indices, npu_value) + self.assertEqual(cpu_output, npu_output) + + def case_inp_exec_fp16(self, shape): + cpu_indices = [] + npu_indices = [] + for item in shape: + cpu_input, npu_input = create_common_tensor(item[0], 1, 100) + cpu_input = cpu_input.to(torch.float32) + for i in range(1, 3): + cpu_indices1, npu_indices1 = create_common_tensor( + item[1], 1, 5) + cpu_indices.append(cpu_indices1) + npu_indices.append(npu_indices1) + cpu_value, npu_value = create_common_tensor(item[2], 1, 100) + cpu_value = cpu_value.to(torch.float32) + cpu_output = self.cpu_op_inp_exec( + cpu_input, cpu_indices, cpu_value) + npu_output = self.npu_op_inp_exec( + npu_input, npu_indices, npu_value) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_index_put_shape_format_fp32(self, device): + format_list = [0] + shape_list = [(5, 6)] + shape_format = [[[np.float32, i, j], [np.int64, 0, [1, 2]], [ + np.float32, 0, [1, 2]]] for i in format_list for j in shape_list] + self.case_exec(shape_format) + self.case_inp_exec(shape_format) + + def test_index_put_shape_format_fp16(self, device): + format_list = [0] + shape_list = [(5, 6)] + shape_format = [[[np.float16, i, j], [np.int64, 0, [1, 2]], [ + np.float16, 0, [1, 2]]] for i in format_list for j in shape_list] + self.case_exec_fp16(shape_format) + self.case_inp_exec_fp16(shape_format) + + def test_index_put_null(self, device): + cpu_input1 = torch.rand(2, 2) + cpu_input2 = torch.rand(2, 2) + cpu_mask_index = torch.tensor([[False, False], [False, False]]) + npu_mask_index = cpu_mask_index.to("npu") + npu_input1 = cpu_input1.to("npu") + npu_input2 = cpu_input2.to("npu") + cpu_input1[cpu_mask_index] = cpu_input2.detach()[cpu_mask_index] + npu_input1[npu_mask_index] = npu_input2.detach()[npu_mask_index] + self.assertEqual(cpu_input1, npu_input1.to("cpu")) + +instantiate_device_type_tests(TestIndexPut, globals(), except_for="cpu") + +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_inverse.py b/test/test_npu/test_network_ops/test_inverse.py index cbf0680ce7e6fd43ef0a1caf451bca002e0aec5b..e2b533d78a6031366557012c16698bfa1f089fba 100644 --- a/test/test_npu/test_network_ops/test_inverse.py +++ b/test/test_npu/test_network_ops/test_inverse.py @@ -1,52 +1,52 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestInverse(TestCase): - def cpu_op_exec(self, input): - output = torch.inverse(input) - output = output.numpy() - return output - - def npu_op_exec(self, input): - output = torch.inverse(input) - output = output.to("cpu") - output = output.numpy() - return output - - def test_inverse_shape_format(self, device): - #aicpu暂不支持5HD format,待支持后增加其他格式测试 - shape_format = [ - [np.float32, 0, (4,4)], - [np.float32, 0, (0,3,29,29)], - [np.float32, 0, (1,2,4,4)] - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, -100, 100) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestInverse, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestInverse(TestCase): + def cpu_op_exec(self, input): + output = torch.inverse(input) + output = output.numpy() + return output + + def npu_op_exec(self, input): + output = torch.inverse(input) + output = output.to("cpu") + output = output.numpy() + return output + + def test_inverse_shape_format(self, device): + #aicpu暂不支持5HD format,待支持后增加其他格式测试 + shape_format = [ + [np.float32, 0, (4,4)], + [np.float32, 0, (0,3,29,29)], + [np.float32, 0, (1,2,4,4)] + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, -100, 100) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestInverse, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_is_floating_point.py b/test/test_npu/test_network_ops/test_is_floating_point.py index eca2d1fd7a4e43c7eec5e8be3ae3480df2941443..1740cf9cf3f908171f5ac72ce15628921f3219e7 100644 --- a/test/test_npu/test_network_ops/test_is_floating_point.py +++ b/test/test_npu/test_network_ops/test_is_floating_point.py @@ -1,47 +1,47 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestIsFloatingPiont(TestCase): - def test_is_floating_point(self, device): - shape_format = [ - [0.36, torch.float32], - [1, torch.int32], - [1, torch.float32], - [1, torch.float16], - [1, torch.int8], - ] - - for item in shape_format: - inputs = torch.tensor([item[0]], dtype=item[1]) - cpu_out = inputs.is_floating_point() - cpu_out = np.array(cpu_out, dtype=np.int32) - inputs = inputs.to("npu") - npu_out = inputs.is_floating_point() - npu_out = np.array(npu_out, dtype=np.int32) - self.assertRtolEqual(cpu_out, npu_out) - - -instantiate_device_type_tests(TestIsFloatingPiont, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestIsFloatingPiont(TestCase): + def test_is_floating_point(self, device): + shape_format = [ + [0.36, torch.float32], + [1, torch.int32], + [1, torch.float32], + [1, torch.float16], + [1, torch.int8], + ] + + for item in shape_format: + inputs = torch.tensor([item[0]], dtype=item[1]) + cpu_out = inputs.is_floating_point() + cpu_out = np.array(cpu_out, dtype=np.int32) + inputs = inputs.to("npu") + npu_out = inputs.is_floating_point() + npu_out = np.array(npu_out, dtype=np.int32) + self.assertRtolEqual(cpu_out, npu_out) + + +instantiate_device_type_tests(TestIsFloatingPiont, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_is_nonzero.py b/test/test_npu/test_network_ops/test_is_nonzero.py index 68f46ebd25561bbe65bb3a80782474a97793b987..252565dcd5df6eeadc1e1575316b7a39d03709cc 100644 --- a/test/test_npu/test_network_ops/test_is_nonzero.py +++ b/test/test_npu/test_network_ops/test_is_nonzero.py @@ -1,44 +1,44 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestIsNonzero(TestCase): - def cpu_op_exec(self, input1): - output = torch.is_nonzero(input1) - return output - - def npu_op_exec(self, input1): - output = torch.is_nonzero(input1) - return output - - def test_isnonzero_shape_format(self, device): - dtype_list = [np.float16, np.float32, np.int32, np.bool_] - format_list = [0] - shape_list = [[1], [1, 1, 1], [1, 1, 1, 1]] - shape_format = [ - [i, j, k] for i in dtype_list for j in format_list for k in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - cpu_output == npu_output - -instantiate_device_type_tests(TestIsNonzero, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestIsNonzero(TestCase): + def cpu_op_exec(self, input1): + output = torch.is_nonzero(input1) + return output + + def npu_op_exec(self, input1): + output = torch.is_nonzero(input1) + return output + + def test_isnonzero_shape_format(self, device): + dtype_list = [np.float16, np.float32, np.int32, np.bool_] + format_list = [0] + shape_list = [[1], [1, 1, 1], [1, 1, 1, 1]] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + cpu_output == npu_output + +instantiate_device_type_tests(TestIsNonzero, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_isfinite.py b/test/test_npu/test_network_ops/test_isfinite.py index 8c1e6ecd2363ba79ca8ae2f5fe046e3c6dc38341..19d775fc0e1fcb076069d8c4c0ff2b7fe9f98389 100644 --- a/test/test_npu/test_network_ops/test_isfinite.py +++ b/test/test_npu/test_network_ops/test_isfinite.py @@ -1,57 +1,57 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestIsfinite(TestCase): - def test_isfinite(self, device): - x = torch.Tensor([1, 2, -10]).to("npu") - self.assertEqual(torch.isfinite(x).to("cpu"), torch.BoolTensor([True, True, True])) - - - def cpu_op_exec(self, input): - output = torch.isfinite(input) - output = output.numpy() - return output - - def npu_op_exec(self, input): - output = torch.isfinite(input) - output = output.to("cpu") - output = output.numpy() - return output - - def test_isfinite_shape_format(self, device): - shape_format = [ - [np.int16, 0, (1, 2, 2, 5)], - [np.int32, 0, (1, 4, 3)], - [np.int64, 0, (2, 3)], - [np.float32, 0, (8, 4, 3, 9)], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, -100, 100) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestIsfinite, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestIsfinite(TestCase): + def test_isfinite(self, device): + x = torch.Tensor([1, 2, -10]).to("npu") + self.assertEqual(torch.isfinite(x).to("cpu"), torch.BoolTensor([True, True, True])) + + + def cpu_op_exec(self, input): + output = torch.isfinite(input) + output = output.numpy() + return output + + def npu_op_exec(self, input): + output = torch.isfinite(input) + output = output.to("cpu") + output = output.numpy() + return output + + def test_isfinite_shape_format(self, device): + shape_format = [ + [np.int16, 0, (1, 2, 2, 5)], + [np.int32, 0, (1, 4, 3)], + [np.int64, 0, (2, 3)], + [np.float32, 0, (8, 4, 3, 9)], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, -100, 100) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestIsfinite, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_item.py b/test/test_npu/test_network_ops/test_item.py index 313304bdc1202e701d6f2e95ddb624fcb3c1d30d..3348e06a6e78c44efdb1ca077026bb2d3778c6bb 100644 --- a/test/test_npu/test_network_ops/test_item.py +++ b/test/test_npu/test_network_ops/test_item.py @@ -1,38 +1,38 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestItem(TestCase): - def test_item_common_shape_format(self, device): - shape_format = [ - [[np.float16, 0, 1]], - [[np.float16, 0, 5]], - [[np.float32, 4, 3]], - [[np.float32, 29, 4]] - ] - for shape in shape_format: - cpu_input, npu_input = create_common_tensor(shape[0], -1, 1) - cpu_output = np.array([cpu_input[0].item()]).astype(np.float32) - npu_output = np.array([npu_input[0].item()]).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestItem, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestItem(TestCase): + def test_item_common_shape_format(self, device): + shape_format = [ + [[np.float16, 0, 1]], + [[np.float16, 0, 5]], + [[np.float32, 4, 3]], + [[np.float32, 29, 4]] + ] + for shape in shape_format: + cpu_input, npu_input = create_common_tensor(shape[0], -1, 1) + cpu_output = np.array([cpu_input[0].item()]).astype(np.float32) + npu_output = np.array([npu_input[0].item()]).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestItem, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_le.py b/test/test_npu/test_network_ops/test_le.py old mode 100644 new mode 100755 index 8d7fcc8753fec6fe21e4b02e9f5a9539b595a511..093f55fdf3d6a481c9158f71c4b1dca8e1d5f491 --- a/test/test_npu/test_network_ops/test_le.py +++ b/test/test_npu/test_network_ops/test_le.py @@ -1,318 +1,318 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestLe(TestCase): - def generate_scalar(self, min, max): - scalar = np.random.uniform(min, max) - return scalar - - def cpu_op_exec(self, input1, input2): - output = torch.le(input1, input2) - output = output.numpy() - return output - - def cpu_op_exec_out(self, input1, input2, input3): - torch.le(input1, input2, out = input3) - output = input3.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.le(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_exec(self, input1, input2): - output = input1.le_(input2) - output = input1 - output = output.numpy() - return output - - def npu_op_inplace_exec(self, input1, input2): - output = input1.le_(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2, output): - torch.le(input1, input2, out=output) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_scalar(self, input, scalar): - output = torch.le(input, scalar) - output = output.numpy() - return output - - def cpu_op_exec_scalar_out(self, input1, scalar, input2): - torch.le(input1, scalar, out = input2) - output = input2.numpy() - return output - - def npu_op_exec_scalar(self, input, scalar): - output = torch.le(input, scalar) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_exec_scalar(self, input, scalar): - output = input.le_(scalar) - output = output.numpy() - return output - - def npu_op_inplace_exec_scalar(self, input, scalar): - input = input.to("npu") - output = input.le_(scalar) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_scalar_out(self, input, scalar, output): - torch.le(input, scalar, out=output) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_stride_exec(self, input1, input2): - input1 = input1.as_strided([2, 2], [1, 2], 1) - input2 = input2.as_strided([2, 2], [1, 2], 1) - output = input1.le_(input2) - output = output.numpy() - return output - - def npu_op_inplace_stride_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - input1 = input1.as_strided([2, 2], [1, 2], 1) - input2 = input2.as_strided([2, 2], [1, 2], 1) - output = input1.le_(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_stride_scalar_exec(self, input1, input2): - input1 = input1.as_strided([2, 2], [1, 2], 1) - output = input1.le_(input2) - output = output.numpy() - return output - - def npu_op_inplace_stride_scalar_exec(self, input1, input2): - input1 = input1.to("npu") - input1 = input1.as_strided([2, 2], [1, 2], 1) - output = input1.le_(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def le_tensor_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) - cpu_input3 = torch.randn(item[1][2])<0 - npu_input3 = cpu_input3.npu() - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - if cpu_input3.dtype == torch.float16: - cpu_input3 = cpu_input3.to(torch.float32) - cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) - - self.assertRtolEqual(cpu_output_out, npu_output_out) - - def test_le_tensor_out(self, device): - shape_format = [ - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], - [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], - [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], - [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], - ] - self.le_tensor_out_result(shape_format) - - def le_scalar_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2 = torch.randn(item[1][2])<0 - npu_input2 = cpu_input2.npu() - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - scalar = self.generate_scalar(0, 100) - cpu_output_out = self.cpu_op_exec_scalar_out(cpu_input1, scalar, cpu_input2) - npu_output_out = self.npu_op_exec_scalar_out(npu_input1, scalar, npu_input2) - cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) - self.assertRtolEqual(cpu_output_out, npu_output_out) - - def test_le_scalar_out(self, device): - shape_format = [ - [[np.float16, 0, [12, 4, 12, 121]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [12, 10, 14, 111]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 2, [16, 3, 11, 121, 21]], [np.float16, 0, [3, 3, 3]]], - [[np.float16, 0, [16, 16, 14]], [np.float16, 0, [128, 116, 14, 14]]], - [[np.float32, 0, [20, 10, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], - [[np.float32, 2, [1313, 3, 3, 3, 121]], [np.float32, 0, [3, 1, 3]]], - [[np.float32, 0, [16, 22, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], - ] - self.le_scalar_out_result(shape_format) - - def test_le_scalar_float32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_exec_scalar(npu_input, scalar) - self.assertEqual(cpu_output, npu_output) - - def test_le_scalar_int32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.int32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_exec_scalar(npu_input, scalar) - self.assertEqual(cpu_output, npu_output) - - def test_gt_scalar_float16(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input = cpu_input.to(torch.float32) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_exec_scalar(npu_input, scalar) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_le_tensor_float32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float32, i, j], [np.float32, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertEqual(cpu_output, npu_output) - - def test_le_tensor_float16(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float16, i, j], [np.float16, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_le_inplace_float32(self, device): - format_list = [0, 3] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float32, i, j], [np.float32, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) - self.assertEqual(cpu_output, npu_output) - - def test_le_inplace_float16(self, device): - format_list = [0, 3] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [[[np.float16, i, j], [np.float16, i, j]] - for i in format_list for j in shape_list] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_le_inplace_scalar_float32(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - scalar = self.generate_scalar(0, 100) - scalar1 = copy.deepcopy(scalar) - ncpu_input = copy.deepcopy(cpu_input) - cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar1) - self.assertEqual(cpu_output, npu_output) - - def test_le_inplace_scalar_float16(self, device): - format_list = [0] - shape_list = [(5, 3), (2, 3, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input = cpu_input.to(torch.float32) - scalar = self.generate_scalar(0, 100) - cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) - npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_le_mix_dtype(self, device): - npu_input1, npu_input2 = create_common_tensor([np.float16, 0, (2, 3)], 1, 100) - npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) - cpu_output = self.cpu_op_exec(npu_input1, npu_input3) - npu_output = self.npu_op_exec(npu_input2, npu_input4) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestLe, globals(), except_for="cpu") -if __name__ == '__main__': - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestLe(TestCase): + def generate_scalar(self, min, max): + scalar = np.random.uniform(min, max) + return scalar + + def cpu_op_exec(self, input1, input2): + output = torch.le(input1, input2) + output = output.numpy() + return output + + def cpu_op_exec_out(self, input1, input2, input3): + torch.le(input1, input2, out = input3) + output = input3.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.le(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_exec(self, input1, input2): + output = input1.le_(input2) + output = input1 + output = output.numpy() + return output + + def npu_op_inplace_exec(self, input1, input2): + output = input1.le_(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2, output): + torch.le(input1, input2, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_scalar(self, input, scalar): + output = torch.le(input, scalar) + output = output.numpy() + return output + + def cpu_op_exec_scalar_out(self, input1, scalar, input2): + torch.le(input1, scalar, out = input2) + output = input2.numpy() + return output + + def npu_op_exec_scalar(self, input, scalar): + output = torch.le(input, scalar) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_exec_scalar(self, input, scalar): + output = input.le_(scalar) + output = output.numpy() + return output + + def npu_op_inplace_exec_scalar(self, input, scalar): + input = input.to("npu") + output = input.le_(scalar) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_scalar_out(self, input, scalar, output): + torch.le(input, scalar, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_stride_exec(self, input1, input2): + input1 = input1.as_strided([2, 2], [1, 2], 1) + input2 = input2.as_strided([2, 2], [1, 2], 1) + output = input1.le_(input2) + output = output.numpy() + return output + + def npu_op_inplace_stride_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + input1 = input1.as_strided([2, 2], [1, 2], 1) + input2 = input2.as_strided([2, 2], [1, 2], 1) + output = input1.le_(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_stride_scalar_exec(self, input1, input2): + input1 = input1.as_strided([2, 2], [1, 2], 1) + output = input1.le_(input2) + output = output.numpy() + return output + + def npu_op_inplace_stride_scalar_exec(self, input1, input2): + input1 = input1.to("npu") + input1 = input1.as_strided([2, 2], [1, 2], 1) + output = input1.le_(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def le_tensor_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) + cpu_input3 = torch.randn(item[1][2])<0 + npu_input3 = cpu_input3.npu() + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + if cpu_input3.dtype == torch.float16: + cpu_input3 = cpu_input3.to(torch.float32) + cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) + + self.assertRtolEqual(cpu_output_out, npu_output_out) + + def test_le_tensor_out(self, device): + shape_format = [ + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], + [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], + [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], + [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], + ] + self.le_tensor_out_result(shape_format) + + def le_scalar_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2 = torch.randn(item[1][2])<0 + npu_input2 = cpu_input2.npu() + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + scalar = self.generate_scalar(0, 100) + cpu_output_out = self.cpu_op_exec_scalar_out(cpu_input1, scalar, cpu_input2) + npu_output_out = self.npu_op_exec_scalar_out(npu_input1, scalar, npu_input2) + cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) + self.assertRtolEqual(cpu_output_out, npu_output_out) + + def test_le_scalar_out(self, device): + shape_format = [ + [[np.float16, 0, [12, 4, 12, 121]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [12, 10, 14, 111]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 2, [16, 3, 11, 121, 21]], [np.float16, 0, [3, 3, 3]]], + [[np.float16, 0, [16, 16, 14]], [np.float16, 0, [128, 116, 14, 14]]], + [[np.float32, 0, [20, 10, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], + [[np.float32, 2, [1313, 3, 3, 3, 121]], [np.float32, 0, [3, 1, 3]]], + [[np.float32, 0, [16, 22, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], + ] + self.le_scalar_out_result(shape_format) + + def test_le_scalar_float32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_exec_scalar(npu_input, scalar) + self.assertEqual(cpu_output, npu_output) + + def test_le_scalar_int32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.int32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_exec_scalar(npu_input, scalar) + self.assertEqual(cpu_output, npu_output) + + def test_gt_scalar_float16(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input = cpu_input.to(torch.float32) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_exec_scalar(npu_input, scalar) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_le_tensor_float32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float32, i, j], [np.float32, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertEqual(cpu_output, npu_output) + + def test_le_tensor_float16(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float16, i, j], [np.float16, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_le_inplace_float32(self, device): + format_list = [0, 3] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float32, i, j], [np.float32, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) + self.assertEqual(cpu_output, npu_output) + + def test_le_inplace_float16(self, device): + format_list = [0, 3] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [[[np.float16, i, j], [np.float16, i, j]] + for i in format_list for j in shape_list] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_inplace_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_le_inplace_scalar_float32(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + scalar = self.generate_scalar(0, 100) + scalar1 = copy.deepcopy(scalar) + ncpu_input = copy.deepcopy(cpu_input) + cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar1) + self.assertEqual(cpu_output, npu_output) + + def test_le_inplace_scalar_float16(self, device): + format_list = [0] + shape_list = [(5, 3), (2, 3, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input = cpu_input.to(torch.float32) + scalar = self.generate_scalar(0, 100) + cpu_output = self.cpu_op_inplace_exec_scalar(cpu_input, scalar) + npu_output = self.npu_op_inplace_exec_scalar(npu_input, scalar) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_le_mix_dtype(self, device): + npu_input1, npu_input2 = create_common_tensor([np.float16, 0, (2, 3)], 1, 100) + npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) + cpu_output = self.cpu_op_exec(npu_input1, npu_input3) + npu_output = self.npu_op_exec(npu_input2, npu_input4) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestLe, globals(), except_for="cpu") +if __name__ == '__main__': + run_tests() diff --git a/test/test_npu/test_network_ops/test_leaky_relu_backward.py b/test/test_npu/test_network_ops/test_leaky_relu_backward.py old mode 100644 new mode 100755 index 9f5a6aca6498a972494ca08d072f91bbc430f115..e690f37809a884db76b77f1544aa1a876788f7fd --- a/test/test_npu/test_network_ops/test_leaky_relu_backward.py +++ b/test/test_npu/test_network_ops/test_leaky_relu_backward.py @@ -1,82 +1,82 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestLeakyReluBackward(TestCase): - def cpu_op_backward_exec(self, input1): - w = torch.ones_like(input1) - input1.requires_grad_(True) - output = torch.nn.functional.leaky_relu(input1) - output.backward(w) - res = input1.grad - res = res.numpy() - return res, output - - def npu_op_backward_exec(self, input1): - w = torch.ones_like(input1) - w = w.to("npu") - input1 = input1.to("npu") - input1.requires_grad_(True) - output = torch.nn.functional.leaky_relu(input1) - output.backward(w) - output = output.to("cpu") - res = input1.grad - res = input1.grad.to("cpu") - res = res.numpy() - return res, output - - def test_leaky_relu_backward_format_fp32(self, device): - format_list = [0, 3] - shape_list = [(5, 3)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - # print(item) - cpu_input1, npu_input1 = create_common_tensor(item, 0, 2) - cpu_output = self.cpu_op_backward_exec(cpu_input1) - npu_output = self.npu_op_backward_exec(npu_input1) - self.assertEqual(cpu_output, npu_output) - - def test_leaky_relu_backward_format_fp16(self, device): - format_list = [0, 3] - shape_list = [(5, 3)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - # print(item) - cpu_input1, npu_input1 = create_common_tensor(item, 0, 2) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output1, cpu_output2 = self.cpu_op_backward_exec(cpu_input1) - npu_output1, npu_output2 = self.npu_op_backward_exec(npu_input1) - cpu_output1 = cpu_output1.astype(np.float16) - self.assertEqual(cpu_output1, npu_output1) - self.assertEqual(cpu_output2, npu_output2) - - -instantiate_device_type_tests( - TestLeakyReluBackward, - globals(), - except_for="cpu") - -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestLeakyReluBackward(TestCase): + def cpu_op_backward_exec(self, input1): + w = torch.ones_like(input1) + input1.requires_grad_(True) + output = torch.nn.functional.leaky_relu(input1) + output.backward(w) + res = input1.grad + res = res.numpy() + return res, output + + def npu_op_backward_exec(self, input1): + w = torch.ones_like(input1) + w = w.to("npu") + input1 = input1.to("npu") + input1.requires_grad_(True) + output = torch.nn.functional.leaky_relu(input1) + output.backward(w) + output = output.to("cpu") + res = input1.grad + res = input1.grad.to("cpu") + res = res.numpy() + return res, output + + def test_leaky_relu_backward_format_fp32(self, device): + format_list = [0, 3] + shape_list = [(5, 3)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + # print(item) + cpu_input1, npu_input1 = create_common_tensor(item, 0, 2) + cpu_output = self.cpu_op_backward_exec(cpu_input1) + npu_output = self.npu_op_backward_exec(npu_input1) + self.assertEqual(cpu_output, npu_output) + + def test_leaky_relu_backward_format_fp16(self, device): + format_list = [0, 3] + shape_list = [(5, 3)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + # print(item) + cpu_input1, npu_input1 = create_common_tensor(item, 0, 2) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output1, cpu_output2 = self.cpu_op_backward_exec(cpu_input1) + npu_output1, npu_output2 = self.npu_op_backward_exec(npu_input1) + cpu_output1 = cpu_output1.astype(np.float16) + self.assertEqual(cpu_output1, npu_output1) + self.assertEqual(cpu_output2, npu_output2) + + +instantiate_device_type_tests( + TestLeakyReluBackward, + globals(), + except_for="cpu") + +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_lerp.py b/test/test_npu/test_network_ops/test_lerp.py index eeb0b7250506026e724a90a5578ecc805cab80b1..497893f832af5d57245472c103b2789437adaf5c 100644 --- a/test/test_npu/test_network_ops/test_lerp.py +++ b/test/test_npu/test_network_ops/test_lerp.py @@ -1,225 +1,225 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -import random -import math - -class TestLerp(TestCase): -# pylint: disable=unused-variable,unused-argument - - def cpu_op_exec(self, input1, input2, input3): - output = torch.lerp(input1,input2,input3) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2, input3): - output = torch.lerp(input1, input2, input3) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_out_exec(self, input1, input2, input3): - output = torch.ones_like(input1) - torch.lerp(input1,input2,input3, out = output) - output = output.numpy() - return output - - def npu_op_out_exec(self, input1, input2, input3): - output = torch.ones_like(input1) - torch.lerp(input1, input2, input3, out = output) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_scalar_out_exec(self, input1, input2, input3): - output = torch.ones_like(input1) - torch.lerp(input1,input2,input3, out = output) - output = output.numpy() - return output - - def npu_op_scalar_out_exec(self, input1, input2, input3): - output = torch.ones_like(input1) - torch.lerp(input1, input2, input3, out = output) - output = output.to("cpu") - output = output.numpy() - return output - - - def test_lerp_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (4, 2, 2, 3)]], - [[np.float32, -1, (2, 2, 3, 4)]], - [[np.float32, -1, (3, 3, 3)]], - [[np.float32, -1, (4, 4, 4)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 1, 100) - cpu_input3, npu_input3 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3) - npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_input3) - self.assertRtolEqual(cpu_output, npu_output) - - def test_lerp_float16_shape_format(self, device): - def cpu_op_exec_fp16(input1, input2, input3): - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - input3 = input3.to(torch.float32) - output = torch.lerp(input1,input2,input3) - output = output.numpy() - output = output.astype(np.float16) - return output - - shape_format = [ - [[np.float16, -1, (100, 4, 5, 5)]], - [[np.float16, -1, (100, 5, 5, 4)]], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 10, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 10, 100) - cpu_input3, npu_input3 = create_common_tensor(item[0], 10, 100) - cpu_output = cpu_op_exec_fp16(cpu_input1, cpu_input2, cpu_input3) - npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_input3) - self.assertRtolEqual(cpu_output, npu_output, prec=0.003, prec16=0.003) - - - def test_lerp_out_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (4, 2, 2, 3)]], - [[np.float32, -1, (2, 2, 3, 4)]], - [[np.float32, -1, (3, 3, 3)]], - [[np.float32, -1, (4, 4, 4)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 1, 100) - cpu_input3, npu_input3 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_out_exec(cpu_input1, cpu_input2, cpu_input3) - npu_output = self.npu_op_out_exec(npu_input1, npu_input2, npu_input3) - self.assertRtolEqual(cpu_output, npu_output) - - def test_lerp_out_float16_shape_format(self, device): - def cpu_op_out_exec_fp16(input1, input2, input3): - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - input3 = input3.to(torch.float32) - output = torch.ones_like(input1) - torch.lerp(input1,input2,input3, out = output) - output = output.numpy() - output = output.astype(np.float16) - return output - - shape_format = [ - [[np.float16, -1, (100, 4, 5, 5)]], - [[np.float16, -1, (100, 5, 5, 4)]], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 10, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 10, 100) - cpu_input3, npu_input3 = create_common_tensor(item[0], 10, 100) - cpu_output = cpu_op_out_exec_fp16(cpu_input1, cpu_input2, cpu_input3) - npu_output = self.npu_op_out_exec(npu_input1, npu_input2, npu_input3) - self.assertRtolEqual(cpu_output, npu_output, prec=0.003, prec16=0.003) - - def test_lerp_scalar_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (4, 2, 2, 3)], 1.0], - [[np.float32, -1, (2, 2, 3, 4)], 2.0], - [[np.float32, -1, (3, 3, 3)], 1.2], - [[np.float32, -1, (4, 4, 4)], 1.2] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 1, 100) - cpu_input3 = item[1] - npu_input3 = item[1] - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3) - npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_input3) - self.assertRtolEqual(cpu_output, npu_output) - - def test_lerp_scalar_float16_shape_format(self, device): - def cpu_op_scalar_exec_fp16(input1, input2, input3): - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - output = torch.lerp(input1,input2,input3) - output = output.numpy() - output = output.astype(np.float16) - return output - - shape_format = [ - [[np.float16, -1, (100, 4, 5, 5)], 1.2], - [[np.float16, -1, (100, 5, 5, 4)], 1.2], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 10, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 10, 100) - cpu_input3 = item[1] - npu_input3 = item[1] - cpu_output = cpu_op_scalar_exec_fp16(cpu_input1, cpu_input2, cpu_input3) - npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_input3) - self.assertRtolEqual(cpu_output, npu_output, prec16=0.02) - - - def test_lerp_scalar_out_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (4, 2, 2, 3)], 1.2], - [[np.float32, -1, (2, 2, 3, 4)],1.2], - [[np.float32, -1, (3, 3, 3)], 1.0], - [[np.float32, -1, (4, 4, 4)], 2.0] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 1, 100) - cpu_input3, npu_input3 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3) - npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_input3) - self.assertRtolEqual(cpu_output, npu_output) - - def test_lerp_scalar_out_float16_shape_format(self, device): - def cpu_op_scalar_out_exec_fp16(input1, input2, input3): - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - output = torch.ones_like(input1) - torch.lerp(input1,input2,input3, out = output) - output = output.numpy() - output = output.astype(np.float16) - return output - - shape_format = [ - [[np.float16, -1, (100, 4, 5, 5)], 1.2], - [[np.float16, -1, (100, 5, 5, 4)], 1.2], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 10, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 10, 100) - cpu_input3 = item[1] - npu_input3 = item[1] - cpu_output = cpu_op_scalar_out_exec_fp16(cpu_input1, cpu_input2, cpu_input3) - npu_output = self.npu_op_scalar_out_exec(npu_input1, npu_input2, npu_input3) - self.assertRtolEqual(cpu_output, npu_output, prec16=0.02) - -instantiate_device_type_tests(TestLerp, globals(), except_for='cpu') -if __name__ == '__main__': - run_tests() +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +import random +import math + +class TestLerp(TestCase): +# pylint: disable=unused-variable,unused-argument + + def cpu_op_exec(self, input1, input2, input3): + output = torch.lerp(input1,input2,input3) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2, input3): + output = torch.lerp(input1, input2, input3) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_out_exec(self, input1, input2, input3): + output = torch.ones_like(input1) + torch.lerp(input1,input2,input3, out = output) + output = output.numpy() + return output + + def npu_op_out_exec(self, input1, input2, input3): + output = torch.ones_like(input1) + torch.lerp(input1, input2, input3, out = output) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_scalar_out_exec(self, input1, input2, input3): + output = torch.ones_like(input1) + torch.lerp(input1,input2,input3, out = output) + output = output.numpy() + return output + + def npu_op_scalar_out_exec(self, input1, input2, input3): + output = torch.ones_like(input1) + torch.lerp(input1, input2, input3, out = output) + output = output.to("cpu") + output = output.numpy() + return output + + + def test_lerp_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (4, 2, 2, 3)]], + [[np.float32, -1, (2, 2, 3, 4)]], + [[np.float32, -1, (3, 3, 3)]], + [[np.float32, -1, (4, 4, 4)]] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 1, 100) + cpu_input3, npu_input3 = create_common_tensor(item[0], 1, 100) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3) + npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_input3) + self.assertRtolEqual(cpu_output, npu_output) + + def test_lerp_float16_shape_format(self, device): + def cpu_op_exec_fp16(input1, input2, input3): + input1 = input1.to(torch.float32) + input2 = input2.to(torch.float32) + input3 = input3.to(torch.float32) + output = torch.lerp(input1,input2,input3) + output = output.numpy() + output = output.astype(np.float16) + return output + + shape_format = [ + [[np.float16, -1, (100, 4, 5, 5)]], + [[np.float16, -1, (100, 5, 5, 4)]], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 10, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 10, 100) + cpu_input3, npu_input3 = create_common_tensor(item[0], 10, 100) + cpu_output = cpu_op_exec_fp16(cpu_input1, cpu_input2, cpu_input3) + npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_input3) + self.assertRtolEqual(cpu_output, npu_output, prec=0.003, prec16=0.003) + + + def test_lerp_out_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (4, 2, 2, 3)]], + [[np.float32, -1, (2, 2, 3, 4)]], + [[np.float32, -1, (3, 3, 3)]], + [[np.float32, -1, (4, 4, 4)]] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 1, 100) + cpu_input3, npu_input3 = create_common_tensor(item[0], 1, 100) + cpu_output = self.cpu_op_out_exec(cpu_input1, cpu_input2, cpu_input3) + npu_output = self.npu_op_out_exec(npu_input1, npu_input2, npu_input3) + self.assertRtolEqual(cpu_output, npu_output) + + def test_lerp_out_float16_shape_format(self, device): + def cpu_op_out_exec_fp16(input1, input2, input3): + input1 = input1.to(torch.float32) + input2 = input2.to(torch.float32) + input3 = input3.to(torch.float32) + output = torch.ones_like(input1) + torch.lerp(input1,input2,input3, out = output) + output = output.numpy() + output = output.astype(np.float16) + return output + + shape_format = [ + [[np.float16, -1, (100, 4, 5, 5)]], + [[np.float16, -1, (100, 5, 5, 4)]], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 10, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 10, 100) + cpu_input3, npu_input3 = create_common_tensor(item[0], 10, 100) + cpu_output = cpu_op_out_exec_fp16(cpu_input1, cpu_input2, cpu_input3) + npu_output = self.npu_op_out_exec(npu_input1, npu_input2, npu_input3) + self.assertRtolEqual(cpu_output, npu_output, prec=0.003, prec16=0.003) + + def test_lerp_scalar_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (4, 2, 2, 3)], 1.0], + [[np.float32, -1, (2, 2, 3, 4)], 2.0], + [[np.float32, -1, (3, 3, 3)], 1.2], + [[np.float32, -1, (4, 4, 4)], 1.2] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 1, 100) + cpu_input3 = item[1] + npu_input3 = item[1] + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3) + npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_input3) + self.assertRtolEqual(cpu_output, npu_output) + + def test_lerp_scalar_float16_shape_format(self, device): + def cpu_op_scalar_exec_fp16(input1, input2, input3): + input1 = input1.to(torch.float32) + input2 = input2.to(torch.float32) + output = torch.lerp(input1,input2,input3) + output = output.numpy() + output = output.astype(np.float16) + return output + + shape_format = [ + [[np.float16, -1, (100, 4, 5, 5)], 1.2], + [[np.float16, -1, (100, 5, 5, 4)], 1.2], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 10, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 10, 100) + cpu_input3 = item[1] + npu_input3 = item[1] + cpu_output = cpu_op_scalar_exec_fp16(cpu_input1, cpu_input2, cpu_input3) + npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_input3) + self.assertRtolEqual(cpu_output, npu_output, prec16=0.02) + + + def test_lerp_scalar_out_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (4, 2, 2, 3)], 1.2], + [[np.float32, -1, (2, 2, 3, 4)],1.2], + [[np.float32, -1, (3, 3, 3)], 1.0], + [[np.float32, -1, (4, 4, 4)], 2.0] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 1, 100) + cpu_input3, npu_input3 = create_common_tensor(item[0], 1, 100) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3) + npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_input3) + self.assertRtolEqual(cpu_output, npu_output) + + def test_lerp_scalar_out_float16_shape_format(self, device): + def cpu_op_scalar_out_exec_fp16(input1, input2, input3): + input1 = input1.to(torch.float32) + input2 = input2.to(torch.float32) + output = torch.ones_like(input1) + torch.lerp(input1,input2,input3, out = output) + output = output.numpy() + output = output.astype(np.float16) + return output + + shape_format = [ + [[np.float16, -1, (100, 4, 5, 5)], 1.2], + [[np.float16, -1, (100, 5, 5, 4)], 1.2], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 10, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 10, 100) + cpu_input3 = item[1] + npu_input3 = item[1] + cpu_output = cpu_op_scalar_out_exec_fp16(cpu_input1, cpu_input2, cpu_input3) + npu_output = self.npu_op_scalar_out_exec(npu_input1, npu_input2, npu_input3) + self.assertRtolEqual(cpu_output, npu_output, prec16=0.02) + +instantiate_device_type_tests(TestLerp, globals(), except_for='cpu') +if __name__ == '__main__': + run_tests() diff --git a/test/test_npu/test_network_ops/test_log.py b/test/test_npu/test_network_ops/test_log.py old mode 100644 new mode 100755 index ab6beb1e8c25584df191921ef8affca628dc7a55..8dc68518f2e7ead7103de6f4d347211d98f7a6b9 --- a/test/test_npu/test_network_ops/test_log.py +++ b/test/test_npu/test_network_ops/test_log.py @@ -1,193 +1,193 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestLog(TestCase): - def cpu_op_exec(self, input1): - output = torch.log(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - input1 = input1.to("npu") - output = torch.log(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - torch.log(input1, out=input2) - output = input2.to("cpu") - output = output.numpy() - return output - - def cpu_inp_op_exec(self, input1): - output = torch.log_(input1) - output = output.numpy() - return output - - def npu_inp_op_exec(self, input1): - input1 = input1.to("npu") - output = torch.log_(input1) - output = input1.to("cpu") - output = output.numpy() - return output - - def cpu_inp_uncon_op_exec(self, input1): - input1 = input1.as_strided([2, 2], [1, 2], 2) - output = torch.log_(input1) - output = output.numpy() - return output - - def npu_inp_uncon_op_exec(self, input1): - input1 = input1.to("npu") - input1 = input1.as_strided([2, 2], [1, 2], 2) - output = torch.log_(input1) - output = input1.to("cpu") - output = output.numpy() - return output - -# TestCase - def test_log_shape_format_fp32(self, device): - format_list = [3] - shape_list = [(4, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log_shape_format_fp16(self, device): - format_list = [3] - shape_list = [(4, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log_inp_shape_format_fp32(self, device): - format_list = [3] - shape_list = [(4, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_inp_op_exec(cpu_input1) - npu_output = self.npu_inp_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log_inp_shape_format_fp16(self, device): - format_list = [3] - shape_list = [(4, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_inp_op_exec(cpu_input1) - npu_output = self.npu_inp_op_exec(npu_input1) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log_inp_uncon_shape_format_fp32(self, device): - format_list = [3] - shape_list = [(8, 6)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) - npu_output = self.npu_inp_uncon_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log_inp_uncon_shape_format_fp16(self, device): - format_list = [3] - shape_list = [(8, 6)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) - npu_output = self.npu_inp_uncon_op_exec(npu_input1) - cpu_output = cpu_output.astype(np.float16) - print("cpu:", cpu_output, "npu:", npu_output) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log_out_float32_shape_format(self, device): - shape_format = [ - [[np.float32, 0, [1024, 32, 7, 7]], [np.float32, 0, [1024, 32, 7, 7]]], - [[np.float32, 0, [1024, 32, 7]], [np.float32, 0, [1024, 32]]], - [[np.float32, 0, [1024, 32]], [np.float32, 0, [1024, 32]]], - [[np.float32, 0, [1024]], [np.float32, 0, [1024, 1]]], - [[np.float32, 3, [1024, 32, 7, 7]], [np.float32, 3, [1024, 32, 7, 7]]], - [[np.float32, 3, [1024, 32, 7]], [np.float32, 3, [1024, 32]]], - [[np.float32, 3, [1024, 32]], [np.float32, 3, [1024, 20]]], - [[np.float32, 3, [1024]], [np.float32, 3, [1024]]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output, npu_output = create_common_tensor(item[1], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec_out(npu_input, npu_output) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log_out_float16_shape_format(self, device): - shape_format = [ - [[np.float16, 0, [1024, 32, 7, 7]], [np.float16, 0, [1024, 32, 7, 7]]], - [[np.float16, 0, [1024, 32, 7]], [np.float16, 0, [1024, 32]]], - [[np.float16, 0, [1024, 32]], [np.float16, 0, [1024, 32]]], - [[np.float16, 0, [1024]], [np.float16, 0, [1024, 1]]], - [[np.float16, 3, [1024, 32, 7, 7]], [np.float16, 3, [1024, 32, 7, 7]]], - [[np.float16, 3, [1024, 32, 7]], [np.float16, 3, [1024, 32]]], - [[np.float16, 3, [1024, 32]], [np.float16, 3, [1024, 20]]], - [[np.float16, 3, [1024]], [np.float16, 3, [1024]]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output, npu_output = create_common_tensor(item[1], 0, 100) - if item[0][0] == np.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = cpu_output.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec_out(npu_input, npu_output) - if item[0][0] == np.float16: - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestLog, globals(), except_for="cpu") -if __name__ == '__main__': - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestLog(TestCase): + def cpu_op_exec(self, input1): + output = torch.log(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + input1 = input1.to("npu") + output = torch.log(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + torch.log(input1, out=input2) + output = input2.to("cpu") + output = output.numpy() + return output + + def cpu_inp_op_exec(self, input1): + output = torch.log_(input1) + output = output.numpy() + return output + + def npu_inp_op_exec(self, input1): + input1 = input1.to("npu") + output = torch.log_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + def cpu_inp_uncon_op_exec(self, input1): + input1 = input1.as_strided([2, 2], [1, 2], 2) + output = torch.log_(input1) + output = output.numpy() + return output + + def npu_inp_uncon_op_exec(self, input1): + input1 = input1.to("npu") + input1 = input1.as_strided([2, 2], [1, 2], 2) + output = torch.log_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + +# TestCase + def test_log_shape_format_fp32(self, device): + format_list = [3] + shape_list = [(4, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log_shape_format_fp16(self, device): + format_list = [3] + shape_list = [(4, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log_inp_shape_format_fp32(self, device): + format_list = [3] + shape_list = [(4, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_inp_op_exec(cpu_input1) + npu_output = self.npu_inp_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log_inp_shape_format_fp16(self, device): + format_list = [3] + shape_list = [(4, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_inp_op_exec(cpu_input1) + npu_output = self.npu_inp_op_exec(npu_input1) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log_inp_uncon_shape_format_fp32(self, device): + format_list = [3] + shape_list = [(8, 6)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) + npu_output = self.npu_inp_uncon_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log_inp_uncon_shape_format_fp16(self, device): + format_list = [3] + shape_list = [(8, 6)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) + npu_output = self.npu_inp_uncon_op_exec(npu_input1) + cpu_output = cpu_output.astype(np.float16) + print("cpu:", cpu_output, "npu:", npu_output) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log_out_float32_shape_format(self, device): + shape_format = [ + [[np.float32, 0, [1024, 32, 7, 7]], [np.float32, 0, [1024, 32, 7, 7]]], + [[np.float32, 0, [1024, 32, 7]], [np.float32, 0, [1024, 32]]], + [[np.float32, 0, [1024, 32]], [np.float32, 0, [1024, 32]]], + [[np.float32, 0, [1024]], [np.float32, 0, [1024, 1]]], + [[np.float32, 3, [1024, 32, 7, 7]], [np.float32, 3, [1024, 32, 7, 7]]], + [[np.float32, 3, [1024, 32, 7]], [np.float32, 3, [1024, 32]]], + [[np.float32, 3, [1024, 32]], [np.float32, 3, [1024, 20]]], + [[np.float32, 3, [1024]], [np.float32, 3, [1024]]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output, npu_output = create_common_tensor(item[1], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec_out(npu_input, npu_output) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log_out_float16_shape_format(self, device): + shape_format = [ + [[np.float16, 0, [1024, 32, 7, 7]], [np.float16, 0, [1024, 32, 7, 7]]], + [[np.float16, 0, [1024, 32, 7]], [np.float16, 0, [1024, 32]]], + [[np.float16, 0, [1024, 32]], [np.float16, 0, [1024, 32]]], + [[np.float16, 0, [1024]], [np.float16, 0, [1024, 1]]], + [[np.float16, 3, [1024, 32, 7, 7]], [np.float16, 3, [1024, 32, 7, 7]]], + [[np.float16, 3, [1024, 32, 7]], [np.float16, 3, [1024, 32]]], + [[np.float16, 3, [1024, 32]], [np.float16, 3, [1024, 20]]], + [[np.float16, 3, [1024]], [np.float16, 3, [1024]]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output, npu_output = create_common_tensor(item[1], 0, 100) + if item[0][0] == np.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = cpu_output.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec_out(npu_input, npu_output) + if item[0][0] == np.float16: + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestLog, globals(), except_for="cpu") +if __name__ == '__main__': + run_tests() diff --git a/test/test_npu/test_network_ops/test_log10.py b/test/test_npu/test_network_ops/test_log10.py index c3cc4226ae48d2e5aeaecbdb5e3ad7ddfd28804c..6b99d68c56a2659220df8749a63555d1ea5564c2 100644 --- a/test/test_npu/test_network_ops/test_log10.py +++ b/test/test_npu/test_network_ops/test_log10.py @@ -1,183 +1,183 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestLog10(TestCase): - def cpu_op_exec(self, input1): - output = torch.log10(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - output = torch.log10(input1) - output = output.to("cpu").numpy() - return output - - def npu_op_exec_out(self, input1, input2): - torch.log10(input1, out=input2) - output = input2.to("cpu").numpy() - return output - - def cpu_inp_op_exec(self, input1): - output = torch.log10_(input1) - output = output.numpy() - return output - - def npu_inp_op_exec(self, input1): - torch.log10_(input1) - output = input1.to("cpu").numpy() - return output - - def cpu_inp_uncon_op_exec(self, input1): - input1 = input1.as_strided([2, 2], [1, 2], 2) - output = torch.log10_(input1) - output = output.numpy() - return output - - def npu_inp_uncon_op_exec(self, input1): - input1 = input1.as_strided([2, 2], [1, 2], 2) - torch.log10_(input1) - output = input1.to("cpu").numpy() - return output - - def test_log10_shape_format_fp32(self, device): - format_list = [3] - shape_list = [(4, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log10_shape_format_fp16(self, device): - format_list = [3] - shape_list = [(4, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log10_inp_shape_format_fp32(self, device): - format_list = [3] - shape_list = [(4, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_inp_op_exec(cpu_input1) - npu_output = self.npu_inp_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log10_inp_shape_format_fp16(self, device): - format_list = [3] - shape_list = [(4, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_inp_op_exec(cpu_input1) - npu_output = self.npu_inp_op_exec(npu_input1) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log10_inp_uncon_shape_format_fp32(self, device): - format_list = [3] - shape_list = [(8, 6)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) - npu_output = self.npu_inp_uncon_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log10_inp_uncon_shape_format_fp16(self, device): - format_list = [3] - shape_list = [(8, 6)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) - npu_output = self.npu_inp_uncon_op_exec(npu_input1) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log10_out_float32_shape_format(self, device): - shape_format = [ - [[np.float32, 0, [1024, 32, 7, 7]], [np.float32, 0, [1024, 32, 7, 7]]], - [[np.float32, 0, [1024, 32, 7]], [np.float32, 0, [1024, 32]]], - [[np.float32, 0, [1024, 32]], [np.float32, 0, [1024, 32]]], - [[np.float32, 0, [1024]], [np.float32, 0, [1024, 1]]], - [[np.float32, 3, [1024, 32, 7, 7]], [np.float32, 3, [1024, 32, 7, 7]]], - [[np.float32, 3, [1024, 32, 7]], [np.float32, 3, [1024, 32]]], - [[np.float32, 3, [1024, 32]], [np.float32, 3, [1024, 20]]], - [[np.float32, 3, [1024]], [np.float32, 3, [1024]]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output, npu_output = create_common_tensor(item[1], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec_out(npu_input, npu_output) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log10_out_float16_shape_format(self, device): - shape_format = [ - [[np.float16, 0, [1024, 32, 7, 7]], [np.float16, 0, [1024, 32, 7, 7]]], - [[np.float16, 0, [1024, 32, 7]], [np.float16, 0, [1024, 32]]], - [[np.float16, 0, [1024, 32]], [np.float16, 0, [1024, 32]]], - [[np.float16, 0, [1024]], [np.float16, 0, [1024, 1]]], - [[np.float16, 3, [1024, 32, 7, 7]], [np.float16, 3, [1024, 32, 7, 7]]], - [[np.float16, 3, [1024, 32, 7]], [np.float16, 3, [1024, 32]]], - [[np.float16, 3, [1024, 32]], [np.float16, 3, [1024, 20]]], - [[np.float16, 3, [1024]], [np.float16, 3, [1024]]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output, npu_output = create_common_tensor(item[1], 0, 100) - if item[0][0] == np.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = cpu_output.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec_out(npu_input, npu_output) - if item[0][0] == np.float16: - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestLog10, globals(), except_for="cpu") -if __name__ == '__main__': - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestLog10(TestCase): + def cpu_op_exec(self, input1): + output = torch.log10(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = torch.log10(input1) + output = output.to("cpu").numpy() + return output + + def npu_op_exec_out(self, input1, input2): + torch.log10(input1, out=input2) + output = input2.to("cpu").numpy() + return output + + def cpu_inp_op_exec(self, input1): + output = torch.log10_(input1) + output = output.numpy() + return output + + def npu_inp_op_exec(self, input1): + torch.log10_(input1) + output = input1.to("cpu").numpy() + return output + + def cpu_inp_uncon_op_exec(self, input1): + input1 = input1.as_strided([2, 2], [1, 2], 2) + output = torch.log10_(input1) + output = output.numpy() + return output + + def npu_inp_uncon_op_exec(self, input1): + input1 = input1.as_strided([2, 2], [1, 2], 2) + torch.log10_(input1) + output = input1.to("cpu").numpy() + return output + + def test_log10_shape_format_fp32(self, device): + format_list = [3] + shape_list = [(4, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log10_shape_format_fp16(self, device): + format_list = [3] + shape_list = [(4, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log10_inp_shape_format_fp32(self, device): + format_list = [3] + shape_list = [(4, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_inp_op_exec(cpu_input1) + npu_output = self.npu_inp_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log10_inp_shape_format_fp16(self, device): + format_list = [3] + shape_list = [(4, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_inp_op_exec(cpu_input1) + npu_output = self.npu_inp_op_exec(npu_input1) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log10_inp_uncon_shape_format_fp32(self, device): + format_list = [3] + shape_list = [(8, 6)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) + npu_output = self.npu_inp_uncon_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log10_inp_uncon_shape_format_fp16(self, device): + format_list = [3] + shape_list = [(8, 6)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) + npu_output = self.npu_inp_uncon_op_exec(npu_input1) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log10_out_float32_shape_format(self, device): + shape_format = [ + [[np.float32, 0, [1024, 32, 7, 7]], [np.float32, 0, [1024, 32, 7, 7]]], + [[np.float32, 0, [1024, 32, 7]], [np.float32, 0, [1024, 32]]], + [[np.float32, 0, [1024, 32]], [np.float32, 0, [1024, 32]]], + [[np.float32, 0, [1024]], [np.float32, 0, [1024, 1]]], + [[np.float32, 3, [1024, 32, 7, 7]], [np.float32, 3, [1024, 32, 7, 7]]], + [[np.float32, 3, [1024, 32, 7]], [np.float32, 3, [1024, 32]]], + [[np.float32, 3, [1024, 32]], [np.float32, 3, [1024, 20]]], + [[np.float32, 3, [1024]], [np.float32, 3, [1024]]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output, npu_output = create_common_tensor(item[1], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec_out(npu_input, npu_output) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log10_out_float16_shape_format(self, device): + shape_format = [ + [[np.float16, 0, [1024, 32, 7, 7]], [np.float16, 0, [1024, 32, 7, 7]]], + [[np.float16, 0, [1024, 32, 7]], [np.float16, 0, [1024, 32]]], + [[np.float16, 0, [1024, 32]], [np.float16, 0, [1024, 32]]], + [[np.float16, 0, [1024]], [np.float16, 0, [1024, 1]]], + [[np.float16, 3, [1024, 32, 7, 7]], [np.float16, 3, [1024, 32, 7, 7]]], + [[np.float16, 3, [1024, 32, 7]], [np.float16, 3, [1024, 32]]], + [[np.float16, 3, [1024, 32]], [np.float16, 3, [1024, 20]]], + [[np.float16, 3, [1024]], [np.float16, 3, [1024]]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output, npu_output = create_common_tensor(item[1], 0, 100) + if item[0][0] == np.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = cpu_output.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec_out(npu_input, npu_output) + if item[0][0] == np.float16: + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestLog10, globals(), except_for="cpu") +if __name__ == '__main__': + run_tests() diff --git a/test/test_npu/test_network_ops/test_log2.py b/test/test_npu/test_network_ops/test_log2.py old mode 100644 new mode 100755 index 164f81a004eaef80b42b14b36f4154ecf4e47b2f..925dde63b543112b751d4ccc50b72de65fee396e --- a/test/test_npu/test_network_ops/test_log2.py +++ b/test/test_npu/test_network_ops/test_log2.py @@ -1,152 +1,152 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestLog2(TestCase): - def cpu_op_exec(self, input1): - output = torch.log2(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - input1 = input1.to("npu") - output = torch.log2(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1): - input1 = input1.to("npu") - output = input1.to("npu") - torch.log2(input1, out=output) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_inp_op_exec(self, input1): - output = torch.log2_(input1) - output = output.numpy() - return output - - def npu_inp_op_exec(self, input1): - input1 = input1.to("npu") - output = torch.log2_(input1) - output = input1.to("cpu") - output = output.numpy() - return output - - def cpu_inp_uncon_op_exec(self, input1): - input1 = input1.as_strided([2, 2], [1, 2], 2) - output = torch.log2_(input1) - output = output.numpy() - return output - - def npu_inp_uncon_op_exec(self, input1): - input1 = input1.to("npu") - input1 = input1.as_strided([2, 2], [1, 2], 2) - output = torch.log2_(input1) - output = input1.to("cpu") - output = output.numpy() - return output - - def test_log2_shape_format_fp32(self, device): - format_list = [3] - shape_list = [(4, 4)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - # print(item) - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log2_shape_format_fp16(self, device): - format_list = [3] - shape_list = [(4, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log2_inp_shape_format_fp32(self, device): - format_list = [3] - shape_list = [(5, 3)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_inp_op_exec(cpu_input1) - npu_output = self.npu_inp_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log2_inp_shape_format_fp16(self, device): - format_list = [3] - shape_list = [(4, 4)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_inp_op_exec(cpu_input1) - npu_output = self.npu_inp_op_exec(npu_input1) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log2_inp_uncon_shape_format_fp32(self, device): - format_list = [3] - shape_list = [(8, 6)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) - npu_output = self.npu_inp_uncon_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_log_inp_uncon_shape_format_fp16(self, device): - format_list = [3] - shape_list = [(8, 6)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) - npu_output = self.npu_inp_uncon_op_exec(npu_input1) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestLog2, globals(), except_for="cpu") -if __name__ == '__main__': - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestLog2(TestCase): + def cpu_op_exec(self, input1): + output = torch.log2(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + input1 = input1.to("npu") + output = torch.log2(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1): + input1 = input1.to("npu") + output = input1.to("npu") + torch.log2(input1, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_inp_op_exec(self, input1): + output = torch.log2_(input1) + output = output.numpy() + return output + + def npu_inp_op_exec(self, input1): + input1 = input1.to("npu") + output = torch.log2_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + def cpu_inp_uncon_op_exec(self, input1): + input1 = input1.as_strided([2, 2], [1, 2], 2) + output = torch.log2_(input1) + output = output.numpy() + return output + + def npu_inp_uncon_op_exec(self, input1): + input1 = input1.to("npu") + input1 = input1.as_strided([2, 2], [1, 2], 2) + output = torch.log2_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + def test_log2_shape_format_fp32(self, device): + format_list = [3] + shape_list = [(4, 4)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + # print(item) + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log2_shape_format_fp16(self, device): + format_list = [3] + shape_list = [(4, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log2_inp_shape_format_fp32(self, device): + format_list = [3] + shape_list = [(5, 3)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_inp_op_exec(cpu_input1) + npu_output = self.npu_inp_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log2_inp_shape_format_fp16(self, device): + format_list = [3] + shape_list = [(4, 4)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_inp_op_exec(cpu_input1) + npu_output = self.npu_inp_op_exec(npu_input1) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log2_inp_uncon_shape_format_fp32(self, device): + format_list = [3] + shape_list = [(8, 6)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) + npu_output = self.npu_inp_uncon_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_log_inp_uncon_shape_format_fp16(self, device): + format_list = [3] + shape_list = [(8, 6)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_inp_uncon_op_exec(cpu_input1) + npu_output = self.npu_inp_uncon_op_exec(npu_input1) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestLog2, globals(), except_for="cpu") +if __name__ == '__main__': + run_tests() diff --git a/test/test_npu/test_network_ops/test_log_softmax.py b/test/test_npu/test_network_ops/test_log_softmax.py old mode 100644 new mode 100755 index 12c0569864fecb0b2c4e3916d040ca1e5ced3e5b..8d5a10668f1e5e50e6e5e71bcac7562b2c592d76 --- a/test/test_npu/test_network_ops/test_log_softmax.py +++ b/test/test_npu/test_network_ops/test_log_softmax.py @@ -1,109 +1,109 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('..') -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestLogSoftmax(TestCase): - def cpu_op_exec(self, input1, dim): - output = torch.nn.functional.log_softmax(input1, dim) - output = output.numpy() - return output - - def npu_op_exec_new(self, input1, dim): - output = torch.nn.functional.log_softmax(input1, dim) - output = output.to("cpu") - output = output.numpy() - return output - - def logsoftmax_result(self, shape_format): - for item in shape_format: - dim = np.random.randint(0, len(item[2])) - print(item, " dim=", dim) - cpu_input1, npu_input1 = create_common_tensor(item, 0, 10) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1, 0) - npu_output = self.npu_op_exec_new(npu_input1, 0) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logsoftmax_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float16, i, [1024]] for i in format_list - ] - self.logsoftmax_result(shape_format) - - def test_logsoftmax_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float32, i, [1024]] for i in format_list - ] - self.logsoftmax_result(shape_format) - - def test_logsoftmax_shape_format_fp16_2d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float16, i, [256, 1000]] for i in format_list - ] - self.logsoftmax_result(shape_format) - - def test_logsoftmax_shape_format_fp32_2d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float32, i, [256, 1000]] for i in format_list - ] - self.logsoftmax_result(shape_format) - - def test_logsoftmax_shape_format_fp16_3d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float16, i, [32, 48, 64]] for i in format_list - ] - self.logsoftmax_result(shape_format) - - def test_logsoftmax_shape_format_fp32_3d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float32, i, [32, 48, 1024]] for i in format_list - ] - self.logsoftmax_result(shape_format) - - def test_logsoftmax_shape_format_fp16_4d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float16, i, [32, 24, 18, 1000]] for i in format_list - ] - self.logsoftmax_result(shape_format) - - def test_logsoftmax_shape_format_fp32_4d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float32, i, [32, 24, 18, 1000]] for i in format_list - ] - self.logsoftmax_result(shape_format) - - -instantiate_device_type_tests(TestLogSoftmax, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append('..') +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestLogSoftmax(TestCase): + def cpu_op_exec(self, input1, dim): + output = torch.nn.functional.log_softmax(input1, dim) + output = output.numpy() + return output + + def npu_op_exec_new(self, input1, dim): + output = torch.nn.functional.log_softmax(input1, dim) + output = output.to("cpu") + output = output.numpy() + return output + + def logsoftmax_result(self, shape_format): + for item in shape_format: + dim = np.random.randint(0, len(item[2])) + print(item, " dim=", dim) + cpu_input1, npu_input1 = create_common_tensor(item, 0, 10) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1, 0) + npu_output = self.npu_op_exec_new(npu_input1, 0) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logsoftmax_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float16, i, [1024]] for i in format_list + ] + self.logsoftmax_result(shape_format) + + def test_logsoftmax_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float32, i, [1024]] for i in format_list + ] + self.logsoftmax_result(shape_format) + + def test_logsoftmax_shape_format_fp16_2d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float16, i, [256, 1000]] for i in format_list + ] + self.logsoftmax_result(shape_format) + + def test_logsoftmax_shape_format_fp32_2d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float32, i, [256, 1000]] for i in format_list + ] + self.logsoftmax_result(shape_format) + + def test_logsoftmax_shape_format_fp16_3d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float16, i, [32, 48, 64]] for i in format_list + ] + self.logsoftmax_result(shape_format) + + def test_logsoftmax_shape_format_fp32_3d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float32, i, [32, 48, 1024]] for i in format_list + ] + self.logsoftmax_result(shape_format) + + def test_logsoftmax_shape_format_fp16_4d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float16, i, [32, 24, 18, 1000]] for i in format_list + ] + self.logsoftmax_result(shape_format) + + def test_logsoftmax_shape_format_fp32_4d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float32, i, [32, 24, 18, 1000]] for i in format_list + ] + self.logsoftmax_result(shape_format) + + +instantiate_device_type_tests(TestLogSoftmax, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_log_softmax_backward.py b/test/test_npu/test_network_ops/test_log_softmax_backward.py old mode 100644 new mode 100755 index 688b7b01b8211e1f81a06445c48ca90db27e34e8..cbc0907ba3af4ebf21beb1f246c7d04974de3bab --- a/test/test_npu/test_network_ops/test_log_softmax_backward.py +++ b/test/test_npu/test_network_ops/test_log_softmax_backward.py @@ -1,109 +1,109 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('..') -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestLogSoftmaxBackward(TestCase): - def cpu_op_exec(self, input1, input2, n): - output = torch._log_softmax_backward_data(input1, input2, n, input1) - output = output.numpy() - return output - - def npu_op_exec_new(self, input1, input2, n): - output = torch._log_softmax_backward_data(input1, input2, n, input1) - output = output.to("cpu") - output = output.numpy() - return output - - def logsoftmax_backward_result(self, shape_format, min_lmt, max_lmt): - for item in shape_format: - dim = np.random.randint(0, len(item[2])) - print(item," dim=", dim) - cpu_input1, npu_input1 = create_common_tensor(item, min_lmt, max_lmt) - cpu_input2, npu_input2 = create_common_tensor(item, min_lmt, max_lmt) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, dim) - npu_output = self.npu_op_exec_new(npu_input1, npu_input2, dim) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logsoftmax_backward_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float16, i, [18]] for i in format_list - ] - self.logsoftmax_backward_result(shape_format, 0, 2) - - def test_logsoftmax_backward_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float32, i, [18]] for i in format_list - ] - self.logsoftmax_backward_result(shape_format, 0, 50) - - def test_logsoftmax_backward_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float16, i, [256, 1000]] for i in format_list - ] - self.logsoftmax_backward_result(shape_format, 0, 2) - - def test_logsoftmax_backward_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [256, 1000]] for i in format_list - ] - self.logsoftmax_backward_result(shape_format, 0, 50) - - def test_logsoftmax_backward_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float16, i, [32, 48, 64]] for i in format_list - ] - self.logsoftmax_backward_result(shape_format, 0, 2) - - def test_logsoftmax_backward_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [32, 48, 64]] for i in format_list - ] - self.logsoftmax_backward_result(shape_format, 0, 50) - - def test_logsoftmax_backward_shape_format_fp16_4d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float16, i, [32, 24, 18, 18]] for i in format_list - ] - self.logsoftmax_backward_result(shape_format, 0, 2) - - def test_logsoftmax_backward_shape_format_fp32_4d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float32, i, [32, 24, 18, 18]] for i in format_list - ] - self.logsoftmax_backward_result(shape_format, 0, 50) - - -instantiate_device_type_tests(TestLogSoftmaxBackward, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append('..') +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestLogSoftmaxBackward(TestCase): + def cpu_op_exec(self, input1, input2, n): + output = torch._log_softmax_backward_data(input1, input2, n, input1) + output = output.numpy() + return output + + def npu_op_exec_new(self, input1, input2, n): + output = torch._log_softmax_backward_data(input1, input2, n, input1) + output = output.to("cpu") + output = output.numpy() + return output + + def logsoftmax_backward_result(self, shape_format, min_lmt, max_lmt): + for item in shape_format: + dim = np.random.randint(0, len(item[2])) + print(item," dim=", dim) + cpu_input1, npu_input1 = create_common_tensor(item, min_lmt, max_lmt) + cpu_input2, npu_input2 = create_common_tensor(item, min_lmt, max_lmt) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, dim) + npu_output = self.npu_op_exec_new(npu_input1, npu_input2, dim) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logsoftmax_backward_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float16, i, [18]] for i in format_list + ] + self.logsoftmax_backward_result(shape_format, 0, 2) + + def test_logsoftmax_backward_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float32, i, [18]] for i in format_list + ] + self.logsoftmax_backward_result(shape_format, 0, 50) + + def test_logsoftmax_backward_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float16, i, [256, 1000]] for i in format_list + ] + self.logsoftmax_backward_result(shape_format, 0, 2) + + def test_logsoftmax_backward_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [256, 1000]] for i in format_list + ] + self.logsoftmax_backward_result(shape_format, 0, 50) + + def test_logsoftmax_backward_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float16, i, [32, 48, 64]] for i in format_list + ] + self.logsoftmax_backward_result(shape_format, 0, 2) + + def test_logsoftmax_backward_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [32, 48, 64]] for i in format_list + ] + self.logsoftmax_backward_result(shape_format, 0, 50) + + def test_logsoftmax_backward_shape_format_fp16_4d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float16, i, [32, 24, 18, 18]] for i in format_list + ] + self.logsoftmax_backward_result(shape_format, 0, 2) + + def test_logsoftmax_backward_shape_format_fp32_4d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float32, i, [32, 24, 18, 18]] for i in format_list + ] + self.logsoftmax_backward_result(shape_format, 0, 50) + + +instantiate_device_type_tests(TestLogSoftmaxBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_logical_and.py b/test/test_npu/test_network_ops/test_logical_and.py index ee82bac600c23868615541195812cf6d27ef7a86..a796a129ae8e697d788412b0d8c22d4d59703d73 100644 --- a/test/test_npu/test_network_ops/test_logical_and.py +++ b/test/test_npu/test_network_ops/test_logical_and.py @@ -1,129 +1,129 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestLogicalAnd(TestCase): - - def generate_single_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - #modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - - return npu_input1, npu_input2 - - def generate_three_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input3 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - #modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - npu_input3 = torch.from_numpy(input3) - - return npu_input1, npu_input2, npu_input3 - - def cpu_op_exec(self, input1, input2): - output = torch.logical_and(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.logical_and(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_out(self, input1, input2, input3): - torch.logical_and(input1, input2, out=input3) - output = input3.numpy() - return output - - def npu_op_exec_out(self, input1, input2, input3): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = input3.to("npu") - torch.logical_and(input1, input2, out=output) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_(self, input1, input2): - output = torch.Tensor.logical_and_(input1, input2) - output = output.numpy() - return output - - def npu_op_exec_(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.Tensor.logical_and_(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def logical_and_out_result(self, shape_format): - for item in shape_format: - cpu_input1 = torch.randn(item[0])<0 - cpu_input2 = torch.randn(item[0])<0 - cpu_input3 = torch.randn(item[1])<0 - cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) - npu_output_out = self.npu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) - - self.assertRtolEqual(cpu_output_out, npu_output_out) - - def test_logical_and_out(self, device): - shape_format = [ - [[128, 116, 14, 14], [256, 116, 1, 1, 28]], - [[128, 3, 224, 224], [3, 3, 3]], - [[128, 116, 14, 14], [128, 116, 14, 14]], - [[256, 128, 7, 7], [128, 256, 3, 3, 28]], - [[2, 3, 3, 3], [3, 1, 3]], - [[128, 232, 7, 7], [128, 232, 7, 7]], - ] - self.logical_and_out_result(shape_format) - - def test_logical_and_bool(self, device): - npu_input1, npu_input2 = self.generate_data(0, 2, (2, 5), np.bool) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_logical_and_inplace_bool(self, device): - npu_input1, npu_input2 = self.generate_data(0, 2, (2, 5), np.bool) - cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) - npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestLogicalAnd, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestLogicalAnd(TestCase): + + def generate_single_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + #modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + + return npu_input1, npu_input2 + + def generate_three_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input3 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + #modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + npu_input3 = torch.from_numpy(input3) + + return npu_input1, npu_input2, npu_input3 + + def cpu_op_exec(self, input1, input2): + output = torch.logical_and(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = torch.logical_and(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_out(self, input1, input2, input3): + torch.logical_and(input1, input2, out=input3) + output = input3.numpy() + return output + + def npu_op_exec_out(self, input1, input2, input3): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = input3.to("npu") + torch.logical_and(input1, input2, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_(self, input1, input2): + output = torch.Tensor.logical_and_(input1, input2) + output = output.numpy() + return output + + def npu_op_exec_(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = torch.Tensor.logical_and_(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def logical_and_out_result(self, shape_format): + for item in shape_format: + cpu_input1 = torch.randn(item[0])<0 + cpu_input2 = torch.randn(item[0])<0 + cpu_input3 = torch.randn(item[1])<0 + cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) + npu_output_out = self.npu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) + + self.assertRtolEqual(cpu_output_out, npu_output_out) + + def test_logical_and_out(self, device): + shape_format = [ + [[128, 116, 14, 14], [256, 116, 1, 1, 28]], + [[128, 3, 224, 224], [3, 3, 3]], + [[128, 116, 14, 14], [128, 116, 14, 14]], + [[256, 128, 7, 7], [128, 256, 3, 3, 28]], + [[2, 3, 3, 3], [3, 1, 3]], + [[128, 232, 7, 7], [128, 232, 7, 7]], + ] + self.logical_and_out_result(shape_format) + + def test_logical_and_bool(self, device): + npu_input1, npu_input2 = self.generate_data(0, 2, (2, 5), np.bool) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_logical_and_inplace_bool(self, device): + npu_input1, npu_input2 = self.generate_data(0, 2, (2, 5), np.bool) + cpu_output = self.cpu_op_exec_(npu_input1, npu_input2).astype(np.float32) + npu_output = self.npu_op_exec_(npu_input1, npu_input2).astype(np.float32) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestLogicalAnd, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_logspace.py b/test/test_npu/test_network_ops/test_logspace.py index d3fb5afc10a18dfd9fce1efdfe0a0a55e755ed85..4dacbb08ec01af1e68963c59017188770d3438e5 100644 --- a/test/test_npu/test_network_ops/test_logspace.py +++ b/test/test_npu/test_network_ops/test_logspace.py @@ -1,92 +1,92 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestLogSpace(TestCase): - - def cpu_op_exec(self, start, end, steps, base): - output = torch.logspace(start=start, end=end, steps=steps, base=base) - output = output.numpy() - return output - - def npu_op_exec(self, start, end, steps, base): - output = torch.logspace(start=start, end=end, steps=steps, base=base, device="npu") - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, start, end, steps, base, dtype): - output = torch.randn(steps) - torch.logspace(start=start, end=end, steps=steps, base=base, dtype=dtype, out=output) - output = output.to("cpu") - output = output.numpy() - return output - - def test_logspace_common_shape_format(self, device): - shape_format = [ - [0.0, 1.0, 10, 0.2, torch.float32], - [2.0, 3.0, 10, 0.05, torch.float32], - [10.0, 10.5, 11, 0.2, torch.float32], - [10.0, 10.5, 110, 0.2, torch.float32], - [0.0, 0.1, 20, 1.2, torch.float32], - [0.5, 1.0, 50, 8.0, torch.float32], - [1.0, 2.0, 2, -0.5, torch.float32], - [0.0, 0.0, 1, 0.0, torch.float32], - [1.0, 1.0, 1, 0.0, torch.float32], - [1.0, 1.0, 0, 0.0, torch.float32], - [1.0, 2.0, 9, 0.0, torch.float32] - ] - - for item in shape_format: - cpu_output = self.cpu_op_exec(item[0], item[1], item[2], item[3]) - npu_output = self.npu_op_exec(item[0], item[1], item[2], item[3]) - self.assertRtolEqual(cpu_output, npu_output) - npu_out_output = self.npu_op_exec_out(item[0], item[1], item[2], item[3], item[4]) - self.assertRtolEqual(cpu_output, npu_out_output) - def test_logspace_float16_shape_format(self, device): - def cpu_op_exec_fp16(start, end, steps, base, dtype): - output = torch.logspace(start=start, end=end, steps=steps, base=base, dtype=torch.float32) - output = output.numpy() - output = output.astype(np.float16) - return output - - def npu_op_exec(start, end, steps, base, dtype): - output = torch.logspace( start=start, end=end, steps=steps, base=base, dtype=dtype, device="npu" ) - output = output.to("cpu") - output = output.numpy() - return output - - shape_format = [ - [-2.0, 2.0, 32, 32, torch.float16], - [0.0, 1.0, 10, 0.2, torch.float16], - [2.0, 3.0, 10, 0.05, torch.float16], - [0.0, 0.1, 20, 1.2, torch.float16], - [0.5, 1.0, 50, 8.0, torch.float16], - [1.0, 2.0, 2, -0.5, torch.float16], - [0.0, 0.0, 1, 0.0, torch.float16] - ] - - for item in shape_format: - cpu_output = cpu_op_exec_fp16(item[0], item[1], item[2], item[3], item[4]) - npu_output = npu_op_exec(item[0], item[1], item[2], item[3], item[4]) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestLogSpace, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestLogSpace(TestCase): + + def cpu_op_exec(self, start, end, steps, base): + output = torch.logspace(start=start, end=end, steps=steps, base=base) + output = output.numpy() + return output + + def npu_op_exec(self, start, end, steps, base): + output = torch.logspace(start=start, end=end, steps=steps, base=base, device="npu") + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, start, end, steps, base, dtype): + output = torch.randn(steps) + torch.logspace(start=start, end=end, steps=steps, base=base, dtype=dtype, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + def test_logspace_common_shape_format(self, device): + shape_format = [ + [0.0, 1.0, 10, 0.2, torch.float32], + [2.0, 3.0, 10, 0.05, torch.float32], + [10.0, 10.5, 11, 0.2, torch.float32], + [10.0, 10.5, 110, 0.2, torch.float32], + [0.0, 0.1, 20, 1.2, torch.float32], + [0.5, 1.0, 50, 8.0, torch.float32], + [1.0, 2.0, 2, -0.5, torch.float32], + [0.0, 0.0, 1, 0.0, torch.float32], + [1.0, 1.0, 1, 0.0, torch.float32], + [1.0, 1.0, 0, 0.0, torch.float32], + [1.0, 2.0, 9, 0.0, torch.float32] + ] + + for item in shape_format: + cpu_output = self.cpu_op_exec(item[0], item[1], item[2], item[3]) + npu_output = self.npu_op_exec(item[0], item[1], item[2], item[3]) + self.assertRtolEqual(cpu_output, npu_output) + npu_out_output = self.npu_op_exec_out(item[0], item[1], item[2], item[3], item[4]) + self.assertRtolEqual(cpu_output, npu_out_output) + def test_logspace_float16_shape_format(self, device): + def cpu_op_exec_fp16(start, end, steps, base, dtype): + output = torch.logspace(start=start, end=end, steps=steps, base=base, dtype=torch.float32) + output = output.numpy() + output = output.astype(np.float16) + return output + + def npu_op_exec(start, end, steps, base, dtype): + output = torch.logspace( start=start, end=end, steps=steps, base=base, dtype=dtype, device="npu" ) + output = output.to("cpu") + output = output.numpy() + return output + + shape_format = [ + [-2.0, 2.0, 32, 32, torch.float16], + [0.0, 1.0, 10, 0.2, torch.float16], + [2.0, 3.0, 10, 0.05, torch.float16], + [0.0, 0.1, 20, 1.2, torch.float16], + [0.5, 1.0, 50, 8.0, torch.float16], + [1.0, 2.0, 2, -0.5, torch.float16], + [0.0, 0.0, 1, 0.0, torch.float16] + ] + + for item in shape_format: + cpu_output = cpu_op_exec_fp16(item[0], item[1], item[2], item[3], item[4]) + npu_output = npu_op_exec(item[0], item[1], item[2], item[3], item[4]) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestLogSpace, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_lt.py b/test/test_npu/test_network_ops/test_lt.py old mode 100644 new mode 100755 index bea84c4534abf1df3ff81c7c28a6f81feb562c79..f9bec34f489995f2a8578135e83d009df919dc51 --- a/test/test_npu/test_network_ops/test_lt.py +++ b/test/test_npu/test_network_ops/test_lt.py @@ -1,256 +1,256 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestLt(TestCase): - def cpu_op_exec(self, input1, input2): - output = torch.lt(input1, input2) - output = output.numpy().astype(np.int32) - return output - - def cpu_op_exec_out(self, input1, input2, input3): - torch.lt(input1, input2, out = input3) - output = input3.numpy().astype(np.int32) - return output - - def npu_op_exec(self, input1, input2): - output = torch.lt(input1, input2) - output = output.to("cpu") - output = output.numpy().astype(np.int32) - return output - - def cpu_op_inplace_exec(self, input1, input2): - output = input1.lt_(input2) - output = input1.numpy().astype(np.int32) - return output - - def npu_op_inplace_exec(self, input1, input2): - output = input1.lt_(input2) - output = output.to("cpu") - output = output.numpy().astype(np.int32) - return output - - def npu_op_exec_out(self, input1, input2, out): - torch.lt(input1, input2, out=out) - output = out.to("cpu") - output = output.numpy().astype(np.int32) - return output - - def cpu_op_exec_scalar(self, input, scalar): - output = torch.lt(input, scalar) - output = output.numpy().astype(np.int32) - return output - - def cpu_op_exec_scalar_out(self, input1, scalar, input2): - torch.lt(input1, scalar, out = input2) - output = input2.numpy().astype(np.int32) - return output - - def npu_op_exec_scalar(self, input, scalar): - output = torch.lt(input, scalar) - output = output.to("cpu") - output = output.numpy().astype(np.int32) - return output - - def npu_op_exec_scalar_out(self, input, scalar, out): - torch.lt(input, scalar, out=out) - output = out.to("cpu") - output = output.numpy().astype(np.int32) - return output - - def lt_tensor_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) - cpu_input3 = torch.randn(item[1][2])<0 - npu_input3 = cpu_input3.npu() - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - if cpu_input3.dtype == torch.float16: - cpu_input3 = cpu_input3.to(torch.float32) - cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) - - self.assertRtolEqual(cpu_output_out, npu_output_out) - - def test_lt_tensor_out(self, device): - shape_format = [ - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], - [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], - [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], - [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], - ] - self.lt_tensor_out_result(shape_format) - - def lt_scalar_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2 = torch.randn(item[1][2])<0 - npu_input2 = cpu_input2.npu() - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - scalar = np.random.uniform(0, 100) - cpu_output_out = self.cpu_op_exec_scalar_out(cpu_input1, scalar, cpu_input2) - npu_output_out = self.npu_op_exec_scalar_out(npu_input1, scalar, npu_input2) - cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) - self.assertRtolEqual(cpu_output_out, npu_output_out) - - def test_lt_scalar_out(self, device): - shape_format = [ - [[np.float16, 0, [4, 4, 128, 128]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [12, 10, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [16, 3, 1111, 1212]], [np.float16, 0, [3, 3, 3]]], - [[np.float16, 0, [16, 16, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], - [[np.float32, 0, [20, 10, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], - [[np.float32, 0, [1313, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], - [[np.float32, 0, [16, 22, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], - ] - self.lt_scalar_out_result(shape_format) - - def lt_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output_inp = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) - npu_output_inp = self.npu_op_inplace_exec(npu_input1, npu_input2) - - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output_inp, npu_output_inp) - - def lt_scalar_result(self, shape_format): - for item in shape_format: - scalar = np.random.uniform(0, 100) - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_scalar = self.cpu_op_exec_scalar(cpu_input1, scalar) - npu_output_scalar = self.npu_op_exec_scalar(npu_input1, scalar) - - cpu_output_scalar = cpu_output_scalar.astype(npu_output_scalar.dtype) - - self.assertRtolEqual(cpu_output_scalar, npu_output_scalar) - - def test_lt_shape_format_fp16_1d(self, device): - format_list = [-1, 0] - shape_format = [[np.float16, i, 5] for i in format_list] - self.lt_result(shape_format) - - def test_lt_shape_format_fp32_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[np.float32, i, 5] for i in format_list] - self.lt_result(shape_format) - - def test_lt_shape_format_fp16_2d(self, device): - format_list = [-1, 0] - shape_format = [[np.float16, i, [5, 3]] for i in format_list] - self.lt_result(shape_format) - - def test_lt_shape_format_fp32_2d(self, device): - format_list = [-1, 0] - shape_format = [[np.float32, i, [5, 3]] for i in format_list] - self.lt_result(shape_format) - - def test_lt_shape_format_fp16_3d(self, device): - format_list = [-1, 0] - shape_format = [[np.float16, i, [16, 640, 640]] for i in format_list] - self.lt_result(shape_format) - - def test_lt_shape_format_fp32_3d(self, device): - format_list = [-1, 0, 3] - shape_format = [[np.float32, i, [16, 640, 640]] for i in format_list] - self.lt_result(shape_format) - - def test_lt_shape_format_fp16_4d(self, device): - format_list = [-1, 3] - shape_format = [[np.float16, i, [32, 3, 3, 3]] for i in format_list] - self.lt_result(shape_format) - - def test_lt_shape_format_fp32_4d(self, device): - format_list = [-1, 3] - shape_format = [[np.float32, i, [32, 3, 3, 3]] for i in format_list] - self.lt_result(shape_format) - - # scalar----------------------------------------------------------------------- - def test_lt_scalar_shape_format_fp16_1d(self, device): - format_list = [-1, 0] - shape_format = [[np.float16, i, 18] for i in format_list] - self.lt_scalar_result(shape_format) - - def test_lt_scalar_shape_format_fp32_1d(self, device): - format_list = [-1, 0] - shape_format = [[np.float32, i, [18]] for i in format_list] - self.lt_scalar_result(shape_format) - - def test_lt_scalar_shape_format_fp16_2d(self, device): - format_list = [-1, 0] - shape_format = [[np.float16, i, [5, 8]] for i in format_list] - self.lt_scalar_result(shape_format) - - def test_lt_scalar_shape_format_fp32_2d(self, device): - format_list = [-1, 0] - shape_format = [[np.float32, i, [5, 8]] for i in format_list] - self.lt_scalar_result(shape_format) - - def test_lt_scalar_shape_format_fp16_3d(self, device): - format_list = [-1, 0] - shape_format = [[np.float16, i, [4, 16, 32]] for i in format_list] - self.lt_scalar_result(shape_format) - - def test_lt_scalar_shape_format_fp32_3d(self, device): - format_list = [-1, 0] - shape_format = [[np.float32, i, [4, 16, 32]] for i in format_list] - self.lt_scalar_result(shape_format) - - def test_lt_scalar_shape_format_fp16_4d(self, device): - format_list = [-1, 0] - shape_format = [[np.float16, i, [32, 3, 3, 3]] for i in format_list] - self.lt_scalar_result(shape_format) - - def test_lt_scalar_shape_format_fp32_4d(self, device): - format_list = [-1, 0] - shape_format = [[np.float32, i, [32, 3, 3, 3]] for i in format_list] - self.lt_scalar_result(shape_format) - - def test_lt_mix_dtype(self, device): - npu_input1, npu_input2 = create_common_tensor([np.float16, 0, (2, 3)], 1, 100) - npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) - cpu_output = self.cpu_op_exec(npu_input1, npu_input3) - npu_output = self.npu_op_exec(npu_input2, npu_input4) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestLt, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestLt(TestCase): + def cpu_op_exec(self, input1, input2): + output = torch.lt(input1, input2) + output = output.numpy().astype(np.int32) + return output + + def cpu_op_exec_out(self, input1, input2, input3): + torch.lt(input1, input2, out = input3) + output = input3.numpy().astype(np.int32) + return output + + def npu_op_exec(self, input1, input2): + output = torch.lt(input1, input2) + output = output.to("cpu") + output = output.numpy().astype(np.int32) + return output + + def cpu_op_inplace_exec(self, input1, input2): + output = input1.lt_(input2) + output = input1.numpy().astype(np.int32) + return output + + def npu_op_inplace_exec(self, input1, input2): + output = input1.lt_(input2) + output = output.to("cpu") + output = output.numpy().astype(np.int32) + return output + + def npu_op_exec_out(self, input1, input2, out): + torch.lt(input1, input2, out=out) + output = out.to("cpu") + output = output.numpy().astype(np.int32) + return output + + def cpu_op_exec_scalar(self, input, scalar): + output = torch.lt(input, scalar) + output = output.numpy().astype(np.int32) + return output + + def cpu_op_exec_scalar_out(self, input1, scalar, input2): + torch.lt(input1, scalar, out = input2) + output = input2.numpy().astype(np.int32) + return output + + def npu_op_exec_scalar(self, input, scalar): + output = torch.lt(input, scalar) + output = output.to("cpu") + output = output.numpy().astype(np.int32) + return output + + def npu_op_exec_scalar_out(self, input, scalar, out): + torch.lt(input, scalar, out=out) + output = out.to("cpu") + output = output.numpy().astype(np.int32) + return output + + def lt_tensor_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) + cpu_input3 = torch.randn(item[1][2])<0 + npu_input3 = cpu_input3.npu() + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + if cpu_input3.dtype == torch.float16: + cpu_input3 = cpu_input3.to(torch.float32) + cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, cpu_input3) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) + + self.assertRtolEqual(cpu_output_out, npu_output_out) + + def test_lt_tensor_out(self, device): + shape_format = [ + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], + [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], + [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], + [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], + ] + self.lt_tensor_out_result(shape_format) + + def lt_scalar_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2 = torch.randn(item[1][2])<0 + npu_input2 = cpu_input2.npu() + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + scalar = np.random.uniform(0, 100) + cpu_output_out = self.cpu_op_exec_scalar_out(cpu_input1, scalar, cpu_input2) + npu_output_out = self.npu_op_exec_scalar_out(npu_input1, scalar, npu_input2) + cpu_output_out = cpu_output_out.astype(npu_output_out.dtype) + self.assertRtolEqual(cpu_output_out, npu_output_out) + + def test_lt_scalar_out(self, device): + shape_format = [ + [[np.float16, 0, [4, 4, 128, 128]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [12, 10, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [16, 3, 1111, 1212]], [np.float16, 0, [3, 3, 3]]], + [[np.float16, 0, [16, 16, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], + [[np.float32, 0, [20, 10, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], + [[np.float32, 0, [1313, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], + [[np.float32, 0, [16, 22, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], + ] + self.lt_scalar_out_result(shape_format) + + def lt_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output_inp = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) + npu_output_inp = self.npu_op_inplace_exec(npu_input1, npu_input2) + + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output_inp, npu_output_inp) + + def lt_scalar_result(self, shape_format): + for item in shape_format: + scalar = np.random.uniform(0, 100) + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_scalar = self.cpu_op_exec_scalar(cpu_input1, scalar) + npu_output_scalar = self.npu_op_exec_scalar(npu_input1, scalar) + + cpu_output_scalar = cpu_output_scalar.astype(npu_output_scalar.dtype) + + self.assertRtolEqual(cpu_output_scalar, npu_output_scalar) + + def test_lt_shape_format_fp16_1d(self, device): + format_list = [-1, 0] + shape_format = [[np.float16, i, 5] for i in format_list] + self.lt_result(shape_format) + + def test_lt_shape_format_fp32_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[np.float32, i, 5] for i in format_list] + self.lt_result(shape_format) + + def test_lt_shape_format_fp16_2d(self, device): + format_list = [-1, 0] + shape_format = [[np.float16, i, [5, 3]] for i in format_list] + self.lt_result(shape_format) + + def test_lt_shape_format_fp32_2d(self, device): + format_list = [-1, 0] + shape_format = [[np.float32, i, [5, 3]] for i in format_list] + self.lt_result(shape_format) + + def test_lt_shape_format_fp16_3d(self, device): + format_list = [-1, 0] + shape_format = [[np.float16, i, [16, 640, 640]] for i in format_list] + self.lt_result(shape_format) + + def test_lt_shape_format_fp32_3d(self, device): + format_list = [-1, 0, 3] + shape_format = [[np.float32, i, [16, 640, 640]] for i in format_list] + self.lt_result(shape_format) + + def test_lt_shape_format_fp16_4d(self, device): + format_list = [-1, 3] + shape_format = [[np.float16, i, [32, 3, 3, 3]] for i in format_list] + self.lt_result(shape_format) + + def test_lt_shape_format_fp32_4d(self, device): + format_list = [-1, 3] + shape_format = [[np.float32, i, [32, 3, 3, 3]] for i in format_list] + self.lt_result(shape_format) + + # scalar----------------------------------------------------------------------- + def test_lt_scalar_shape_format_fp16_1d(self, device): + format_list = [-1, 0] + shape_format = [[np.float16, i, 18] for i in format_list] + self.lt_scalar_result(shape_format) + + def test_lt_scalar_shape_format_fp32_1d(self, device): + format_list = [-1, 0] + shape_format = [[np.float32, i, [18]] for i in format_list] + self.lt_scalar_result(shape_format) + + def test_lt_scalar_shape_format_fp16_2d(self, device): + format_list = [-1, 0] + shape_format = [[np.float16, i, [5, 8]] for i in format_list] + self.lt_scalar_result(shape_format) + + def test_lt_scalar_shape_format_fp32_2d(self, device): + format_list = [-1, 0] + shape_format = [[np.float32, i, [5, 8]] for i in format_list] + self.lt_scalar_result(shape_format) + + def test_lt_scalar_shape_format_fp16_3d(self, device): + format_list = [-1, 0] + shape_format = [[np.float16, i, [4, 16, 32]] for i in format_list] + self.lt_scalar_result(shape_format) + + def test_lt_scalar_shape_format_fp32_3d(self, device): + format_list = [-1, 0] + shape_format = [[np.float32, i, [4, 16, 32]] for i in format_list] + self.lt_scalar_result(shape_format) + + def test_lt_scalar_shape_format_fp16_4d(self, device): + format_list = [-1, 0] + shape_format = [[np.float16, i, [32, 3, 3, 3]] for i in format_list] + self.lt_scalar_result(shape_format) + + def test_lt_scalar_shape_format_fp32_4d(self, device): + format_list = [-1, 0] + shape_format = [[np.float32, i, [32, 3, 3, 3]] for i in format_list] + self.lt_scalar_result(shape_format) + + def test_lt_mix_dtype(self, device): + npu_input1, npu_input2 = create_common_tensor([np.float16, 0, (2, 3)], 1, 100) + npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) + cpu_output = self.cpu_op_exec(npu_input1, npu_input3) + npu_output = self.npu_op_exec(npu_input2, npu_input4) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestLt, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_masked_scatter.py b/test/test_npu/test_network_ops/test_masked_scatter.py index c48f27cd61d30e6a8a31d2dbdd703f56cfc05ce5..fcfdc8ed014dbcaba0c6899397f3b668febe1a01 100644 --- a/test/test_npu/test_network_ops/test_masked_scatter.py +++ b/test/test_npu/test_network_ops/test_masked_scatter.py @@ -1,87 +1,87 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestMaskedScatter(TestCase): - def cpu_op_exec(self, input, maskbool, source): - cpu_output = torch.masked_scatter(input, maskbool, source) - return cpu_output.numpy() - - def npu_op_exec(self, input, maskbool, source): - input = input.to("npu") - maskbool = maskbool.to("npu") - source = source.to("npu") - npu_output = torch.masked_scatter(input, maskbool, source) - npu_output = npu_output.to("cpu") - return npu_output.numpy() - - def cpu_inp_op_exec(self, input, maskbool, source): - cpu_output = input.masked_scatter_(maskbool, source) - return cpu_output.numpy() - - def npu_inp_op_exec(self, input, maskbool, source): - maskbool = maskbool.to("npu") - npu_output = input.masked_scatter_(maskbool, source) - npu_output = npu_output.to("cpu") - return npu_output.numpy() - - def test_masked_scatter_float(self, device): - dtype_list = [np.float32] - format_list = [0, 3] - shape_list = [[4, 5],[3, 4, 5], [2, 3, 4, 5]] - shape_format = [ - [i, j, k] for i in dtype_list for j in format_list for k in shape_list - ] - mask = torch.randn(4, 1) - maskbool = mask.ge(0.5) - - for item in shape_format: - print(item) - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_source, npu_source = create_common_tensor(item, 0, 100) - cpu_output2 = self.cpu_inp_op_exec(cpu_input, maskbool, cpu_source) - npu_output2 = self.npu_inp_op_exec(npu_input, maskbool, npu_source) - self.assertRtolEqual(cpu_output2, npu_output2) - - def test_masked_scatter_int(self, device): - dtype_list = [np.int32, np.int64] - format_list = [0] - shape_list = [[4, 5],[3, 4, 5], [2, 3, 4, 5]] - shape_format = [ - [i, j, k] for i in dtype_list for j in format_list for k in shape_list - ] - mask = torch.randn(4, 1) - maskbool = mask.ge(0.5) - - for item in shape_format: - print(item) - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_source, npu_source = create_common_tensor(item, 0, 100) - cpu_output2 = self.cpu_inp_op_exec(cpu_input, maskbool, cpu_source) - npu_output2 = self.npu_inp_op_exec(npu_input, maskbool, npu_source) - self.assertRtolEqual(cpu_output2, npu_output2) - -instantiate_device_type_tests(TestMaskedScatter, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestMaskedScatter(TestCase): + def cpu_op_exec(self, input, maskbool, source): + cpu_output = torch.masked_scatter(input, maskbool, source) + return cpu_output.numpy() + + def npu_op_exec(self, input, maskbool, source): + input = input.to("npu") + maskbool = maskbool.to("npu") + source = source.to("npu") + npu_output = torch.masked_scatter(input, maskbool, source) + npu_output = npu_output.to("cpu") + return npu_output.numpy() + + def cpu_inp_op_exec(self, input, maskbool, source): + cpu_output = input.masked_scatter_(maskbool, source) + return cpu_output.numpy() + + def npu_inp_op_exec(self, input, maskbool, source): + maskbool = maskbool.to("npu") + npu_output = input.masked_scatter_(maskbool, source) + npu_output = npu_output.to("cpu") + return npu_output.numpy() + + def test_masked_scatter_float(self, device): + dtype_list = [np.float32] + format_list = [0, 3] + shape_list = [[4, 5],[3, 4, 5], [2, 3, 4, 5]] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + mask = torch.randn(4, 1) + maskbool = mask.ge(0.5) + + for item in shape_format: + print(item) + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_source, npu_source = create_common_tensor(item, 0, 100) + cpu_output2 = self.cpu_inp_op_exec(cpu_input, maskbool, cpu_source) + npu_output2 = self.npu_inp_op_exec(npu_input, maskbool, npu_source) + self.assertRtolEqual(cpu_output2, npu_output2) + + def test_masked_scatter_int(self, device): + dtype_list = [np.int32, np.int64] + format_list = [0] + shape_list = [[4, 5],[3, 4, 5], [2, 3, 4, 5]] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + mask = torch.randn(4, 1) + maskbool = mask.ge(0.5) + + for item in shape_format: + print(item) + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_source, npu_source = create_common_tensor(item, 0, 100) + cpu_output2 = self.cpu_inp_op_exec(cpu_input, maskbool, cpu_source) + npu_output2 = self.npu_inp_op_exec(npu_input, maskbool, npu_source) + self.assertRtolEqual(cpu_output2, npu_output2) + +instantiate_device_type_tests(TestMaskedScatter, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_matmul.py b/test/test_npu/test_network_ops/test_matmul.py old mode 100644 new mode 100755 index 74755b14579a3a3332f6a82718a0ffc7489c29a7..7cfb770ea2080e86cace00aa54ff6e6bcf34372d --- a/test/test_npu/test_network_ops/test_matmul.py +++ b/test/test_npu/test_network_ops/test_matmul.py @@ -1,166 +1,166 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import numpy as np -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestMatMul(TestCase): - def op_exec_cpu(self, mat1, mat2): - input1 = mat1 - input2 = mat2 - input1.requires_grad = True - input2.requires_grad = True - - cpu_output = torch.matmul(input1, input2) - tmp = torch.ones_like(cpu_output) - cpu_output.backward(tmp) - - return cpu_output.detach().numpy(), input1.grad.numpy(), input2.grad.numpy() - - def op_exec_npu(self, mat1, mat2): - input1 = mat1 - input2 = mat2 - input1.requires_grad = True - input2.requires_grad = True - - npu_output = torch.matmul(input1, input2) - tmp = torch.ones_like(npu_output) - npu_output.backward(tmp) - npu_output = npu_output.cpu() - return npu_output.detach().cpu().numpy(), input1.grad.cpu().numpy(), input2.grad.cpu().numpy() - - def matmul_backward_result(self, shape_format): - for item in shape_format: - mat1_cpu, mat1_npu = create_common_tensor(item[0], -10, 10) - if mat1_cpu.dtype == torch.float16: - mat1_cpu = mat1_cpu.to(torch.float32) - mat2_cpu, mat2_npu = create_common_tensor(item[1], -10, 10) - if mat2_cpu.dtype == torch.float16: - mat2_cpu = mat2_cpu.to(torch.float32) - cpu_output, cpu_mat1_grad, cpu_mat2_grad = self.op_exec_cpu(mat1_cpu, mat2_cpu) - npu_output, npu_mat1_grad, npu_mat2_grad = self.op_exec_npu(mat1_npu, mat2_npu) - - - self.assertRtolEqual(cpu_output.astype(npu_output.dtype), npu_output) - self.assertRtolEqual(cpu_mat1_grad.astype(npu_mat1_grad.dtype), npu_mat1_grad) - self.assertRtolEqual(cpu_mat2_grad.astype(npu_mat2_grad.dtype), npu_mat2_grad) - - def test_matmul_backward_shape_format_fp16_case1(self, device): - shape_format = [ - # mat1 1dim, mat2 1dim - [[np.float16, 2, [5]], [np.float16, 2, [5]]], - [[np.float16, 2, [2560]], [np.float16, 2, [2560]]], - ] - self.matmul_backward_result(shape_format) - - # 暂不支持 - # def test_matmul_backward_shape_format_fp16_case2(self, device): - # shape_format = [ # mat1 2dim, mat2 1dim - # [[np.float16, 2, [3,5]], [np.float16, 2, [5]]], - # [[np.float16, 2, [2560,4680]], [np.float16, 2, [4680]]], - # [[np.float16, 2, [100,200]], [np.float16, 2, [200]]], - # [[np.float16, 2, [4,4]], [np.float16, 2, [4]]], - - # ] - # self.matmul_backward_result(shape_format) - - def test_matmul_backward_shape_format_fp16_case3(self, device): - shape_format = [ - # mat1 1dim, mat2 2dim - [[np.float16, 2, [5]], [np.float16, 2, [5,6]]], - [[np.float16, 2, [2560]], [np.float16, 2, [2560,4680]]], - [[np.float16, 2, [5]], [np.float16, 2, [5,5]]], - - ] - self.matmul_backward_result(shape_format) - - def test_matmul_backward_shape_format_fp16_case4(self, device): - shape_format = [ - # mat1 1dim, mat2 2dim - [[np.float16, 2, [5,7]], [np.float16, 2, [7,10]]], - [[np.float16, 2, [3750,2560]], [np.float16, 2, [2560,4680]]], - [[np.float16, 2, [5,10]], [np.float16, 2, [10,20]]], - ] - self.matmul_backward_result(shape_format) - - def test_matmul_backward_shape_format_fp16_case5(self, device): - shape_format = [ - # mat1 1dim, mat2 2dim - [[np.float16, 2, [5,7,10]], [np.float16, 2, [10]]], - [[np.float16, 2, [168,3750,256]], [np.float16, 2, [256]]], - [[np.float16, 2, [4,5,10]], [np.float16, 2, [10]]], - #[[np.float16, 2, [5,10,20,30]], [np.float16, 2, [30]]], # 该shape无法通过 - #[[np.float16, 2, [20,30,40,50,60]], [np.float16, 2, [60]]], batch 三维 精度不行 - [[np.float16, 2, [3,4,5,6,7,16]], [np.float16, 2, [16]]], - ] - self.matmul_backward_result(shape_format) - - def test_matmul_backward_shape_format_fp16_case6(self, device): - shape_format = [ - # mat1 >2dim, mat2 2dim - [[np.float16, 2, [5,7,10]], [np.float16, 2, [10,16]]], - #[[np.float16, 2, [5,10,20,30]], [np.float16, 2, [30,25]]], # 该shape无法过 - [[np.float16, 2, [2,5,7,8,19,80]], [np.float16, 2, [80,32]]], - ] - self.matmul_backward_result(shape_format) - - def test_matmul_backward_shape_format_fp16_case7(self, device): - shape_format = [ - # mat1 1dim, mat2 >2dim - [[np.float16, 2, [7]], [np.float16, 2, [5,7,10]]], - [[np.float16, 2, [5,]], [np.float16, 2, [4,5,10]]], - # [[np.float16, 2, [20]], [np.float16, 2, [5,10,20,30]]], # 该shape无法过 - [[np.float16, 2, [7]], [np.float16, 2, [3,4,5,6,7,16]]], - ] - self.matmul_backward_result(shape_format) - - def test_matmul_backward_shape_format_fp16_case8(self, device): - shape_format = [ - # mat1 2dim, mat2 >2dim - [[np.float16, 2, [5,7]], [np.float16, 2, [5,7,10]]], - [[np.float16, 2, [12,5]], [np.float16, 2, [4,5,10]]], - # [[np.float16, 2, [44,20]], [np.float16, 2, [5,10,20,30]]], # 该shape无法过 - # [[np.float16, 2, [75,50]], [np.float16, 2, [2,3,40,50,60]]], # 该shape无法过 - [[np.float16, 2, [188,7]], [np.float16, 2, [3,4,5,6,7,16]]], - ] - self.matmul_backward_result(shape_format) - - def test_matmul_backward_shape_format_fp16_case9(self, device): - shape_format = [ - [[np.float16, 2, [5,7,10]], [np.float16, 2, [5,10,15]]], - [[np.float16, 2, [168,3750,256]], [np.float16, 2, [168,256,43]]], - # TODO(ascend): Insufficient precision - # 在两个输入shape不一致的情况下,会通过expand将两个tensor shape对齐。反向时expand的反向会调用sum(dim),在fp16下与CPU比较不过。 - # 但是结果与CUDA比对通过。所以只放开两个tensor batch部分一致的用例 - # [[np.float16, 2, [1,6,7,65]], [np.float16, 2, [5,6,65,17]]],#该shape无法过 - # [[np.float16, 2, [4,5,10,15]], [np.float16, 2, [5,15,20]]], - # [[np.float16, 2, [5,10,20,30]], [np.float16, 2, [1,30,40]]], - # [[np.float16, 2, [20,30,40,50,60]], [np.float16, 2, [40,60,6]]], - # [[np.float16, 2, [6,7,16]], [np.float16, 2, [4,5,6,16,17]]], - # [[np.float16, 2, [5,6,7,33]], [np.float16, 2, [12,23,5,6,33,17]]], - # [[np.float16, 2, [3,4,6,7,44]], [np.float16, 2, [2,3,4,6,44,17]]], - # [[np.float16, 2, [42,2,3,41]], [np.float16, 2, [1,2,42,2,41,17]]], - ] - self.matmul_backward_result(shape_format) - - -instantiate_device_type_tests(TestMatMul, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import numpy as np +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestMatMul(TestCase): + def op_exec_cpu(self, mat1, mat2): + input1 = mat1 + input2 = mat2 + input1.requires_grad = True + input2.requires_grad = True + + cpu_output = torch.matmul(input1, input2) + tmp = torch.ones_like(cpu_output) + cpu_output.backward(tmp) + + return cpu_output.detach().numpy(), input1.grad.numpy(), input2.grad.numpy() + + def op_exec_npu(self, mat1, mat2): + input1 = mat1 + input2 = mat2 + input1.requires_grad = True + input2.requires_grad = True + + npu_output = torch.matmul(input1, input2) + tmp = torch.ones_like(npu_output) + npu_output.backward(tmp) + npu_output = npu_output.cpu() + return npu_output.detach().cpu().numpy(), input1.grad.cpu().numpy(), input2.grad.cpu().numpy() + + def matmul_backward_result(self, shape_format): + for item in shape_format: + mat1_cpu, mat1_npu = create_common_tensor(item[0], -10, 10) + if mat1_cpu.dtype == torch.float16: + mat1_cpu = mat1_cpu.to(torch.float32) + mat2_cpu, mat2_npu = create_common_tensor(item[1], -10, 10) + if mat2_cpu.dtype == torch.float16: + mat2_cpu = mat2_cpu.to(torch.float32) + cpu_output, cpu_mat1_grad, cpu_mat2_grad = self.op_exec_cpu(mat1_cpu, mat2_cpu) + npu_output, npu_mat1_grad, npu_mat2_grad = self.op_exec_npu(mat1_npu, mat2_npu) + + + self.assertRtolEqual(cpu_output.astype(npu_output.dtype), npu_output) + self.assertRtolEqual(cpu_mat1_grad.astype(npu_mat1_grad.dtype), npu_mat1_grad) + self.assertRtolEqual(cpu_mat2_grad.astype(npu_mat2_grad.dtype), npu_mat2_grad) + + def test_matmul_backward_shape_format_fp16_case1(self, device): + shape_format = [ + # mat1 1dim, mat2 1dim + [[np.float16, 2, [5]], [np.float16, 2, [5]]], + [[np.float16, 2, [2560]], [np.float16, 2, [2560]]], + ] + self.matmul_backward_result(shape_format) + + # 暂不支持 + # def test_matmul_backward_shape_format_fp16_case2(self, device): + # shape_format = [ # mat1 2dim, mat2 1dim + # [[np.float16, 2, [3,5]], [np.float16, 2, [5]]], + # [[np.float16, 2, [2560,4680]], [np.float16, 2, [4680]]], + # [[np.float16, 2, [100,200]], [np.float16, 2, [200]]], + # [[np.float16, 2, [4,4]], [np.float16, 2, [4]]], + + # ] + # self.matmul_backward_result(shape_format) + + def test_matmul_backward_shape_format_fp16_case3(self, device): + shape_format = [ + # mat1 1dim, mat2 2dim + [[np.float16, 2, [5]], [np.float16, 2, [5,6]]], + [[np.float16, 2, [2560]], [np.float16, 2, [2560,4680]]], + [[np.float16, 2, [5]], [np.float16, 2, [5,5]]], + + ] + self.matmul_backward_result(shape_format) + + def test_matmul_backward_shape_format_fp16_case4(self, device): + shape_format = [ + # mat1 1dim, mat2 2dim + [[np.float16, 2, [5,7]], [np.float16, 2, [7,10]]], + [[np.float16, 2, [3750,2560]], [np.float16, 2, [2560,4680]]], + [[np.float16, 2, [5,10]], [np.float16, 2, [10,20]]], + ] + self.matmul_backward_result(shape_format) + + def test_matmul_backward_shape_format_fp16_case5(self, device): + shape_format = [ + # mat1 1dim, mat2 2dim + [[np.float16, 2, [5,7,10]], [np.float16, 2, [10]]], + [[np.float16, 2, [168,3750,256]], [np.float16, 2, [256]]], + [[np.float16, 2, [4,5,10]], [np.float16, 2, [10]]], + #[[np.float16, 2, [5,10,20,30]], [np.float16, 2, [30]]], # 该shape无法通过 + #[[np.float16, 2, [20,30,40,50,60]], [np.float16, 2, [60]]], batch 三维 精度不行 + [[np.float16, 2, [3,4,5,6,7,16]], [np.float16, 2, [16]]], + ] + self.matmul_backward_result(shape_format) + + def test_matmul_backward_shape_format_fp16_case6(self, device): + shape_format = [ + # mat1 >2dim, mat2 2dim + [[np.float16, 2, [5,7,10]], [np.float16, 2, [10,16]]], + #[[np.float16, 2, [5,10,20,30]], [np.float16, 2, [30,25]]], # 该shape无法过 + [[np.float16, 2, [2,5,7,8,19,80]], [np.float16, 2, [80,32]]], + ] + self.matmul_backward_result(shape_format) + + def test_matmul_backward_shape_format_fp16_case7(self, device): + shape_format = [ + # mat1 1dim, mat2 >2dim + [[np.float16, 2, [7]], [np.float16, 2, [5,7,10]]], + [[np.float16, 2, [5,]], [np.float16, 2, [4,5,10]]], + # [[np.float16, 2, [20]], [np.float16, 2, [5,10,20,30]]], # 该shape无法过 + [[np.float16, 2, [7]], [np.float16, 2, [3,4,5,6,7,16]]], + ] + self.matmul_backward_result(shape_format) + + def test_matmul_backward_shape_format_fp16_case8(self, device): + shape_format = [ + # mat1 2dim, mat2 >2dim + [[np.float16, 2, [5,7]], [np.float16, 2, [5,7,10]]], + [[np.float16, 2, [12,5]], [np.float16, 2, [4,5,10]]], + # [[np.float16, 2, [44,20]], [np.float16, 2, [5,10,20,30]]], # 该shape无法过 + # [[np.float16, 2, [75,50]], [np.float16, 2, [2,3,40,50,60]]], # 该shape无法过 + [[np.float16, 2, [188,7]], [np.float16, 2, [3,4,5,6,7,16]]], + ] + self.matmul_backward_result(shape_format) + + def test_matmul_backward_shape_format_fp16_case9(self, device): + shape_format = [ + [[np.float16, 2, [5,7,10]], [np.float16, 2, [5,10,15]]], + [[np.float16, 2, [168,3750,256]], [np.float16, 2, [168,256,43]]], + # TODO(ascend): Insufficient precision + # 在两个输入shape不一致的情况下,会通过expand将两个tensor shape对齐。反向时expand的反向会调用sum(dim),在fp16下与CPU比较不过。 + # 但是结果与CUDA比对通过。所以只放开两个tensor batch部分一致的用例 + # [[np.float16, 2, [1,6,7,65]], [np.float16, 2, [5,6,65,17]]],#该shape无法过 + # [[np.float16, 2, [4,5,10,15]], [np.float16, 2, [5,15,20]]], + # [[np.float16, 2, [5,10,20,30]], [np.float16, 2, [1,30,40]]], + # [[np.float16, 2, [20,30,40,50,60]], [np.float16, 2, [40,60,6]]], + # [[np.float16, 2, [6,7,16]], [np.float16, 2, [4,5,6,16,17]]], + # [[np.float16, 2, [5,6,7,33]], [np.float16, 2, [12,23,5,6,33,17]]], + # [[np.float16, 2, [3,4,6,7,44]], [np.float16, 2, [2,3,4,6,44,17]]], + # [[np.float16, 2, [42,2,3,41]], [np.float16, 2, [1,2,42,2,41,17]]], + ] + self.matmul_backward_result(shape_format) + + +instantiate_device_type_tests(TestMatMul, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_max.py b/test/test_npu/test_network_ops/test_max.py old mode 100644 new mode 100755 index af0171f837e2ad468d95d81bfcfcb213f1b5c292..64f19c4fb090a4cb8071c04a4ff6d0b361a1a4ed --- a/test/test_npu/test_network_ops/test_max.py +++ b/test/test_npu/test_network_ops/test_max.py @@ -1,570 +1,570 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestMax(TestCase): - def cpu_op_exec(self, input1): - ''' - 调用算子 torch.max(input) → Tensor - ''' - output = torch.max(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - ''' - 调用适配算子函数 Tensor max_npu(const Tensor& self) - ''' - output = torch.max(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_other_exec(self, input1, input2): - ''' - 调用算子 torch.max(input, other, out=None) → Tensor - ''' - output = torch.max(input1, input2) - output = output.numpy() - return output - - def npu_op_other_exec(self, input1, input2): - ''' - 适配算子函数 Tensor max_npu(const Tensor& self, const Tensor& other) - ''' - output = torch.max(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2, out): - torch.max(input1, input2, out=out) - output = out.to("cpu") - output = output.numpy() - return output - - def cpu_op_dim_exec(self, input1, dim, keepdim): - ''' - 调用算子 torch.max(input, dim, keepdim=False, out=None) -> (Tensor, LongTensor) - ''' - output1, output2 = torch.max(input1, dim, keepdim) - output1 = output1.numpy() - # 这里需要将索引从64位转32位 便于拿去与npu的对比 - output2 = output2.int().numpy() - return output1, output2 - - def npu_op_dim_exec(self, input1, dim, keepdim): - ''' - 适配算子函数 tuple max_npu(const Tensor& self, int64_t dim, bool keepdim) - ''' - output1, output2 = torch.max(input1, dim, keepdim) - output1 = output1.to("cpu") - output2 = output2.to("cpu") - output1 = output1.numpy() - output2 = output2.numpy() - return output1, output2 - - def cpu_max_values_exec(self, input): - output = input.max() - output = output.numpy() - return output - - def npu_max_values_exec(self, input): - output = input.max() - output = output.to("cpu") - output = output.numpy() - return output - - def _cpu_op_dim_exec(self, input1, dim, keepdim): - output1, output2 = torch._max(input1, dim, keepdim) - output1 = output1.numpy() - output2 = output2.numpy() - return output1, output2 - - def _npu_op_dim_exec(self, input1, dim, keepdim): - output1, output2 = torch._max(input1, dim, keepdim) - output1 = output1.to("cpu") - output2 = output2.to("cpu") - output1 = output1.numpy() - output2 = output2.numpy() - return output1, output2 - - def cpu_op_dim_exec_out(self, input1, dim, keepdim): - out = torch.tensor(0).to(input1.dtype) - indices = torch.tensor(0).to(torch.long) - torch.max(input1, dim=dim, keepdim=keepdim, out=(out,indices)) - out = out.numpy() - indices = indices.numpy() - return out,indices - - def npu_op_dim_exec_out(self, input1, dim, keepdim): - out = torch.tensor(0).to(input1.dtype).npu() - indices = torch.tensor(0).to(torch.long).npu() - torch.max(input1, dim=dim, keepdim=keepdim, out=(out,indices)) - out = out.to("cpu").numpy() - indices = indices.to("cpu").numpy() - return out,indices - - def max_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def max_result_dim(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) - npu_output_dim, npu_output_indices = self.npu_op_dim_exec(npu_input1, item[1], item[2]) - cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) - - self.assertRtolEqual(cpu_output_dim, npu_output_dim) - - def _max_result_dim(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_dim, cpu_output_indices = self._cpu_op_dim_exec(cpu_input1, item[1], item[2]) - npu_output_dim, npu_output_indices = self._npu_op_dim_exec(npu_input1, item[1], item[2]) - cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) - - self.assertRtolEqual(cpu_output_dim, npu_output_dim) - - def max_result_other(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 10) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output_other = self.cpu_op_other_exec(cpu_input1, cpu_input2) - npu_output_other = self.npu_op_other_exec(npu_input1, npu_input2) - - cpu_output_other = cpu_output_other.astype(npu_output_other.dtype) - self.assertRtolEqual(cpu_output_other, npu_output_other) - - def max_out_result_other(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) - cpu_input3, npu_input3 = create_common_tensor(item[0], -100, 100) - cpu_input4, npu_input4 = create_common_tensor(item[1], -100, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_other_exec(cpu_input1, cpu_input2) - npu_output_out1 = self.npu_op_other_exec(npu_input1, npu_input2) - npu_output_out2 = self.npu_op_exec_out(npu_input1, npu_input2, npu_input4) - cpu_output = cpu_output.astype(npu_output_out1.dtype) - - self.assertRtolEqual(cpu_output, npu_output_out1) - self.assertRtolEqual(cpu_output, npu_output_out2) - - cpu_out_dim, cpu_out_indices = self.cpu_op_dim_exec_out(cpu_input1, dim=0, keepdim=True) - npu_out_dim, npu_out_indices = self.npu_op_dim_exec_out(npu_input1, dim=0, keepdim=True) - npu_output_dim, npu_output_indices = self.npu_op_dim_exec(npu_input1, dim=0, keepdim=True) - cpu_out_dim = cpu_out_dim.astype(npu_out_dim.dtype) - if npu_out_dim.dtype != np.float16: - self.assertRtolEqual(npu_out_dim, cpu_out_dim) - self.assertRtolEqual(npu_out_indices, cpu_out_indices) - else: - self.assertRtolEqual(npu_out_dim, npu_output_dim) - self.assertRtolEqual(npu_out_indices, npu_output_indices) - - # Npu and cpu have different logic to find the maximum value index. - # The existence of two maximum values will cause the second output to be different. - def max_name_result_other(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input1.names = item[0][3] - npu_input1.names = item[0][3] - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) - npu_output_dim, npu_output_indices = self.npu_op_dim_exec(cpu_input1, item[1], item[2]) - - if npu_output_dim.dtype != np.float16: - self.assertRtolEqual(npu_output_dim, cpu_output_dim) - #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) - else: - self.assertRtolEqual( npu_output_dim, cpu_output_dim.astype(np.float16)) - #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) - - # Npu and cpu have different logic to find the maximum value index. - # The existence of two maximum values will cause the second output to be different. - def max_name_out_result_other(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input1.names = item[0][3] - npu_input1.names = item[0][3] - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec_out(cpu_input1, item[1], item[2]) - npu_output_dim, npu_output_indices = self.npu_op_dim_exec_out(npu_input1, item[1], item[2]) - - if npu_output_dim.dtype != np.float16: - self.assertRtolEqual(npu_output_dim, cpu_output_dim) - #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) - else: - self.assertRtolEqual( npu_output_dim, cpu_output_dim.astype(np.float16)) - #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) - - def max_values_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_max_values_exec(cpu_input1) - npu_output = self.npu_max_values_exec(npu_input1) - cpu_output = cpu_output.astype(npu_output.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - - def test_max_out_result(self, device): - shape_format = [ - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [128, 58, 28, 28]], [np.float16, 0, [58, 58, 1, 1]]], - [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], - [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], - [[np.float32, 0, [256, 3, 224, 224]], [np.float32, 0, [3, 3, 7, 7]]], - [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], - [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], - ] - self.max_out_result_other(shape_format) - - def test_max_shape_format_fp16_1d(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list - ] - self.max_result(shape_format) - - def test_max_shape_format_fp32_1d(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.max_result(shape_format) - - def test_max_shape_format_fp16_2d(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.max_result(shape_format) - - def test_max_shape_format_fp32_2d(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.max_result(shape_format) - - def test_max_shape_format_fp16_3d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self.max_result(shape_format) - - def test_max_shape_format_fp32_3d(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self.max_result(shape_format) - - def test_max_shape_format_fp16_4d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self.max_result(shape_format) - - def test_max_shape_format_fp32_4d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in - keepdim_list - ] - self.max_result(shape_format) - - # ---------------------------------------dim - def test_max_dim_shape_format_fp16_1d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list - ] - self.max_result_dim(shape_format) - - def test_max_dim_shape_format_fp32_1d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.max_result_dim(shape_format) - - def test_max_dim_shape_format_fp16_2d(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.max_result_dim(shape_format) - - def test_max_dim_shape_format_fp32_2d(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.max_result_dim(shape_format) - - def test_max_dim_shape_format_fp16_3d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self.max_result_dim(shape_format) - - def test_max_dim_shape_format_fp32_3d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self.max_result_dim(shape_format) - - def test_max_dim_shape_format_fp16_4d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self.max_result_dim(shape_format) - - def test_max_dim_shape_format_fp32_4d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self.max_result_dim(shape_format) - - def test_max_dim_shape_format_fp16_1d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list - ] - self._max_result_dim(shape_format) - - def test_max_dim_shape_format_fp32_1d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self._max_result_dim(shape_format) - - #One-dimensional NZ to ND result is wrong, CCB has given a conclusion - def test_max_dim_shape_format_fp16_2d_(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self._max_result_dim(shape_format) - - #One-dimensional NZ to ND result is wrong, CCB has given a conclusion - def test_max_dim_shape_format_fp32_2d_(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self._max_result_dim(shape_format) - - def test_max_dim_shape_format_fp16_3d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self._max_result_dim(shape_format) - - def test_max_dim_shape_format_fp32_3d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self._max_result_dim(shape_format) - - def test_max_dim_shape_format_fp16_4d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self._max_result_dim(shape_format) - - def test_max_dim_shape_format_fp32_4d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self._max_result_dim(shape_format) - - # -----------------------------other - - def test_max_other_shape_format_fp16_1d(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list - ] - self.max_result_other(shape_format) - - def test_max_other_shape_format_fp32_1d(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.max_result_other(shape_format) - - def test_max_other_shape_format_fp16_2d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.max_result_other(shape_format) - - def test_max_other_shape_format_fp32_2d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.max_result_other(shape_format) - - def test_max_other_shape_format_fp16_3d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self.max_result_other(shape_format) - - def test_max_other_shape_format_fp32_3d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self.max_result_other(shape_format) - - def test_max_other_shape_format_fp16_4d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self.max_result_other(shape_format) - - def test_max_other_shape_format_fp32_4d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in - keepdim_list - ] - self.max_result_other(shape_format) - - def test_max_dimname_shape_format(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], - np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j - in - keepdim_list - ] - self.max_name_result_other(shape_format) - - def test_max_dimname_shape_format_fp16(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], - np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j - in - keepdim_list - ] - self.max_name_result_other(shape_format) - - def test_max_dimname_out_shape_format(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], - np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j - in - keepdim_list - ] - self.max_name_out_result_other(shape_format) - - def test_max_dimname_out_shape_format_fp16(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], - np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j - in - keepdim_list - ] - self.max_name_out_result_other(shape_format) - - def test_max_values_shape_format(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.max_values_result(shape_format) - -instantiate_device_type_tests(TestMax, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestMax(TestCase): + def cpu_op_exec(self, input1): + ''' + 调用算子 torch.max(input) → Tensor + ''' + output = torch.max(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + ''' + 调用适配算子函数 Tensor max_npu(const Tensor& self) + ''' + output = torch.max(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_other_exec(self, input1, input2): + ''' + 调用算子 torch.max(input, other, out=None) → Tensor + ''' + output = torch.max(input1, input2) + output = output.numpy() + return output + + def npu_op_other_exec(self, input1, input2): + ''' + 适配算子函数 Tensor max_npu(const Tensor& self, const Tensor& other) + ''' + output = torch.max(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2, out): + torch.max(input1, input2, out=out) + output = out.to("cpu") + output = output.numpy() + return output + + def cpu_op_dim_exec(self, input1, dim, keepdim): + ''' + 调用算子 torch.max(input, dim, keepdim=False, out=None) -> (Tensor, LongTensor) + ''' + output1, output2 = torch.max(input1, dim, keepdim) + output1 = output1.numpy() + # 这里需要将索引从64位转32位 便于拿去与npu的对比 + output2 = output2.int().numpy() + return output1, output2 + + def npu_op_dim_exec(self, input1, dim, keepdim): + ''' + 适配算子函数 tuple max_npu(const Tensor& self, int64_t dim, bool keepdim) + ''' + output1, output2 = torch.max(input1, dim, keepdim) + output1 = output1.to("cpu") + output2 = output2.to("cpu") + output1 = output1.numpy() + output2 = output2.numpy() + return output1, output2 + + def cpu_max_values_exec(self, input): + output = input.max() + output = output.numpy() + return output + + def npu_max_values_exec(self, input): + output = input.max() + output = output.to("cpu") + output = output.numpy() + return output + + def _cpu_op_dim_exec(self, input1, dim, keepdim): + output1, output2 = torch._max(input1, dim, keepdim) + output1 = output1.numpy() + output2 = output2.numpy() + return output1, output2 + + def _npu_op_dim_exec(self, input1, dim, keepdim): + output1, output2 = torch._max(input1, dim, keepdim) + output1 = output1.to("cpu") + output2 = output2.to("cpu") + output1 = output1.numpy() + output2 = output2.numpy() + return output1, output2 + + def cpu_op_dim_exec_out(self, input1, dim, keepdim): + out = torch.tensor(0).to(input1.dtype) + indices = torch.tensor(0).to(torch.long) + torch.max(input1, dim=dim, keepdim=keepdim, out=(out,indices)) + out = out.numpy() + indices = indices.numpy() + return out,indices + + def npu_op_dim_exec_out(self, input1, dim, keepdim): + out = torch.tensor(0).to(input1.dtype).npu() + indices = torch.tensor(0).to(torch.long).npu() + torch.max(input1, dim=dim, keepdim=keepdim, out=(out,indices)) + out = out.to("cpu").numpy() + indices = indices.to("cpu").numpy() + return out,indices + + def max_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def max_result_dim(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) + npu_output_dim, npu_output_indices = self.npu_op_dim_exec(npu_input1, item[1], item[2]) + cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) + + self.assertRtolEqual(cpu_output_dim, npu_output_dim) + + def _max_result_dim(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_dim, cpu_output_indices = self._cpu_op_dim_exec(cpu_input1, item[1], item[2]) + npu_output_dim, npu_output_indices = self._npu_op_dim_exec(npu_input1, item[1], item[2]) + cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) + + self.assertRtolEqual(cpu_output_dim, npu_output_dim) + + def max_result_other(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 10) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output_other = self.cpu_op_other_exec(cpu_input1, cpu_input2) + npu_output_other = self.npu_op_other_exec(npu_input1, npu_input2) + + cpu_output_other = cpu_output_other.astype(npu_output_other.dtype) + self.assertRtolEqual(cpu_output_other, npu_output_other) + + def max_out_result_other(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) + cpu_input3, npu_input3 = create_common_tensor(item[0], -100, 100) + cpu_input4, npu_input4 = create_common_tensor(item[1], -100, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_other_exec(cpu_input1, cpu_input2) + npu_output_out1 = self.npu_op_other_exec(npu_input1, npu_input2) + npu_output_out2 = self.npu_op_exec_out(npu_input1, npu_input2, npu_input4) + cpu_output = cpu_output.astype(npu_output_out1.dtype) + + self.assertRtolEqual(cpu_output, npu_output_out1) + self.assertRtolEqual(cpu_output, npu_output_out2) + + cpu_out_dim, cpu_out_indices = self.cpu_op_dim_exec_out(cpu_input1, dim=0, keepdim=True) + npu_out_dim, npu_out_indices = self.npu_op_dim_exec_out(npu_input1, dim=0, keepdim=True) + npu_output_dim, npu_output_indices = self.npu_op_dim_exec(npu_input1, dim=0, keepdim=True) + cpu_out_dim = cpu_out_dim.astype(npu_out_dim.dtype) + if npu_out_dim.dtype != np.float16: + self.assertRtolEqual(npu_out_dim, cpu_out_dim) + self.assertRtolEqual(npu_out_indices, cpu_out_indices) + else: + self.assertRtolEqual(npu_out_dim, npu_output_dim) + self.assertRtolEqual(npu_out_indices, npu_output_indices) + + # Npu and cpu have different logic to find the maximum value index. + # The existence of two maximum values will cause the second output to be different. + def max_name_result_other(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input1.names = item[0][3] + npu_input1.names = item[0][3] + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) + npu_output_dim, npu_output_indices = self.npu_op_dim_exec(cpu_input1, item[1], item[2]) + + if npu_output_dim.dtype != np.float16: + self.assertRtolEqual(npu_output_dim, cpu_output_dim) + #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) + else: + self.assertRtolEqual( npu_output_dim, cpu_output_dim.astype(np.float16)) + #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) + + # Npu and cpu have different logic to find the maximum value index. + # The existence of two maximum values will cause the second output to be different. + def max_name_out_result_other(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input1.names = item[0][3] + npu_input1.names = item[0][3] + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec_out(cpu_input1, item[1], item[2]) + npu_output_dim, npu_output_indices = self.npu_op_dim_exec_out(npu_input1, item[1], item[2]) + + if npu_output_dim.dtype != np.float16: + self.assertRtolEqual(npu_output_dim, cpu_output_dim) + #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) + else: + self.assertRtolEqual( npu_output_dim, cpu_output_dim.astype(np.float16)) + #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) + + def max_values_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_max_values_exec(cpu_input1) + npu_output = self.npu_max_values_exec(npu_input1) + cpu_output = cpu_output.astype(npu_output.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + + def test_max_out_result(self, device): + shape_format = [ + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [128, 58, 28, 28]], [np.float16, 0, [58, 58, 1, 1]]], + [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], + [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], + [[np.float32, 0, [256, 3, 224, 224]], [np.float32, 0, [3, 3, 7, 7]]], + [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], + [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], + ] + self.max_out_result_other(shape_format) + + def test_max_shape_format_fp16_1d(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list + ] + self.max_result(shape_format) + + def test_max_shape_format_fp32_1d(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.max_result(shape_format) + + def test_max_shape_format_fp16_2d(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.max_result(shape_format) + + def test_max_shape_format_fp32_2d(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.max_result(shape_format) + + def test_max_shape_format_fp16_3d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self.max_result(shape_format) + + def test_max_shape_format_fp32_3d(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self.max_result(shape_format) + + def test_max_shape_format_fp16_4d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self.max_result(shape_format) + + def test_max_shape_format_fp32_4d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in + keepdim_list + ] + self.max_result(shape_format) + + # ---------------------------------------dim + def test_max_dim_shape_format_fp16_1d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list + ] + self.max_result_dim(shape_format) + + def test_max_dim_shape_format_fp32_1d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.max_result_dim(shape_format) + + def test_max_dim_shape_format_fp16_2d(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.max_result_dim(shape_format) + + def test_max_dim_shape_format_fp32_2d(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.max_result_dim(shape_format) + + def test_max_dim_shape_format_fp16_3d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self.max_result_dim(shape_format) + + def test_max_dim_shape_format_fp32_3d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self.max_result_dim(shape_format) + + def test_max_dim_shape_format_fp16_4d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self.max_result_dim(shape_format) + + def test_max_dim_shape_format_fp32_4d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self.max_result_dim(shape_format) + + def test_max_dim_shape_format_fp16_1d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list + ] + self._max_result_dim(shape_format) + + def test_max_dim_shape_format_fp32_1d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self._max_result_dim(shape_format) + + #One-dimensional NZ to ND result is wrong, CCB has given a conclusion + def test_max_dim_shape_format_fp16_2d_(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self._max_result_dim(shape_format) + + #One-dimensional NZ to ND result is wrong, CCB has given a conclusion + def test_max_dim_shape_format_fp32_2d_(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self._max_result_dim(shape_format) + + def test_max_dim_shape_format_fp16_3d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self._max_result_dim(shape_format) + + def test_max_dim_shape_format_fp32_3d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self._max_result_dim(shape_format) + + def test_max_dim_shape_format_fp16_4d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self._max_result_dim(shape_format) + + def test_max_dim_shape_format_fp32_4d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self._max_result_dim(shape_format) + + # -----------------------------other + + def test_max_other_shape_format_fp16_1d(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list + ] + self.max_result_other(shape_format) + + def test_max_other_shape_format_fp32_1d(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.max_result_other(shape_format) + + def test_max_other_shape_format_fp16_2d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.max_result_other(shape_format) + + def test_max_other_shape_format_fp32_2d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.max_result_other(shape_format) + + def test_max_other_shape_format_fp16_3d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self.max_result_other(shape_format) + + def test_max_other_shape_format_fp32_3d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self.max_result_other(shape_format) + + def test_max_other_shape_format_fp16_4d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self.max_result_other(shape_format) + + def test_max_other_shape_format_fp32_4d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in + keepdim_list + ] + self.max_result_other(shape_format) + + def test_max_dimname_shape_format(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], + np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j + in + keepdim_list + ] + self.max_name_result_other(shape_format) + + def test_max_dimname_shape_format_fp16(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], + np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j + in + keepdim_list + ] + self.max_name_result_other(shape_format) + + def test_max_dimname_out_shape_format(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], + np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j + in + keepdim_list + ] + self.max_name_out_result_other(shape_format) + + def test_max_dimname_out_shape_format_fp16(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], + np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j + in + keepdim_list + ] + self.max_name_out_result_other(shape_format) + + def test_max_values_shape_format(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.max_values_result(shape_format) + +instantiate_device_type_tests(TestMax, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_max_backward.py b/test/test_npu/test_network_ops/test_max_backward.py index 280abdb341c8ec9deaa90941ac2f3fa399f3b5e3..ea5ff2c62ff1753637e694075d4bc8b9fe53f4b9 100644 --- a/test/test_npu/test_network_ops/test_max_backward.py +++ b/test/test_npu/test_network_ops/test_max_backward.py @@ -1,78 +1,78 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestMaxBackward(TestCase): - def cpu_op_exec(self, input): - input.requires_grad = True - output = input.max(0, False) - output[0].backward(torch.ones_like(output[0])) - output_grad = input.grad - output_grad = output_grad.detach().numpy() - output1 = output[0].detach().numpy() - output2 = output[1].detach().numpy() - return output_grad, output1, output2 - - def npu_op_exec(self, input): - input.requires_grad = True - output = input.max(0, False) - output[0].backward(torch.ones_like(output[0])) - output_grad = input.grad - output_grad = output_grad.to("cpu") - output_grad = output_grad.detach().numpy() - output1 = output[0].detach().cpu().numpy() - output2 = output[1].detach().cpu().numpy() - return output_grad, output1, output2 - - def test_avg_pool2d_backward_shape_format_fp32(self, device): - format_list = [-1] - shape_list = [(2,32,8,600,40),(2,32,16,300,40)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_output_grad, cpu_output1, cpu_output2= self.cpu_op_exec(cpu_input) - npu_output_grad, npu_output1, npu_output2 = self.npu_op_exec(npu_input) - - self.assertRtolEqual(cpu_output1, npu_output1) - self.assertRtolEqual(cpu_output_grad, npu_output_grad) - - def test_avg_pool2d_backward_shape_format_fp16(self, device): - format_list = [-1] - shape_list = [(2,32,8,600),(2,32,16,300,40)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input = cpu_input.to(torch.float32) - cpu_output_grad, cpu_output1, cpu_output2= self.cpu_op_exec(cpu_input) - npu_output_grad, npu_output1, npu_output2 = self.npu_op_exec(npu_input) - cpu_output1 = cpu_output1.astype(npu_output1.dtype) - cpu_output_grad = cpu_output_grad.astype(npu_output_grad.dtype) - self.assertRtolEqual(cpu_output_grad, npu_output_grad) - -instantiate_device_type_tests( - TestMaxBackward, - globals(), - except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestMaxBackward(TestCase): + def cpu_op_exec(self, input): + input.requires_grad = True + output = input.max(0, False) + output[0].backward(torch.ones_like(output[0])) + output_grad = input.grad + output_grad = output_grad.detach().numpy() + output1 = output[0].detach().numpy() + output2 = output[1].detach().numpy() + return output_grad, output1, output2 + + def npu_op_exec(self, input): + input.requires_grad = True + output = input.max(0, False) + output[0].backward(torch.ones_like(output[0])) + output_grad = input.grad + output_grad = output_grad.to("cpu") + output_grad = output_grad.detach().numpy() + output1 = output[0].detach().cpu().numpy() + output2 = output[1].detach().cpu().numpy() + return output_grad, output1, output2 + + def test_avg_pool2d_backward_shape_format_fp32(self, device): + format_list = [-1] + shape_list = [(2,32,8,600,40),(2,32,16,300,40)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_output_grad, cpu_output1, cpu_output2= self.cpu_op_exec(cpu_input) + npu_output_grad, npu_output1, npu_output2 = self.npu_op_exec(npu_input) + + self.assertRtolEqual(cpu_output1, npu_output1) + self.assertRtolEqual(cpu_output_grad, npu_output_grad) + + def test_avg_pool2d_backward_shape_format_fp16(self, device): + format_list = [-1] + shape_list = [(2,32,8,600),(2,32,16,300,40)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input = cpu_input.to(torch.float32) + cpu_output_grad, cpu_output1, cpu_output2= self.cpu_op_exec(cpu_input) + npu_output_grad, npu_output1, npu_output2 = self.npu_op_exec(npu_input) + cpu_output1 = cpu_output1.astype(npu_output1.dtype) + cpu_output_grad = cpu_output_grad.astype(npu_output_grad.dtype) + self.assertRtolEqual(cpu_output_grad, npu_output_grad) + +instantiate_device_type_tests( + TestMaxBackward, + globals(), + except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_min.py b/test/test_npu/test_network_ops/test_min.py old mode 100644 new mode 100755 index badc9c3a2dd71a6ca45c28c1196a24b826d2404a..d34e8c8ca908e96e9ec87910f49d058ea7dfc7da --- a/test/test_npu/test_network_ops/test_min.py +++ b/test/test_npu/test_network_ops/test_min.py @@ -1,574 +1,574 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import copy -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestMin(TestCase): - def cpu_op_exec(self, input1): - ''' - 调用算子 torch.min(input) → Tensor - ''' - output = torch.min(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - ''' - 调用适配算子函数 Tensor min_npu(const Tensor& self) - ''' - output = torch.min(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_other_exec(self, input1, input2): - ''' - 调用算子 torch.min(input, other, out=None) → Tensor - ''' - output = torch.min(input1, input2) - output = output.numpy() - return output - - def npu_op_other_exec(self, input1, input2): - ''' - 适配算子函数 Tensor min_npu(const Tensor& self, const Tensor& other) - ''' - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.min(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_other_exec_out(self, input1, input2, out): - torch.min(input1, input2, out=out) - output = out.to("cpu") - output = output.numpy() - return output - - def cpu_op_dim_exec(self, input1, dim, keepdim): - ''' - 调用算子 torch.min(input, dim, keepdim=False, out=None) -> (Tensor, LongTensor) - ''' - output1, output2 = torch.min(input1, dim, keepdim) - output1 = output1.numpy() - # 这里需要将索引从64位转32位 便于拿去与npu的对比 - output2 = output2.int().numpy() - return output1, output2 - - def npu_op_dim_exec(self, input1, dim, keepdim): - ''' - 适配算子函数 tuple min_npu(const Tensor& self, int64_t dim, bool keepdim) - ''' - input1 = input1.to("npu") - output1, output2 = torch.min(input1, dim, keepdim) - output1 = output1.to("cpu") - output2 = output2.to("cpu") - output1 = output1.numpy() - output2 = output2.numpy() - return output1, output2 - - def _cpu_op_dim_exec(self, input1, dim, keepdim): - output1, output2 = torch._min(input1, dim, keepdim) - output1 = output1.numpy() - output2 = output2.numpy() - return output1, output2 - - def _npu_op_dim_exec(self, input1, dim, keepdim): - output1, output2 = torch._min(input1, dim, keepdim) - output1 = output1.to("cpu") - output2 = output2.to("cpu") - output1 = output1.numpy() - output2 = output2.numpy() - return output1, output2 - - def cpu_op_dim_exec_out(self, input1, dim, keepdim): - out = torch.tensor(0).to(input1.dtype) - indices = torch.tensor(0).to(torch.long) - torch.min(input1, dim=dim, keepdim=keepdim, out=(out,indices)) - out = out.numpy() - indices = indices.numpy() - return out,indices - - def npu_op_dim_exec_out(self, input1, dim, keepdim): - out = torch.tensor(0).to(input1.dtype).npu() - indices = torch.tensor(0).to(torch.long).npu() - torch.min(input1, dim=dim, keepdim=keepdim, out=(out,indices)) - out = out.to("cpu").numpy() - indices = indices.to("cpu").numpy() - return out,indices - - def cpu_min_values_exec(self, input): - output = input.min() - output = output.numpy() - return output - - def npu_min_values_exec(self, input): - output = input.min() - output = output.to("cpu") - output = output.numpy() - return output - - def min_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - cpu_output = cpu_output.astype(npu_output.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - - def min_result_dim(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) - npu_output_dim, npu_output_indices = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) - cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) - - self.assertRtolEqual(cpu_output_dim, npu_output_dim) - - def _min_result_dim(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_dim, cpu_output_indices = self._cpu_op_dim_exec(cpu_input1, item[1], item[2]) - npu_output_dim, npu_output_indices = self._npu_op_dim_exec(npu_input1, item[1], item[2]) - cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) - - self.assertRtolEqual(cpu_output_dim, npu_output_dim) - - def min_result_other(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 10) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output_other = self.cpu_op_other_exec(cpu_input1, cpu_input2) - npu_output_other = self.npu_op_other_exec(npu_input1, npu_input2) - cpu_output_other = cpu_output_other.astype(npu_output_other.dtype) - - self.assertRtolEqual(cpu_output_other, npu_output_other) - - # Npu and cpu have different logic to find the maximum value index. - # The existence of two maximum values will cause the second output to be different. - def min_out_result_other(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) - cpu_input3, npu_input3 = create_common_tensor(item[0], -100, 100) - cpu_input4, npu_input4 = create_common_tensor(item[1], -100, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_other_exec(cpu_input1, cpu_input2) - npu_output_out1 = self.npu_op_other_exec(npu_input1, npu_input2) - npu_output_out2 = self.npu_op_other_exec_out(npu_input1, npu_input2, npu_input4) - cpu_output = cpu_output.astype(npu_output_out1.dtype) - - self.assertRtolEqual(cpu_output, npu_output_out1) - self.assertRtolEqual(cpu_output, npu_output_out2) - - cpu_out_dim, cpu_out_indices = self.cpu_op_dim_exec_out(cpu_input1, dim=0, keepdim=True) - npu_out_dim, npu_out_indices = self.npu_op_dim_exec_out(npu_input1, dim=0, keepdim=True) - npu_output_dim, npu_output_indices = self.npu_op_dim_exec(npu_input1, dim=0, keepdim=True) - cpu_out_dim = cpu_out_dim.astype(npu_out_dim.dtype) - if cpu_out_dim.dtype != np.float16: - self.assertRtolEqual(npu_out_dim, cpu_out_dim) - #self.assertRtolEqual(npu_out_indices, cpu_out_indices) - else: - self.assertRtolEqual(npu_out_dim, npu_output_dim) - #self.assertRtolEqual(npu_out_indices, npu_output_indices) - - # Npu and cpu have different logic to find the minimum value index. - # The existence of two minimum values will cause the second output to be different. - def min_name_result_other(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input1.names = item[0][3] - npu_input1.names = item[0][3] - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) - npu_output_dim, npu_output_indices = self.npu_op_dim_exec(cpu_input1, item[1], item[2]) - - if npu_output_dim.dtype != np.float16: - self.assertRtolEqual(npu_output_dim, cpu_output_dim) - #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) - else: - self.assertRtolEqual( npu_output_dim, cpu_output_dim.astype(np.float16)) - #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) - - # Npu and cpu have different logic to find the minimum value index. - # The existence of two minimum values will cause the second output to be different. - def min_name_out_result_other(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input1.names = item[0][3] - npu_input1.names = item[0][3] - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec_out(cpu_input1, item[1], item[2]) - npu_output_dim, npu_output_indices = self.npu_op_dim_exec_out(npu_input1, item[1], item[2]) - - if npu_output_dim.dtype != np.float16: - self.assertRtolEqual(npu_output_dim, cpu_output_dim) - #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) - else: - self.assertRtolEqual( npu_output_dim, cpu_output_dim.astype(np.float16)) - #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) - - def min_values_result(self, shape_format): - for item in shape_format: - print(item) - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_min_values_exec(cpu_input1) - npu_output = self.npu_min_values_exec(npu_input1) - - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_min_out_result(self, device): - shape_format = [ - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [128, 58, 28, 28]], [np.float16, 0, [58, 58, 1, 1]]], - [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], - [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], - [[np.float32, 0, [256, 3, 224, 224]], [np.float32, 0, [3, 3, 7, 7]]], - [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], - [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], - ] - self.min_out_result_other(shape_format) - - def test_min_shape_format_fp16_1d(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list - ] - self.min_result(shape_format) - - def test_min_shape_format_fp32_1d(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.min_result(shape_format) - - def test_min_shape_format_fp16_2d(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.min_result(shape_format) - - def test_min_shape_format_fp32_2d(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.min_result(shape_format) - - def test_min_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.min_result(shape_format) - - def test_min_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.min_result(shape_format) - - def test_min_shape_format_fp16_4d(self, device): - format_list = [0, 4, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.min_result(shape_format) - - def test_min_shape_format_fp32_4d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.min_result(shape_format) - - # ---------------------------------------dim - def test_min_dim_shape_format_fp16_1d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list - ] - self.min_result_dim(shape_format) - - def test_min_dim_shape_format_fp32_1d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.min_result_dim(shape_format) - - #One-dimensional NZ to ND result is wrong, CCB has given a conclusion - def test_min_dim_shape_format_fp16_2d(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.min_result_dim(shape_format) - - #One-dimensional NZ to ND result is wrong, CCB has given a conclusion - def test_min_dim_shape_format_fp32_2d(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.min_result_dim(shape_format) - - def test_min_dim_shape_format_fp16_3d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self.min_result_dim(shape_format) - - def test_min_dim_shape_format_fp32_3d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self.min_result_dim(shape_format) - - def test_min_dim_shape_format_fp16_4d(self, device): - format_list = [0, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self.min_result_dim(shape_format) - - def test_min_dim_shape_format_fp32_4d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self.min_result_dim(shape_format) - - def test_min_dim_shape_format_fp16_1d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list - ] - self._min_result_dim(shape_format) - - def test_min_dim_shape_format_fp32_1d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self._min_result_dim(shape_format) - - def test_min_dim_shape_format_fp16_2d_(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self._min_result_dim(shape_format) - - def test_min_dim_shape_format_fp32_2d_(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self._min_result_dim(shape_format) - - def test_min_dim_shape_format_fp16_3d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self._min_result_dim(shape_format) - - def test_min_dim_shape_format_fp32_3d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self._min_result_dim(shape_format) - - def test_min_dim_shape_format_fp16_4d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self._min_result_dim(shape_format) - - def test_min_dim_shape_format_fp32_4d_(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self._min_result_dim(shape_format) - - # -----------------------------other - - def test_min_other_shape_format_fp16_1d(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list - ] - self.min_result_other(shape_format) - - def test_min_other_shape_format_fp32_1d(self, device): - format_list = [0, 3, 4] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in - keepdim_list - ] - self.min_result_other(shape_format) - - def test_min_other_shape_format_fp16_2d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.min_result_other(shape_format) - - def test_min_other_shape_format_fp32_2d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.min_result_other(shape_format) - - def test_min_other_shape_format_fp16_3d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self.min_result_other(shape_format) - - def test_min_other_shape_format_fp32_3d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in - keepdim_list - ] - self.min_result_other(shape_format) - - def test_min_other_shape_format_fp16_4d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self.min_result_other(shape_format) - - def test_min_other_shape_format_fp32_4d(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j - in keepdim_list - ] - self.min_result_other(shape_format) - - def test_min_dimname_shape_format(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], - np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j - in - keepdim_list - ] - self.min_name_result_other(shape_format) - - def test_min_dimname_shape_format_fp16(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], - np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j - in - keepdim_list - ] - self.min_name_result_other(shape_format) - - def test_min_dimname_out_shape_format(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], - np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j - in - keepdim_list - ] - self.min_name_out_result_other(shape_format) - - def test_min_dimname_out_shape_format_fp16(self, device): - format_list = [0, 3, 4, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], - np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j - in - keepdim_list - ] - self.min_name_out_result_other(shape_format) - - def test_min_values_shape_format(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in - keepdim_list - ] - self.min_values_result(shape_format) - -instantiate_device_type_tests(TestMin, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import copy +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestMin(TestCase): + def cpu_op_exec(self, input1): + ''' + 调用算子 torch.min(input) → Tensor + ''' + output = torch.min(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + ''' + 调用适配算子函数 Tensor min_npu(const Tensor& self) + ''' + output = torch.min(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_other_exec(self, input1, input2): + ''' + 调用算子 torch.min(input, other, out=None) → Tensor + ''' + output = torch.min(input1, input2) + output = output.numpy() + return output + + def npu_op_other_exec(self, input1, input2): + ''' + 适配算子函数 Tensor min_npu(const Tensor& self, const Tensor& other) + ''' + input1 = input1.to("npu") + input2 = input2.to("npu") + output = torch.min(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_other_exec_out(self, input1, input2, out): + torch.min(input1, input2, out=out) + output = out.to("cpu") + output = output.numpy() + return output + + def cpu_op_dim_exec(self, input1, dim, keepdim): + ''' + 调用算子 torch.min(input, dim, keepdim=False, out=None) -> (Tensor, LongTensor) + ''' + output1, output2 = torch.min(input1, dim, keepdim) + output1 = output1.numpy() + # 这里需要将索引从64位转32位 便于拿去与npu的对比 + output2 = output2.int().numpy() + return output1, output2 + + def npu_op_dim_exec(self, input1, dim, keepdim): + ''' + 适配算子函数 tuple min_npu(const Tensor& self, int64_t dim, bool keepdim) + ''' + input1 = input1.to("npu") + output1, output2 = torch.min(input1, dim, keepdim) + output1 = output1.to("cpu") + output2 = output2.to("cpu") + output1 = output1.numpy() + output2 = output2.numpy() + return output1, output2 + + def _cpu_op_dim_exec(self, input1, dim, keepdim): + output1, output2 = torch._min(input1, dim, keepdim) + output1 = output1.numpy() + output2 = output2.numpy() + return output1, output2 + + def _npu_op_dim_exec(self, input1, dim, keepdim): + output1, output2 = torch._min(input1, dim, keepdim) + output1 = output1.to("cpu") + output2 = output2.to("cpu") + output1 = output1.numpy() + output2 = output2.numpy() + return output1, output2 + + def cpu_op_dim_exec_out(self, input1, dim, keepdim): + out = torch.tensor(0).to(input1.dtype) + indices = torch.tensor(0).to(torch.long) + torch.min(input1, dim=dim, keepdim=keepdim, out=(out,indices)) + out = out.numpy() + indices = indices.numpy() + return out,indices + + def npu_op_dim_exec_out(self, input1, dim, keepdim): + out = torch.tensor(0).to(input1.dtype).npu() + indices = torch.tensor(0).to(torch.long).npu() + torch.min(input1, dim=dim, keepdim=keepdim, out=(out,indices)) + out = out.to("cpu").numpy() + indices = indices.to("cpu").numpy() + return out,indices + + def cpu_min_values_exec(self, input): + output = input.min() + output = output.numpy() + return output + + def npu_min_values_exec(self, input): + output = input.min() + output = output.to("cpu") + output = output.numpy() + return output + + def min_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + cpu_output = cpu_output.astype(npu_output.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + + def min_result_dim(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) + npu_output_dim, npu_output_indices = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) + cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) + + self.assertRtolEqual(cpu_output_dim, npu_output_dim) + + def _min_result_dim(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_dim, cpu_output_indices = self._cpu_op_dim_exec(cpu_input1, item[1], item[2]) + npu_output_dim, npu_output_indices = self._npu_op_dim_exec(npu_input1, item[1], item[2]) + cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) + + self.assertRtolEqual(cpu_output_dim, npu_output_dim) + + def min_result_other(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 10) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output_other = self.cpu_op_other_exec(cpu_input1, cpu_input2) + npu_output_other = self.npu_op_other_exec(npu_input1, npu_input2) + cpu_output_other = cpu_output_other.astype(npu_output_other.dtype) + + self.assertRtolEqual(cpu_output_other, npu_output_other) + + # Npu and cpu have different logic to find the maximum value index. + # The existence of two maximum values will cause the second output to be different. + def min_out_result_other(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) + cpu_input3, npu_input3 = create_common_tensor(item[0], -100, 100) + cpu_input4, npu_input4 = create_common_tensor(item[1], -100, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_other_exec(cpu_input1, cpu_input2) + npu_output_out1 = self.npu_op_other_exec(npu_input1, npu_input2) + npu_output_out2 = self.npu_op_other_exec_out(npu_input1, npu_input2, npu_input4) + cpu_output = cpu_output.astype(npu_output_out1.dtype) + + self.assertRtolEqual(cpu_output, npu_output_out1) + self.assertRtolEqual(cpu_output, npu_output_out2) + + cpu_out_dim, cpu_out_indices = self.cpu_op_dim_exec_out(cpu_input1, dim=0, keepdim=True) + npu_out_dim, npu_out_indices = self.npu_op_dim_exec_out(npu_input1, dim=0, keepdim=True) + npu_output_dim, npu_output_indices = self.npu_op_dim_exec(npu_input1, dim=0, keepdim=True) + cpu_out_dim = cpu_out_dim.astype(npu_out_dim.dtype) + if cpu_out_dim.dtype != np.float16: + self.assertRtolEqual(npu_out_dim, cpu_out_dim) + #self.assertRtolEqual(npu_out_indices, cpu_out_indices) + else: + self.assertRtolEqual(npu_out_dim, npu_output_dim) + #self.assertRtolEqual(npu_out_indices, npu_output_indices) + + # Npu and cpu have different logic to find the minimum value index. + # The existence of two minimum values will cause the second output to be different. + def min_name_result_other(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input1.names = item[0][3] + npu_input1.names = item[0][3] + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) + npu_output_dim, npu_output_indices = self.npu_op_dim_exec(cpu_input1, item[1], item[2]) + + if npu_output_dim.dtype != np.float16: + self.assertRtolEqual(npu_output_dim, cpu_output_dim) + #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) + else: + self.assertRtolEqual( npu_output_dim, cpu_output_dim.astype(np.float16)) + #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) + + # Npu and cpu have different logic to find the minimum value index. + # The existence of two minimum values will cause the second output to be different. + def min_name_out_result_other(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input1.names = item[0][3] + npu_input1.names = item[0][3] + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_dim, cpu_output_indices = self.cpu_op_dim_exec_out(cpu_input1, item[1], item[2]) + npu_output_dim, npu_output_indices = self.npu_op_dim_exec_out(npu_input1, item[1], item[2]) + + if npu_output_dim.dtype != np.float16: + self.assertRtolEqual(npu_output_dim, cpu_output_dim) + #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) + else: + self.assertRtolEqual( npu_output_dim, cpu_output_dim.astype(np.float16)) + #self.assertRtolEqual(npu_output_indices.astype(np.int32), cpu_output_indices.astype(np.int32)) + + def min_values_result(self, shape_format): + for item in shape_format: + print(item) + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_min_values_exec(cpu_input1) + npu_output = self.npu_min_values_exec(npu_input1) + + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_min_out_result(self, device): + shape_format = [ + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [128, 58, 28, 28]], [np.float16, 0, [58, 58, 1, 1]]], + [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3]]], + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [128, 116, 14, 14]]], + [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 256, 3, 3]]], + [[np.float32, 0, [256, 3, 224, 224]], [np.float32, 0, [3, 3, 7, 7]]], + [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3]]], + [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [128, 232, 7, 7]]], + ] + self.min_out_result_other(shape_format) + + def test_min_shape_format_fp16_1d(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list + ] + self.min_result(shape_format) + + def test_min_shape_format_fp32_1d(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.min_result(shape_format) + + def test_min_shape_format_fp16_2d(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.min_result(shape_format) + + def test_min_shape_format_fp32_2d(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.min_result(shape_format) + + def test_min_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.min_result(shape_format) + + def test_min_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.min_result(shape_format) + + def test_min_shape_format_fp16_4d(self, device): + format_list = [0, 4, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.min_result(shape_format) + + def test_min_shape_format_fp32_4d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.min_result(shape_format) + + # ---------------------------------------dim + def test_min_dim_shape_format_fp16_1d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list + ] + self.min_result_dim(shape_format) + + def test_min_dim_shape_format_fp32_1d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.min_result_dim(shape_format) + + #One-dimensional NZ to ND result is wrong, CCB has given a conclusion + def test_min_dim_shape_format_fp16_2d(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.min_result_dim(shape_format) + + #One-dimensional NZ to ND result is wrong, CCB has given a conclusion + def test_min_dim_shape_format_fp32_2d(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.min_result_dim(shape_format) + + def test_min_dim_shape_format_fp16_3d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self.min_result_dim(shape_format) + + def test_min_dim_shape_format_fp32_3d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self.min_result_dim(shape_format) + + def test_min_dim_shape_format_fp16_4d(self, device): + format_list = [0, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self.min_result_dim(shape_format) + + def test_min_dim_shape_format_fp32_4d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self.min_result_dim(shape_format) + + def test_min_dim_shape_format_fp16_1d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list + ] + self._min_result_dim(shape_format) + + def test_min_dim_shape_format_fp32_1d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self._min_result_dim(shape_format) + + def test_min_dim_shape_format_fp16_2d_(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self._min_result_dim(shape_format) + + def test_min_dim_shape_format_fp32_2d_(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self._min_result_dim(shape_format) + + def test_min_dim_shape_format_fp16_3d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self._min_result_dim(shape_format) + + def test_min_dim_shape_format_fp32_3d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self._min_result_dim(shape_format) + + def test_min_dim_shape_format_fp16_4d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self._min_result_dim(shape_format) + + def test_min_dim_shape_format_fp32_4d_(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self._min_result_dim(shape_format) + + # -----------------------------other + + def test_min_other_shape_format_fp16_1d(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in keepdim_list + ] + self.min_result_other(shape_format) + + def test_min_other_shape_format_fp32_1d(self, device): + format_list = [0, 3, 4] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list for j in + keepdim_list + ] + self.min_result_other(shape_format) + + def test_min_other_shape_format_fp16_2d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.min_result_other(shape_format) + + def test_min_other_shape_format_fp32_2d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.min_result_other(shape_format) + + def test_min_other_shape_format_fp16_3d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self.min_result_other(shape_format) + + def test_min_other_shape_format_fp32_3d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64]], np.random.randint(0, 3), j] for i in format_list for j in + keepdim_list + ] + self.min_result_other(shape_format) + + def test_min_other_shape_format_fp16_4d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self.min_result_other(shape_format) + + def test_min_other_shape_format_fp32_4d(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34]], np.random.randint(0, 4), j] for i in format_list for j + in keepdim_list + ] + self.min_result_other(shape_format) + + def test_min_dimname_shape_format(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], + np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j + in + keepdim_list + ] + self.min_name_result_other(shape_format) + + def test_min_dimname_shape_format_fp16(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], + np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j + in + keepdim_list + ] + self.min_name_result_other(shape_format) + + def test_min_dimname_out_shape_format(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], + np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j + in + keepdim_list + ] + self.min_name_out_result_other(shape_format) + + def test_min_dimname_out_shape_format_fp16(self, device): + format_list = [0, 3, 4, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256, 64, 34], ('N', 'C', 'H', 'W')], + np.random.choice(['N', 'C', 'H', 'W']), j] for i in format_list for j + in + keepdim_list + ] + self.min_name_out_result_other(shape_format) + + def test_min_values_shape_format(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list for j in + keepdim_list + ] + self.min_values_result(shape_format) + +instantiate_device_type_tests(TestMin, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_mish.py b/test/test_npu/test_network_ops/test_mish.py index a79f7c426fc9edc3a6a0b01268aaca4430b659f6..6bb41262a391d45a7132526d21b9be76e388e4f0 100644 --- a/test/test_npu/test_network_ops/test_mish.py +++ b/test/test_npu/test_network_ops/test_mish.py @@ -1,59 +1,59 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn.functional as F -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import instantiate_device_type_tests -from util_test import create_common_tensor - -class TestMish(TestCase): - def npu_op_exec(self, input1): - output = torch.npu_mish(input1) - output = output.cpu().numpy() - return output - - def cpu_op_exec(self, input1): - output = input1 * (torch.tanh(F.softplus(input1))) - output = output.numpy() - return output - - def test_mish_fp32(self, device): - shape_format = [ - [[np.float32, -1, [10,30,10]]], - [[np.float32, -1, [20,30,20]]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - - def test_mish_fp16(self, device): - shape_format = [ - [[np.float16, -1, [10,30,10]]], - [[np.float16, -1, [20,30,20]]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input.float()).astype(np.float16) - npu_output = self.npu_op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestMish, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn.functional as F +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests +from util_test import create_common_tensor + +class TestMish(TestCase): + def npu_op_exec(self, input1): + output = torch.npu_mish(input1) + output = output.cpu().numpy() + return output + + def cpu_op_exec(self, input1): + output = input1 * (torch.tanh(F.softplus(input1))) + output = output.numpy() + return output + + def test_mish_fp32(self, device): + shape_format = [ + [[np.float32, -1, [10,30,10]]], + [[np.float32, -1, [20,30,20]]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + def test_mish_fp16(self, device): + shape_format = [ + [[np.float16, -1, [10,30,10]]], + [[np.float16, -1, [20,30,20]]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input.float()).astype(np.float16) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestMish, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_mish_backward.py b/test/test_npu/test_network_ops/test_mish_backward.py index 1240cb55b3ac30f375b59ba3cca882b1a2a0fd6d..5656539db1c0b7c42fd6a09e9964e45a50febc49 100644 --- a/test/test_npu/test_network_ops/test_mish_backward.py +++ b/test/test_npu/test_network_ops/test_mish_backward.py @@ -1,55 +1,55 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn.functional as F -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import instantiate_device_type_tests -from util_test import create_common_tensor - -class TestMishBackward(TestCase): - def npu_op_exec(self, input1): - input1.requires_grad = True - output = torch.npu_mish(input1) - output.backward(torch.ones_like(output)) - output_grad = input1.grad - output_grad = output_grad.to("cpu") - output_grad = output_grad.detach().numpy() - output = output.cpu().detach().numpy() - return output_grad, output - - def cpu_op_exec(self, input1): - input1.requires_grad = True - output = input1 * (torch.tanh(F.softplus(input1))) - output.backward(torch.ones_like(output)) - output_grad = input1.grad - output_grad = output_grad.to("cpu") - output_grad = output_grad.detach().numpy() - output = output.detach().numpy() - return output_grad, output - - def test_mish_fp32(self, device): - npu_input = torch.tensor([1.,2.,3.,4.,5.,6.,7.,8.,9.,10.]).npu() - cpu_input = torch.tensor([1.,2.,3.,4.,5.,6.,7.,8.,9.,10.]) - output_grad, npu_output = self.npu_op_exec(npu_input) - ep_output_grad, ep_npu_output = self.cpu_op_exec(cpu_input) - self.assertRtolEqual(ep_output_grad, output_grad) - self.assertRtolEqual(ep_npu_output, npu_output) - -instantiate_device_type_tests(TestMishBackward, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn.functional as F +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests +from util_test import create_common_tensor + +class TestMishBackward(TestCase): + def npu_op_exec(self, input1): + input1.requires_grad = True + output = torch.npu_mish(input1) + output.backward(torch.ones_like(output)) + output_grad = input1.grad + output_grad = output_grad.to("cpu") + output_grad = output_grad.detach().numpy() + output = output.cpu().detach().numpy() + return output_grad, output + + def cpu_op_exec(self, input1): + input1.requires_grad = True + output = input1 * (torch.tanh(F.softplus(input1))) + output.backward(torch.ones_like(output)) + output_grad = input1.grad + output_grad = output_grad.to("cpu") + output_grad = output_grad.detach().numpy() + output = output.detach().numpy() + return output_grad, output + + def test_mish_fp32(self, device): + npu_input = torch.tensor([1.,2.,3.,4.,5.,6.,7.,8.,9.,10.]).npu() + cpu_input = torch.tensor([1.,2.,3.,4.,5.,6.,7.,8.,9.,10.]) + output_grad, npu_output = self.npu_op_exec(npu_input) + ep_output_grad, ep_npu_output = self.cpu_op_exec(cpu_input) + self.assertRtolEqual(ep_output_grad, output_grad) + self.assertRtolEqual(ep_npu_output, npu_output) + +instantiate_device_type_tests(TestMishBackward, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_mm.py b/test/test_npu/test_network_ops/test_mm.py old mode 100644 new mode 100755 index 8b55ddcbc5ed6be782ec1c0750771119d694a309..46a75f7c13ebb91fe23d18a153e46c321f59b355 --- a/test/test_npu/test_network_ops/test_mm.py +++ b/test/test_npu/test_network_ops/test_mm.py @@ -1,93 +1,93 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('..') -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - -class TestMatMul(TestCase): - def trans_tensor(self, mat1, mat2): - if mat1.size(1) == mat2.size(0): - return mat1, mat2 - mat = mat1.t() - if mat.size(1) == mat2.size(0): - return mat, mat2 - mat = mat2.t() - if mat1.size(1) == mat.size(0): - return mat1, mat - return mat1, mat2 - - def cpu_op_exec(self, input1, input2): - output = torch.mm(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.mm(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def mm_auto_list_exec(self, shape): - for item in shape: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 1) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_input_1, cpu_input_2 = self.trans_tensor(cpu_input1, cpu_input2) - npu_input_1, npu_input_2 = self.trans_tensor(npu_input1, npu_input2) - cpu_output = self.cpu_op_exec(cpu_input_1, cpu_input_2) - npu_output = self.npu_op_exec(npu_input_1, npu_input_2) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_muls_shape_format_fp16(self, device): - format_list = [0, 3, 29] - shape_list = [[1024, 1000], [1000, 1024], - [1024, 1024]] - shape_format1 = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - shape_format2 = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - shape_format = [ - [i, j] for i in shape_format1 for j in shape_format2 - ] - self.mm_auto_list_exec(shape_format) - - def test_matmul_shape_format_fp32(self, device): - format_list = [0, 3, 29] - shape_list = [[256, 1280], [1000, 1280], - ] - shape_format1 = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - shape_format2 = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - shape_format = [ - [i, j] for i in shape_format1 for j in shape_format2 - ] - self.mm_auto_list_exec(shape_format) - -instantiate_device_type_tests(TestMatMul, globals(), except_for="cpu") - -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append('..') +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + +class TestMatMul(TestCase): + def trans_tensor(self, mat1, mat2): + if mat1.size(1) == mat2.size(0): + return mat1, mat2 + mat = mat1.t() + if mat.size(1) == mat2.size(0): + return mat, mat2 + mat = mat2.t() + if mat1.size(1) == mat.size(0): + return mat1, mat + return mat1, mat2 + + def cpu_op_exec(self, input1, input2): + output = torch.mm(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.mm(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def mm_auto_list_exec(self, shape): + for item in shape: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 1) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_input_1, cpu_input_2 = self.trans_tensor(cpu_input1, cpu_input2) + npu_input_1, npu_input_2 = self.trans_tensor(npu_input1, npu_input2) + cpu_output = self.cpu_op_exec(cpu_input_1, cpu_input_2) + npu_output = self.npu_op_exec(npu_input_1, npu_input_2) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_muls_shape_format_fp16(self, device): + format_list = [0, 3, 29] + shape_list = [[1024, 1000], [1000, 1024], + [1024, 1024]] + shape_format1 = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + shape_format2 = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + shape_format = [ + [i, j] for i in shape_format1 for j in shape_format2 + ] + self.mm_auto_list_exec(shape_format) + + def test_matmul_shape_format_fp32(self, device): + format_list = [0, 3, 29] + shape_list = [[256, 1280], [1000, 1280], + ] + shape_format1 = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + shape_format2 = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + shape_format = [ + [i, j] for i in shape_format1 for j in shape_format2 + ] + self.mm_auto_list_exec(shape_format) + +instantiate_device_type_tests(TestMatMul, globals(), except_for="cpu") + +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_muls.py b/test/test_npu/test_network_ops/test_muls.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_mv.py b/test/test_npu/test_network_ops/test_mv.py index a501b40b3b5991a07a801ed2ba9b9c7b4febd400..670839b67f0f32ee0948a34f3b0811bde7e4e307 100644 --- a/test/test_npu/test_network_ops/test_mv.py +++ b/test/test_npu/test_network_ops/test_mv.py @@ -1,72 +1,72 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestMv(TestCase): - def cpu_op_exec(self, input1, input2): - cpu_output = torch.mv(input1, input2) - cpu_output = cpu_output.numpy() - return cpu_output - - def npu_op_exec(self, input1, input2): - npu_output = torch.mv(input1, input2) - npu_output = npu_output.cpu() - npu_output = npu_output.numpy() - return npu_output - - def npu_op_exec_out(self, input1, input2, output): - torch.mv(input1, input2, out=output) - output = output.cpu() - output = output.numpy() - return output - - def test_mv_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (3, 3)], [np.float32, -1, (3)]], - [[np.float32, -1, (5, 8)], [np.float32, -1, (8)]], - [[np.float32, -1, (8, 9)], [np.float32, -1, (9)]], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_mv_out_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (3, 3)], [np.float32, -1, (3)], [np.float32, -1, (3)]], - [[np.float32, -1, (5, 8)], [np.float32, -1, (8)], [np.float32, -1, (5)]], - [[np.float32, -1, (8, 9)], [np.float32, -1, (9)], [np.float32, -1, (8)]], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) - cpu_input3, npu_input3 = create_common_tensor(item[2], -100, 100) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - self.assertRtolEqual(cpu_output, npu_output) - - - -instantiate_device_type_tests(TestMv, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestMv(TestCase): + def cpu_op_exec(self, input1, input2): + cpu_output = torch.mv(input1, input2) + cpu_output = cpu_output.numpy() + return cpu_output + + def npu_op_exec(self, input1, input2): + npu_output = torch.mv(input1, input2) + npu_output = npu_output.cpu() + npu_output = npu_output.numpy() + return npu_output + + def npu_op_exec_out(self, input1, input2, output): + torch.mv(input1, input2, out=output) + output = output.cpu() + output = output.numpy() + return output + + def test_mv_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (3, 3)], [np.float32, -1, (3)]], + [[np.float32, -1, (5, 8)], [np.float32, -1, (8)]], + [[np.float32, -1, (8, 9)], [np.float32, -1, (9)]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_mv_out_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (3, 3)], [np.float32, -1, (3)], [np.float32, -1, (3)]], + [[np.float32, -1, (5, 8)], [np.float32, -1, (8)], [np.float32, -1, (5)]], + [[np.float32, -1, (8, 9)], [np.float32, -1, (9)], [np.float32, -1, (8)]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) + cpu_input3, npu_input3 = create_common_tensor(item[2], -100, 100) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + self.assertRtolEqual(cpu_output, npu_output) + + + +instantiate_device_type_tests(TestMv, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_neg.py b/test/test_npu/test_network_ops/test_neg.py old mode 100644 new mode 100755 index 2456670193398aaa3aace6b23615d552f25b4839..5ee20c1d7fd04ba8c3f0c0fe115d6c3c24501824 --- a/test/test_npu/test_network_ops/test_neg.py +++ b/test/test_npu/test_network_ops/test_neg.py @@ -1,139 +1,139 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestNeg(TestCase): - def cpu_op_exec(self, input1): - output = torch.neg(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - output = torch.neg(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2): - torch.neg(input1, out=input2) - output = input2.to("cpu") - output = output.numpy() - return output - - def cpu_inp_op_exec(self, input1): - torch.neg_(input1) - output = input1.numpy() - return output - - def npu_inp_op_exec(self, input1): - torch.neg_(input1) - output = input1.to("cpu") - output = output.numpy() - return output - - def neg_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - cpu_input_inp, npu_input_inp = create_common_tensor(item[0], -100, 100) - if cpu_input_inp.dtype == torch.float16: - cpu_input_inp = cpu_input_inp.to(torch.float32) - cpu_output_inp = self.cpu_inp_op_exec(cpu_input_inp) - npu_output_inp = self.npu_inp_op_exec(npu_input_inp) - cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) - self.assertRtolEqual(cpu_output_inp, npu_output_inp) - - def neg_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) - cpu_input3, npu_input3 = create_common_tensor(item[1], -100, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output_out1 = self.npu_op_exec_out(npu_input1, npu_input2) - npu_output_out2 = self.npu_op_exec_out(npu_input1, npu_input3) - cpu_output = cpu_output.astype(npu_output_out1.dtype) - self.assertRtolEqual(cpu_output, npu_output_out1) - self.assertRtolEqual(cpu_output, npu_output_out2) - - def test_neg_out_result(self, device): - shape_format = [ - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], - [[np.float16, 0, [128, 58, 28, 28]], [np.float16, 0, [58, 58, 1, 1]]], - [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3, 3]]], - [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [116, 116, 1, 1]]], - [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 128, 3, 3]]], - [[np.float32, 0, [256, 3, 224, 224]], [np.float32, 0, [3, 3, 7, 7]]], - [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3, 3]]], - [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [232, 232, 1, 1]]], - ] - self.neg_out_result(shape_format) - - def test_neg_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [[[np.float16, i, [96]]] for i in format_list] - self.neg_result(shape_format) - - def test_neg_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [[[np.float32, i, [96]]] for i in format_list] - self.neg_result(shape_format) - - def test_neg_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[[np.float16, i, [448, 1]]] for i in format_list] - self.neg_result(shape_format) - - def test_neg_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[[np.float32, i, [448, 1]]] for i in format_list] - self.neg_result(shape_format) - - def test_neg_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[[np.float16, i, [64, 24, 38]]] for i in format_list] - self.neg_result(shape_format) - - def test_neg_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[[np.float32, i, [64, 24, 38]]] for i in format_list] - self.neg_result(shape_format) - - def test_neg_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[[np.float16, i, [32, 3, 3, 3]]] for i in format_list] - self.neg_result(shape_format) - - def test_neg_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[[np.float32, i, [32, 3, 3, 3]]] for i in format_list] - self.neg_result(shape_format) - - -instantiate_device_type_tests(TestNeg, globals(), except_for="cpu") - -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestNeg(TestCase): + def cpu_op_exec(self, input1): + output = torch.neg(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = torch.neg(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2): + torch.neg(input1, out=input2) + output = input2.to("cpu") + output = output.numpy() + return output + + def cpu_inp_op_exec(self, input1): + torch.neg_(input1) + output = input1.numpy() + return output + + def npu_inp_op_exec(self, input1): + torch.neg_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + def neg_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + cpu_input_inp, npu_input_inp = create_common_tensor(item[0], -100, 100) + if cpu_input_inp.dtype == torch.float16: + cpu_input_inp = cpu_input_inp.to(torch.float32) + cpu_output_inp = self.cpu_inp_op_exec(cpu_input_inp) + npu_output_inp = self.npu_inp_op_exec(npu_input_inp) + cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) + self.assertRtolEqual(cpu_output_inp, npu_output_inp) + + def neg_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], -100, 100) + cpu_input3, npu_input3 = create_common_tensor(item[1], -100, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output_out1 = self.npu_op_exec_out(npu_input1, npu_input2) + npu_output_out2 = self.npu_op_exec_out(npu_input1, npu_input3) + cpu_output = cpu_output.astype(npu_output_out1.dtype) + self.assertRtolEqual(cpu_output, npu_output_out1) + self.assertRtolEqual(cpu_output, npu_output_out2) + + def test_neg_out_result(self, device): + shape_format = [ + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [256, 116, 1, 1]]], + [[np.float16, 0, [128, 58, 28, 28]], [np.float16, 0, [58, 58, 1, 1]]], + [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3, 3]]], + [[np.float16, 0, [128, 116, 14, 14]], [np.float16, 0, [116, 116, 1, 1]]], + [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 128, 3, 3]]], + [[np.float32, 0, [256, 3, 224, 224]], [np.float32, 0, [3, 3, 7, 7]]], + [[np.float32, 0, [2, 3, 3, 3]], [np.float32, 0, [3, 1, 3, 3]]], + [[np.float32, 0, [128, 232, 7, 7]], [np.float32, 0, [232, 232, 1, 1]]], + ] + self.neg_out_result(shape_format) + + def test_neg_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [[[np.float16, i, [96]]] for i in format_list] + self.neg_result(shape_format) + + def test_neg_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [[[np.float32, i, [96]]] for i in format_list] + self.neg_result(shape_format) + + def test_neg_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[[np.float16, i, [448, 1]]] for i in format_list] + self.neg_result(shape_format) + + def test_neg_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[[np.float32, i, [448, 1]]] for i in format_list] + self.neg_result(shape_format) + + def test_neg_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[[np.float16, i, [64, 24, 38]]] for i in format_list] + self.neg_result(shape_format) + + def test_neg_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[[np.float32, i, [64, 24, 38]]] for i in format_list] + self.neg_result(shape_format) + + def test_neg_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[[np.float16, i, [32, 3, 3, 3]]] for i in format_list] + self.neg_result(shape_format) + + def test_neg_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[[np.float32, i, [32, 3, 3, 3]]] for i in format_list] + self.neg_result(shape_format) + + +instantiate_device_type_tests(TestNeg, globals(), except_for="cpu") + +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_new_full.py b/test/test_npu/test_network_ops/test_new_full.py index ada9cbe2c43d36c739b35b73bb0920f9837449f9..2fdc8e7feb2db5dad81fe1ebd8e24003c139642f 100644 --- a/test/test_npu/test_network_ops/test_new_full.py +++ b/test/test_npu/test_network_ops/test_new_full.py @@ -1,56 +1,56 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestNewFull(TestCase): - def cpu_op_exec(self, input1, size, value): - output = input1.new_full(size, value) - output = output.numpy() - return output - - def npu_op_exec(self, input1, size, value): - output = input1.new_full(size,value) - output = output.to("cpu") - output = output.numpy() - return output - - def test_new_full_shape_format(self, device): - shape = [ - [np.float32, 0, (4, 3)], - [np.float32, 4, (2, 3, 7)], - [np.float16, 0, (2, 3, 7)], - ] - size = [(2, 2), (1, 2)] - value = [-100, 0, 100] - - shape_format = [ - [i, j, k] for i in shape for j in size for k in value - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2]) - npu_output = self.npu_op_exec(npu_input1, item[1], item[2]) - self.assertEqual(cpu_output.shape, npu_output.shape) - - -instantiate_device_type_tests(TestNewFull, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestNewFull(TestCase): + def cpu_op_exec(self, input1, size, value): + output = input1.new_full(size, value) + output = output.numpy() + return output + + def npu_op_exec(self, input1, size, value): + output = input1.new_full(size,value) + output = output.to("cpu") + output = output.numpy() + return output + + def test_new_full_shape_format(self, device): + shape = [ + [np.float32, 0, (4, 3)], + [np.float32, 4, (2, 3, 7)], + [np.float16, 0, (2, 3, 7)], + ] + size = [(2, 2), (1, 2)] + value = [-100, 0, 100] + + shape_format = [ + [i, j, k] for i in shape for j in size for k in value + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2]) + npu_output = self.npu_op_exec(npu_input1, item[1], item[2]) + self.assertEqual(cpu_output.shape, npu_output.shape) + + +instantiate_device_type_tests(TestNewFull, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_nllloss.py b/test/test_npu/test_network_ops/test_nllloss.py old mode 100644 new mode 100755 index 9994116093bb4b300c074d829cb93276d8c7d5ae..170ce07f2cef7c05b427a60f9667ea6d3bafa383 --- a/test/test_npu/test_network_ops/test_nllloss.py +++ b/test/test_npu/test_network_ops/test_nllloss.py @@ -1,85 +1,85 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestNllloss(TestCase): - def cpu_op_exec_new(self, input1, target, reduction, ignore_index): - if not ignore_index: - ignore_index = -100 # 默认值 - output = torch.nn.functional.nll_loss(input1, target, reduction=reduction, ignore_index=ignore_index) - output = output.numpy() - return output - - def npu_op_exec_new(self, input1, target, reduction, ignore_index): - if not ignore_index: - ignore_index = -100 # 默认值 - target = target.to(torch.int32) - target = target.to("npu") - output = torch.nn.functional.nll_loss(input1, target, reduction=reduction, ignore_index=ignore_index) - output = output.to("cpu") - output = output.numpy() - return output - - def test_nllloss_shape_format_fp32(self, device): - # 当前仅支持设置正数, 若np.sum(ignore_index == np_target) == 0,则ignore_index设置任意数值不影响 - ignore_index = 1 - for reduction in ['mean', 'none', 'sum']: - shape_format = [ - [[np.float32, 0, [256, 100]], [np.int32, 0, [256]], reduction, None], - [[np.float32, 3, [256, 100]], [np.int32, 0, [256]], reduction, ignore_index], - [[np.float32, 0, [4800, 3003]], [np.int32, 0, [4800]], reduction, ignore_index], - [[np.float32, 3, [4800, 3003]], [np.int32, 0, [4800]], reduction, ignore_index], - [[np.float32, 0, [4800, 3003]], [np.int32, 0, [4800]], reduction, None], - ] - for item in shape_format: - np_target = np.random.randint(0, item[0][2][1], (item[1][2])).astype(np.long) - target = torch.from_numpy(np_target) - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_output = self.cpu_op_exec_new(cpu_input1, target, item[2], item[3]) - npu_output = self.npu_op_exec_new(npu_input1, target, item[2], item[3]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_nllloss_shape_format_fp16(self, device): - # 当前仅支持设置正数, 若np.sum(ignore_index == np_target) == 0,则ignore_index设置任意数值不影响 - ignore_index = 1 - for reduction in ['mean', 'none', 'sum']: - shape_format = [ - [[np.float16, 0, [256, 100]], [np.int32, 0, [256]], reduction, ignore_index], - [[np.float16, 3, [256, 100]], [np.int32, 0, [256]], reduction, ignore_index], - [[np.float16, 0, [4800, 3003]], [np.int32, 0, [4800]], reduction, ignore_index], - [[np.float16, 3, [4800, 3003]], [np.int32, 0, [4800]], reduction, ignore_index], - [[np.float16, 0, [4800, 3003]], [np.int32, 0, [4800]], reduction, None], - ] - for item in shape_format: - np_target = np.random.uniform(0, item[0][2][1], (item[1][2])).astype(np.long) - target = torch.from_numpy(np_target) - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec_new(cpu_input1, target, item[2], item[3]) - npu_output = self.npu_op_exec_new(npu_input1, target, item[2], item[3]) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestNllloss, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestNllloss(TestCase): + def cpu_op_exec_new(self, input1, target, reduction, ignore_index): + if not ignore_index: + ignore_index = -100 # 默认值 + output = torch.nn.functional.nll_loss(input1, target, reduction=reduction, ignore_index=ignore_index) + output = output.numpy() + return output + + def npu_op_exec_new(self, input1, target, reduction, ignore_index): + if not ignore_index: + ignore_index = -100 # 默认值 + target = target.to(torch.int32) + target = target.to("npu") + output = torch.nn.functional.nll_loss(input1, target, reduction=reduction, ignore_index=ignore_index) + output = output.to("cpu") + output = output.numpy() + return output + + def test_nllloss_shape_format_fp32(self, device): + # 当前仅支持设置正数, 若np.sum(ignore_index == np_target) == 0,则ignore_index设置任意数值不影响 + ignore_index = 1 + for reduction in ['mean', 'none', 'sum']: + shape_format = [ + [[np.float32, 0, [256, 100]], [np.int32, 0, [256]], reduction, None], + [[np.float32, 3, [256, 100]], [np.int32, 0, [256]], reduction, ignore_index], + [[np.float32, 0, [4800, 3003]], [np.int32, 0, [4800]], reduction, ignore_index], + [[np.float32, 3, [4800, 3003]], [np.int32, 0, [4800]], reduction, ignore_index], + [[np.float32, 0, [4800, 3003]], [np.int32, 0, [4800]], reduction, None], + ] + for item in shape_format: + np_target = np.random.randint(0, item[0][2][1], (item[1][2])).astype(np.long) + target = torch.from_numpy(np_target) + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_output = self.cpu_op_exec_new(cpu_input1, target, item[2], item[3]) + npu_output = self.npu_op_exec_new(npu_input1, target, item[2], item[3]) + self.assertRtolEqual(cpu_output, npu_output) + + def test_nllloss_shape_format_fp16(self, device): + # 当前仅支持设置正数, 若np.sum(ignore_index == np_target) == 0,则ignore_index设置任意数值不影响 + ignore_index = 1 + for reduction in ['mean', 'none', 'sum']: + shape_format = [ + [[np.float16, 0, [256, 100]], [np.int32, 0, [256]], reduction, ignore_index], + [[np.float16, 3, [256, 100]], [np.int32, 0, [256]], reduction, ignore_index], + [[np.float16, 0, [4800, 3003]], [np.int32, 0, [4800]], reduction, ignore_index], + [[np.float16, 3, [4800, 3003]], [np.int32, 0, [4800]], reduction, ignore_index], + [[np.float16, 0, [4800, 3003]], [np.int32, 0, [4800]], reduction, None], + ] + for item in shape_format: + np_target = np.random.uniform(0, item[0][2][1], (item[1][2])).astype(np.long) + target = torch.from_numpy(np_target) + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec_new(cpu_input1, target, item[2], item[3]) + npu_output = self.npu_op_exec_new(npu_input1, target, item[2], item[3]) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestNllloss, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_nms_v4.py b/test/test_npu/test_network_ops/test_nms_v4.py index 7737ced69345e038a07549c3d54ed8b5367ebac9..0714914dfb2aa87e210cc7625808bdabe4630012 100644 --- a/test/test_npu/test_network_ops/test_nms_v4.py +++ b/test/test_npu/test_network_ops/test_nms_v4.py @@ -1,41 +1,41 @@ -import torch -import numpy as np -import sys -import copy -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestNmsV4(TestCase): - def generate_data(self, min, max, shape, dtype): - input = np.random.uniform(min, max, shape).astype(dtype) - npu_input = torch.from_numpy(input) - return npu_input - - def npu_op_exec(self, boxes, scores, max_output_size, iou_threshold, scores_threshold): - boxes = boxes.to("npu") - scores = scores.to("npu") - iou_threshold = iou_threshold.to("npu") - scores_threshold = scores_threshold.to("npu") - npu_output = torch.npu_nms_v4(boxes, scores, max_output_size, iou_threshold, scores_threshold) - #npu_output = npu_output.to("cpu") - print("===npu_output===") - print(npu_output) - return npu_output - - - def test_nms_v4_float32(self, device): - boxes = self.generate_data(0, 100, (100, 4), np.float32) - scores = self.generate_data(0, 1, (100), np.float32) - max_output_size = 20 - iou_threshold = torch.tensor(0.5) - scores_threshold = torch.tensor(0.3) - - npu_output = self.npu_op_exec(boxes, scores, max_output_size, iou_threshold, scores_threshold) - - -instantiate_device_type_tests(TestNmsV4, globals(), except_for='cpu') -if __name__ == "__main__": +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestNmsV4(TestCase): + def generate_data(self, min, max, shape, dtype): + input = np.random.uniform(min, max, shape).astype(dtype) + npu_input = torch.from_numpy(input) + return npu_input + + def npu_op_exec(self, boxes, scores, max_output_size, iou_threshold, scores_threshold): + boxes = boxes.to("npu") + scores = scores.to("npu") + iou_threshold = iou_threshold.to("npu") + scores_threshold = scores_threshold.to("npu") + npu_output = torch.npu_nms_v4(boxes, scores, max_output_size, iou_threshold, scores_threshold) + #npu_output = npu_output.to("cpu") + print("===npu_output===") + print(npu_output) + return npu_output + + + def test_nms_v4_float32(self, device): + boxes = self.generate_data(0, 100, (100, 4), np.float32) + scores = self.generate_data(0, 1, (100), np.float32) + max_output_size = 20 + iou_threshold = torch.tensor(0.5) + scores_threshold = torch.tensor(0.3) + + npu_output = self.npu_op_exec(boxes, scores, max_output_size, iou_threshold, scores_threshold) + + +instantiate_device_type_tests(TestNmsV4, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_nms_with_mask.py b/test/test_npu/test_network_ops/test_nms_with_mask.py index 39ee878cd77f69b10b2c882544839ecc3a4ef533..57b46884179a64babeca82db81d03ffd2db67b0c 100644 --- a/test/test_npu/test_network_ops/test_nms_with_mask.py +++ b/test/test_npu/test_network_ops/test_nms_with_mask.py @@ -1,52 +1,52 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestNmsWithMask(TestCase): - def npu_op_exec(self, input1, iou_threshold): - npu_output1, npu_output2, npu_output3, = torch.npu_nms_with_mask(input1, iou_threshold) - npu_output1 = npu_output1.to("cpu") - npu_output2 = npu_output2.to("cpu") - npu_output3 = npu_output3.to("cpu") - - return npu_output1, npu_output2, npu_output3 - - def test_nms_with_mask_float32(self, device): - input1 = torch.tensor([[0.0, 1.0, 2.0, 3.0, 0.6], [6.0, 7.0, 8.0, 9.0, 0.4]]).npu() - iou_threshold = 0.5 - - eq_output1 = torch.tensor([[0.0000, 1.0000, 2.0000, 3.0000, 0.6001], - [6.0000, 7.0000, 8.0000, 9.0000, 0.3999]]) - eq_output2 = torch.tensor([0, 1], dtype=torch.int32) - eq_output3 = torch.tensor([1, 1], dtype=torch.uint8) - - npu_output1, npu_output2, npu_output3 = self.npu_op_exec(input1, iou_threshold) - - self.assertRtolEqual(eq_output1, npu_output1) - self.assertRtolEqual(eq_output2, npu_output2) - self.assertRtolEqual(eq_output3, npu_output3) - - -instantiate_device_type_tests(TestNmsWithMask, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestNmsWithMask(TestCase): + def npu_op_exec(self, input1, iou_threshold): + npu_output1, npu_output2, npu_output3, = torch.npu_nms_with_mask(input1, iou_threshold) + npu_output1 = npu_output1.to("cpu") + npu_output2 = npu_output2.to("cpu") + npu_output3 = npu_output3.to("cpu") + + return npu_output1, npu_output2, npu_output3 + + def test_nms_with_mask_float32(self, device): + input1 = torch.tensor([[0.0, 1.0, 2.0, 3.0, 0.6], [6.0, 7.0, 8.0, 9.0, 0.4]]).npu() + iou_threshold = 0.5 + + eq_output1 = torch.tensor([[0.0000, 1.0000, 2.0000, 3.0000, 0.6001], + [6.0000, 7.0000, 8.0000, 9.0000, 0.3999]]) + eq_output2 = torch.tensor([0, 1], dtype=torch.int32) + eq_output3 = torch.tensor([1, 1], dtype=torch.uint8) + + npu_output1, npu_output2, npu_output3 = self.npu_op_exec(input1, iou_threshold) + + self.assertRtolEqual(eq_output1, npu_output1) + self.assertRtolEqual(eq_output2, npu_output2) + self.assertRtolEqual(eq_output3, npu_output3) + + +instantiate_device_type_tests(TestNmsWithMask, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_norm.py b/test/test_npu/test_network_ops/test_norm.py index ccbaf189c8ab7de6af8a4298313e28799053477c..9212c8e98622c047d1cb49ddd3328c5709ba30aa 100644 --- a/test/test_npu/test_network_ops/test_norm.py +++ b/test/test_npu/test_network_ops/test_norm.py @@ -1,80 +1,80 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import torch.nn.functional as F -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -import time - -class TestNorm(TestCase): - def norm_output_size(self, data, dimVal, keepdimVal): - output_size = list(data.size()) - for i in dimVal: - if i < 0: - i = i + data.dim() - if i < data.dim() and keepdimVal == True: - output_size[i] = 1 - if i < data.dim() and keepdimVal == False: - output_size.pop(i) - return output_size - - def cpu_dtype_out_exec(self, data, pVal, dimVal, keepdimVal, dtypeVal): - output_size = self.norm_output_size(data, dimVal, keepdimVal) - cpu_output = torch.randn(output_size) - torch.norm(data, p=pVal, dim = dimVal, keepdim = keepdimVal, out=cpu_output, dtype = dtypeVal) - return cpu_output.numpy() - - def npu_dtype_out_exec(self, data, pVal, dimVal, keepdimVal, dtypeVal): - output_size = self.norm_output_size(data, dimVal, keepdimVal) - npu_output = torch.randn(output_size).npu() - torch.norm(data, p=pVal, dim = dimVal, keepdim = keepdimVal, out=npu_output, dtype = dtypeVal) - return npu_output.cpu().numpy() - - def dtype_out_test(self, item): - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_out = self.cpu_dtype_out_exec(cpu_input, 2, [1,2], True, torch.float) - npu_out = self.npu_dtype_out_exec(npu_input, 2, [1,2], True, torch.float) - self.assertRtolEqual(cpu_out, npu_out) - - cpu_out = self.cpu_dtype_out_exec(cpu_input, 2, [1,2], False, torch.float) - npu_out = self.npu_dtype_out_exec(npu_input, 2, [1,2], False, torch.float) - self.assertRtolEqual(cpu_out, npu_out) - - cpu_out = self.cpu_dtype_out_exec(cpu_input, 1, [1,2], False, torch.float) - npu_out = self.npu_dtype_out_exec(npu_input, 1, [1,2], False, torch.float) - self.assertRtolEqual(cpu_out, npu_out) - - cpu_out = self.cpu_dtype_out_exec(cpu_input, 3, [1,2], False, torch.float) - npu_out = self.npu_dtype_out_exec(npu_input, 3, [1,2], False, torch.float) - self.assertRtolEqual(cpu_out, npu_out) - - cpu_out = self.cpu_dtype_out_exec(cpu_input, float("-inf"), [1,2], False, torch.float) - npu_out = self.npu_dtype_out_exec(npu_input, float("-inf"), [1,2], False, torch.float) - self.assertRtolEqual(cpu_out, npu_out) - - def test_norm_shape_format(self, device): - shape_format = [ - [[np.float32, 0, (64, 64, 64, 64)]], - ] - - for item in shape_format: - # norm.dtype_out - self.dtype_out_test(item) - -instantiate_device_type_tests(TestNorm, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import torch.nn.functional as F +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +import time + +class TestNorm(TestCase): + def norm_output_size(self, data, dimVal, keepdimVal): + output_size = list(data.size()) + for i in dimVal: + if i < 0: + i = i + data.dim() + if i < data.dim() and keepdimVal == True: + output_size[i] = 1 + if i < data.dim() and keepdimVal == False: + output_size.pop(i) + return output_size + + def cpu_dtype_out_exec(self, data, pVal, dimVal, keepdimVal, dtypeVal): + output_size = self.norm_output_size(data, dimVal, keepdimVal) + cpu_output = torch.randn(output_size) + torch.norm(data, p=pVal, dim = dimVal, keepdim = keepdimVal, out=cpu_output, dtype = dtypeVal) + return cpu_output.numpy() + + def npu_dtype_out_exec(self, data, pVal, dimVal, keepdimVal, dtypeVal): + output_size = self.norm_output_size(data, dimVal, keepdimVal) + npu_output = torch.randn(output_size).npu() + torch.norm(data, p=pVal, dim = dimVal, keepdim = keepdimVal, out=npu_output, dtype = dtypeVal) + return npu_output.cpu().numpy() + + def dtype_out_test(self, item): + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_out = self.cpu_dtype_out_exec(cpu_input, 2, [1,2], True, torch.float) + npu_out = self.npu_dtype_out_exec(npu_input, 2, [1,2], True, torch.float) + self.assertRtolEqual(cpu_out, npu_out) + + cpu_out = self.cpu_dtype_out_exec(cpu_input, 2, [1,2], False, torch.float) + npu_out = self.npu_dtype_out_exec(npu_input, 2, [1,2], False, torch.float) + self.assertRtolEqual(cpu_out, npu_out) + + cpu_out = self.cpu_dtype_out_exec(cpu_input, 1, [1,2], False, torch.float) + npu_out = self.npu_dtype_out_exec(npu_input, 1, [1,2], False, torch.float) + self.assertRtolEqual(cpu_out, npu_out) + + cpu_out = self.cpu_dtype_out_exec(cpu_input, 3, [1,2], False, torch.float) + npu_out = self.npu_dtype_out_exec(npu_input, 3, [1,2], False, torch.float) + self.assertRtolEqual(cpu_out, npu_out) + + cpu_out = self.cpu_dtype_out_exec(cpu_input, float("-inf"), [1,2], False, torch.float) + npu_out = self.npu_dtype_out_exec(npu_input, float("-inf"), [1,2], False, torch.float) + self.assertRtolEqual(cpu_out, npu_out) + + def test_norm_shape_format(self, device): + shape_format = [ + [[np.float32, 0, (64, 64, 64, 64)]], + ] + + for item in shape_format: + # norm.dtype_out + self.dtype_out_test(item) + +instantiate_device_type_tests(TestNorm, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_norm_ext.py b/test/test_npu/test_network_ops/test_norm_ext.py index 8e5e51224cf3493e38b3b1e0fb40300874c678eb..0b8b13c90c0535e77816770d21d937423e09d05c 100644 --- a/test/test_npu/test_network_ops/test_norm_ext.py +++ b/test/test_npu/test_network_ops/test_norm_ext.py @@ -1,130 +1,130 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestNorm(TestCase): - def norm_output_size(self, input, dim, keepdim): - output_size = list(input.size()) - for i in dim: - if i < 0: - i = i + input.dim() - if i < input.dim() and keepdim == True: - output_size[i] = 1 - if i < input.dim() and keepdim == False: - output_size.pop(i) - return output_size - - def cpu_out_exec(self, input, p1, dim1, keepdim1, dtype1): - output_size = self.norm_output_size(input, dim1, keepdim1) - cpu_out = torch.randn(output_size) - output = torch.norm(input, p = p1, dim = dim1 , keepdim = keepdim1, out = cpu_out, dtype = dtype1) - return output - - def npu_out_exec(self, input, p1, dim1, keepdim1, dtype1): - output_size = self.norm_output_size(input, dim1, keepdim1) - npu_out = torch.randn(output_size).npu() - output1 = torch.norm(input, p = p1, dim = dim1 , keepdim = keepdim1, out = npu_out, dtype = dtype1) - output = output1.to("cpu") - return output - - def test_norm_shape_format_0(self, device): - shape_format = [ - [[np.float16, 0, (1)]], - [[np.float32, 0, (1)]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_out_exec(cpu_input, 0, [0], True, torch.float) - npu_output = self.npu_out_exec(npu_input, 0, [0], True, torch.float) - cpu_output = cpu_output.to(npu_output.dtype) - self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) - - def test_norm_shape_format_1(self, device): - shape_format = [ - [[np.float16, 0, (12, 33)]], - [[np.float32, 0, (12, 33)]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_out_exec(cpu_input, 1, [0,1], True, torch.float) - npu_output = self.npu_out_exec(npu_input, 1, [0,1], True, torch.float) - cpu_output = cpu_output.to(npu_output.dtype) - self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) - - def test_norm_shape_format_2(self, device): - shape_format = [ - # [[np.float16, 0, (12, 33)]], # result error - [[np.float32, 0, (12, 33)]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_out_exec(cpu_input, 2, [0], False, torch.float) - npu_output = self.npu_out_exec(npu_input, 2, [0], False, torch.float) - npu_output = npu_output.to(cpu_output.dtype) - self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) - - def test_norm_shape_format_3(self, device): - shape_format = [ - # [[np.float16, 0, (10, 24, 56, 2048)]], # result error - [[np.float32, 0, (10, 24, 56, 2048)]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_out_exec(cpu_input, 3, [1,2], True, torch.float) - npu_output = self.npu_out_exec(npu_input, 3, [1,2], True, torch.float) - cpu_output = cpu_output.to(npu_output.dtype) - self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) - - def test_norm_shape_format_inf(self, device): - shape_format = [ - [[np.float16, 0, (64, 64, 64, 64)]], - [[np.float32, 0, (64, 64, 64, 64)]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_out_exec(cpu_input, float("inf"), [1,2], True, torch.float) - npu_output = self.npu_out_exec(npu_input, float("inf"), [1,2], True, torch.float) - cpu_output = cpu_output.to(npu_output.dtype) - self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) - - def test_norm_shape_format_inf1(self, device): - shape_format = [ - [[np.float16, 0, (64, 64, 64, 64)]], - [[np.float32, 0, (64, 64, 64, 64)]], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - cpu_output = self.cpu_out_exec(cpu_input, float("-inf"), [1,2], False, torch.float) - npu_output = self.npu_out_exec(npu_input, float("-inf"), [1,2], False, torch.float) - cpu_output = cpu_output.to(npu_output.dtype) - self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) - -instantiate_device_type_tests(TestNorm, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestNorm(TestCase): + def norm_output_size(self, input, dim, keepdim): + output_size = list(input.size()) + for i in dim: + if i < 0: + i = i + input.dim() + if i < input.dim() and keepdim == True: + output_size[i] = 1 + if i < input.dim() and keepdim == False: + output_size.pop(i) + return output_size + + def cpu_out_exec(self, input, p1, dim1, keepdim1, dtype1): + output_size = self.norm_output_size(input, dim1, keepdim1) + cpu_out = torch.randn(output_size) + output = torch.norm(input, p = p1, dim = dim1 , keepdim = keepdim1, out = cpu_out, dtype = dtype1) + return output + + def npu_out_exec(self, input, p1, dim1, keepdim1, dtype1): + output_size = self.norm_output_size(input, dim1, keepdim1) + npu_out = torch.randn(output_size).npu() + output1 = torch.norm(input, p = p1, dim = dim1 , keepdim = keepdim1, out = npu_out, dtype = dtype1) + output = output1.to("cpu") + return output + + def test_norm_shape_format_0(self, device): + shape_format = [ + [[np.float16, 0, (1)]], + [[np.float32, 0, (1)]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_out_exec(cpu_input, 0, [0], True, torch.float) + npu_output = self.npu_out_exec(npu_input, 0, [0], True, torch.float) + cpu_output = cpu_output.to(npu_output.dtype) + self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) + + def test_norm_shape_format_1(self, device): + shape_format = [ + [[np.float16, 0, (12, 33)]], + [[np.float32, 0, (12, 33)]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_out_exec(cpu_input, 1, [0,1], True, torch.float) + npu_output = self.npu_out_exec(npu_input, 1, [0,1], True, torch.float) + cpu_output = cpu_output.to(npu_output.dtype) + self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) + + def test_norm_shape_format_2(self, device): + shape_format = [ + # [[np.float16, 0, (12, 33)]], # result error + [[np.float32, 0, (12, 33)]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_out_exec(cpu_input, 2, [0], False, torch.float) + npu_output = self.npu_out_exec(npu_input, 2, [0], False, torch.float) + npu_output = npu_output.to(cpu_output.dtype) + self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) + + def test_norm_shape_format_3(self, device): + shape_format = [ + # [[np.float16, 0, (10, 24, 56, 2048)]], # result error + [[np.float32, 0, (10, 24, 56, 2048)]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_out_exec(cpu_input, 3, [1,2], True, torch.float) + npu_output = self.npu_out_exec(npu_input, 3, [1,2], True, torch.float) + cpu_output = cpu_output.to(npu_output.dtype) + self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) + + def test_norm_shape_format_inf(self, device): + shape_format = [ + [[np.float16, 0, (64, 64, 64, 64)]], + [[np.float32, 0, (64, 64, 64, 64)]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_out_exec(cpu_input, float("inf"), [1,2], True, torch.float) + npu_output = self.npu_out_exec(npu_input, float("inf"), [1,2], True, torch.float) + cpu_output = cpu_output.to(npu_output.dtype) + self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) + + def test_norm_shape_format_inf1(self, device): + shape_format = [ + [[np.float16, 0, (64, 64, 64, 64)]], + [[np.float32, 0, (64, 64, 64, 64)]], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + cpu_output = self.cpu_out_exec(cpu_input, float("-inf"), [1,2], False, torch.float) + npu_output = self.npu_out_exec(npu_input, float("-inf"), [1,2], False, torch.float) + cpu_output = cpu_output.to(npu_output.dtype) + self.assertRtolEqual(cpu_output.numpy(), npu_output.numpy()) + +instantiate_device_type_tests(TestNorm, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_not_equal.py b/test/test_npu/test_network_ops/test_not_equal.py old mode 100644 new mode 100755 index 09fe26b416f8f39b1bc3594cc62188984305284f..d64cc713d90a34d7c7b73d2fecf24214d81f01a9 --- a/test/test_npu/test_network_ops/test_not_equal.py +++ b/test/test_npu/test_network_ops/test_not_equal.py @@ -1,190 +1,190 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestNotEqual(TestCase): - def cpu_op_exec(self, input1, input2): - output = torch.ne(input1, input2) - output = output.numpy().astype(np.int32) - return output - - def npu_op_exec(self, input1, input2): - output = torch.ne(input1, input2) - output = output.to("cpu") - output = output.numpy().astype(np.int32) - return output - - def cpu_op_inplace_exec(self, input1, input2): - input1.ne_(input2) - output = input1.numpy().astype(np.int32) - return output - - def npu_op_inplace_exec(self, input1, input2): - input1.ne_(input2) - output = input1.to("cpu") - output = output.numpy().astype(np.int32) - return output - - def npu_op_exec_out(self, input1, input2, out): - torch.ne(input1, input2, out=out) - output = out.to("cpu") - output = output.numpy().astype(np.int32) - return output - - def not_equal_scalar_result(self, shape_format): - for item in shape_format: - scalar = np.random.uniform(0, 100) - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - npu_input3 = copy.deepcopy(cpu_input1).to("npu").to(torch.bool) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1, scalar) - npu_output = self.npu_op_exec(npu_input1, scalar) - npu_output_out = self.npu_op_exec_out(npu_input1, scalar, npu_input3) - - cpu_output_inp = self.cpu_op_inplace_exec(cpu_input1, scalar) - npu_output_inp = self.npu_op_inplace_exec(npu_input1, scalar) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output, npu_output_out) - self.assertRtolEqual(cpu_output_inp, npu_output_inp) - - def not_equal_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - npu_input3 = copy.deepcopy(cpu_input1).to("npu").to(torch.bool) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - - cpu_output_inp = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) - npu_output_inp = self.npu_op_inplace_exec(npu_input1, npu_input2) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output, npu_output_out) - self.assertRtolEqual(cpu_output_inp, npu_output_inp) - - def test_not_equal_shape_format_fp16_1d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float16, i, [16]], [np.float16, i, [16]]] for i in format_list] - self.not_equal_result(shape_format) - - def test_not_equal_shape_format_fp32_1d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float32, i, [16]], [np.float32, i, [16]]] for i in format_list] - self.not_equal_result(shape_format) - - def test_not_equal_shape_format_fp16_2d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float16, i, [448, 1]], [np.float16, i, [448, 1]]] for i in format_list] - self.not_equal_result(shape_format) - - def test_not_equal_shape_format_fp32_2d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float32, i, [448, 1]], [np.float32, i, [448, 1]]] for i in format_list] - self.not_equal_result(shape_format) - - def test_not_equal_shape_format_fp16_3d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float16, i, [16, 640, 640]], [np.float16, i, [16, 640, 640]]] for i in format_list] - self.not_equal_result(shape_format) - - def test_not_equal_shape_format_fp32_3d(self, device): - format_list = [-1, 0, 3] - shape_format = [[[np.float32, i, [16, 640, 640]], [np.float32, i, [16, 640, 640]]] for i in format_list] - self.not_equal_result(shape_format) - - def test_not_equal_shape_format_fp16_4d(self, device): - format_list = [-1, 0, 3] - shape_format = [[[np.float16, i, [32, 3, 3, 3]], [np.float16, i, [32, 3, 3, 3]]] for i in format_list] - self.not_equal_result(shape_format) - - def test_not_equal_shape_format_fp32_4d(self, device): - format_list = [-1, 0, 3] - shape_format = [[[np.float32, i, [32, 3, 3, 3]], [np.float32, i, [32, 3, 3, 3]]] for i in format_list] - self.not_equal_result(shape_format) - - # scala----------------------------------------------------------------- - - def test_not_equal_scalar_shape_format_fp16_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[[np.float16, i, 18]] for i in format_list] - self.not_equal_scalar_result(shape_format) - - def test_not_equal_scalar_shape_format_fp32_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[[np.float32, i, [18]]] for i in format_list] - self.not_equal_scalar_result(shape_format) - - def test_not_equal_scalar_shape_format_fp16_2d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float16, i, [64, 7]]] for i in format_list] - self.not_equal_scalar_result(shape_format) - - def test_not_equal_scalar_shape_format_fp32_2d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float32, i, [64, 7]]] for i in format_list] - self.not_equal_scalar_result(shape_format) - - def test_not_equal_scalar_shape_format_fp32_3d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float32, i, [64, 24, 38]]] for i in format_list] - self.not_equal_scalar_result(shape_format) - - def test_not_equal_scalar_shape_format_fp16_4d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float16, i, [32, 3, 3, 3]]] for i in format_list] - self.not_equal_scalar_result(shape_format) - - def test_not_equal_scalar_shape_format_fp32_4d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float32, i, [32, 3, 3, 3]]] for i in format_list] - self.not_equal_scalar_result(shape_format) - - def test_not_equal_shape_format_int32_1d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [16]], [np.int32, i, [16]]] for i in format_list] - self.not_equal_result(shape_format) - - def test_not_equal_shape_format_int32_2d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [448, 1]], [np.int32, i, [448, 1]]] for i in format_list] - self.not_equal_result(shape_format) - - def test_not_equal_shape_format_int32_3d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [16, 640, 640]], [np.int32, i, [16, 640, 640]]] for i in format_list] - self.not_equal_result(shape_format) - - def test_not_equal_shape_format_int32_4d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [32, 3, 3, 3]], [np.int32, i, [32, 3, 3, 3]]] for i in format_list] - self.not_equal_result(shape_format) - - -instantiate_device_type_tests(TestNotEqual, globals(), except_for="cpu") - -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestNotEqual(TestCase): + def cpu_op_exec(self, input1, input2): + output = torch.ne(input1, input2) + output = output.numpy().astype(np.int32) + return output + + def npu_op_exec(self, input1, input2): + output = torch.ne(input1, input2) + output = output.to("cpu") + output = output.numpy().astype(np.int32) + return output + + def cpu_op_inplace_exec(self, input1, input2): + input1.ne_(input2) + output = input1.numpy().astype(np.int32) + return output + + def npu_op_inplace_exec(self, input1, input2): + input1.ne_(input2) + output = input1.to("cpu") + output = output.numpy().astype(np.int32) + return output + + def npu_op_exec_out(self, input1, input2, out): + torch.ne(input1, input2, out=out) + output = out.to("cpu") + output = output.numpy().astype(np.int32) + return output + + def not_equal_scalar_result(self, shape_format): + for item in shape_format: + scalar = np.random.uniform(0, 100) + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + npu_input3 = copy.deepcopy(cpu_input1).to("npu").to(torch.bool) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1, scalar) + npu_output = self.npu_op_exec(npu_input1, scalar) + npu_output_out = self.npu_op_exec_out(npu_input1, scalar, npu_input3) + + cpu_output_inp = self.cpu_op_inplace_exec(cpu_input1, scalar) + npu_output_inp = self.npu_op_inplace_exec(npu_input1, scalar) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output_out) + self.assertRtolEqual(cpu_output_inp, npu_output_inp) + + def not_equal_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + npu_input3 = copy.deepcopy(cpu_input1).to("npu").to(torch.bool) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + + cpu_output_inp = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) + npu_output_inp = self.npu_op_inplace_exec(npu_input1, npu_input2) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output_out) + self.assertRtolEqual(cpu_output_inp, npu_output_inp) + + def test_not_equal_shape_format_fp16_1d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float16, i, [16]], [np.float16, i, [16]]] for i in format_list] + self.not_equal_result(shape_format) + + def test_not_equal_shape_format_fp32_1d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float32, i, [16]], [np.float32, i, [16]]] for i in format_list] + self.not_equal_result(shape_format) + + def test_not_equal_shape_format_fp16_2d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float16, i, [448, 1]], [np.float16, i, [448, 1]]] for i in format_list] + self.not_equal_result(shape_format) + + def test_not_equal_shape_format_fp32_2d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float32, i, [448, 1]], [np.float32, i, [448, 1]]] for i in format_list] + self.not_equal_result(shape_format) + + def test_not_equal_shape_format_fp16_3d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float16, i, [16, 640, 640]], [np.float16, i, [16, 640, 640]]] for i in format_list] + self.not_equal_result(shape_format) + + def test_not_equal_shape_format_fp32_3d(self, device): + format_list = [-1, 0, 3] + shape_format = [[[np.float32, i, [16, 640, 640]], [np.float32, i, [16, 640, 640]]] for i in format_list] + self.not_equal_result(shape_format) + + def test_not_equal_shape_format_fp16_4d(self, device): + format_list = [-1, 0, 3] + shape_format = [[[np.float16, i, [32, 3, 3, 3]], [np.float16, i, [32, 3, 3, 3]]] for i in format_list] + self.not_equal_result(shape_format) + + def test_not_equal_shape_format_fp32_4d(self, device): + format_list = [-1, 0, 3] + shape_format = [[[np.float32, i, [32, 3, 3, 3]], [np.float32, i, [32, 3, 3, 3]]] for i in format_list] + self.not_equal_result(shape_format) + + # scala----------------------------------------------------------------- + + def test_not_equal_scalar_shape_format_fp16_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[[np.float16, i, 18]] for i in format_list] + self.not_equal_scalar_result(shape_format) + + def test_not_equal_scalar_shape_format_fp32_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[[np.float32, i, [18]]] for i in format_list] + self.not_equal_scalar_result(shape_format) + + def test_not_equal_scalar_shape_format_fp16_2d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float16, i, [64, 7]]] for i in format_list] + self.not_equal_scalar_result(shape_format) + + def test_not_equal_scalar_shape_format_fp32_2d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float32, i, [64, 7]]] for i in format_list] + self.not_equal_scalar_result(shape_format) + + def test_not_equal_scalar_shape_format_fp32_3d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float32, i, [64, 24, 38]]] for i in format_list] + self.not_equal_scalar_result(shape_format) + + def test_not_equal_scalar_shape_format_fp16_4d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float16, i, [32, 3, 3, 3]]] for i in format_list] + self.not_equal_scalar_result(shape_format) + + def test_not_equal_scalar_shape_format_fp32_4d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float32, i, [32, 3, 3, 3]]] for i in format_list] + self.not_equal_scalar_result(shape_format) + + def test_not_equal_shape_format_int32_1d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [16]], [np.int32, i, [16]]] for i in format_list] + self.not_equal_result(shape_format) + + def test_not_equal_shape_format_int32_2d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [448, 1]], [np.int32, i, [448, 1]]] for i in format_list] + self.not_equal_result(shape_format) + + def test_not_equal_shape_format_int32_3d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [16, 640, 640]], [np.int32, i, [16, 640, 640]]] for i in format_list] + self.not_equal_result(shape_format) + + def test_not_equal_shape_format_int32_4d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [32, 3, 3, 3]], [np.int32, i, [32, 3, 3, 3]]] for i in format_list] + self.not_equal_result(shape_format) + + +instantiate_device_type_tests(TestNotEqual, globals(), except_for="cpu") + +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_npu_bert_apply_adam.py b/test/test_npu/test_network_ops/test_npu_bert_apply_adam.py index a3b02059ab466903385045fc88ea75eba6ba09fb..589454b227a22b67a4a8d79ac18f4626c6a485ec 100644 --- a/test/test_npu/test_network_ops/test_npu_bert_apply_adam.py +++ b/test/test_npu/test_network_ops/test_npu_bert_apply_adam.py @@ -1,55 +1,55 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestNpuBertApplyAdam(TestCase): - def test_npu_bert_apply_adam(self, device): - seed = 3 - torch.manual_seed(seed) - torch.npu.manual_seed(seed) - torch.npu.manual_seed_all(seed) - - var_in = torch.rand(321538).uniform_(-32., 21.).npu() - m_in = torch.zeros(321538).npu() - v_in = torch.zeros(321538).npu() - grad = torch.rand(321538).uniform_(-0.05, 0.03).npu() - - var_ans = torch.tensor([13.1862, -30.1250, -20.4954]) - m_ans = torch.tensor([0.0014, 0.0018, -0.0021]) - v_ans = torch.tensor([1.8999e-06, 3.2629e-06, 4.4347e-06]) - - max_grad_norm = -1. - beta1 = 0.9 - beta2 = 0.99 - weight_decay = 0. - lr = 0. - epsilon = 1e-06 - global_grad_norm = 0. - - var_out, m_out, v_out = torch.npu_bert_apply_adam( - var_in, m_in, v_in, lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay) - - self.assertRtolEqual(var_out[:3].cpu(), var_ans) - self.assertRtolEqual(m_out[:3].cpu(), m_ans) - self.assertRtolEqual(v_out[:3].cpu(), v_ans) - -instantiate_device_type_tests(TestNpuBertApplyAdam, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestNpuBertApplyAdam(TestCase): + def test_npu_bert_apply_adam(self, device): + seed = 3 + torch.manual_seed(seed) + torch.npu.manual_seed(seed) + torch.npu.manual_seed_all(seed) + + var_in = torch.rand(321538).uniform_(-32., 21.).npu() + m_in = torch.zeros(321538).npu() + v_in = torch.zeros(321538).npu() + grad = torch.rand(321538).uniform_(-0.05, 0.03).npu() + + var_ans = torch.tensor([13.1862, -30.1250, -20.4954]) + m_ans = torch.tensor([0.0014, 0.0018, -0.0021]) + v_ans = torch.tensor([1.8999e-06, 3.2629e-06, 4.4347e-06]) + + max_grad_norm = -1. + beta1 = 0.9 + beta2 = 0.99 + weight_decay = 0. + lr = 0. + epsilon = 1e-06 + global_grad_norm = 0. + + var_out, m_out, v_out = torch.npu_bert_apply_adam( + var_in, m_in, v_in, lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay) + + self.assertRtolEqual(var_out[:3].cpu(), var_ans) + self.assertRtolEqual(m_out[:3].cpu(), m_ans) + self.assertRtolEqual(v_out[:3].cpu(), v_ans) + +instantiate_device_type_tests(TestNpuBertApplyAdam, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_npu_giou.py b/test/test_npu/test_network_ops/test_npu_giou.py new file mode 100644 index 0000000000000000000000000000000000000000..c6f55768d074081684971fb48a5f5469e31c9536 --- /dev/null +++ b/test/test_npu/test_network_ops/test_npu_giou.py @@ -0,0 +1,133 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import math +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestNpuGiou(TestCase): + def generate_giou_data(self, n, m, dtype): + data_bboxes = np.array([]).astype(dtype) + for i in range(4): + data_bboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, n).astype(dtype) + data_bboxes = np.append(data_bboxes, data_bboxes_array) + data_bboxes = data_bboxes.reshape([4, n]) + data_gtboxes = np.array([]).astype(dtype) + for i in range(4): + data_gtboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, m).astype(dtype) + data_gtboxes = np.append(data_gtboxes, data_gtboxes_array) + data_gtboxes = data_gtboxes.reshape([4, m]) + cpu_input1 = torch.from_numpy(data_bboxes) + cpu_input2 = torch.from_numpy(data_gtboxes) + npu_input1 = cpu_input1.npu() + npu_input2 = cpu_input2.npu() + return cpu_input1, cpu_input2, npu_input1, npu_input2 + + def cpu_op_exec(self, box1, box2, trans=False, is_cross=False, mode="iou"): + box1 = box1.numpy() + box2 = box2.numpy() + dtype = box1.dtype + _, n = box1.shape + _, m = box2.shape + if trans: + b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 + b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 + b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 + b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 + else: + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + area1 = w1 * h1 + area2 = w2 * h2 + giou_res =np.array([], dtype=dtype) + + for i in range(n): + for j in range(m): + inter_x1 = max(b1_x1[i], b2_x1[j]) + inter_x2 = min(b1_x2[i], b2_x2[j]) + inter_y1 = max(b1_y1[i], b2_y1[j]) + inter_y2 = min(b1_y2[i], b2_y2[j]) + outer_x1 = min(b1_x1[i], b2_x1[j]) + outer_x2 = max(b1_x2[i], b2_x2[j]) + outer_y1 = min(b1_y1[i], b2_y1[j]) + outer_y2 = max(b1_y2[i], b2_y2[j]) + inter_area = max(0, (inter_x2 - inter_x1)) * max(0, (inter_y2 - inter_y1)) + outer_area = abs(outer_x2 - outer_x1) * abs(outer_y2 - outer_y1) + union_area = area1[i] + area2[j] - inter_area + 1e-16 + other_area = outer_area - union_area + giou_ij = inter_area / union_area - other_area / outer_area + if not is_cross: + if i == j: + giou_res = np.append(giou_res, giou_ij) + else: + giou_res = np.append(giou_res, giou_ij) + + if not is_cross: + res = giou_res.reshape(1, n) + else: + res = giou_res.reshape(n, m) + res = np.transpose(res) + res = np.transpose(res) + return res + + def npu_op_exec(self, box1, box2, trans=False, is_cross=False, mode=0): + output = torch.npu_giou(box1, box2, trans, is_cross, mode) + output = output.detach().cpu().numpy() + return output + + def test_npu_giou_shape_format_fp32(self, device): + self._test_npu_giou_shape_format(np.float32) + + def test_npu_giou_shape_format_fp16(self, device): + self._test_npu_giou_shape_format(np.float16) + + def _test_npu_giou_shape_format(self, dtype): + shape_list = [ + [10, 10], + [12, 10], + [100, 100] + ] + is_trans_list = [False] + mode_list = ["iou"] + # TODO(Ascend): 反向只支持 mode=="iof", is_cross==False, + # is_trans==Fasle场景,这里同步验证相同场景 + shape_format = [[j, k, m] + for j in shape_list + for k in is_trans_list + for m in mode_list] + + for item in shape_format: + mode_digit = 0 if item[-1] == "iou" else 1 + is_cross = False if item[0][0] == item[0][1] else True + cpu_input1, cpu_input2, npu_input1, npu_input2 = self.generate_giou_data(*item[0], dtype) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, item[1], is_cross, item[-1]) + npu_output = self.npu_op_exec(npu_input1, npu_input2, item[1], is_cross, mode_digit) + cpu_output = cpu_output.astype(npu_output.dtype) + if dtype == np.float16: + # TODO(Ascend): fp16 insufficient precision + self.assertRtolEqual(cpu_output, npu_output, prec16=1e-2) + else: + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestNpuGiou, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_npu_giou_backward.py b/test/test_npu/test_network_ops/test_npu_giou_backward.py new file mode 100644 index 0000000000000000000000000000000000000000..1cf564d74b5263401d311c4486f9335c82fa7a8b --- /dev/null +++ b/test/test_npu/test_network_ops/test_npu_giou_backward.py @@ -0,0 +1,86 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import math +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestNpuGiouBackward(TestCase): + def generate_giou_data(self, n, m, dtype): + data_bboxes = np.array([]).astype(dtype) + for i in range(4): + data_bboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, n).astype(dtype) + data_bboxes = np.append(data_bboxes, data_bboxes_array) + data_bboxes = data_bboxes.reshape([4, n]) + data_gtboxes = np.array([]).astype(dtype) + for i in range(4): + data_gtboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, m).astype(dtype) + data_gtboxes = np.append(data_gtboxes, data_gtboxes_array) + data_gtboxes = data_gtboxes.reshape([4, m]) + cpu_input1 = torch.from_numpy(data_bboxes) + cpu_input2 = torch.from_numpy(data_gtboxes) + npu_input1 = cpu_input1.npu() + npu_input2 = cpu_input2.npu() + return cpu_input1, cpu_input2, npu_input1, npu_input2 + + def npu_op_exec(self, box1, box2, trans=False, is_cross=False, mode=0): + box1.requires_grad = True + box2.requires_grad = True + output = torch.npu_giou(box1, box2, trans, is_cross, mode) + output.backward(torch.ones_like(output)) + box1_grad = box1.grad + box2_grad = box2.grad + box1_grad = box1_grad.detach().cpu().numpy() + box2_grad = box2_grad.detach().cpu().numpy() + output = output.detach().cpu().numpy() + return output, box1_grad, box2_grad + + def test_npu_giou_backward_shape_format(self, dtype): + shape_list = [ + [1, 1] + ] + is_trans_list = [False] + mode_list = ["iou"] + # TODO(Ascend): only support mode=="iof", is_cross==False, + # is_trans==Fasle currently + shape_format = [[j, k, m] + for j in shape_list + for k in is_trans_list + for m in mode_list] + + for item in shape_format: + mode_digit = 0 if item[-1] == "iou" else 1 + is_cross = False if item[0][0] == item[0][1] else True + expected_cpu_grad1 = np.array([[0.51091206], + [-0.70909655], + [0.3726323], + [0.349545]], dtype=np.float32) + expected_cpu_grad2 = np.array([[-0.51091206], + [0.70909655], + [0.3599837], + [0.47306436]], dtype=np.float32) + _, _, npu_input1, npu_input2 = self.generate_giou_data(*item[0], np.float32) + _, npu_grad1, npu_grad2 = self.npu_op_exec(npu_input1, npu_input2, item[1], is_cross, mode_digit) + self.assertRtolEqual(expected_cpu_grad1, npu_grad1) + self.assertRtolEqual(expected_cpu_grad2, npu_grad2) + + +instantiate_device_type_tests(TestNpuGiouBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_npu_linear.py b/test/test_npu/test_network_ops/test_npu_linear.py index ea9e7c2e2f507d69f4bcf3446babe2c4141cf6c0..5ac981b4df519cc1a43816bc0c60c1ec46fdd387 100644 --- a/test/test_npu/test_network_ops/test_npu_linear.py +++ b/test/test_npu/test_network_ops/test_npu_linear.py @@ -1,62 +1,62 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestNpuLinear(TestCase): - def cpu_op_exec(self, x, weight, bias): - output = torch.nn.functional.linear(x, weight, bias) - output = output.numpy() - return output - - def npu_op_exec(self, x, weight, bias): - output = torch.npu_linear(x, weight, bias) - output = output.cpu().numpy() - return output - - def test_npu_linear_shape_format_fp32(self, device): - shape_format = [ - [[np.float32, -1, (6144, 1024)], [np.float32, -1, (256, 1024)], [np.float32, -1, (256)]], - [[np.float32, -1, (123, 456)], [np.float32, -1, (789, 456)], [np.float32, -1, (789)]], - ] - - for item in shape_format: - cpu_x, npu_x = create_common_tensor(item[0], -2, 2) - cpu_w, npu_w = create_common_tensor(item[1], -2, 2) - cpu_b, npu_b = create_common_tensor(item[2], -2, 2) - cpu_output = self.cpu_op_exec(cpu_x, cpu_w, cpu_b) - npu_output = self.npu_op_exec(npu_x, npu_w, npu_b) - self.assertRtolEqual(cpu_output, npu_output, 0.0002) - - def test_npu_linear_shape_format_fp16(self, device): - shape_format = [ - [[np.float16, -1, (6144, 1024)], [np.float16, -1, (256, 1024)], [np.float16, -1, (256)]], - [[np.float16, -1, (123, 456)], [np.float16, -1, (789, 456)], [np.float16, -1, (789)]], - ] - - for item in shape_format: - cpu_x, npu_x = create_common_tensor(item[0], -2, 2) - cpu_w, npu_w = create_common_tensor(item[1], -2, 2) - cpu_b, npu_b = create_common_tensor(item[2], -2, 2) - cpu_output = self.cpu_op_exec(cpu_x.float(), cpu_w.float(), cpu_b.float()).astype(np.float16) - npu_output = self.npu_op_exec(npu_x, npu_w, npu_b) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestNpuLinear, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() - +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestNpuLinear(TestCase): + def cpu_op_exec(self, x, weight, bias): + output = torch.nn.functional.linear(x, weight, bias) + output = output.numpy() + return output + + def npu_op_exec(self, x, weight, bias): + output = torch.npu_linear(x, weight, bias) + output = output.cpu().numpy() + return output + + def test_npu_linear_shape_format_fp32(self, device): + shape_format = [ + [[np.float32, -1, (6144, 1024)], [np.float32, -1, (256, 1024)], [np.float32, -1, (256)]], + [[np.float32, -1, (123, 456)], [np.float32, -1, (789, 456)], [np.float32, -1, (789)]], + ] + + for item in shape_format: + cpu_x, npu_x = create_common_tensor(item[0], -2, 2) + cpu_w, npu_w = create_common_tensor(item[1], -2, 2) + cpu_b, npu_b = create_common_tensor(item[2], -2, 2) + cpu_output = self.cpu_op_exec(cpu_x, cpu_w, cpu_b) + npu_output = self.npu_op_exec(npu_x, npu_w, npu_b) + self.assertRtolEqual(cpu_output, npu_output, 0.0002) + + def test_npu_linear_shape_format_fp16(self, device): + shape_format = [ + [[np.float16, -1, (6144, 1024)], [np.float16, -1, (256, 1024)], [np.float16, -1, (256)]], + [[np.float16, -1, (123, 456)], [np.float16, -1, (789, 456)], [np.float16, -1, (789)]], + ] + + for item in shape_format: + cpu_x, npu_x = create_common_tensor(item[0], -2, 2) + cpu_w, npu_w = create_common_tensor(item[1], -2, 2) + cpu_b, npu_b = create_common_tensor(item[2], -2, 2) + cpu_output = self.cpu_op_exec(cpu_x.float(), cpu_w.float(), cpu_b.float()).astype(np.float16) + npu_output = self.npu_op_exec(npu_x, npu_w, npu_b) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestNpuLinear, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() + diff --git a/test/test_npu/test_network_ops/test_npu_linear_backward.py b/test/test_npu/test_network_ops/test_npu_linear_backward.py index 66f8a47f4143ac56fa0afe457ecbe0f9ebdc9268..f17921bfca16badd64a2bedbfd2fc804ee1a87f8 100644 --- a/test/test_npu/test_network_ops/test_npu_linear_backward.py +++ b/test/test_npu/test_network_ops/test_npu_linear_backward.py @@ -1,77 +1,77 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestNpuLinearBackward(TestCase): - def cpu_op_exec(self, x, weight, bias): - x.requires_grad = True - weight.requires_grad = True - bias.requires_grad = True - output = torch.nn.functional.linear(x, weight, bias) - loss = output.sum() - loss.backward() - return output.detach().numpy(), x.grad.numpy(), weight.grad.numpy(), bias.grad.numpy() - - def npu_op_exec(self, x, weight, bias): - x.requires_grad = True - weight.requires_grad = True - bias.requires_grad = True - output = torch.npu_linear(x, weight, bias) - loss = output.sum() - loss.backward() - return output.cpu().detach().numpy(), x.grad.cpu().numpy(), weight.grad.cpu().numpy(), bias.grad.cpu().numpy() - - def test_npu_linear_backward_shape_format_fp32(self, device): - shape_format = [ - [[np.float32, -1, (6144, 1024)], [np.float32, -1, (256, 1024)], [np.float32, -1, (256)]], - [[np.float32, -1, (123, 456)], [np.float32, -1, (789, 456)], [np.float32, -1, (789)]], - ] - - for item in shape_format: - cpu_x, npu_x = create_common_tensor(item[0], -2, 2) - cpu_w, npu_w = create_common_tensor(item[1], -2, 2) - cpu_b, npu_b = create_common_tensor(item[2], -2, 2) - cpu_output, cpu_x_grad, cpu_w_grad, cpu_b_grad = self.cpu_op_exec(cpu_x, cpu_w, cpu_b) - npu_output, npu_x_grad, npu_w_grad, npu_b_grad = self.npu_op_exec(npu_x, npu_w, npu_b) - self.assertRtolEqual(cpu_output, npu_output, 0.0002) - self.assertRtolEqual(cpu_x_grad, npu_x_grad) - self.assertRtolEqual(cpu_w_grad, npu_w_grad) - self.assertRtolEqual(cpu_b_grad, npu_b_grad) - - def test_npu_linear_shape_format_fp16(self, device): - shape_format = [ - [[np.float16, -1, (6144, 1024)], [np.float16, -1, (256, 1024)], [np.float16, -1, (256)]], - [[np.float16, -1, (123, 456)], [np.float16, -1, (789, 456)], [np.float16, -1, (789)]], - ] - - for item in shape_format: - cpu_x, npu_x = create_common_tensor(item[0], -2, 2) - cpu_w, npu_w = create_common_tensor(item[1], -2, 2) - cpu_b, npu_b = create_common_tensor(item[2], -2, 2) - cpu_output, cpu_x_grad, cpu_w_grad, cpu_b_grad = self.cpu_op_exec( - cpu_x.float(), cpu_w.float(), cpu_b.float()) - npu_output, npu_x_grad, npu_w_grad, npu_b_grad = self.npu_op_exec(npu_x, npu_w, npu_b) - self.assertRtolEqual(cpu_output.astype(np.float16), npu_output) - self.assertRtolEqual(cpu_x_grad.astype(np.float16), npu_x_grad) - self.assertRtolEqual(cpu_w_grad.astype(np.float16), npu_w_grad) - self.assertRtolEqual(cpu_b_grad.astype(np.float16), npu_b_grad) - -instantiate_device_type_tests(TestNpuLinearBackward, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() - +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestNpuLinearBackward(TestCase): + def cpu_op_exec(self, x, weight, bias): + x.requires_grad = True + weight.requires_grad = True + bias.requires_grad = True + output = torch.nn.functional.linear(x, weight, bias) + loss = output.sum() + loss.backward() + return output.detach().numpy(), x.grad.numpy(), weight.grad.numpy(), bias.grad.numpy() + + def npu_op_exec(self, x, weight, bias): + x.requires_grad = True + weight.requires_grad = True + bias.requires_grad = True + output = torch.npu_linear(x, weight, bias) + loss = output.sum() + loss.backward() + return output.cpu().detach().numpy(), x.grad.cpu().numpy(), weight.grad.cpu().numpy(), bias.grad.cpu().numpy() + + def test_npu_linear_backward_shape_format_fp32(self, device): + shape_format = [ + [[np.float32, -1, (6144, 1024)], [np.float32, -1, (256, 1024)], [np.float32, -1, (256)]], + [[np.float32, -1, (123, 456)], [np.float32, -1, (789, 456)], [np.float32, -1, (789)]], + ] + + for item in shape_format: + cpu_x, npu_x = create_common_tensor(item[0], -2, 2) + cpu_w, npu_w = create_common_tensor(item[1], -2, 2) + cpu_b, npu_b = create_common_tensor(item[2], -2, 2) + cpu_output, cpu_x_grad, cpu_w_grad, cpu_b_grad = self.cpu_op_exec(cpu_x, cpu_w, cpu_b) + npu_output, npu_x_grad, npu_w_grad, npu_b_grad = self.npu_op_exec(npu_x, npu_w, npu_b) + self.assertRtolEqual(cpu_output, npu_output, 0.0002) + self.assertRtolEqual(cpu_x_grad, npu_x_grad) + self.assertRtolEqual(cpu_w_grad, npu_w_grad) + self.assertRtolEqual(cpu_b_grad, npu_b_grad) + + def test_npu_linear_shape_format_fp16(self, device): + shape_format = [ + [[np.float16, -1, (6144, 1024)], [np.float16, -1, (256, 1024)], [np.float16, -1, (256)]], + [[np.float16, -1, (123, 456)], [np.float16, -1, (789, 456)], [np.float16, -1, (789)]], + ] + + for item in shape_format: + cpu_x, npu_x = create_common_tensor(item[0], -2, 2) + cpu_w, npu_w = create_common_tensor(item[1], -2, 2) + cpu_b, npu_b = create_common_tensor(item[2], -2, 2) + cpu_output, cpu_x_grad, cpu_w_grad, cpu_b_grad = self.cpu_op_exec( + cpu_x.float(), cpu_w.float(), cpu_b.float()) + npu_output, npu_x_grad, npu_w_grad, npu_b_grad = self.npu_op_exec(npu_x, npu_w, npu_b) + self.assertRtolEqual(cpu_output.astype(np.float16), npu_output) + self.assertRtolEqual(cpu_x_grad.astype(np.float16), npu_x_grad) + self.assertRtolEqual(cpu_w_grad.astype(np.float16), npu_w_grad) + self.assertRtolEqual(cpu_b_grad.astype(np.float16), npu_b_grad) + +instantiate_device_type_tests(TestNpuLinearBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() + diff --git a/test/test_npu/test_network_ops/test_npu_pad.py b/test/test_npu/test_network_ops/test_npu_pad.py index 2580b1a65dfe8249e1bb911e54fbe873ceaf4617..7c91f8f751608e69363e610266fe5692801f327c 100644 --- a/test/test_npu/test_network_ops/test_npu_pad.py +++ b/test/test_npu/test_network_ops/test_npu_pad.py @@ -1,35 +1,35 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestNpuPad(TestCase): - def test_npu_pad(self, device): - npu_input = torch.ones(2, 2).npu() - pads = (1, 1, 1, 1) - benchmark = torch.tensor([[0., 0., 0., 0.], - [0., 1., 1., 0.], - [0., 1., 1., 0.], - [0., 0., 0., 0.]]) - npu_output = torch.npu_pad(npu_input, pads) - npu_output = npu_output.cpu().detach() - self.assertRtolEqual(benchmark, npu_output) - -instantiate_device_type_tests(TestNpuPad, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestNpuPad(TestCase): + def test_npu_pad(self, device): + npu_input = torch.ones(2, 2).npu() + pads = (1, 1, 1, 1) + benchmark = torch.tensor([[0., 0., 0., 0.], + [0., 1., 1., 0.], + [0., 1., 1., 0.], + [0., 0., 0., 0.]]) + npu_output = torch.npu_pad(npu_input, pads) + npu_output = npu_output.cpu().detach() + self.assertRtolEqual(benchmark, npu_output) + +instantiate_device_type_tests(TestNpuPad, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_numpy_T.py b/test/test_npu/test_network_ops/test_numpy_T.py index f8a5e1af64d52a630b527885a848aee1e5c80376..37527db346ddfd1e50788e6985667c552bf16b6a 100644 --- a/test/test_npu/test_network_ops/test_numpy_T.py +++ b/test/test_npu/test_network_ops/test_numpy_T.py @@ -1,48 +1,48 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestNumpyT(TestCase): - def test_numpy_T_common_shape_format(self, device): - def cpu_op_exec(input): - output = input.T - output = output.numpy() - return output - - def npu_op_exec(input): - output = input.T - output = output.to("cpu") - output = output.numpy() - return output - - shape_format = [ - [[np.float16, 0, (64, 10)]], - [[np.float32, 4, (32, 1, 3, 3)]], - [[np.float32, 29, (10, 128)]] - ] - for shape in shape_format: - cpu_input, npu_input = create_common_tensor(shape[0], -1, 1) - cpu_output = cpu_op_exec(cpu_input) - npu_output = npu_op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestNumpyT, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestNumpyT(TestCase): + def test_numpy_T_common_shape_format(self, device): + def cpu_op_exec(input): + output = input.T + output = output.numpy() + return output + + def npu_op_exec(input): + output = input.T + output = output.to("cpu") + output = output.numpy() + return output + + shape_format = [ + [[np.float16, 0, (64, 10)]], + [[np.float32, 4, (32, 1, 3, 3)]], + [[np.float32, 29, (10, 128)]] + ] + for shape in shape_format: + cpu_input, npu_input = create_common_tensor(shape[0], -1, 1) + cpu_output = cpu_op_exec(cpu_input) + npu_output = npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestNumpyT, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_ones_like.py b/test/test_npu/test_network_ops/test_ones_like.py index 924c4d12022a51286a084802fa59bcaa71a5fede..7b0b11ed38aab5e752e7bf942a955ea50bf1e104 100644 --- a/test/test_npu/test_network_ops/test_ones_like.py +++ b/test/test_npu/test_network_ops/test_ones_like.py @@ -1,65 +1,65 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestOnesLike(TestCase): - def cpu_op_exec(self, input1): - output = torch.ones_like(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - output = torch.ones_like(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def test_op_shape_format_fp16(self, device): - format_list = [0, 3, 29] - shape_list = [1, (1000, 1280), (32, 3, 3), (32, 144, 1, 1)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - print(item) - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - cpu_output = cpu_output.astype(np.float16) - self.assertEqual(cpu_output, npu_output) - - def test_op_shape_format_fp32(self, device): - format_list = [0, 3, 29] - shape_list = [1, (1000, 1280), (32, 3, 3), (32, 144, 1, 1)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - print(item) - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - self.assertEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestOnesLike, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestOnesLike(TestCase): + def cpu_op_exec(self, input1): + output = torch.ones_like(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = torch.ones_like(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def test_op_shape_format_fp16(self, device): + format_list = [0, 3, 29] + shape_list = [1, (1000, 1280), (32, 3, 3), (32, 144, 1, 1)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + print(item) + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + cpu_output = cpu_output.astype(np.float16) + self.assertEqual(cpu_output, npu_output) + + def test_op_shape_format_fp32(self, device): + format_list = [0, 3, 29] + shape_list = [1, (1000, 1280), (32, 3, 3), (32, 144, 1, 1)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + print(item) + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestOnesLike, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_permute.py b/test/test_npu/test_network_ops/test_permute.py index 1f5fb99d7510940b202958289895253ae8992fde..daf1a347c724567dd0028a7e7507f0a5ba740eb3 100644 --- a/test/test_npu/test_network_ops/test_permute.py +++ b/test/test_npu/test_network_ops/test_permute.py @@ -1,54 +1,54 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestPermute(TestCase): - def cpu_op_exec(self, input1, input2): - output = input1.permute(input2); - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - input1 = input1.to("npu") - output = input1.permute(input2); - output = output.to("cpu") - output = output.numpy() - return output - - def test_permute(self, device): - shape_format = [ - [[2, 3, 5], (2, 0, 1), torch.float32], - [[2, 5, 6, 9], (2, 0, 3, 1), torch.float32], - [[2, 4, 6, 8, 10], (2, 3, 4, 0, 1), torch.float32], - ] - for item in shape_format: - cpu_input1 = torch.randn(item[0], dtype=item[2]) - cpu_output = self.cpu_op_exec(cpu_input1, item[1]) - npu_output = self.npu_op_exec(cpu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestPermute, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestPermute(TestCase): + def cpu_op_exec(self, input1, input2): + output = input1.permute(input2); + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + input1 = input1.to("npu") + output = input1.permute(input2); + output = output.to("cpu") + output = output.numpy() + return output + + def test_permute(self, device): + shape_format = [ + [[2, 3, 5], (2, 0, 1), torch.float32], + [[2, 5, 6, 9], (2, 0, 3, 1), torch.float32], + [[2, 4, 6, 8, 10], (2, 3, 4, 0, 1), torch.float32], + ] + for item in shape_format: + cpu_input1 = torch.randn(item[0], dtype=item[2]) + cpu_output = self.cpu_op_exec(cpu_input1, item[1]) + npu_output = self.npu_op_exec(cpu_input1, item[1]) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestPermute, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_pow.py b/test/test_npu/test_network_ops/test_pow.py old mode 100644 new mode 100755 index 31fabef610bfe9a3859d38acdf4c122d56fd32bf..1a25cadfd483f0c647a30f73f06f82ed8d7a889f --- a/test/test_npu/test_network_ops/test_pow.py +++ b/test/test_npu/test_network_ops/test_pow.py @@ -1,296 +1,296 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestPow(TestCase): - def cpu_op_exec(self, input1, input2): - output = torch.pow(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.pow(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2, out): - torch.pow(input1, input2, out=out) - output = out.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_exec(self, input1, input2): - input1.pow_(input2) - output = input1.numpy() - return output - - def npu_op_inplace_exec(self, input1, input2): - input1.pow_(input2) - output = input1.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_tensor_scalar(self, input1, n): - output = torch.pow(input1, n) - output = output.numpy() - return output - - def npu_op_exec_tensor_scalar(self, input1, n): - # input1 = input1.to("npu") - output = torch.pow(input1, n) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_tensor_scalar_out(self, input1, n, out): - # input1 = input1.to("npu") - output = torch.pow(input1, n, out=out) - output = out.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_scalar_tensor(self, n, input1): - output = torch.pow(n, input1) - output = output.numpy() - return output - - def npu_op_exec_scalar_tensor(self, n, input1): - output = torch.pow(n, input1) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_scalar_tensor_out(self, n, input1, out): - torch.pow(n, input1, out=out) - output = out.to("cpu") - output = output.numpy() - return output - - def pow_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 1) - npu_input3 = copy.deepcopy(cpu_input1).to("npu") - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - cpu_output_inp = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) - npu_output_inp = self.npu_op_inplace_exec(npu_input1, npu_input2) - - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output, npu_output_out) - self.assertRtolEqual(cpu_output_inp, npu_output_inp) - - def pow_result_scalar_tensor(self, shape_format): - for item in shape_format: - scalar = np.random.randint(0, 1) - cpu_input1, npu_input1 = create_common_tensor(item, 0, 1) - npu_input3 = copy.deepcopy(cpu_input1).to("npu") - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_scalar = self.cpu_op_exec_scalar_tensor(scalar, cpu_input1) - npu_output_scalar = self.npu_op_exec_scalar_tensor(scalar, npu_input1) - npu_output_scalar_out = self.npu_op_exec_scalar_tensor_out(scalar, npu_input1, npu_input3) - - cpu_output_scalar = cpu_output_scalar.astype(npu_output_scalar.dtype) - self.assertRtolEqual(cpu_output_scalar, npu_output_scalar) - self.assertRtolEqual(cpu_output_scalar, npu_output_scalar_out) - - def pow_result_tensor_scalar_(self, shape_format): - for item in shape_format: - scalar = np.random.randint(0, 1) - cpu_input1, npu_input1 = create_common_tensor(item, 0, 1) - npu_input3 = copy.deepcopy(cpu_input1).to("npu") - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_tensor_scalar = self.cpu_op_exec_tensor_scalar(cpu_input1, scalar) - npu_output_tensor_scalar = self.npu_op_exec_tensor_scalar(npu_input1, scalar) - npu_output_tensor_scalar_out = self.npu_op_exec_tensor_scalar_out(npu_input1, scalar, npu_input3) - - cpu_output_tensor_scalar = cpu_output_tensor_scalar.astype(npu_output_tensor_scalar.dtype) - self.assertRtolEqual(cpu_output_tensor_scalar, npu_output_tensor_scalar) - self.assertRtolEqual(cpu_output_tensor_scalar, npu_output_tensor_scalar_out) - - # scalar_tensor------------------------------------------------------- - def test_pow_shape_format_scalar_tensor_fp16_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[np.float16, i, [18]] for i in format_list] - self.pow_result_scalar_tensor(shape_format) - - def test_pow_shape_format_scalar_tensor_fp32_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[np.float32, i, [18]] for i in format_list] - # shape_format = [np.float32, 0, [18]] - self.pow_result_scalar_tensor(shape_format) - - def test_pow_shape_format_scalar_tensor_fp16_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float16, i, [18, 64]] for i in format_list] - self.pow_result_scalar_tensor(shape_format) - - def test_pow_shape_format_scalar_tensor_fp32_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float32, i, [18, 64]] for i in format_list] - self.pow_result_scalar_tensor(shape_format) - - def test_pow_shape_format_scalar_tensor_fp16_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float16, i, [18, 64, 128]] for i in format_list] - self.pow_result_scalar_tensor(shape_format) - - def test_pow_shape_format_scalar_tensor_fp32_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float32, i, [18, 64, 128]] for i in format_list] - self.pow_result_scalar_tensor(shape_format) - - def test_pow_shape_format_scalar_tensor_fp16_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float16, i, [18, 64, 128, 256]] for i in format_list] - self.pow_result_scalar_tensor(shape_format) - - def test_pow_shape_format_scalar_tensor_fp32_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float32, i, [18, 64, 128, 256]] for i in format_list] - self.pow_result_scalar_tensor(shape_format) - - # tensor_scalar----------------------------------------------------------- - def test_pow_shape_format_tensor_scala_fp16_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[np.float16, i, [18]] for i in format_list] - self.pow_result_tensor_scalar_(shape_format) - - def test_pow_shape_format_tensor_scalar_fp32_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[np.float32, i, [18]] for i in format_list] - self.pow_result_tensor_scalar_(shape_format) - - def test_pow_shape_format_tensor_scala_fp16_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float16, i, [18, 64]] for i in format_list] - self.pow_result_tensor_scalar_(shape_format) - - def test_pow_shape_format_tensor_scalar_fp32_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float32, i, [18, 64]] for i in format_list] - self.pow_result_tensor_scalar_(shape_format) - - def test_pow_shape_format_tensor_scala_fp16_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float16, i, [18, 64, 128]] for i in format_list] - self.pow_result_tensor_scalar_(shape_format) - - def test_pow_shape_format_tensor_scalar_fp32_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float32, i, [18, 64, 128]] for i in format_list] - self.pow_result_tensor_scalar_(shape_format) - - def test_pow_shape_format_tensor_scala_fp16_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float16, i, [18, 64, 128, 256]] for i in format_list] - self.pow_result_tensor_scalar_(shape_format) - - def test_pow_shape_format_tensor_scalar_fp32_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float32, i, [18, 64, 128, 256]] for i in format_list] - self.pow_result_tensor_scalar_(shape_format) - - # tensor_tensor----------------------------------------------------------- - def test_pow_shape_format_fp16_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[[np.float16, i, [5]], [np.float16, i, []]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp32_1d(self, device): - format_list = [-1, 0, 3,] - shape_format = [[[np.float32, i, [5]], [np.float32, i, []]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp16_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [448, 1]], [np.float16, i, []]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp32_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [448, 1]], [np.float32, i, []]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp16_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [16, 640, 640]], [np.float16, i, []]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp32_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [16, 640, 640]], [np.float32, i, []]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp16_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [32, 3, 3, 3]], [np.float16, i, []]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp32_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [32, 3, 3, 3]], [np.float32, i, []]] for i in format_list] - self.pow_result(shape_format) - - #broadcast - def test_pow_shape_format_fp16_2d_broadcast(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [448, 20]], [np.float16, i, [448,1]]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp32_2d_broadcast(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [448, 20]], [np.float32, i, [448,1]]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp16_3d_broadcast(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [16, 640, 640]], [np.float16, i, [16, 640, 1]]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp32_3d_broadcast(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [16, 640, 640]], [np.float32, i, [16, 1, 1]]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp16_4d_broadcast(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [32, 3, 3, 3]], [np.float16, i, [32, 1, 1, 1]]] for i in format_list] - self.pow_result(shape_format) - - def test_pow_shape_format_fp32_4d_broadcast(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [32, 3, 3, 3]], [np.float32, i, [32, 3, 1, 1]]] for i in format_list] - self.pow_result(shape_format) - - -instantiate_device_type_tests(TestPow, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestPow(TestCase): + def cpu_op_exec(self, input1, input2): + output = torch.pow(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.pow(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2, out): + torch.pow(input1, input2, out=out) + output = out.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_exec(self, input1, input2): + input1.pow_(input2) + output = input1.numpy() + return output + + def npu_op_inplace_exec(self, input1, input2): + input1.pow_(input2) + output = input1.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_tensor_scalar(self, input1, n): + output = torch.pow(input1, n) + output = output.numpy() + return output + + def npu_op_exec_tensor_scalar(self, input1, n): + # input1 = input1.to("npu") + output = torch.pow(input1, n) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_tensor_scalar_out(self, input1, n, out): + # input1 = input1.to("npu") + output = torch.pow(input1, n, out=out) + output = out.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_scalar_tensor(self, n, input1): + output = torch.pow(n, input1) + output = output.numpy() + return output + + def npu_op_exec_scalar_tensor(self, n, input1): + output = torch.pow(n, input1) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_scalar_tensor_out(self, n, input1, out): + torch.pow(n, input1, out=out) + output = out.to("cpu") + output = output.numpy() + return output + + def pow_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 1) + npu_input3 = copy.deepcopy(cpu_input1).to("npu") + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + cpu_output_inp = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) + npu_output_inp = self.npu_op_inplace_exec(npu_input1, npu_input2) + + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output_out) + self.assertRtolEqual(cpu_output_inp, npu_output_inp) + + def pow_result_scalar_tensor(self, shape_format): + for item in shape_format: + scalar = np.random.randint(0, 1) + cpu_input1, npu_input1 = create_common_tensor(item, 0, 1) + npu_input3 = copy.deepcopy(cpu_input1).to("npu") + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_scalar = self.cpu_op_exec_scalar_tensor(scalar, cpu_input1) + npu_output_scalar = self.npu_op_exec_scalar_tensor(scalar, npu_input1) + npu_output_scalar_out = self.npu_op_exec_scalar_tensor_out(scalar, npu_input1, npu_input3) + + cpu_output_scalar = cpu_output_scalar.astype(npu_output_scalar.dtype) + self.assertRtolEqual(cpu_output_scalar, npu_output_scalar) + self.assertRtolEqual(cpu_output_scalar, npu_output_scalar_out) + + def pow_result_tensor_scalar_(self, shape_format): + for item in shape_format: + scalar = np.random.randint(0, 1) + cpu_input1, npu_input1 = create_common_tensor(item, 0, 1) + npu_input3 = copy.deepcopy(cpu_input1).to("npu") + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_tensor_scalar = self.cpu_op_exec_tensor_scalar(cpu_input1, scalar) + npu_output_tensor_scalar = self.npu_op_exec_tensor_scalar(npu_input1, scalar) + npu_output_tensor_scalar_out = self.npu_op_exec_tensor_scalar_out(npu_input1, scalar, npu_input3) + + cpu_output_tensor_scalar = cpu_output_tensor_scalar.astype(npu_output_tensor_scalar.dtype) + self.assertRtolEqual(cpu_output_tensor_scalar, npu_output_tensor_scalar) + self.assertRtolEqual(cpu_output_tensor_scalar, npu_output_tensor_scalar_out) + + # scalar_tensor------------------------------------------------------- + def test_pow_shape_format_scalar_tensor_fp16_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[np.float16, i, [18]] for i in format_list] + self.pow_result_scalar_tensor(shape_format) + + def test_pow_shape_format_scalar_tensor_fp32_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[np.float32, i, [18]] for i in format_list] + # shape_format = [np.float32, 0, [18]] + self.pow_result_scalar_tensor(shape_format) + + def test_pow_shape_format_scalar_tensor_fp16_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float16, i, [18, 64]] for i in format_list] + self.pow_result_scalar_tensor(shape_format) + + def test_pow_shape_format_scalar_tensor_fp32_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float32, i, [18, 64]] for i in format_list] + self.pow_result_scalar_tensor(shape_format) + + def test_pow_shape_format_scalar_tensor_fp16_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float16, i, [18, 64, 128]] for i in format_list] + self.pow_result_scalar_tensor(shape_format) + + def test_pow_shape_format_scalar_tensor_fp32_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float32, i, [18, 64, 128]] for i in format_list] + self.pow_result_scalar_tensor(shape_format) + + def test_pow_shape_format_scalar_tensor_fp16_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float16, i, [18, 64, 128, 256]] for i in format_list] + self.pow_result_scalar_tensor(shape_format) + + def test_pow_shape_format_scalar_tensor_fp32_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float32, i, [18, 64, 128, 256]] for i in format_list] + self.pow_result_scalar_tensor(shape_format) + + # tensor_scalar----------------------------------------------------------- + def test_pow_shape_format_tensor_scala_fp16_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[np.float16, i, [18]] for i in format_list] + self.pow_result_tensor_scalar_(shape_format) + + def test_pow_shape_format_tensor_scalar_fp32_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[np.float32, i, [18]] for i in format_list] + self.pow_result_tensor_scalar_(shape_format) + + def test_pow_shape_format_tensor_scala_fp16_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float16, i, [18, 64]] for i in format_list] + self.pow_result_tensor_scalar_(shape_format) + + def test_pow_shape_format_tensor_scalar_fp32_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float32, i, [18, 64]] for i in format_list] + self.pow_result_tensor_scalar_(shape_format) + + def test_pow_shape_format_tensor_scala_fp16_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float16, i, [18, 64, 128]] for i in format_list] + self.pow_result_tensor_scalar_(shape_format) + + def test_pow_shape_format_tensor_scalar_fp32_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float32, i, [18, 64, 128]] for i in format_list] + self.pow_result_tensor_scalar_(shape_format) + + def test_pow_shape_format_tensor_scala_fp16_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float16, i, [18, 64, 128, 256]] for i in format_list] + self.pow_result_tensor_scalar_(shape_format) + + def test_pow_shape_format_tensor_scalar_fp32_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float32, i, [18, 64, 128, 256]] for i in format_list] + self.pow_result_tensor_scalar_(shape_format) + + # tensor_tensor----------------------------------------------------------- + def test_pow_shape_format_fp16_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[[np.float16, i, [5]], [np.float16, i, []]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp32_1d(self, device): + format_list = [-1, 0, 3,] + shape_format = [[[np.float32, i, [5]], [np.float32, i, []]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp16_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [448, 1]], [np.float16, i, []]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp32_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [448, 1]], [np.float32, i, []]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp16_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [16, 640, 640]], [np.float16, i, []]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp32_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [16, 640, 640]], [np.float32, i, []]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp16_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [32, 3, 3, 3]], [np.float16, i, []]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp32_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [32, 3, 3, 3]], [np.float32, i, []]] for i in format_list] + self.pow_result(shape_format) + + #broadcast + def test_pow_shape_format_fp16_2d_broadcast(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [448, 20]], [np.float16, i, [448,1]]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp32_2d_broadcast(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [448, 20]], [np.float32, i, [448,1]]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp16_3d_broadcast(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [16, 640, 640]], [np.float16, i, [16, 640, 1]]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp32_3d_broadcast(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [16, 640, 640]], [np.float32, i, [16, 1, 1]]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp16_4d_broadcast(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [32, 3, 3, 3]], [np.float16, i, [32, 1, 1, 1]]] for i in format_list] + self.pow_result(shape_format) + + def test_pow_shape_format_fp32_4d_broadcast(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [32, 3, 3, 3]], [np.float32, i, [32, 3, 1, 1]]] for i in format_list] + self.pow_result(shape_format) + + +instantiate_device_type_tests(TestPow, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_prod.py b/test/test_npu/test_network_ops/test_prod.py old mode 100644 new mode 100755 index 985bf5912d833af19f86093cb59d9b40c1563220..1e97410c12307f2e073bf66e2db3bd4f8b911005 --- a/test/test_npu/test_network_ops/test_prod.py +++ b/test/test_npu/test_network_ops/test_prod.py @@ -1,457 +1,457 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestProd(TestCase): - def create_input_tensor(self, dtype, npu_format, shape, minValue, maxValue): - input1 = np.random.uniform(minValue, maxValue, shape).astype(dtype) - cpu_input = torch.from_numpy(input1) - npu_input = torch.from_numpy(input1).npu() - if npu_format != -1: - npu_input = npu_input.npu_format_cast(npu_format) - return cpu_input, npu_input - - def cpu_op_exec(self, input1): - output = torch.prod(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - output = torch.prod(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_dataType_exec(self, input1): - output = torch.prod(input1, dtype=torch.float32) - output = output.numpy() - return output - - def npu_op_dataType_exec(self, input1): - output = torch.prod(input1, dtype=torch.float32) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_dim_exec(self, input1, dim, keepdim): - output = torch.prod(input1, dim, keepdim) - output = output.numpy() - return output - - def npu_op_dim_exec(self, input1, dim, keepdim): - output = torch.prod(input1, dim, keepdim) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_dim_out_exec(self, input1, dim, keepdim, output): - output = torch.prod(input1, dim, keepdim, out = output) - output = output.numpy() - return output - - def npu_op_dim_out_exec(self, input1, dim, keepdim, output): - output = torch.prod(input1, dim, keepdim, out = output) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_dimname_exec(self, input1, dim, keepdim): - output = torch.prod(input1, dim, keepdim) - output = output.numpy() - return output - - def npu_op_dimname_exec(self, input1, dim, keepdim): - output = torch.prod(input1, dim, keepdim) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_dimname_out_exec(self, input1, dim, keepdim, output): - output = torch.prod(input1, dim, keepdim, out = output) - output = output.numpy() - return output - - def npu_op_dimname_out_exec(self, input1, dim, keepdim, output): - output = torch.prod(input1, dim, keepdim, out = output) - output = output.to("cpu") - output = output.numpy() - return output - - def prod_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - cpu_output_dataType = self.cpu_op_dataType_exec(cpu_input1) - npu_output_dataType = self.npu_op_dataType_exec(npu_input1) - - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_output_dataType = cpu_output_dataType.astype(npu_output_dataType.dtype) - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output_dataType, npu_output_dataType) - - def prod_dim_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output_dim = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) - npu_output_dim = self.npu_op_dim_exec(npu_input1, item[1], item[2]) - cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) - self.assertRtolEqual(cpu_output_dim, npu_output_dim) - - def output_shape(self, item): - output_size = list(item[0][2]) - dims = len(item[0][2]) - keepdim = item[2] - dim = item[1] - if dim < dims and keepdim == True: - output_size[dim] = 1 - if dim < dims and keepdim == False: - output_size.pop(dim) - return output_size - - def prod_dim_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) - shapes = self.output_shape(item) - cpu_output, npu_output = self.create_input_tensor(item[0][0],item[0][1],shapes, 0, 1) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = cpu_output.to(torch.float32) - cpu_output_dim = self.cpu_op_dim_out_exec(cpu_input1, item[1], item[2], cpu_output) - npu_output_dim = self.npu_op_dim_out_exec(npu_input1, item[1], item[2], npu_output) - - cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) - self.assertRtolEqual(cpu_output_dim, npu_output_dim) - - cpu_out = torch.tensor(0).to(input1.dtype) - npu_out = cpu_out.npu() - cpu_output_dim = self.cpu_op_dim_out_exec(cpu_input1, item[1], item[2], cpu_out) - npu_output_dim = self.npu_op_dim_out_exec(npu_input1, item[1], item[2], npu_out) - cpu_out = cpu_out.astype(npu_out.dtype) - self.assertRtolEqual(cpu_out, npu_out) - - def prod_dim_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) - shapes = self.output_shape(item) - cpu_output, npu_output = self.create_input_tensor(item[0][0],item[0][1],shapes, 0, 1) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = cpu_output.to(torch.float32) - cpu_output_dim = self.cpu_op_dim_out_exec(cpu_input1, item[1], item[2], cpu_output) - npu_output_dim = self.npu_op_dim_out_exec(npu_input1, item[1], item[2], npu_output) - - cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) - self.assertRtolEqual(cpu_output_dim, npu_output_dim) - - def prod_dim_name_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input1.names = item[0][3] - npu_input1.names = item[0][3] - - cpu_output_dim = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) - npu_output_dim = self.npu_op_dim_exec(npu_input1, item[1], item[2]) - cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) - self.assertRtolEqual(cpu_output_dim, npu_output_dim) - - def prod_dim_name_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) - shapes = self.output_shape(item) - cpu_output, npu_output = self.create_input_tensor(item[0][0],item[0][1],shapes, 0, 1) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = cpu_output.to(torch.float32) - cpu_input1.names = item[0][3] - npu_input1.names = item[0][3] - cpu_output_dim = self.cpu_op_dim_out_exec(cpu_input1, item[1], item[2], cpu_output) - npu_output_dim = self.npu_op_dim_out_exec(npu_input1, item[1], item[2], npu_output) - cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) - self.assertRtolEqual(cpu_output_dim, npu_output_dim) - - def test_prod_shape_format_fp16_1d(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list - for j in keepdim_list - ] - self.prod_result(shape_format) - - def test_prod_shape_format_fp32_1d(self, device): - format_list = [0, 3] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list - for j in keepdim_list - ] - self.prod_result(shape_format) - - def test_prod_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list - for j in keepdim_list - ] - self.prod_result(shape_format) - - def test_prod_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 25]], np.random.randint(0, 2), j] for i in format_list - for j in keepdim_list - ] - self.prod_result(shape_format) - - def test_prod_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list - for j in keepdim_list - ] - self.prod_result(shape_format) - - def test_prod_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list - for j in keepdim_list - ] - self.prod_result(shape_format) - - def test_prod_shape_format_fp16_4d(self, device): - format_list = [0] - keepdim_list = [True] - shape_format = [[[np.float16, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] for i in format_list - for j in keepdim_list - ] - self.prod_result(shape_format) - - def test_prod_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] for i in format_list - for j in keepdim_list - ] - self.prod_result(shape_format) - - # dim------------------------------------------------------------- - - def test_prod_dim_shape_format_fp16_1d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_result(shape_format) - - def test_prod_dim_shape_format_fp32_1d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_result(shape_format) - - def test_prod_dim_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 1), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_result(shape_format) - - def test_prod_dim_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64]], np.random.randint(0, 1), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_result(shape_format) - - def test_prod_dim_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_result(shape_format) - - def test_prod_dim_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_result(shape_format) - - def test_prod_dim_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_result(shape_format) - - def test_prod_dim_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_result(shape_format) - - #prod.int_out - - def test_prod_dim_out_shape_format_fp16_1d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_out_result(shape_format) - - def test_prod_dim_out_shape_format_fp32_1d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_out_result(shape_format) - - def test_prod_dim_out_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 1), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_out_result(shape_format) - - def test_prod_dim_out_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64]], np.random.randint(0, 1), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_out_result(shape_format) - - def test_prod_dim_out_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float16, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_out_result(shape_format) - - def test_prod_dim_out_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list - for j in keepdim_list - ] - self.prod_dim_out_result(shape_format) - - def test_prod_dim_out_shape_format_fp16_4d(self, device): - format_list = [0,3,29] - keepdim_list = [True] - shape_format = [[[np.float16, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] - for i in format_list for j in keepdim_list - ] - self.prod_dim_out_result(shape_format) - - def test_prod_dim_out_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] - for i in format_list for j in keepdim_list - ] - self.prod_dim_out_result(shape_format) - - def test_prod_dim_name_shape_format_fp32_1d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18], ('N',)], np.random.randint(0, 1), j] - for i in format_list for j in keepdim_list - ] - self.prod_dim_name_result(shape_format) - - def test_prod_dim_name_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64], ('N','C')], np.random.randint(0, 2), j] - for i in format_list for j in keepdim_list - ] - self.prod_dim_name_result(shape_format) - - def test_prod_dim_name_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64, 32], ('N','C','H')], np.random.randint(0, 3), j] - for i in format_list for j in keepdim_list - ] - self.prod_dim_name_result(shape_format) - - def test_prod_dim_name_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64, 32, 128], ('N','C','H','W')], np.random.randint(0, 4), j] - for i in format_list for j in keepdim_list - ] - self.prod_dim_name_result(shape_format) - - def test_prod_dim_name_out_shape_format_fp32_1d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18], ('N',)], np.random.randint(0, 1), j] - for i in format_list for j in keepdim_list - ] - self.prod_dim_name_out_result(shape_format) - - def test_prod_dim_name_out_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64], ('N','C')], np.random.randint(0, 1), j] - for i in format_list for j in keepdim_list - ] - self.prod_dim_name_out_result(shape_format) - - def test_prod_dim_name_out_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64, 32], ('N','C','H')], np.random.randint(0, 3), j] - for i in format_list for j in keepdim_list - ] - self.prod_dim_name_out_result(shape_format) - - def test_prod_dim_name_out_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - keepdim_list = [True, False] - shape_format = [[[np.float32, i, [18, 64, 32, 128], ('N','C','H','W')], np.random.randint(0, 4), j] - for i in format_list for j in keepdim_list - ] - self.prod_dim_name_out_result(shape_format) - -instantiate_device_type_tests(TestProd, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestProd(TestCase): + def create_input_tensor(self, dtype, npu_format, shape, minValue, maxValue): + input1 = np.random.uniform(minValue, maxValue, shape).astype(dtype) + cpu_input = torch.from_numpy(input1) + npu_input = torch.from_numpy(input1).npu() + if npu_format != -1: + npu_input = npu_input.npu_format_cast(npu_format) + return cpu_input, npu_input + + def cpu_op_exec(self, input1): + output = torch.prod(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = torch.prod(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_dataType_exec(self, input1): + output = torch.prod(input1, dtype=torch.float32) + output = output.numpy() + return output + + def npu_op_dataType_exec(self, input1): + output = torch.prod(input1, dtype=torch.float32) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_dim_exec(self, input1, dim, keepdim): + output = torch.prod(input1, dim, keepdim) + output = output.numpy() + return output + + def npu_op_dim_exec(self, input1, dim, keepdim): + output = torch.prod(input1, dim, keepdim) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_dim_out_exec(self, input1, dim, keepdim, output): + output = torch.prod(input1, dim, keepdim, out = output) + output = output.numpy() + return output + + def npu_op_dim_out_exec(self, input1, dim, keepdim, output): + output = torch.prod(input1, dim, keepdim, out = output) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_dimname_exec(self, input1, dim, keepdim): + output = torch.prod(input1, dim, keepdim) + output = output.numpy() + return output + + def npu_op_dimname_exec(self, input1, dim, keepdim): + output = torch.prod(input1, dim, keepdim) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_dimname_out_exec(self, input1, dim, keepdim, output): + output = torch.prod(input1, dim, keepdim, out = output) + output = output.numpy() + return output + + def npu_op_dimname_out_exec(self, input1, dim, keepdim, output): + output = torch.prod(input1, dim, keepdim, out = output) + output = output.to("cpu") + output = output.numpy() + return output + + def prod_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + cpu_output_dataType = self.cpu_op_dataType_exec(cpu_input1) + npu_output_dataType = self.npu_op_dataType_exec(npu_input1) + + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_output_dataType = cpu_output_dataType.astype(npu_output_dataType.dtype) + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output_dataType, npu_output_dataType) + + def prod_dim_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output_dim = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) + npu_output_dim = self.npu_op_dim_exec(npu_input1, item[1], item[2]) + cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) + self.assertRtolEqual(cpu_output_dim, npu_output_dim) + + def output_shape(self, item): + output_size = list(item[0][2]) + dims = len(item[0][2]) + keepdim = item[2] + dim = item[1] + if dim < dims and keepdim == True: + output_size[dim] = 1 + if dim < dims and keepdim == False: + output_size.pop(dim) + return output_size + + def prod_dim_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) + shapes = self.output_shape(item) + cpu_output, npu_output = self.create_input_tensor(item[0][0],item[0][1],shapes, 0, 1) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = cpu_output.to(torch.float32) + cpu_output_dim = self.cpu_op_dim_out_exec(cpu_input1, item[1], item[2], cpu_output) + npu_output_dim = self.npu_op_dim_out_exec(npu_input1, item[1], item[2], npu_output) + + cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) + self.assertRtolEqual(cpu_output_dim, npu_output_dim) + + cpu_out = torch.tensor(0).to(input1.dtype) + npu_out = cpu_out.npu() + cpu_output_dim = self.cpu_op_dim_out_exec(cpu_input1, item[1], item[2], cpu_out) + npu_output_dim = self.npu_op_dim_out_exec(npu_input1, item[1], item[2], npu_out) + cpu_out = cpu_out.astype(npu_out.dtype) + self.assertRtolEqual(cpu_out, npu_out) + + def prod_dim_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) + shapes = self.output_shape(item) + cpu_output, npu_output = self.create_input_tensor(item[0][0],item[0][1],shapes, 0, 1) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = cpu_output.to(torch.float32) + cpu_output_dim = self.cpu_op_dim_out_exec(cpu_input1, item[1], item[2], cpu_output) + npu_output_dim = self.npu_op_dim_out_exec(npu_input1, item[1], item[2], npu_output) + + cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) + self.assertRtolEqual(cpu_output_dim, npu_output_dim) + + def prod_dim_name_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input1.names = item[0][3] + npu_input1.names = item[0][3] + + cpu_output_dim = self.cpu_op_dim_exec(cpu_input1, item[1], item[2]) + npu_output_dim = self.npu_op_dim_exec(npu_input1, item[1], item[2]) + cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) + self.assertRtolEqual(cpu_output_dim, npu_output_dim) + + def prod_dim_name_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) + shapes = self.output_shape(item) + cpu_output, npu_output = self.create_input_tensor(item[0][0],item[0][1],shapes, 0, 1) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = cpu_output.to(torch.float32) + cpu_input1.names = item[0][3] + npu_input1.names = item[0][3] + cpu_output_dim = self.cpu_op_dim_out_exec(cpu_input1, item[1], item[2], cpu_output) + npu_output_dim = self.npu_op_dim_out_exec(npu_input1, item[1], item[2], npu_output) + cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) + self.assertRtolEqual(cpu_output_dim, npu_output_dim) + + def test_prod_shape_format_fp16_1d(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list + for j in keepdim_list + ] + self.prod_result(shape_format) + + def test_prod_shape_format_fp32_1d(self, device): + format_list = [0, 3] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list + for j in keepdim_list + ] + self.prod_result(shape_format) + + def test_prod_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 2), j] for i in format_list + for j in keepdim_list + ] + self.prod_result(shape_format) + + def test_prod_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 25]], np.random.randint(0, 2), j] for i in format_list + for j in keepdim_list + ] + self.prod_result(shape_format) + + def test_prod_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list + for j in keepdim_list + ] + self.prod_result(shape_format) + + def test_prod_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list + for j in keepdim_list + ] + self.prod_result(shape_format) + + def test_prod_shape_format_fp16_4d(self, device): + format_list = [0] + keepdim_list = [True] + shape_format = [[[np.float16, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] for i in format_list + for j in keepdim_list + ] + self.prod_result(shape_format) + + def test_prod_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] for i in format_list + for j in keepdim_list + ] + self.prod_result(shape_format) + + # dim------------------------------------------------------------- + + def test_prod_dim_shape_format_fp16_1d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_result(shape_format) + + def test_prod_dim_shape_format_fp32_1d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_result(shape_format) + + def test_prod_dim_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 1), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_result(shape_format) + + def test_prod_dim_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64]], np.random.randint(0, 1), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_result(shape_format) + + def test_prod_dim_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_result(shape_format) + + def test_prod_dim_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_result(shape_format) + + def test_prod_dim_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_result(shape_format) + + def test_prod_dim_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_result(shape_format) + + #prod.int_out + + def test_prod_dim_out_shape_format_fp16_1d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_out_result(shape_format) + + def test_prod_dim_out_shape_format_fp32_1d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_out_result(shape_format) + + def test_prod_dim_out_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 256]], np.random.randint(0, 1), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_out_result(shape_format) + + def test_prod_dim_out_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64]], np.random.randint(0, 1), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_out_result(shape_format) + + def test_prod_dim_out_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float16, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_out_result(shape_format) + + def test_prod_dim_out_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64, 32]], np.random.randint(0, 3), j] for i in format_list + for j in keepdim_list + ] + self.prod_dim_out_result(shape_format) + + def test_prod_dim_out_shape_format_fp16_4d(self, device): + format_list = [0,3,29] + keepdim_list = [True] + shape_format = [[[np.float16, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] + for i in format_list for j in keepdim_list + ] + self.prod_dim_out_result(shape_format) + + def test_prod_dim_out_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64, 32, 128]], np.random.randint(0, 4), j] + for i in format_list for j in keepdim_list + ] + self.prod_dim_out_result(shape_format) + + def test_prod_dim_name_shape_format_fp32_1d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18], ('N',)], np.random.randint(0, 1), j] + for i in format_list for j in keepdim_list + ] + self.prod_dim_name_result(shape_format) + + def test_prod_dim_name_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64], ('N','C')], np.random.randint(0, 2), j] + for i in format_list for j in keepdim_list + ] + self.prod_dim_name_result(shape_format) + + def test_prod_dim_name_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64, 32], ('N','C','H')], np.random.randint(0, 3), j] + for i in format_list for j in keepdim_list + ] + self.prod_dim_name_result(shape_format) + + def test_prod_dim_name_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64, 32, 128], ('N','C','H','W')], np.random.randint(0, 4), j] + for i in format_list for j in keepdim_list + ] + self.prod_dim_name_result(shape_format) + + def test_prod_dim_name_out_shape_format_fp32_1d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18], ('N',)], np.random.randint(0, 1), j] + for i in format_list for j in keepdim_list + ] + self.prod_dim_name_out_result(shape_format) + + def test_prod_dim_name_out_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64], ('N','C')], np.random.randint(0, 1), j] + for i in format_list for j in keepdim_list + ] + self.prod_dim_name_out_result(shape_format) + + def test_prod_dim_name_out_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64, 32], ('N','C','H')], np.random.randint(0, 3), j] + for i in format_list for j in keepdim_list + ] + self.prod_dim_name_out_result(shape_format) + + def test_prod_dim_name_out_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + keepdim_list = [True, False] + shape_format = [[[np.float32, i, [18, 64, 32, 128], ('N','C','H','W')], np.random.randint(0, 4), j] + for i in format_list for j in keepdim_list + ] + self.prod_dim_name_out_result(shape_format) + +instantiate_device_type_tests(TestProd, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_ptmuls.py b/test/test_npu/test_network_ops/test_ptmuls.py index e8952eb6c5fcb8470e93fc183d01f1784ceed1e9..5a23432b69b7cb2f27bcf331d3f5b2b22eb69239 100644 --- a/test/test_npu/test_network_ops/test_ptmuls.py +++ b/test/test_npu/test_network_ops/test_ptmuls.py @@ -1,69 +1,69 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('..') -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestPtMuls(TestCase): - def cpu_op_exec(self, input1, input2): - output = torch.mul(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.mul(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def test_ptmuls_shape_format_fp16(self, device): - format_list = [0, 3, 4, 29] - shape_list = [(64, 10), (32, 3, 3), (256, 2048, 7, 7)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) - cpu_input2 = 4.0 - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, cpu_input2) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_ptmuls_shape_format_fp32(self, device): - format_list = [0, 3, 4, 29] - shape_list = [(64, 10), (32, 3, 3), (256, 2048, 7, 7)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) - cpu_input2 = 6.2 - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, cpu_input2) - self.assertRtolEqual(cpu_output, npu_output) - cpu_output1 = self.cpu_op_exec(cpu_input2, cpu_input1) - npu_output1 = self.npu_op_exec(cpu_input2, npu_input1) - self.assertRtolEqual(cpu_output1, npu_output1) - -instantiate_device_type_tests(TestPtMuls, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() - +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append('..') +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestPtMuls(TestCase): + def cpu_op_exec(self, input1, input2): + output = torch.mul(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.mul(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def test_ptmuls_shape_format_fp16(self, device): + format_list = [0, 3, 4, 29] + shape_list = [(64, 10), (32, 3, 3), (256, 2048, 7, 7)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_input2 = 4.0 + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, cpu_input2) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_ptmuls_shape_format_fp32(self, device): + format_list = [0, 3, 4, 29] + shape_list = [(64, 10), (32, 3, 3), (256, 2048, 7, 7)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_input2 = 6.2 + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, cpu_input2) + self.assertRtolEqual(cpu_output, npu_output) + cpu_output1 = self.cpu_op_exec(cpu_input2, cpu_input1) + npu_output1 = self.npu_op_exec(cpu_input2, npu_input1) + self.assertRtolEqual(cpu_output1, npu_output1) + +instantiate_device_type_tests(TestPtMuls, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() + diff --git a/test/test_npu/test_network_ops/test_qr.py b/test/test_npu/test_network_ops/test_qr.py index 52b2f94914f000593e7ffcfc83ef7a1ef5235de6..5b73a615e1ff54a23506077b2ee956c063fda56d 100644 --- a/test/test_npu/test_network_ops/test_qr.py +++ b/test/test_npu/test_network_ops/test_qr.py @@ -1,123 +1,123 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestQr(TestCase): - def cpu_op_exec(self, input1, some): - q, r = torch.qr(input1, some) - return q.numpy(), r.numpy() - - def cpu_op_exec_tuple(self, input1, some): - out = torch.qr(input1, some) - output_q = out.Q - output_r = out.R - output_q = output_q.numpy() - output_r = output_r.numpy() - return output_q, output_r, out - - def npu_op_exec(self, input1, some): - q, r = torch.qr(input1, some) - qout = q.to("cpu").numpy() - rout = r.to("cpu").numpy() - return qout, rout - - def npu_op_exec_tuple(self, input1, some): - out = torch.qr(input1.to("npu"), some) - output_q = out.Q - output_r = out.R - output_q = output_q.to("cpu") - output_r = output_r.to("cpu") - output_q = output_q.numpy() - output_r = output_r.numpy() - return output_q, output_r, out - - def npu_op_exec_out(self, input1, some, input2, input3): - torch.qr(input1, some, out=(input2, input3)) - qout = input2.to("cpu").numpy() - rout = input3.to("cpu").numpy() - return qout, rout - - def test_qr_shape_format(self, device): - # TODO(ascend): 算子目前 暂不支持fp16, 后续开发中 - dtype_list = [np.float32] - format_list = [-1] - # Note: - # precision may be lost if the magnitudes of the elements of input are large - shape_list = [ - [3, 4], - [2, 30, 30], - [20, 10, 20], - [8, 6, 50, 20], - [10, 4, 6, 15, 13] - ] - somes_list = [True, False] - shape_format = [ - [[d, i, j], l] for d in dtype_list for i in format_list - for j in shape_list for l in somes_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 10) - npu_input2 = torch.empty(0).npu().to(cpu_input1.dtype) - npu_input3 = torch.empty(0).npu().to(cpu_input1.dtype) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output1, cpu_output2 = self.cpu_op_exec(cpu_input1, item[1]) - npu_output1, npu_output2 = self.npu_op_exec(npu_input1, item[1]) - npu_output1_out, npu_output2_out = self.npu_op_exec_out(npu_input1, item[1], npu_input2, npu_input3) - cpu_output1 = cpu_output1.astype(npu_output1.dtype) - cpu_output2 = cpu_output2.astype(npu_output2.dtype) - - self.assertRtolEqual(cpu_output1, npu_output1) - self.assertRtolEqual(cpu_output2, npu_output2) - self.assertRtolEqual(npu_output1_out, npu_output1) - self.assertRtolEqual(npu_output2_out, npu_output2) - - def test_qr_common_shape_format(self, device): - shape_format = [ - [np.float32, -1, (5, 3)], - [np.float32, -1, (1, 64, 147, 147)], - [np.float32, -1, (65536, 14, 7, 1)], - [np.int32, -1, (1000000, 3, 3, 1)], - [np.int32, -1, (1024, 107, 31, 2)], - [np.int32, -1, (1, 128, 1, 1)] - ] - somes = [True, False] - for item in shape_format: - for some in somes: - cpu_input1, npu_input1 = create_common_tensor(item, -0.001, 0.001) - if cpu_input1.dtype == torch.int32: - cpu_input1 = cpu_input1.to(torch.float32) - if npu_input1.dtype == torch.int32: - npu_input1 = npu_input1.to(torch.float32) - cpu_output_q, cpu_output_r, cpu_out = self.cpu_op_exec_tuple(cpu_input1, some) - npu_output_q, npu_output_r, npu_out = self.npu_op_exec_tuple(npu_input1, some) - npu_output = np.matmul(npu_output_q, npu_output_r) - - self.assertRtolEqual(cpu_output_q, npu_output_q) - self.assertRtolEqual(cpu_output_r, npu_output_r) - self.assertRtolEqual(cpu_input1.numpy(), npu_output) - -instantiate_device_type_tests(TestQr, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestQr(TestCase): + def cpu_op_exec(self, input1, some): + q, r = torch.qr(input1, some) + return q.numpy(), r.numpy() + + def cpu_op_exec_tuple(self, input1, some): + out = torch.qr(input1, some) + output_q = out.Q + output_r = out.R + output_q = output_q.numpy() + output_r = output_r.numpy() + return output_q, output_r, out + + def npu_op_exec(self, input1, some): + q, r = torch.qr(input1, some) + qout = q.to("cpu").numpy() + rout = r.to("cpu").numpy() + return qout, rout + + def npu_op_exec_tuple(self, input1, some): + out = torch.qr(input1.to("npu"), some) + output_q = out.Q + output_r = out.R + output_q = output_q.to("cpu") + output_r = output_r.to("cpu") + output_q = output_q.numpy() + output_r = output_r.numpy() + return output_q, output_r, out + + def npu_op_exec_out(self, input1, some, input2, input3): + torch.qr(input1, some, out=(input2, input3)) + qout = input2.to("cpu").numpy() + rout = input3.to("cpu").numpy() + return qout, rout + + def test_qr_shape_format(self, device): + # TODO(ascend): 算子目前 暂不支持fp16, 后续开发中 + dtype_list = [np.float32] + format_list = [-1] + # Note: + # precision may be lost if the magnitudes of the elements of input are large + shape_list = [ + [3, 4], + [2, 30, 30], + [20, 10, 20], + [8, 6, 50, 20], + [10, 4, 6, 15, 13] + ] + somes_list = [True, False] + shape_format = [ + [[d, i, j], l] for d in dtype_list for i in format_list + for j in shape_list for l in somes_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 10) + npu_input2 = torch.empty(0).npu().to(cpu_input1.dtype) + npu_input3 = torch.empty(0).npu().to(cpu_input1.dtype) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output1, cpu_output2 = self.cpu_op_exec(cpu_input1, item[1]) + npu_output1, npu_output2 = self.npu_op_exec(npu_input1, item[1]) + npu_output1_out, npu_output2_out = self.npu_op_exec_out(npu_input1, item[1], npu_input2, npu_input3) + cpu_output1 = cpu_output1.astype(npu_output1.dtype) + cpu_output2 = cpu_output2.astype(npu_output2.dtype) + + self.assertRtolEqual(cpu_output1, npu_output1) + self.assertRtolEqual(cpu_output2, npu_output2) + self.assertRtolEqual(npu_output1_out, npu_output1) + self.assertRtolEqual(npu_output2_out, npu_output2) + + def test_qr_common_shape_format(self, device): + shape_format = [ + [np.float32, -1, (5, 3)], + [np.float32, -1, (1, 64, 147, 147)], + [np.float32, -1, (65536, 14, 7, 1)], + [np.int32, -1, (1000000, 3, 3, 1)], + [np.int32, -1, (1024, 107, 31, 2)], + [np.int32, -1, (1, 128, 1, 1)] + ] + somes = [True, False] + for item in shape_format: + for some in somes: + cpu_input1, npu_input1 = create_common_tensor(item, -0.001, 0.001) + if cpu_input1.dtype == torch.int32: + cpu_input1 = cpu_input1.to(torch.float32) + if npu_input1.dtype == torch.int32: + npu_input1 = npu_input1.to(torch.float32) + cpu_output_q, cpu_output_r, cpu_out = self.cpu_op_exec_tuple(cpu_input1, some) + npu_output_q, npu_output_r, npu_out = self.npu_op_exec_tuple(npu_input1, some) + npu_output = np.matmul(npu_output_q, npu_output_r) + + self.assertRtolEqual(cpu_output_q, npu_output_q) + self.assertRtolEqual(cpu_output_r, npu_output_r) + self.assertRtolEqual(cpu_input1.numpy(), npu_output) + +instantiate_device_type_tests(TestQr, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_reciprocal.py b/test/test_npu/test_network_ops/test_reciprocal.py old mode 100644 new mode 100755 index fac894fb43ee9e933053c14ff3687a47e551b3d9..058188af46c9ffcf78068fef81fa939008b2be29 --- a/test/test_npu/test_network_ops/test_reciprocal.py +++ b/test/test_npu/test_network_ops/test_reciprocal.py @@ -1,156 +1,156 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestReciprocal(TestCase): - def cpu_op_exec(self, input1): - output = torch.reciprocal(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - output = torch.reciprocal(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2): - output = input2.to("npu") - torch.reciprocal(input1, out=output) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_inp_op_exec(self, input1): - output = torch.reciprocal_(input1) - output = output.numpy() - return output - - def npu_inp_op_exec(self, input1): - output = torch.reciprocal_(input1) - output = input1.to("cpu") - output = output.numpy() - return output - - def reciprocal_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - cpu_output_inp = self.cpu_inp_op_exec(cpu_input1) - npu_output_inp = self.npu_inp_op_exec(npu_input1) - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output_inp, npu_output_inp) - - def reciprocal_result_out(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2) - cpu_output = cpu_output.astype(npu_output_out.dtype) - - self.assertRtolEqual(cpu_output, npu_output_out) - - def test_reciprocal_shape_format_fp16_out(self, device): - shape_format = [[[np.float16, 0, [18]], [np.float16, 0, [18, 20]]], - [[np.float16, 0, [18, 20, 30]], [np.float16, 0, [18, 20, 30]]], - [[np.float16, 0, [18, 10, 10, 20]], [np.float16, 0, [18, 10, 20]]], - [[np.float16, 3, [18]], [np.float16, 3, [18, 20]]], - [[np.float16, 3, [18, 20, 30]], [np.float16, 3, [18, 20, 30]]], - [[np.float16, 3, [18, 10, 10, 20]], [np.float16, 3, [18, 10, 20]]], - [[np.float16, 4, [18]], [np.float16, 4, [18, 20]]], - [[np.float16, 4, [18, 20, 30]], [np.float16, 4, [18, 20, 30]]], - [[np.float16, 4, [18, 10, 10, 20]], [np.float16, 4, [18, 10, 20]]], - ] - self.reciprocal_result_out(shape_format) - - def test_reciprocal_shape_format_fp32_out(self, device): - shape_format = [[[np.float32, 0, [18]], [np.float32, 0, [18, 20]]], - [[np.float32, 0, [18, 20, 30]], [np.float32, 0, [18, 20, 30]]], - [[np.float32, 0, [18, 10, 10, 20]], [np.float32, 0, [18, 10, 20]]], - [[np.float32, 3, [18]], [np.float32, 3, [18, 20]]], - [[np.float32, 3, [18, 20, 30]], [np.float32, 3, [18, 20, 30]]], - [[np.float32, 3, [18, 10, 10, 20]], [np.float32, 3, [18, 10, 20]]], - [[np.float32, 4, [18]], [np.float32, 4, [18, 20]]], - [[np.float32, 4, [18, 20, 30]], [np.float32, 4, [18, 20, 30]]], - [[np.float32, 4, [18, 10, 10, 20]], [np.float32, 4, [18, 10, 20]]], - ] - self.reciprocal_result_out(shape_format) - - def test_reciprocal_shape_format_fp16_1d(self, device): - format_list = [0, 3, 4] - shape_format = [[np.float16, i, [18]] for i in format_list - ] - self.reciprocal_result(shape_format) - - def test_reciprocal_shape_format_fp32_1d(self, device): - format_list = [0, 3, 4] - shape_format = [[np.float32, i, [256]] for i in format_list - ] - self.reciprocal_result(shape_format) - - def test_reciprocal_shape_format_fp16_2d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [[np.float16, i, [64, 516]] for i in format_list - ] - self.reciprocal_result(shape_format) - - def test_reciprocal_shape_format_fp32_2d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [[np.float32, i, [64, 516]] for i in format_list - ] - self.reciprocal_result(shape_format) - - def test_reciprocal_shape_format_fp16_3d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [[np.float16, i, [64, 124, 516]] for i in format_list - ] - self.reciprocal_result(shape_format) - - def test_reciprocal_shape_format_fp32_3d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [[np.float32, i, [64, 124, 516]] for i in format_list - ] - self.reciprocal_result(shape_format) - - def test_reciprocal_shape_format_fp16_4d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [[np.float16, i, [64, 128, 516, 32]] for i in format_list - ] - self.reciprocal_result(shape_format) - - def test_reciprocal_shape_format_fp32_4d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [[np.float32, i, [64, 128, 516, 32]] for i in format_list - ] - self.reciprocal_result(shape_format) - - -instantiate_device_type_tests(TestReciprocal, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestReciprocal(TestCase): + def cpu_op_exec(self, input1): + output = torch.reciprocal(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = torch.reciprocal(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2): + output = input2.to("npu") + torch.reciprocal(input1, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_inp_op_exec(self, input1): + output = torch.reciprocal_(input1) + output = output.numpy() + return output + + def npu_inp_op_exec(self, input1): + output = torch.reciprocal_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + def reciprocal_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + cpu_output_inp = self.cpu_inp_op_exec(cpu_input1) + npu_output_inp = self.npu_inp_op_exec(npu_input1) + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output_inp, npu_output_inp) + + def reciprocal_result_out(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2) + cpu_output = cpu_output.astype(npu_output_out.dtype) + + self.assertRtolEqual(cpu_output, npu_output_out) + + def test_reciprocal_shape_format_fp16_out(self, device): + shape_format = [[[np.float16, 0, [18]], [np.float16, 0, [18, 20]]], + [[np.float16, 0, [18, 20, 30]], [np.float16, 0, [18, 20, 30]]], + [[np.float16, 0, [18, 10, 10, 20]], [np.float16, 0, [18, 10, 20]]], + [[np.float16, 3, [18]], [np.float16, 3, [18, 20]]], + [[np.float16, 3, [18, 20, 30]], [np.float16, 3, [18, 20, 30]]], + [[np.float16, 3, [18, 10, 10, 20]], [np.float16, 3, [18, 10, 20]]], + [[np.float16, 4, [18]], [np.float16, 4, [18, 20]]], + [[np.float16, 4, [18, 20, 30]], [np.float16, 4, [18, 20, 30]]], + [[np.float16, 4, [18, 10, 10, 20]], [np.float16, 4, [18, 10, 20]]], + ] + self.reciprocal_result_out(shape_format) + + def test_reciprocal_shape_format_fp32_out(self, device): + shape_format = [[[np.float32, 0, [18]], [np.float32, 0, [18, 20]]], + [[np.float32, 0, [18, 20, 30]], [np.float32, 0, [18, 20, 30]]], + [[np.float32, 0, [18, 10, 10, 20]], [np.float32, 0, [18, 10, 20]]], + [[np.float32, 3, [18]], [np.float32, 3, [18, 20]]], + [[np.float32, 3, [18, 20, 30]], [np.float32, 3, [18, 20, 30]]], + [[np.float32, 3, [18, 10, 10, 20]], [np.float32, 3, [18, 10, 20]]], + [[np.float32, 4, [18]], [np.float32, 4, [18, 20]]], + [[np.float32, 4, [18, 20, 30]], [np.float32, 4, [18, 20, 30]]], + [[np.float32, 4, [18, 10, 10, 20]], [np.float32, 4, [18, 10, 20]]], + ] + self.reciprocal_result_out(shape_format) + + def test_reciprocal_shape_format_fp16_1d(self, device): + format_list = [0, 3, 4] + shape_format = [[np.float16, i, [18]] for i in format_list + ] + self.reciprocal_result(shape_format) + + def test_reciprocal_shape_format_fp32_1d(self, device): + format_list = [0, 3, 4] + shape_format = [[np.float32, i, [256]] for i in format_list + ] + self.reciprocal_result(shape_format) + + def test_reciprocal_shape_format_fp16_2d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [[np.float16, i, [64, 516]] for i in format_list + ] + self.reciprocal_result(shape_format) + + def test_reciprocal_shape_format_fp32_2d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [[np.float32, i, [64, 516]] for i in format_list + ] + self.reciprocal_result(shape_format) + + def test_reciprocal_shape_format_fp16_3d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [[np.float16, i, [64, 124, 516]] for i in format_list + ] + self.reciprocal_result(shape_format) + + def test_reciprocal_shape_format_fp32_3d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [[np.float32, i, [64, 124, 516]] for i in format_list + ] + self.reciprocal_result(shape_format) + + def test_reciprocal_shape_format_fp16_4d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [[np.float16, i, [64, 128, 516, 32]] for i in format_list + ] + self.reciprocal_result(shape_format) + + def test_reciprocal_shape_format_fp32_4d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [[np.float32, i, [64, 128, 516, 32]] for i in format_list + ] + self.reciprocal_result(shape_format) + + +instantiate_device_type_tests(TestReciprocal, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_relu.py b/test/test_npu/test_network_ops/test_relu.py old mode 100644 new mode 100755 index c05e82ec5a045be515930a512f2fa1c21f03272d..7d2f578d46ef792c6909ccafc8f9995ae86d3609 --- a/test/test_npu/test_network_ops/test_relu.py +++ b/test/test_npu/test_network_ops/test_relu.py @@ -1,107 +1,107 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestRelu(TestCase): - def cpu_op_back_exec(self, input1): - w = torch.ones_like(input1) - input1.requires_grad_(True) - output = torch.relu(input1) - output.backward(w) - res = input1.grad - res = res.numpy() - return output.detach().numpy(), res - - def npu_op_back_exec(self, input1): - w = torch.ones_like(input1) - input1.requires_grad_(True) - output = torch.relu(input1) - output.backward(w) - output = output.to("cpu") - res = input1.grad.to("cpu") - res = res.numpy() - return output.detach().numpy(), res - - def cpu_inp_op_exec(self, input1): - output = torch.relu_(input1) - output = output.numpy() - return output - - def npu_inp_op_exec(self, input1): - output = torch.relu_(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def test_relu_shape_format_fp32(self, device): - format_list = [0, 3, 4, 29] - shape_list = [(1000, 1280), (32, 3, 3), (1024, 464, 7, 7)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output, cpu_res = self.cpu_op_back_exec(cpu_input1) - npu_output, npu_res = self.npu_op_back_exec(npu_input1) - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_res = cpu_res.astype(npu_res.dtype) - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_res, npu_res) - - def test_relu_shape_format_fp16(self, device): - format_list = [0, 3, 4, 29] - shape_list = [(1000, 1280), (32, 3, 3), (1024, 464, 7, 7)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output, cpu_res = self.cpu_op_back_exec(cpu_input1) - npu_output, npu_res = self.npu_op_back_exec(npu_input1) - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_res = cpu_res.astype(npu_res.dtype) - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_res, npu_res) - - def test_relu_shape_format_fp16_inp(self, device): - format_list = [0, 3, 4, 29] - shape_list = [(1000, 1280), (32, 3, 3), (1024, 464, 7, 7)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_inp_op_exec(cpu_input1) - npu_output = self.npu_inp_op_exec(npu_input1) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestRelu, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestRelu(TestCase): + def cpu_op_back_exec(self, input1): + w = torch.ones_like(input1) + input1.requires_grad_(True) + output = torch.relu(input1) + output.backward(w) + res = input1.grad + res = res.numpy() + return output.detach().numpy(), res + + def npu_op_back_exec(self, input1): + w = torch.ones_like(input1) + input1.requires_grad_(True) + output = torch.relu(input1) + output.backward(w) + output = output.to("cpu") + res = input1.grad.to("cpu") + res = res.numpy() + return output.detach().numpy(), res + + def cpu_inp_op_exec(self, input1): + output = torch.relu_(input1) + output = output.numpy() + return output + + def npu_inp_op_exec(self, input1): + output = torch.relu_(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def test_relu_shape_format_fp32(self, device): + format_list = [0, 3, 4, 29] + shape_list = [(1000, 1280), (32, 3, 3), (1024, 464, 7, 7)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output, cpu_res = self.cpu_op_back_exec(cpu_input1) + npu_output, npu_res = self.npu_op_back_exec(npu_input1) + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_res = cpu_res.astype(npu_res.dtype) + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_res, npu_res) + + def test_relu_shape_format_fp16(self, device): + format_list = [0, 3, 4, 29] + shape_list = [(1000, 1280), (32, 3, 3), (1024, 464, 7, 7)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output, cpu_res = self.cpu_op_back_exec(cpu_input1) + npu_output, npu_res = self.npu_op_back_exec(npu_input1) + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_res = cpu_res.astype(npu_res.dtype) + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_res, npu_res) + + def test_relu_shape_format_fp16_inp(self, device): + format_list = [0, 3, 4, 29] + shape_list = [(1000, 1280), (32, 3, 3), (1024, 464, 7, 7)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_inp_op_exec(cpu_input1) + npu_output = self.npu_inp_op_exec(npu_input1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestRelu, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_remainder.py b/test/test_npu/test_network_ops/test_remainder.py old mode 100644 new mode 100755 index 4d017a53c5189eb0cf6cc43e31676d63a38a9bf5..64f6311b8a6e2856f334bcd29a4f77b4004ac9d2 --- a/test/test_npu/test_network_ops/test_remainder.py +++ b/test/test_npu/test_network_ops/test_remainder.py @@ -1,228 +1,228 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestRemainder(TestCase): - def cpu_op_exec(self, input1, input2): - output = torch.remainder(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.remainder(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2, out): - # output = out.to("npu") - output = torch.remainder(input1, input2, out=out) - output = out.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_exec(self, input1, input2): - output = input1.remainder_(input2) - output = output.numpy() - return output - - def npu_op_inplace_exec(self, input1, input2): - output = input1.remainder_(input2) - output = input1.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_scalar(self, input1, input2): - # input1 = input1.to("npu") - output = torch.remainder(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def remainder_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - npu_input3 = torch.randn(6).to("npu") - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - self.assertRtolEqual(cpu_output, npu_output_out) - - def remainder_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) - npu_input3 = copy.deepcopy(cpu_input1).to("npu") - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) - cpu_output_inplace = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) - npu_output_inplace = self.npu_op_inplace_exec(npu_input1, npu_input2) - - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_output_inplace = cpu_output_inplace.astype(npu_output_inplace.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output_inplace, npu_output_inplace) - self.assertRtolEqual(cpu_output, npu_output_out) - - def remainder_scalar_result(self, shape_format): - for item in shape_format: - scalar = np.random.uniform(0, 100) - cpu_input1, npu_input1 = create_common_tensor(item, 0, 2) - npu_input3 = copy.deepcopy(cpu_input1).to("npu") - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, scalar) - npu_output_scalar = self.npu_op_exec_scalar(npu_input1, scalar) - npu_output_out = self.npu_op_exec_out(npu_input1, scalar, npu_input3) - - cpu_output = cpu_output.astype(npu_output_scalar.dtype) - self.assertRtolEqual(cpu_output, npu_output_scalar) - self.assertRtolEqual(cpu_output, npu_output_out) - - def test_remainder_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float16, i, [4]] for i in format_list - ] - self.remainder_result(shape_format) - - def test_remainder_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float32, i, [4]] for i in format_list - ] - self.remainder_result(shape_format) - - def test_remainder_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [4, 18]] for i in format_list - ] - self.remainder_result(shape_format) - - def test_remainder_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [4, 18]] for i in format_list - ] - self.remainder_result(shape_format) - - def test_remainder_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [4, 18, 32]] for i in format_list - ] - self.remainder_result(shape_format) - - def test_remainder_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [4, 18, 32]] for i in format_list - ] - self.remainder_result(shape_format) - - def test_remainder_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [4, 18, 32, 128]] for i in format_list - ] - self.remainder_result(shape_format) - - def test_remainder_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [4, 18, 32, 128]] for i in format_list - ] - self.remainder_result(shape_format) - - # scalar---------------------------------------------------------- - def test_remainder_scalar_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float16, i, [4]] for i in format_list - ] - self.remainder_scalar_result(shape_format) - - def test_remainder_scalar_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float32, i, [4]] for i in format_list - ] - self.remainder_scalar_result(shape_format) - - def test_remainder_scalar_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [4, 18]] for i in format_list - ] - self.remainder_scalar_result(shape_format) - - def test_remainder_scalar_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [4, 18]] for i in format_list - ] - self.remainder_scalar_result(shape_format) - - def test_remainder_scalar_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [4, 18, 32]] for i in format_list - ] - self.remainder_scalar_result(shape_format) - - def test_remainder_scalar_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [4, 18, 32]] for i in format_list - ] - self.remainder_scalar_result(shape_format) - - def test_remainder_scalar_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [4, 18, 32, 128]] for i in format_list - ] - self.remainder_scalar_result(shape_format) - - def test_remainder_scalar_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [4, 18, 32, 128]] for i in format_list - ] - self.remainder_scalar_result(shape_format) - - def test_remainder_mix_dtype_1(self, device): - npu_input1, npu_input2 = create_common_tensor([np.int32, 0, (2, 3)], 1, 100) - npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) - cpu_output = self.cpu_op_exec(npu_input1, npu_input3) - npu_output = self.npu_op_exec(npu_input1, npu_input3) - self.assertRtolEqual(cpu_output, npu_output) - - def test_remainder_mix_dtype_2(self, device): - npu_input1, npu_input2 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) - npu_input3 = torch.tensor(3).int() - cpu_output = self.cpu_op_exec(npu_input1, npu_input3) - npu_output = self.npu_op_exec(npu_input1, npu_input3) - self.assertRtolEqual(cpu_output, npu_output) - - def test_remainder_scalar_shape_format_fp32_out_4d(self, device): - format_list = [0] - shape_format = [[np.float32, i, [4, 18, 32, 128]] for i in format_list - ] - self.remainder_out_result(shape_format) - - -instantiate_device_type_tests(TestRemainder, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestRemainder(TestCase): + def cpu_op_exec(self, input1, input2): + output = torch.remainder(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.remainder(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2, out): + # output = out.to("npu") + output = torch.remainder(input1, input2, out=out) + output = out.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_exec(self, input1, input2): + output = input1.remainder_(input2) + output = output.numpy() + return output + + def npu_op_inplace_exec(self, input1, input2): + output = input1.remainder_(input2) + output = input1.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_scalar(self, input1, input2): + # input1 = input1.to("npu") + output = torch.remainder(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def remainder_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + npu_input3 = torch.randn(6).to("npu") + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + self.assertRtolEqual(cpu_output, npu_output_out) + + def remainder_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 0, 100) + npu_input3 = copy.deepcopy(cpu_input1).to("npu") + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3) + cpu_output_inplace = self.cpu_op_inplace_exec(cpu_input1, cpu_input2) + npu_output_inplace = self.npu_op_inplace_exec(npu_input1, npu_input2) + + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_output_inplace = cpu_output_inplace.astype(npu_output_inplace.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output_inplace, npu_output_inplace) + self.assertRtolEqual(cpu_output, npu_output_out) + + def remainder_scalar_result(self, shape_format): + for item in shape_format: + scalar = np.random.uniform(0, 100) + cpu_input1, npu_input1 = create_common_tensor(item, 0, 2) + npu_input3 = copy.deepcopy(cpu_input1).to("npu") + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, scalar) + npu_output_scalar = self.npu_op_exec_scalar(npu_input1, scalar) + npu_output_out = self.npu_op_exec_out(npu_input1, scalar, npu_input3) + + cpu_output = cpu_output.astype(npu_output_scalar.dtype) + self.assertRtolEqual(cpu_output, npu_output_scalar) + self.assertRtolEqual(cpu_output, npu_output_out) + + def test_remainder_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float16, i, [4]] for i in format_list + ] + self.remainder_result(shape_format) + + def test_remainder_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float32, i, [4]] for i in format_list + ] + self.remainder_result(shape_format) + + def test_remainder_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [4, 18]] for i in format_list + ] + self.remainder_result(shape_format) + + def test_remainder_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [4, 18]] for i in format_list + ] + self.remainder_result(shape_format) + + def test_remainder_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [4, 18, 32]] for i in format_list + ] + self.remainder_result(shape_format) + + def test_remainder_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [4, 18, 32]] for i in format_list + ] + self.remainder_result(shape_format) + + def test_remainder_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [4, 18, 32, 128]] for i in format_list + ] + self.remainder_result(shape_format) + + def test_remainder_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [4, 18, 32, 128]] for i in format_list + ] + self.remainder_result(shape_format) + + # scalar---------------------------------------------------------- + def test_remainder_scalar_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float16, i, [4]] for i in format_list + ] + self.remainder_scalar_result(shape_format) + + def test_remainder_scalar_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float32, i, [4]] for i in format_list + ] + self.remainder_scalar_result(shape_format) + + def test_remainder_scalar_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [4, 18]] for i in format_list + ] + self.remainder_scalar_result(shape_format) + + def test_remainder_scalar_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [4, 18]] for i in format_list + ] + self.remainder_scalar_result(shape_format) + + def test_remainder_scalar_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [4, 18, 32]] for i in format_list + ] + self.remainder_scalar_result(shape_format) + + def test_remainder_scalar_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [4, 18, 32]] for i in format_list + ] + self.remainder_scalar_result(shape_format) + + def test_remainder_scalar_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [4, 18, 32, 128]] for i in format_list + ] + self.remainder_scalar_result(shape_format) + + def test_remainder_scalar_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [4, 18, 32, 128]] for i in format_list + ] + self.remainder_scalar_result(shape_format) + + def test_remainder_mix_dtype_1(self, device): + npu_input1, npu_input2 = create_common_tensor([np.int32, 0, (2, 3)], 1, 100) + npu_input3, npu_input4 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) + cpu_output = self.cpu_op_exec(npu_input1, npu_input3) + npu_output = self.npu_op_exec(npu_input1, npu_input3) + self.assertRtolEqual(cpu_output, npu_output) + + def test_remainder_mix_dtype_2(self, device): + npu_input1, npu_input2 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100) + npu_input3 = torch.tensor(3).int() + cpu_output = self.cpu_op_exec(npu_input1, npu_input3) + npu_output = self.npu_op_exec(npu_input1, npu_input3) + self.assertRtolEqual(cpu_output, npu_output) + + def test_remainder_scalar_shape_format_fp32_out_4d(self, device): + format_list = [0] + shape_format = [[np.float32, i, [4, 18, 32, 128]] for i in format_list + ] + self.remainder_out_result(shape_format) + + +instantiate_device_type_tests(TestRemainder, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_resize_.py b/test/test_npu/test_network_ops/test_resize_.py index bafaff5300e2777698a8fbc57f1e899c1f305bd9..230ecd156a53dbf0a8354bab2dbdbcdc81aaaf43 100644 --- a/test/test_npu/test_network_ops/test_resize_.py +++ b/test/test_npu/test_network_ops/test_resize_.py @@ -1,69 +1,69 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import itertools -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests - - -class TestResize(TestCase): - def cpu_op_exec(self, cpu_in, cpu_out, shape, op): - cpu_out.resize_(shape) - op(cpu_in, cpu_in, out=cpu_out) - return cpu_out - - def npu_op_exec(self, npu_in, npu_out, shape, op): - npu_out.resize_(shape) - op(npu_in, npu_in, out=npu_out) - return npu_out - - def op_result_cmp_(self, shape_a, shape_b, op, is_contiguous=False): - a = torch.rand(shape_a) - b = torch.full(shape_b, 100.) - if is_contiguous: - b = b.t() - cpu = self.cpu_op_exec(a, b, shape_a, op) - - nb = torch.full(shape_b, 100.) - a_npu = a.npu() - b_npu = nb.npu() - if is_contiguous: - b_npu = b_npu.t() - npu = self.npu_op_exec(a_npu, b_npu, shape_a, op) - - cpu.add_(10) - npu.add_(10) - self.assertRtolEqual(cpu.numpy(), npu.cpu().numpy()) - - def test_op_resize_(self, device): - operators = [torch.add, torch.mul, torch.matmul] - shape_a = (5, 5) - contiguous = [True, False] - - smalls = [(0, ), (1, ), (3, 1), (2, 3)] - for shape_b, op, is_contiguous in itertools.product(smalls, operators, contiguous): - self.op_result_cmp_(shape_a, shape_b, op, is_contiguous) - - bigs = [(10, 9), (11, 11), (8, 11)] - for shape_b, op, is_contiguous in itertools.product(bigs, operators, contiguous): - self.op_result_cmp_(shape_a, shape_b, op, is_contiguous) - - -instantiate_device_type_tests(TestResize, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import itertools +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests + + +class TestResize(TestCase): + def cpu_op_exec(self, cpu_in, cpu_out, shape, op): + cpu_out.resize_(shape) + op(cpu_in, cpu_in, out=cpu_out) + return cpu_out + + def npu_op_exec(self, npu_in, npu_out, shape, op): + npu_out.resize_(shape) + op(npu_in, npu_in, out=npu_out) + return npu_out + + def op_result_cmp_(self, shape_a, shape_b, op, is_contiguous=False): + a = torch.rand(shape_a) + b = torch.full(shape_b, 100.) + if is_contiguous: + b = b.t() + cpu = self.cpu_op_exec(a, b, shape_a, op) + + nb = torch.full(shape_b, 100.) + a_npu = a.npu() + b_npu = nb.npu() + if is_contiguous: + b_npu = b_npu.t() + npu = self.npu_op_exec(a_npu, b_npu, shape_a, op) + + cpu.add_(10) + npu.add_(10) + self.assertRtolEqual(cpu.numpy(), npu.cpu().numpy()) + + def test_op_resize_(self, device): + operators = [torch.add, torch.mul, torch.matmul] + shape_a = (5, 5) + contiguous = [True, False] + + smalls = [(0, ), (1, ), (3, 1), (2, 3)] + for shape_b, op, is_contiguous in itertools.product(smalls, operators, contiguous): + self.op_result_cmp_(shape_a, shape_b, op, is_contiguous) + + bigs = [(10, 9), (11, 11), (8, 11)] + for shape_b, op, is_contiguous in itertools.product(bigs, operators, contiguous): + self.op_result_cmp_(shape_a, shape_b, op, is_contiguous) + + +instantiate_device_type_tests(TestResize, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_resize_as.py b/test/test_npu/test_network_ops/test_resize_as.py index 15363c6a6f6af5bcd30e16652778470c8572f52d..c1fc5835be0a311a4d42565eb4829cc0efefc260 100644 --- a/test/test_npu/test_network_ops/test_resize_as.py +++ b/test/test_npu/test_network_ops/test_resize_as.py @@ -1,61 +1,61 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestResizeAs(TestCase): - def cpu_op_exec(self, input1, input2): - output = torch.resize_as_(input1, input2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = torch.resize_as_(input1, input2) - output = output.cpu() - output = output.numpy() - return output - - def test_resize_as_type_format(self, device): - shape_format = [ - [torch.float32, (1, 2), (3, 4)], - [torch.float32, (1, 2, 5), (3, 4, 7)], - [torch.float16, (2, 3, 4), (5, 6, 7)] - ] - - for item in shape_format: - cpu_input1 = torch.randn(item[1]) - cpu_input2 = torch.randn(item[2]) - - if item[0] == torch.float16: - cpu_input1 = cpu_input1.to(torch.float16) - cpu_input2 = cpu_input2.to(torch.float16) - - npu_input1 = cpu_input1.npu() - npu_input2 = cpu_input2.npu() - - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - - self.assertEqual(cpu_output.shape, npu_output.shape) - - -instantiate_device_type_tests(TestResizeAs, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() - +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestResizeAs(TestCase): + def cpu_op_exec(self, input1, input2): + output = torch.resize_as_(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.resize_as_(input1, input2) + output = output.cpu() + output = output.numpy() + return output + + def test_resize_as_type_format(self, device): + shape_format = [ + [torch.float32, (1, 2), (3, 4)], + [torch.float32, (1, 2, 5), (3, 4, 7)], + [torch.float16, (2, 3, 4), (5, 6, 7)] + ] + + for item in shape_format: + cpu_input1 = torch.randn(item[1]) + cpu_input2 = torch.randn(item[2]) + + if item[0] == torch.float16: + cpu_input1 = cpu_input1.to(torch.float16) + cpu_input2 = cpu_input2.to(torch.float16) + + npu_input1 = cpu_input1.npu() + npu_input2 = cpu_input2.npu() + + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + + self.assertEqual(cpu_output.shape, npu_output.shape) + + +instantiate_device_type_tests(TestResizeAs, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() + diff --git a/test/test_npu/test_network_ops/test_round.py b/test/test_npu/test_network_ops/test_round.py index 70dd7ca2d24ec6182f5d9f1bd1d525b6c2eadcbb..a9b137471034bd83eb0172d5b7215c977a1c2311 100644 --- a/test/test_npu/test_network_ops/test_round.py +++ b/test/test_npu/test_network_ops/test_round.py @@ -1,111 +1,111 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestRound(TestCase): - - def cpu_op_exec(self,input1): - output = torch.round(input1) - output = output.numpy() - return output - - def npu_op_exec(self,input1): - output = torch.round(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_(self,input1): - output = torch.round_(input1) - output = input1.numpy() - return output - - def npu_op_exec_(self,input1): - output = torch.round_(input1) - output = input1.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_out(self,input1,cpu_out): - output = torch.round(input1, out = cpu_out) - output = cpu_out.numpy() - return output - - def npu_op_exec_out(self,input1,npu_out): - output = torch.round(input1, out = npu_out) - output = npu_out.to("cpu") - output = output.numpy() - return output - - def test_round_float32_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (3)]], - [[np.float32, -1, (4, 23)]], - [[np.float32, -1, (2, 3)]], - [[np.float32, -1, (12, 23)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_round_inp_float32_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (14)]], - [[np.float32, -1, (4, 3)]], - [[np.float32, -1, (12, 32)]], - [[np.float32, -1, (22, 38)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_exec_(cpu_input1) - npu_output = self.npu_op_exec_(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_round_out_common_shape_format(self, device): - shape_format = [ - [[np.float16, -1, (10, 5)], [np.float16, -1, (5, 2)]], - [[np.float16, -1, (4, 1, 5)], [np.float16, -1, (8, 1, 10)]], - [[np.float32, -1, (10)], [np.float32, -1, (5)]], - [[np.float32, -1, (4, 1, 5)], [np.float32, -1, (8, 1, 3)]], - [[np.float32, -1, (2, 3, 8)], [np.float32, -1, (2, 3, 16)]], - [[np.float32, -1, (2, 13, 56)], [np.float32, -1, (1, 26, 56)]], - [[np.float32, -1, (2, 13, 56)], [np.float32, -1, (1, 26)]], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_out1, npu_out1 = create_common_tensor(item[0], 1, 100) - cpu_out2, npu_out2 = create_common_tensor(item[1], 1, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - if cpu_out1.dtype == torch.float16: - cpu_out1 = cpu_out1.to(torch.float32) - cpu_output = self.cpu_op_exec_out(cpu_input1,cpu_out1) - npu_output1 = self.npu_op_exec_out(npu_input1,npu_out1) - npu_output2 = self.npu_op_exec_out(npu_input1,npu_out2) - cpu_output = cpu_output.astype(npu_output1.dtype) - self.assertRtolEqual(cpu_output, npu_output1) - self.assertRtolEqual(cpu_output, npu_output2) - -instantiate_device_type_tests(TestRound, globals(), except_for="cpu") - -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestRound(TestCase): + + def cpu_op_exec(self,input1): + output = torch.round(input1) + output = output.numpy() + return output + + def npu_op_exec(self,input1): + output = torch.round(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_(self,input1): + output = torch.round_(input1) + output = input1.numpy() + return output + + def npu_op_exec_(self,input1): + output = torch.round_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_out(self,input1,cpu_out): + output = torch.round(input1, out = cpu_out) + output = cpu_out.numpy() + return output + + def npu_op_exec_out(self,input1,npu_out): + output = torch.round(input1, out = npu_out) + output = npu_out.to("cpu") + output = output.numpy() + return output + + def test_round_float32_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (3)]], + [[np.float32, -1, (4, 23)]], + [[np.float32, -1, (2, 3)]], + [[np.float32, -1, (12, 23)]] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_round_inp_float32_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (14)]], + [[np.float32, -1, (4, 3)]], + [[np.float32, -1, (12, 32)]], + [[np.float32, -1, (22, 38)]] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_output = self.cpu_op_exec_(cpu_input1) + npu_output = self.npu_op_exec_(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_round_out_common_shape_format(self, device): + shape_format = [ + [[np.float16, -1, (10, 5)], [np.float16, -1, (5, 2)]], + [[np.float16, -1, (4, 1, 5)], [np.float16, -1, (8, 1, 10)]], + [[np.float32, -1, (10)], [np.float32, -1, (5)]], + [[np.float32, -1, (4, 1, 5)], [np.float32, -1, (8, 1, 3)]], + [[np.float32, -1, (2, 3, 8)], [np.float32, -1, (2, 3, 16)]], + [[np.float32, -1, (2, 13, 56)], [np.float32, -1, (1, 26, 56)]], + [[np.float32, -1, (2, 13, 56)], [np.float32, -1, (1, 26)]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_out1, npu_out1 = create_common_tensor(item[0], 1, 100) + cpu_out2, npu_out2 = create_common_tensor(item[1], 1, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_out1.dtype == torch.float16: + cpu_out1 = cpu_out1.to(torch.float32) + cpu_output = self.cpu_op_exec_out(cpu_input1,cpu_out1) + npu_output1 = self.npu_op_exec_out(npu_input1,npu_out1) + npu_output2 = self.npu_op_exec_out(npu_input1,npu_out2) + cpu_output = cpu_output.astype(npu_output1.dtype) + self.assertRtolEqual(cpu_output, npu_output1) + self.assertRtolEqual(cpu_output, npu_output2) + +instantiate_device_type_tests(TestRound, globals(), except_for="cpu") + +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_rsqrt.py b/test/test_npu/test_network_ops/test_rsqrt.py old mode 100644 new mode 100755 index 5c3e453e088a391c8c60bd5df3fcc4269c045f26..348b1c5a293a8f5743549345fff8b4bd8a143045 --- a/test/test_npu/test_network_ops/test_rsqrt.py +++ b/test/test_npu/test_network_ops/test_rsqrt.py @@ -1,139 +1,139 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestRsqrt(TestCase): - def cpu_op_exec(self, input1): - output = torch.rsqrt(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - output = torch.rsqrt(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2): - output = input2 - torch.rsqrt(input1, out=output) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_inp_op_exec(self, input1): - output = torch.rsqrt_(input1) - output = output.numpy() - return output - - def npu_inp_op_exec(self, input1): - output = torch.rsqrt_(input1) - output = input1.to("cpu") - output = output.numpy() - return output - - def rsqrt_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - - cpu_output_inp = self.cpu_inp_op_exec(cpu_input1) - npu_output_inp = self.npu_inp_op_exec(npu_input1) - - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output_inp, npu_output_inp) - - def rsqrt_out_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 100) - cpu_input3, npu_input3 = create_common_tensor(item[1], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output_out1 = self.npu_op_exec_out(npu_input1, npu_input2) - npu_output_out2 = self.npu_op_exec_out(npu_input1, npu_input3) - cpu_output = cpu_output.astype(npu_output_out1.dtype) - self.assertRtolEqual(cpu_output, npu_output_out1) - self.assertRtolEqual(cpu_output, npu_output_out2) - - def test_rsqrt_out_result(self, device): - shape_format = [ - [[np.float16, -1, [128, 116, 14, 14]], [np.float16, -1, [256, 116, 1, 1]]], - [[np.float16, 0, [128, 58, 28, 28]], [np.float16, 0, [58, 58, 1, 1]]], - [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3, 3]]], - [[np.float16, -1, [128, 116, 14, 14]], [np.float16, -1, [128, 116, 14, 14]]], - [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 128, 3, 3]]], - [[np.float32, 0, [256, 3, 224, 224]], [np.float32, 0, [3, 3, 7, 7]]], - [[np.float32, -1, [2, 3, 3, 3]], [np.float32, -1, [3, 1, 3, 3]]], - [[np.float32, -1, [128, 232, 7, 7]], [np.float32, -1, [128, 232, 7, 7]]], - ] - self.rsqrt_out_result(shape_format) - - def test_rsqrt_shape_format_fp16_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[np.float16, i, [16]] for i in format_list] - self.rsqrt_result(shape_format) - - def test_rsqrt_shape_format_fp32_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[np.float32, i, [16]] for i in format_list] - self.rsqrt_result(shape_format) - - def test_rsqrt_shape_format_fp16_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float16, i, [16, 32]] for i in format_list] - self.rsqrt_result(shape_format) - - def test_rsqrt_shape_format_fp32_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float32, i, [16, 32]] for i in format_list] - self.rsqrt_result(shape_format) - - def test_rsqrt_shape_format_fp16_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float16, i, [16, 32, 64]] for i in format_list] - self.rsqrt_result(shape_format) - - def test_rsqrt_shape_format_fp32_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float32, i, [16, 32, 64]] for i in format_list] - self.rsqrt_result(shape_format) - - def test_rsqrt_shape_format_fp16_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float16, i, [16, 32, 64, 128]] for i in format_list] - self.rsqrt_result(shape_format) - - def test_rsqrt_shape_format_fp32_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[np.float32, i, [16, 32, 64, 128]] for i in format_list] - self.rsqrt_result(shape_format) - - -instantiate_device_type_tests(TestRsqrt, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestRsqrt(TestCase): + def cpu_op_exec(self, input1): + output = torch.rsqrt(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = torch.rsqrt(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2): + output = input2 + torch.rsqrt(input1, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_inp_op_exec(self, input1): + output = torch.rsqrt_(input1) + output = output.numpy() + return output + + def npu_inp_op_exec(self, input1): + output = torch.rsqrt_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + def rsqrt_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + + cpu_output_inp = self.cpu_inp_op_exec(cpu_input1) + npu_output_inp = self.npu_inp_op_exec(npu_input1) + + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output_inp, npu_output_inp) + + def rsqrt_out_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 100) + cpu_input3, npu_input3 = create_common_tensor(item[1], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output_out1 = self.npu_op_exec_out(npu_input1, npu_input2) + npu_output_out2 = self.npu_op_exec_out(npu_input1, npu_input3) + cpu_output = cpu_output.astype(npu_output_out1.dtype) + self.assertRtolEqual(cpu_output, npu_output_out1) + self.assertRtolEqual(cpu_output, npu_output_out2) + + def test_rsqrt_out_result(self, device): + shape_format = [ + [[np.float16, -1, [128, 116, 14, 14]], [np.float16, -1, [256, 116, 1, 1]]], + [[np.float16, 0, [128, 58, 28, 28]], [np.float16, 0, [58, 58, 1, 1]]], + [[np.float16, 0, [128, 3, 224, 224]], [np.float16, 0, [3, 3, 3, 3]]], + [[np.float16, -1, [128, 116, 14, 14]], [np.float16, -1, [128, 116, 14, 14]]], + [[np.float32, 0, [256, 128, 7, 7]], [np.float32, 0, [128, 128, 3, 3]]], + [[np.float32, 0, [256, 3, 224, 224]], [np.float32, 0, [3, 3, 7, 7]]], + [[np.float32, -1, [2, 3, 3, 3]], [np.float32, -1, [3, 1, 3, 3]]], + [[np.float32, -1, [128, 232, 7, 7]], [np.float32, -1, [128, 232, 7, 7]]], + ] + self.rsqrt_out_result(shape_format) + + def test_rsqrt_shape_format_fp16_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[np.float16, i, [16]] for i in format_list] + self.rsqrt_result(shape_format) + + def test_rsqrt_shape_format_fp32_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[np.float32, i, [16]] for i in format_list] + self.rsqrt_result(shape_format) + + def test_rsqrt_shape_format_fp16_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float16, i, [16, 32]] for i in format_list] + self.rsqrt_result(shape_format) + + def test_rsqrt_shape_format_fp32_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float32, i, [16, 32]] for i in format_list] + self.rsqrt_result(shape_format) + + def test_rsqrt_shape_format_fp16_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float16, i, [16, 32, 64]] for i in format_list] + self.rsqrt_result(shape_format) + + def test_rsqrt_shape_format_fp32_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float32, i, [16, 32, 64]] for i in format_list] + self.rsqrt_result(shape_format) + + def test_rsqrt_shape_format_fp16_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float16, i, [16, 32, 64, 128]] for i in format_list] + self.rsqrt_result(shape_format) + + def test_rsqrt_shape_format_fp32_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[np.float32, i, [16, 32, 64, 128]] for i in format_list] + self.rsqrt_result(shape_format) + + +instantiate_device_type_tests(TestRsqrt, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_rsub.py b/test/test_npu/test_network_ops/test_rsub.py old mode 100644 new mode 100755 index afd70d99758e41048218132f8a41179330d212f6..50a173c3ecb17d5f1aa42b44ac300dd470e70e16 --- a/test/test_npu/test_network_ops/test_rsub.py +++ b/test/test_npu/test_network_ops/test_rsub.py @@ -1,170 +1,170 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestRsub(TestCase): - def cpu_op_exec(self, input1, input2): - output = input2 - input1 - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = input2 - input1 - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_scalar(self, input1, input2): - output = input1 - input2 - output = output.to("cpu") - output = output.numpy() - output = -output - return output - - def rsub_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def rsub_scalar_result(self, shape_format): - for item in shape_format: - scalar = np.random.uniform(0, 100) - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input1, scalar) - npu_output_scalar = self.npu_op_exec_scalar(npu_input1, scalar) - - cpu_output = cpu_output.astype(npu_output_scalar.dtype) - self.assertRtolEqual(cpu_output, npu_output_scalar) - - def test_sub_shape_format_fp16_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[[np.float16, i, [32]], [np.float16, i, [32]]] for i in format_list] - self.rsub_result(shape_format) - - def test_sub_shape_format_fp32_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[[np.float16, i, [32]], [np.float16, i, [32]]] for i in format_list] - self.rsub_result(shape_format) - - def test_sub_shape_format_fp16_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [5, 3]], [np.float16, i, [5, 3]]] for i in format_list] - self.rsub_result(shape_format) - - def test_sub_shape_format_fp32_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [5, 3]], [np.float16, i, [5, 3]]] for i in format_list] - self.rsub_result(shape_format) - - def test_sub_shape_format_fp16_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [256, 480, 14]], [np.float16, i, [256, 480, 14]]] for i in format_list] - self.rsub_result(shape_format) - - def test_sub_shape_format_fp32_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [256, 480, 14]], [np.float16, i, [256, 480, 14]]] for i in format_list] - self.rsub_result(shape_format) - - def test_sub_shape_format_fp16_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [32, 3, 3, 3]], [np.float16, i, [32, 3, 3, 3]]] for i in format_list] - self.rsub_result(shape_format) - - def test_sub_shape_format_fp32_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [32, 3, 3, 3]], [np.float16, i, [32, 3, 3, 3]]] for i in format_list] - self.rsub_result(shape_format) - - # int------------------------------------------------------------------------------- - def test_sub_shape_format_int32_1d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [32]], [np.int32, i, [32]]] for i in format_list] - self.rsub_result(shape_format) - - def test_sub_shape_format_int32_2d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [5, 3]], [np.int32, i, [5, 3]]] for i in format_list] - self.rsub_result(shape_format) - - def test_sub_shape_format_int32_3d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [256, 480, 14]], [np.int32, i, [256, 480, 14]]] for i in format_list] - self.rsub_result(shape_format) - - def test_sub_shape_format_int32_4d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [32, 3, 3, 3]], [np.int32, i, [32, 3, 3, 3]]] for i in format_list] - self.rsub_result(shape_format) - - # scalar---------------------------------------------------------------------------- - def test_sub_scalar_shape_format_fp16_1d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float16, i, [32]]] for i in format_list] - self.rsub_scalar_result(shape_format) - - def test_sub_scalar_shape_format_fp32_1d(self, device): - format_list = [-1, 0] - shape_format = [[[np.float16, i, [32]]] for i in format_list] - self.rsub_scalar_result(shape_format) - - def test_sub_scalar_shape_format_fp16_2d(self, device): - format_list = [] - shape_format = [[[np.float16, i, [32, 64]]] for i in format_list] - self.rsub_scalar_result(shape_format) - - def test_sub_scalar_shape_format_fp32_2d(self, device): - format_list = [] - shape_format = [[[np.float16, i, [32, 64]]] for i in format_list] - self.rsub_scalar_result(shape_format) - - def test_sub_scalar_shape_format_fp16_3d(self, device): - format_list = [] - shape_format = [[[np.float16, i, [32, 64, 128]]] for i in format_list] - self.rsub_scalar_result(shape_format) - - def test_sub_scalar_shape_format_fp32_3d(self, device): - format_list = [] - shape_format = [[[np.float16, i, [32, 64, 128]]] for i in format_list] - self.rsub_scalar_result(shape_format) - - def test_sub_scalar_shape_format_fp16_4d(self, device): - format_list = [] - shape_format = [[[np.float16, i, [32, 64, 128, 28]]] for i in format_list] - self.rsub_scalar_result(shape_format) - - def test_sub_scalar_shape_format_fp32_4d(self, device): - format_list = [] - shape_format = [[[np.float16, i, [32, 64, 128, 28]]] for i in format_list] - self.rsub_scalar_result(shape_format) - - -instantiate_device_type_tests(TestRsub, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestRsub(TestCase): + def cpu_op_exec(self, input1, input2): + output = input2 - input1 + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = input2 - input1 + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_scalar(self, input1, input2): + output = input1 - input2 + output = output.to("cpu") + output = output.numpy() + output = -output + return output + + def rsub_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def rsub_scalar_result(self, shape_format): + for item in shape_format: + scalar = np.random.uniform(0, 100) + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, scalar) + npu_output_scalar = self.npu_op_exec_scalar(npu_input1, scalar) + + cpu_output = cpu_output.astype(npu_output_scalar.dtype) + self.assertRtolEqual(cpu_output, npu_output_scalar) + + def test_sub_shape_format_fp16_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[[np.float16, i, [32]], [np.float16, i, [32]]] for i in format_list] + self.rsub_result(shape_format) + + def test_sub_shape_format_fp32_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[[np.float16, i, [32]], [np.float16, i, [32]]] for i in format_list] + self.rsub_result(shape_format) + + def test_sub_shape_format_fp16_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [5, 3]], [np.float16, i, [5, 3]]] for i in format_list] + self.rsub_result(shape_format) + + def test_sub_shape_format_fp32_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [5, 3]], [np.float16, i, [5, 3]]] for i in format_list] + self.rsub_result(shape_format) + + def test_sub_shape_format_fp16_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [256, 480, 14]], [np.float16, i, [256, 480, 14]]] for i in format_list] + self.rsub_result(shape_format) + + def test_sub_shape_format_fp32_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [256, 480, 14]], [np.float16, i, [256, 480, 14]]] for i in format_list] + self.rsub_result(shape_format) + + def test_sub_shape_format_fp16_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [32, 3, 3, 3]], [np.float16, i, [32, 3, 3, 3]]] for i in format_list] + self.rsub_result(shape_format) + + def test_sub_shape_format_fp32_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [32, 3, 3, 3]], [np.float16, i, [32, 3, 3, 3]]] for i in format_list] + self.rsub_result(shape_format) + + # int------------------------------------------------------------------------------- + def test_sub_shape_format_int32_1d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [32]], [np.int32, i, [32]]] for i in format_list] + self.rsub_result(shape_format) + + def test_sub_shape_format_int32_2d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [5, 3]], [np.int32, i, [5, 3]]] for i in format_list] + self.rsub_result(shape_format) + + def test_sub_shape_format_int32_3d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [256, 480, 14]], [np.int32, i, [256, 480, 14]]] for i in format_list] + self.rsub_result(shape_format) + + def test_sub_shape_format_int32_4d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [32, 3, 3, 3]], [np.int32, i, [32, 3, 3, 3]]] for i in format_list] + self.rsub_result(shape_format) + + # scalar---------------------------------------------------------------------------- + def test_sub_scalar_shape_format_fp16_1d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float16, i, [32]]] for i in format_list] + self.rsub_scalar_result(shape_format) + + def test_sub_scalar_shape_format_fp32_1d(self, device): + format_list = [-1, 0] + shape_format = [[[np.float16, i, [32]]] for i in format_list] + self.rsub_scalar_result(shape_format) + + def test_sub_scalar_shape_format_fp16_2d(self, device): + format_list = [] + shape_format = [[[np.float16, i, [32, 64]]] for i in format_list] + self.rsub_scalar_result(shape_format) + + def test_sub_scalar_shape_format_fp32_2d(self, device): + format_list = [] + shape_format = [[[np.float16, i, [32, 64]]] for i in format_list] + self.rsub_scalar_result(shape_format) + + def test_sub_scalar_shape_format_fp16_3d(self, device): + format_list = [] + shape_format = [[[np.float16, i, [32, 64, 128]]] for i in format_list] + self.rsub_scalar_result(shape_format) + + def test_sub_scalar_shape_format_fp32_3d(self, device): + format_list = [] + shape_format = [[[np.float16, i, [32, 64, 128]]] for i in format_list] + self.rsub_scalar_result(shape_format) + + def test_sub_scalar_shape_format_fp16_4d(self, device): + format_list = [] + shape_format = [[[np.float16, i, [32, 64, 128, 28]]] for i in format_list] + self.rsub_scalar_result(shape_format) + + def test_sub_scalar_shape_format_fp32_4d(self, device): + format_list = [] + shape_format = [[[np.float16, i, [32, 64, 128, 28]]] for i in format_list] + self.rsub_scalar_result(shape_format) + + +instantiate_device_type_tests(TestRsub, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_scalar_tensor.py b/test/test_npu/test_network_ops/test_scalar_tensor.py index d7f5acc0ae513fa7e434ddd99e8c149407fc5007..abf64be8016531c5acddddd0842da7bd0224b00b 100644 --- a/test/test_npu/test_network_ops/test_scalar_tensor.py +++ b/test/test_npu/test_network_ops/test_scalar_tensor.py @@ -1,51 +1,51 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestScalarTensor(TestCase): - def cpu_op_exec(self, scalar, dtype): - output = torch.scalar_tensor(scalar, dtype=dtype, device="cpu") - output = output.numpy() - return output - - def npu_op_exec(self, scalar, dtype): - output = torch.scalar_tensor(scalar, dtype=dtype, device="npu") - output = output.cpu() - output = output.numpy() - return output - - def test_scalar_tensor_shape_format(self, device): - scalars = [-50, 0, 50] - dtypes = [torch.float16, torch.float32, torch.int32] - - shape_format = [ - [i, j] for i in scalars for j in dtypes - ] - - for item in shape_format: - cpu_output = self.cpu_op_exec(item[0], item[1]) - npu_output = self.npu_op_exec(item[0], item[1]) - - self.assertEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestScalarTensor, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestScalarTensor(TestCase): + def cpu_op_exec(self, scalar, dtype): + output = torch.scalar_tensor(scalar, dtype=dtype, device="cpu") + output = output.numpy() + return output + + def npu_op_exec(self, scalar, dtype): + output = torch.scalar_tensor(scalar, dtype=dtype, device="npu") + output = output.cpu() + output = output.numpy() + return output + + def test_scalar_tensor_shape_format(self, device): + scalars = [-50, 0, 50] + dtypes = [torch.float16, torch.float32, torch.int32] + + shape_format = [ + [i, j] for i in scalars for j in dtypes + ] + + for item in shape_format: + cpu_output = self.cpu_op_exec(item[0], item[1]) + npu_output = self.npu_op_exec(item[0], item[1]) + + self.assertEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestScalarTensor, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_scatter_add.py b/test/test_npu/test_network_ops/test_scatter_add.py index e7edf77d536630de799f1eea1d0ad83fd063f5b9..a2f2ee59b800be5303ecdda19394d8f2ab77b33a 100644 --- a/test/test_npu/test_network_ops/test_scatter_add.py +++ b/test/test_npu/test_network_ops/test_scatter_add.py @@ -1,108 +1,108 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestScatterAdd(TestCase): - def cpu_op_exec_inp(self, input1, dim, index, src): - input1.scatter_add_(dim, index, src) - output = input1.numpy() - return output - - def npu_op_exec_inp(self, input, dim, index, src): - input.scatter_add_(dim, index, src) - input = input.to("cpu") - output = input.numpy() - return output - - def cpu_op_exec(self, input1, dim, index, src): - output = torch.scatter_add(input1, dim, index, src) - output = output.numpy() - return output - - def npu_op_exec(self, input1, dim, index, src): - output = torch.scatter_add(input1,dim, index, src) - output = output.to("cpu") - output = output.numpy() - return output - - def test_scatter_add_common_shape_format(self, device): - shape_format = [ - [0, [np.int64, 0, [10, 20]], [np.float32, 0, [10, 20]], [np.float32, 0, [10, 20]]], - [1, [np.int64, 0, [10, 20]], [np.float32, 0, [10, 20]], [np.float32, 0, [10, 20]]], - [0, [np.int64, 0, [2, 6]], [np.float32, 0, [2, 6]], [np.float32, 0, [2, 6]]], - [1, [np.int64, 0, [2, 6]], [np.float32, 0, [2, 6]], [np.float32, 0, [2, 6]]], - [0, [np.int64, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]]], - [1, [np.int64, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]]], - [2, [np.int64, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]]], - ] - - for item in shape_format: - cpu_input2, npu_input2 = create_common_tensor(item[2], 1, 100) - cpu_input1, npu_input1 = create_common_tensor(item[1], 1, (item[1][2][item[0]] - 1)) - cpu_input3, npu_input3 = create_common_tensor(item[3], 1, 100) - - cpu_output = self.cpu_op_exec(cpu_input3, item[0], cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input3, item[0], npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - cpu_inp_output = self.cpu_op_exec_inp(cpu_input3, item[0], cpu_input1, cpu_input2) - npu_inp_output = self.npu_op_exec_inp(npu_input3, item[0], npu_input1, npu_input2) - self.assertRtolEqual(cpu_inp_output, npu_inp_output) - - def test_scatter_add_float16_shape_format(self, device): - def cpu_op_exec_inp_fp16(input, dim, index, src): - input = input.to(torch.float32) - src = src.to(torch.float32) - input.scatter_add_(dim, index, src) - output = input.numpy() - output = output.astype(np.float16) - return output - - def cpu_op_exec_fp16(input1, dim, index, src): - output = torch.scatter_add(input1,dim, index, src) - output = output.numpy() - output = output.astype(np.float16) - return output - - shape_format = [ - [0, [np.int64, 0, [10, 20]], [np.float16, 0, [10, 20]], [np.float16, 0, [10, 20]]], - [1, [np.int64, 0, [10, 20]], [np.float16, 0, [10, 20]], [np.float16, 0, [10, 20]]], - [0, [np.int64, 0, [2, 6]], [np.float16, 0, [2, 6]], [np.float16, 0, [2, 6]]], - [1, [np.int64, 0, [2, 6]], [np.float16, 0, [2, 6]], [np.float16, 0, [2, 6]]], - [0, [np.int64, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]]], - [1, [np.int64, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]]], - [2, [np.int64, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]]], - ] - - for item in shape_format: - cpu_input2, npu_input2 = create_common_tensor(item[2], 1, 100) - cpu_input1, npu_input1 = create_common_tensor(item[1], 1, (item[1][2][item[0]] - 1)) - cpu_input3, npu_input3 = create_common_tensor(item[3], 1, 100) - - cpu_output = cpu_op_exec_fp16(cpu_input3, item[0], cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input3, item[0], npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - cpu_inp_output = cpu_op_exec_inp_fp16(cpu_input3, item[0], cpu_input1, cpu_input2) - npu_inp_output = self.npu_op_exec_inp(npu_input3, item[0], npu_input1, npu_input2) - self.assertRtolEqual(cpu_inp_output, npu_inp_output) - -instantiate_device_type_tests(TestScatterAdd, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestScatterAdd(TestCase): + def cpu_op_exec_inp(self, input1, dim, index, src): + input1.scatter_add_(dim, index, src) + output = input1.numpy() + return output + + def npu_op_exec_inp(self, input, dim, index, src): + input.scatter_add_(dim, index, src) + input = input.to("cpu") + output = input.numpy() + return output + + def cpu_op_exec(self, input1, dim, index, src): + output = torch.scatter_add(input1, dim, index, src) + output = output.numpy() + return output + + def npu_op_exec(self, input1, dim, index, src): + output = torch.scatter_add(input1,dim, index, src) + output = output.to("cpu") + output = output.numpy() + return output + + def test_scatter_add_common_shape_format(self, device): + shape_format = [ + [0, [np.int64, 0, [10, 20]], [np.float32, 0, [10, 20]], [np.float32, 0, [10, 20]]], + [1, [np.int64, 0, [10, 20]], [np.float32, 0, [10, 20]], [np.float32, 0, [10, 20]]], + [0, [np.int64, 0, [2, 6]], [np.float32, 0, [2, 6]], [np.float32, 0, [2, 6]]], + [1, [np.int64, 0, [2, 6]], [np.float32, 0, [2, 6]], [np.float32, 0, [2, 6]]], + [0, [np.int64, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]]], + [1, [np.int64, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]]], + [2, [np.int64, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]], [np.float32, 0, [10, 20, 30]]], + ] + + for item in shape_format: + cpu_input2, npu_input2 = create_common_tensor(item[2], 1, 100) + cpu_input1, npu_input1 = create_common_tensor(item[1], 1, (item[1][2][item[0]] - 1)) + cpu_input3, npu_input3 = create_common_tensor(item[3], 1, 100) + + cpu_output = self.cpu_op_exec(cpu_input3, item[0], cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input3, item[0], npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + cpu_inp_output = self.cpu_op_exec_inp(cpu_input3, item[0], cpu_input1, cpu_input2) + npu_inp_output = self.npu_op_exec_inp(npu_input3, item[0], npu_input1, npu_input2) + self.assertRtolEqual(cpu_inp_output, npu_inp_output) + + def test_scatter_add_float16_shape_format(self, device): + def cpu_op_exec_inp_fp16(input, dim, index, src): + input = input.to(torch.float32) + src = src.to(torch.float32) + input.scatter_add_(dim, index, src) + output = input.numpy() + output = output.astype(np.float16) + return output + + def cpu_op_exec_fp16(input1, dim, index, src): + output = torch.scatter_add(input1,dim, index, src) + output = output.numpy() + output = output.astype(np.float16) + return output + + shape_format = [ + [0, [np.int64, 0, [10, 20]], [np.float16, 0, [10, 20]], [np.float16, 0, [10, 20]]], + [1, [np.int64, 0, [10, 20]], [np.float16, 0, [10, 20]], [np.float16, 0, [10, 20]]], + [0, [np.int64, 0, [2, 6]], [np.float16, 0, [2, 6]], [np.float16, 0, [2, 6]]], + [1, [np.int64, 0, [2, 6]], [np.float16, 0, [2, 6]], [np.float16, 0, [2, 6]]], + [0, [np.int64, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]]], + [1, [np.int64, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]]], + [2, [np.int64, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]], [np.float16, 0, [10, 20, 30]]], + ] + + for item in shape_format: + cpu_input2, npu_input2 = create_common_tensor(item[2], 1, 100) + cpu_input1, npu_input1 = create_common_tensor(item[1], 1, (item[1][2][item[0]] - 1)) + cpu_input3, npu_input3 = create_common_tensor(item[3], 1, 100) + + cpu_output = cpu_op_exec_fp16(cpu_input3, item[0], cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input3, item[0], npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + cpu_inp_output = cpu_op_exec_inp_fp16(cpu_input3, item[0], cpu_input1, cpu_input2) + npu_inp_output = self.npu_op_exec_inp(npu_input3, item[0], npu_input1, npu_input2) + self.assertRtolEqual(cpu_inp_output, npu_inp_output) + +instantiate_device_type_tests(TestScatterAdd, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_scatterv1.py b/test/test_npu/test_network_ops/test_scatterv1.py index 139aa04221c87d69d2ed9b94f4c87041afd96d5b..0f8ec6a5f846d99c44dafd0c9d91bf9d8f69a5c7 100644 --- a/test/test_npu/test_network_ops/test_scatterv1.py +++ b/test/test_npu/test_network_ops/test_scatterv1.py @@ -1,39 +1,39 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - - -class TestScatterV1(TestCase): - def npu_op_exec(self, input1, indices, updates, dim): - output = torch.npu_scatter(input1, indices, updates, dim) - output = output.to("cpu") - output = output.numpy() - return output - - def test_scatterv1(self, device): - input = torch.tensor([[1.6279, 0.1226], [0.9041, 1.0980]]).npu() - indices = torch.tensor([0, 1]).npu().to(torch.int32) - updates = torch.tensor([-1.1993, -1.5247]).npu() - dim = 0 - exoutput = torch.tensor([[-1.1993, 0.1226], [0.9041, -1.5247]]) - output = self.npu_op_exec(input, indices, updates, dim) - self.assertRtolEqual(exoutput.numpy(), output) - -instantiate_device_type_tests(TestScatterV1, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + + +class TestScatterV1(TestCase): + def npu_op_exec(self, input1, indices, updates, dim): + output = torch.npu_scatter(input1, indices, updates, dim) + output = output.to("cpu") + output = output.numpy() + return output + + def test_scatterv1(self, device): + input = torch.tensor([[1.6279, 0.1226], [0.9041, 1.0980]]).npu() + indices = torch.tensor([0, 1]).npu().to(torch.int32) + updates = torch.tensor([-1.1993, -1.5247]).npu() + dim = 0 + exoutput = torch.tensor([[-1.1993, 0.1226], [0.9041, -1.5247]]) + output = self.npu_op_exec(input, indices, updates, dim) + self.assertRtolEqual(exoutput.numpy(), output) + +instantiate_device_type_tests(TestScatterV1, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_select.py b/test/test_npu/test_network_ops/test_select.py index 2cf6b00de326812a821d85ddc573b2f4c140ab4f..10a262839e41f47f2d43f8653837c8bf26e1f518 100644 --- a/test/test_npu/test_network_ops/test_select.py +++ b/test/test_npu/test_network_ops/test_select.py @@ -1,48 +1,48 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestSelect(TestCase): - def test_select_common_shape_format(self, device): - def cpu_op_exec(input, dim, index): - output = input.select(dim, index) - output = output.numpy() - return output - - def npu_op_exec(input, dim, index): - output = input.select(dim,index) - output = output.to("cpu") - output = output.numpy() - return output - - shape_format = [ - [[np.float16, 0, (64, 10)]], - [[np.float32, 4, (32, 1, 3, 3)]], - [[np.float32, 29, (10, 128)]] - ] - for shape in shape_format: - cpu_input, npu_input = create_common_tensor(shape[0], -1, 1) - cpu_output = cpu_op_exec(cpu_input, 0, 2) - npu_output = npu_op_exec(npu_input, 0, 2) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestSelect, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestSelect(TestCase): + def test_select_common_shape_format(self, device): + def cpu_op_exec(input, dim, index): + output = input.select(dim, index) + output = output.numpy() + return output + + def npu_op_exec(input, dim, index): + output = input.select(dim,index) + output = output.to("cpu") + output = output.numpy() + return output + + shape_format = [ + [[np.float16, 0, (64, 10)]], + [[np.float32, 4, (32, 1, 3, 3)]], + [[np.float32, 29, (10, 128)]] + ] + for shape in shape_format: + cpu_input, npu_input = create_common_tensor(shape[0], -1, 1) + cpu_output = cpu_op_exec(cpu_input, 0, 2) + npu_output = npu_op_exec(npu_input, 0, 2) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestSelect, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_select_to_contiguous.py b/test/test_npu/test_network_ops/test_select_to_contiguous.py index 1249e255a308ea1a282fb0ec6a9865dcb24f06ac..d5af660a232ebcc051f429ea68b37f1aa6251717 100644 --- a/test/test_npu/test_network_ops/test_select_to_contiguous.py +++ b/test/test_npu/test_network_ops/test_select_to_contiguous.py @@ -1,39 +1,39 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestSelectToContiguous(TestCase): - def test_SelectToContiguous(self, device): - dtype_list = [ np.float16 ,np.float32 ] - format_list = [0,3,4] - shape_list = [[200, 100, 300],[200,200,100,100]] - shape_format = [ - [i, j, k] for i in dtype_list for j in format_list for k in shape_list - ] - for item in shape_format: - a1_cpu, a1_npu = create_common_tensor(item, 0, 100) - for dim in range(1,len(item[2])): - npu_out = a1_npu.select(dim,1).contiguous() - cpu_out = a1_cpu.select(dim,1).contiguous() - self.assertRtolEqual(npu_out.to("cpu").numpy(), cpu_out.numpy()) - - -instantiate_device_type_tests(TestSelectToContiguous, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestSelectToContiguous(TestCase): + def test_SelectToContiguous(self, device): + dtype_list = [ np.float16 ,np.float32 ] + format_list = [0,3,4] + shape_list = [[200, 100, 300],[200,200,100,100]] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + for item in shape_format: + a1_cpu, a1_npu = create_common_tensor(item, 0, 100) + for dim in range(1,len(item[2])): + npu_out = a1_npu.select(dim,1).contiguous() + cpu_out = a1_cpu.select(dim,1).contiguous() + self.assertRtolEqual(npu_out.to("cpu").numpy(), cpu_out.numpy()) + + +instantiate_device_type_tests(TestSelectToContiguous, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_sign.py b/test/test_npu/test_network_ops/test_sign.py old mode 100644 new mode 100755 index 5f85e478873a365a4aa33fbca8d0d4ab3900f978..2b9525f83bebb211d070b3dc7c9b6ea4e4326b42 --- a/test/test_npu/test_network_ops/test_sign.py +++ b/test/test_npu/test_network_ops/test_sign.py @@ -1,134 +1,134 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestSign(TestCase): - def cpu_op_exec(self, input1): - cpu_output = torch.sign(input1) - cpu_output = cpu_output.numpy() - return cpu_output - - def npu_op_exec(self, input1): - output = torch.sign(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2): - torch.sign(input1, out=input2) - output = input2.to("cpu") - output = output.numpy() - return output - - def cpu_inp_op_exec(self, input1): - input1.sign_() - output = input1.numpy() - return output - - def npu_inp_op_exec(self, input1): - input1.sign_() - output = input1.to("cpu") - output = output.numpy() - return output - - def sign_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item, -100, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2) - cpu_output_inp = self.cpu_inp_op_exec(cpu_input1) - npu_output_inp = self.npu_inp_op_exec(npu_input1) - - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output, npu_output_out) - self.assertRtolEqual(cpu_output_inp, npu_output_inp) - - def test_sign_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float16, i, [18]] for i in format_list] - self.sign_result(shape_format) - - def test_sign_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [5, 256]] for i in format_list] - self.sign_result(shape_format) - - def test_sign_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [32, 3, 3]] for i in format_list] - self.sign_result(shape_format) - - def test_sign_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] - self.sign_result(shape_format) - - def test_sign_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float32, i, [18]] for i in format_list] - self.sign_result(shape_format) - - def test_sign_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [5, 256]] for i in format_list] - self.sign_result(shape_format) - - def test_sign_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] - self.sign_result(shape_format) - - def test_sign_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] - self.sign_result(shape_format) - - def test_sign_shape_format_int32_1d(self, device): - format_list = [0] - shape_format = [[np.int32, i, [18]] for i in format_list] - self.sign_result(shape_format) - - def test_sign_shape_format_int32_2d(self, device): - format_list = [0] - shape_format = [[np.int32, i, [5, 256]] for i in format_list] - self.sign_result(shape_format) - - def test_sign_shape_format_int32_3d(self, device): - format_list = [0] - shape_format = [[np.int32, i, [32, 3, 3]] for i in format_list] - self.sign_result(shape_format) - - def test_sign_shape_format_int32_4d(self, device): - format_list = [0] - shape_format = [[np.int32, i, [64, 112, 7, 7]] for i in format_list] - self.sign_result(shape_format) - - -instantiate_device_type_tests(TestSign, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestSign(TestCase): + def cpu_op_exec(self, input1): + cpu_output = torch.sign(input1) + cpu_output = cpu_output.numpy() + return cpu_output + + def npu_op_exec(self, input1): + output = torch.sign(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2): + torch.sign(input1, out=input2) + output = input2.to("cpu") + output = output.numpy() + return output + + def cpu_inp_op_exec(self, input1): + input1.sign_() + output = input1.numpy() + return output + + def npu_inp_op_exec(self, input1): + input1.sign_() + output = input1.to("cpu") + output = output.numpy() + return output + + def sign_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item, -100, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2) + cpu_output_inp = self.cpu_inp_op_exec(cpu_input1) + npu_output_inp = self.npu_inp_op_exec(npu_input1) + + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output_out) + self.assertRtolEqual(cpu_output_inp, npu_output_inp) + + def test_sign_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float16, i, [18]] for i in format_list] + self.sign_result(shape_format) + + def test_sign_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [5, 256]] for i in format_list] + self.sign_result(shape_format) + + def test_sign_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [32, 3, 3]] for i in format_list] + self.sign_result(shape_format) + + def test_sign_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] + self.sign_result(shape_format) + + def test_sign_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float32, i, [18]] for i in format_list] + self.sign_result(shape_format) + + def test_sign_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [5, 256]] for i in format_list] + self.sign_result(shape_format) + + def test_sign_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] + self.sign_result(shape_format) + + def test_sign_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] + self.sign_result(shape_format) + + def test_sign_shape_format_int32_1d(self, device): + format_list = [0] + shape_format = [[np.int32, i, [18]] for i in format_list] + self.sign_result(shape_format) + + def test_sign_shape_format_int32_2d(self, device): + format_list = [0] + shape_format = [[np.int32, i, [5, 256]] for i in format_list] + self.sign_result(shape_format) + + def test_sign_shape_format_int32_3d(self, device): + format_list = [0] + shape_format = [[np.int32, i, [32, 3, 3]] for i in format_list] + self.sign_result(shape_format) + + def test_sign_shape_format_int32_4d(self, device): + format_list = [0] + shape_format = [[np.int32, i, [64, 112, 7, 7]] for i in format_list] + self.sign_result(shape_format) + + +instantiate_device_type_tests(TestSign, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_size.py b/test/test_npu/test_network_ops/test_size.py index fef31a6c8687ca96b076ba47d8ffce59910357f4..ec354c63cea95a73132001191c3179ce1c4213d9 100644 --- a/test/test_npu/test_network_ops/test_size.py +++ b/test/test_npu/test_network_ops/test_size.py @@ -1,42 +1,42 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import instantiate_device_type_tests -from util_test import create_common_tensor - -class TestSize(TestCase): - def test_size_common_shape_format(self, device): - def op_exec(input): - output = input.size() - output = np.array(output, dtype=np.int32) - return output - - shape_format = [ - [[np.float16, 0, (64)]], - [[np.float32, 4, (32, 1, 3, 3)]], - [[np.float32, 3, (10, 128)]] - ] - for shape in shape_format: - cpu_input, npu_input = create_common_tensor(shape[0], -100, 100) - cpu_output = op_exec(cpu_input) - npu_output = op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestSize, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests +from util_test import create_common_tensor + +class TestSize(TestCase): + def test_size_common_shape_format(self, device): + def op_exec(input): + output = input.size() + output = np.array(output, dtype=np.int32) + return output + + shape_format = [ + [[np.float16, 0, (64)]], + [[np.float32, 4, (32, 1, 3, 3)]], + [[np.float32, 3, (10, 128)]] + ] + for shape in shape_format: + cpu_input, npu_input = create_common_tensor(shape[0], -100, 100) + cpu_output = op_exec(cpu_input) + npu_output = op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestSize, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_slice.py b/test/test_npu/test_network_ops/test_slice.py index 7a29d001e32d939fca6d06ca8eccb04166b50d00..13b28de868144584c836f6d8f9ef6ea36523fd5e 100644 --- a/test/test_npu/test_network_ops/test_slice.py +++ b/test/test_npu/test_network_ops/test_slice.py @@ -1,36 +1,36 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - - -class TestSlice(TestCase): - def npu_op_exec(self, input1, offset, sizes): - output = torch.npu_slice(input1, offset, sizes) - output = output.to("cpu") - output = output.numpy() - return output - - def test_slice_int32(self, device): - input = torch.tensor([[1,2,3,4,5], [6,7,8,9,10]]).npu() - exoutput = torch.tensor([[1,2],[6,7]]) - output = self.npu_op_exec(input, [0, 0], [2, 2]) - self.assertRtolEqual(exoutput.numpy(), output) - -instantiate_device_type_tests(TestSlice, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + + +class TestSlice(TestCase): + def npu_op_exec(self, input1, offset, sizes): + output = torch.npu_slice(input1, offset, sizes) + output = output.to("cpu") + output = output.numpy() + return output + + def test_slice_int32(self, device): + input = torch.tensor([[1,2,3,4,5], [6,7,8,9,10]]).npu() + exoutput = torch.tensor([[1,2],[6,7]]) + output = self.npu_op_exec(input, [0, 0], [2, 2]) + self.assertRtolEqual(exoutput.numpy(), output) + +instantiate_device_type_tests(TestSlice, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_slogdet.py b/test/test_npu/test_network_ops/test_slogdet.py index dafcd7431976925d1e27a667bba2e145d3904fa7..8164efda4b32f9317633be70d6c381c7d7cc98a9 100644 --- a/test/test_npu/test_network_ops/test_slogdet.py +++ b/test/test_npu/test_network_ops/test_slogdet.py @@ -1,55 +1,55 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestSlogdet(TestCase): - def cpu_op_exec(self, input): - sign, logabsdet = torch.slogdet(input) - sign = sign.numpy() - logabsdet = logabsdet.numpy() - return sign, logabsdet - - def npu_op_exec(self, input): - sign, logabsdet = torch.slogdet(input) - sign = sign.cpu() - logabsdet = logabsdet.cpu() - sign = sign.numpy() - logabsdet = logabsdet.numpy() - return sign, logabsdet - - def test_slogdet_shape_format(self, device): - shape_format = [ - [np.float32, -1, (3, 3)], - [np.float32, -1, (4, 3, 3)], - [np.float32, -1, (5, 5, 5, 5)], - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, -100, 100) - cpu_output, cpu_indices = self.cpu_op_exec(cpu_input) - npu_output, npu_indices = self.npu_op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_indices, npu_indices) - - - -instantiate_device_type_tests(TestSlogdet, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestSlogdet(TestCase): + def cpu_op_exec(self, input): + sign, logabsdet = torch.slogdet(input) + sign = sign.numpy() + logabsdet = logabsdet.numpy() + return sign, logabsdet + + def npu_op_exec(self, input): + sign, logabsdet = torch.slogdet(input) + sign = sign.cpu() + logabsdet = logabsdet.cpu() + sign = sign.numpy() + logabsdet = logabsdet.numpy() + return sign, logabsdet + + def test_slogdet_shape_format(self, device): + shape_format = [ + [np.float32, -1, (3, 3)], + [np.float32, -1, (4, 3, 3)], + [np.float32, -1, (5, 5, 5, 5)], + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, -100, 100) + cpu_output, cpu_indices = self.cpu_op_exec(cpu_input) + npu_output, npu_indices = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_indices, npu_indices) + + + +instantiate_device_type_tests(TestSlogdet, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_smoothl1loss.py b/test/test_npu/test_network_ops/test_smoothl1loss.py index 977192f46cac46c78bd2009f2f0837fb57743a5f..bf3d50d26875d7a9ea4efa66d3752904d10a4827 100644 --- a/test/test_npu/test_network_ops/test_smoothl1loss.py +++ b/test/test_npu/test_network_ops/test_smoothl1loss.py @@ -1,74 +1,74 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -#torch.nn.functional.smooth_l1_loss 接口没有.out入参。未做.out测试 -class TestSmoothL1loss(TestCase): - def cpu_op_exec_new(self, input1, target, reduction): - output = torch.nn.functional.smooth_l1_loss(input1, target, reduction = reduction) - return output.numpy() - - def npu_op_exec_new(self, input1, target, reduction): - target = target.npu() - output = torch.nn.functional.smooth_l1_loss(input1, target,reduction = reduction) - return output.cpu().numpy() - - def test_smoothl1loss_shape_format_fp32(self, device): - format_list = [0] - shape_list = [[256, 10], [256, 1000], [256, 10000], - [64, 10, 10], [64, 100, 100], [64, 200, 200], - [32, 3, 10, 10], [32, 3, 100, 100], [32, 3, 200, 200]] - reduction_list = ['none', 'mean', 'sum'] - shape_format = [ - [[np.float32, i, j], [np.float32, 0, j], k] for i in format_list - for j in shape_list for k in reduction_list - ] - for item in shape_format: - np_target = np.random.uniform(0, 10, (item[1][2])).astype(item[1][0]) - target = torch.from_numpy(np_target) - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_output = self.cpu_op_exec_new(cpu_input1, target, item[2]) - npu_output = self.npu_op_exec_new(npu_input1, target, item[2]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_nllloss_shape_format_fp16(self, device): - format_list = [0] - shape_list = [[256, 10], [256, 1000], [256, 10000], - [64, 10, 10], [64, 100, 100], [64, 200, 200], - [32, 3, 10, 10], [32, 3, 100, 100], [32, 3, 200, 200]] - reduction_list = ['none', 'mean'] - shape_format = [ - [[np.float16, i, j], [np.float16, 0, j], k] for i in format_list - for j in shape_list for k in reduction_list - ] - - for item in shape_format: - np_target = np.random.uniform(0, 10, (item[1][2])).astype(item[1][0]) - target = torch.from_numpy(np_target) - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = self.cpu_op_exec_new(cpu_input1, target, item[2]) - npu_output = self.npu_op_exec_new(npu_input1, target, item[2]) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestSmoothL1loss, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +#torch.nn.functional.smooth_l1_loss 接口没有.out入参。未做.out测试 +class TestSmoothL1loss(TestCase): + def cpu_op_exec_new(self, input1, target, reduction): + output = torch.nn.functional.smooth_l1_loss(input1, target, reduction = reduction) + return output.numpy() + + def npu_op_exec_new(self, input1, target, reduction): + target = target.npu() + output = torch.nn.functional.smooth_l1_loss(input1, target,reduction = reduction) + return output.cpu().numpy() + + def test_smoothl1loss_shape_format_fp32(self, device): + format_list = [0] + shape_list = [[256, 10], [256, 1000], [256, 10000], + [64, 10, 10], [64, 100, 100], [64, 200, 200], + [32, 3, 10, 10], [32, 3, 100, 100], [32, 3, 200, 200]] + reduction_list = ['none', 'mean', 'sum'] + shape_format = [ + [[np.float32, i, j], [np.float32, 0, j], k] for i in format_list + for j in shape_list for k in reduction_list + ] + for item in shape_format: + np_target = np.random.uniform(0, 10, (item[1][2])).astype(item[1][0]) + target = torch.from_numpy(np_target) + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_output = self.cpu_op_exec_new(cpu_input1, target, item[2]) + npu_output = self.npu_op_exec_new(npu_input1, target, item[2]) + self.assertRtolEqual(cpu_output, npu_output) + + def test_nllloss_shape_format_fp16(self, device): + format_list = [0] + shape_list = [[256, 10], [256, 1000], [256, 10000], + [64, 10, 10], [64, 100, 100], [64, 200, 200], + [32, 3, 10, 10], [32, 3, 100, 100], [32, 3, 200, 200]] + reduction_list = ['none', 'mean'] + shape_format = [ + [[np.float16, i, j], [np.float16, 0, j], k] for i in format_list + for j in shape_list for k in reduction_list + ] + + for item in shape_format: + np_target = np.random.uniform(0, 10, (item[1][2])).astype(item[1][0]) + target = torch.from_numpy(np_target) + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec_new(cpu_input1, target, item[2]) + npu_output = self.npu_op_exec_new(npu_input1, target, item[2]) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestSmoothL1loss, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_soft_margin_loss.py b/test/test_npu/test_network_ops/test_soft_margin_loss.py index 9c8308738fee4d74c75dd8bcb07f848c68cc025a..e1ac2921a77283ba3dc854054d2dea3ade7672e1 100644 --- a/test/test_npu/test_network_ops/test_soft_margin_loss.py +++ b/test/test_npu/test_network_ops/test_soft_margin_loss.py @@ -1,127 +1,127 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestSoftMarginLoss(TestCase): - def generate_data(self,min_d, max_d, shape1, shape2, dtype): - input1 = np.random.uniform(min_d, max_d, shape1).astype(dtype) - npu_input1 = torch.from_numpy(input1) - if dtype == np.float16: - stype = torch.float16 - if dtype == np.float32: - stype = torch.float32 - npu_input2 = torch.ones(size=shape2, dtype=stype) - return npu_input1, npu_input2 - - def cpu_op_exec_default(self,input1, input2): - stype=input1.dtype - if stype==torch.float16: - input1=input1.float() - input2=input2.float() - loss = torch.nn.SoftMarginLoss() - output=loss(input1, input2) - if stype==torch.float16: - output=output.half() - output = output.numpy() - return output - - def npu_op_exec_default(self,input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - loss = torch.nn.SoftMarginLoss() - output = loss(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec(self,input1, input2, reduct): - stype=input1.dtype - if stype==torch.float16: - input1=input1.float() - input2=input2.float() - loss = torch.nn.SoftMarginLoss(reduction=reduct) - output = loss(input1, input2) - if stype==torch.float16: - output=output.half() - output = output.numpy() - return output - - def npu_op_exec(self,input1, input2, reduct): - input1 = input1.to("npu") - input2 = input2.to("npu") - loss = torch.nn.SoftMarginLoss(reduction=reduct) - output = loss(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def test_soft_margin_loss_float16(self, device): - npu_input1, npu_input2 =self.generate_data(-2, 2, (5, 13, 2, 7, 18, 83, 5, 22), (5, 13, 2, 7, 18, 83, 5, 22), np.float16) - cpu_output = self.cpu_op_exec_default(npu_input1, npu_input2) - npu_output = self.npu_op_exec_default(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_soft_margin_loss_float16_mean(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (3, 19, 19, 3, 11, 11, 2), (3, 1, 19, 3, 11, 11, 1), np.float16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "mean") - npu_output = self.npu_op_exec(npu_input1, npu_input2, "mean") - self.assertRtolEqual(cpu_output, npu_output) - - def test_soft_margin_loss_float16_none(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (5, 13, 2, 7, 18, 83, 5, 22), (5, 13, 2, 1, 18, 83, 1, 22), np.float16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "none") - npu_output = self.npu_op_exec(npu_input1, npu_input2, "none") - self.assertRtolEqual(cpu_output, npu_output) - - def test_soft_margin_loss_float16_sum(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (1, 8, 2, 2, 5, 8, 2, 8), - (1, 8, 2, 2, 1, 1, 1, 1), np.float16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "sum") - npu_output = self.npu_op_exec(npu_input1, npu_input2, "sum") - self.assertRtolEqual(cpu_output, npu_output) - - def test_soft_margin_loss_float32(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (416, 192, 272), (416, 1, 272), np.float32) - cpu_output = self.cpu_op_exec_default(npu_input1, npu_input2) - npu_output = self.npu_op_exec_default(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_soft_margin_loss_float32_mean(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (416, 192, 272), (416, 192, 272), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "mean") - npu_output = self.npu_op_exec(npu_input1, npu_input2, "mean") - self.assertRtolEqual(cpu_output, npu_output) - - def test_soft_margin_loss_float32_none(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (25, 25, 25), (25, 1, 25), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "none") - npu_output = self.npu_op_exec(npu_input1, npu_input2, "none") - self.assertRtolEqual(cpu_output, npu_output) - - def test_soft_margin_loss_float32_sum(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (148, 110, 148), (148, 1, 148), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "sum") - npu_output = self.npu_op_exec(npu_input1, npu_input2, "sum") - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestSoftMarginLoss, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestSoftMarginLoss(TestCase): + def generate_data(self,min_d, max_d, shape1, shape2, dtype): + input1 = np.random.uniform(min_d, max_d, shape1).astype(dtype) + npu_input1 = torch.from_numpy(input1) + if dtype == np.float16: + stype = torch.float16 + if dtype == np.float32: + stype = torch.float32 + npu_input2 = torch.ones(size=shape2, dtype=stype) + return npu_input1, npu_input2 + + def cpu_op_exec_default(self,input1, input2): + stype=input1.dtype + if stype==torch.float16: + input1=input1.float() + input2=input2.float() + loss = torch.nn.SoftMarginLoss() + output=loss(input1, input2) + if stype==torch.float16: + output=output.half() + output = output.numpy() + return output + + def npu_op_exec_default(self,input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + loss = torch.nn.SoftMarginLoss() + output = loss(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec(self,input1, input2, reduct): + stype=input1.dtype + if stype==torch.float16: + input1=input1.float() + input2=input2.float() + loss = torch.nn.SoftMarginLoss(reduction=reduct) + output = loss(input1, input2) + if stype==torch.float16: + output=output.half() + output = output.numpy() + return output + + def npu_op_exec(self,input1, input2, reduct): + input1 = input1.to("npu") + input2 = input2.to("npu") + loss = torch.nn.SoftMarginLoss(reduction=reduct) + output = loss(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def test_soft_margin_loss_float16(self, device): + npu_input1, npu_input2 =self.generate_data(-2, 2, (5, 13, 2, 7, 18, 83, 5, 22), (5, 13, 2, 7, 18, 83, 5, 22), np.float16) + cpu_output = self.cpu_op_exec_default(npu_input1, npu_input2) + npu_output = self.npu_op_exec_default(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_soft_margin_loss_float16_mean(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (3, 19, 19, 3, 11, 11, 2), (3, 1, 19, 3, 11, 11, 1), np.float16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "mean") + npu_output = self.npu_op_exec(npu_input1, npu_input2, "mean") + self.assertRtolEqual(cpu_output, npu_output) + + def test_soft_margin_loss_float16_none(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (5, 13, 2, 7, 18, 83, 5, 22), (5, 13, 2, 1, 18, 83, 1, 22), np.float16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "none") + npu_output = self.npu_op_exec(npu_input1, npu_input2, "none") + self.assertRtolEqual(cpu_output, npu_output) + + def test_soft_margin_loss_float16_sum(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (1, 8, 2, 2, 5, 8, 2, 8), + (1, 8, 2, 2, 1, 1, 1, 1), np.float16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "sum") + npu_output = self.npu_op_exec(npu_input1, npu_input2, "sum") + self.assertRtolEqual(cpu_output, npu_output) + + def test_soft_margin_loss_float32(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (416, 192, 272), (416, 1, 272), np.float32) + cpu_output = self.cpu_op_exec_default(npu_input1, npu_input2) + npu_output = self.npu_op_exec_default(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_soft_margin_loss_float32_mean(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (416, 192, 272), (416, 192, 272), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "mean") + npu_output = self.npu_op_exec(npu_input1, npu_input2, "mean") + self.assertRtolEqual(cpu_output, npu_output) + + def test_soft_margin_loss_float32_none(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (25, 25, 25), (25, 1, 25), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "none") + npu_output = self.npu_op_exec(npu_input1, npu_input2, "none") + self.assertRtolEqual(cpu_output, npu_output) + + def test_soft_margin_loss_float32_sum(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (148, 110, 148), (148, 1, 148), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, "sum") + npu_output = self.npu_op_exec(npu_input1, npu_input2, "sum") + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestSoftMarginLoss, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_softmax.py b/test/test_npu/test_network_ops/test_softmax.py old mode 100644 new mode 100755 index 692dba18a13647e3793b3605ccc62c8bf13fd72d..ce624ab6b11d34620bfbd297685033289b6087bb --- a/test/test_npu/test_network_ops/test_softmax.py +++ b/test/test_npu/test_network_ops/test_softmax.py @@ -1,126 +1,126 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestSoftMax(TestCase): - def cpu_op_exec(self, input1, dim): - output = torch.nn.functional.softmax(input1, dim) - output = output.numpy() - return output - - def npu_op_exec(self, input1, dim): - output = torch.nn.functional.softmax(input1, dim) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_dtype(self, input1, dim, dtype): - output = torch.nn.functional.softmax(input1, dim, dtype=dtype) - output = output.numpy() - return output - - def npu_op_exec_dtype(self, input1, dim, dtype): - output = torch.nn.functional.softmax(input1, dim, dtype=dtype) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_half_float(self, input1, dim): - output = torch._softmax(input1, dim, True) - output = output.to("cpu") - output = output.numpy() - return output - - def softmax_result(self, shape_format): - for item in shape_format: - dim = np.random.randint(0, len(item[2])) - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1, dim) - npu_output = self.npu_op_exec(npu_input1, dim) - - if npu_input1.dtype == torch.float16: - npu_output_half = self.npu_op_exec_half_float(npu_input1, dim) - npu_output_half = npu_output_half.astype(np.float16) - - cpu_output_inp = self.cpu_op_exec_dtype(cpu_input1, dim, torch.float32) - npu_output_inp = self.npu_op_exec_dtype(npu_input1, dim, torch.float32) - - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - if npu_input1.dtype == torch.float16: - self.assertRtolEqual(cpu_output, npu_output_half) - self.assertRtolEqual(cpu_output_inp, npu_output_inp) - - def test_softmax_shape_format_fp16_1d(self, device): - format_list = [0] - shape_format = [[np.float16, i, [18]] for i in format_list] - self.softmax_result(shape_format) - - def test_softmax_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [5, 256]] for i in format_list] - self.softmax_result(shape_format) - - def test_softmax_shape_format_fp16_3d(self, device): - format_list = [0, 29] - shape_format = [[np.float16, i, [32, 8, 8]] for i in format_list] - self.softmax_result(shape_format) - - def test_softmax_shape_format_fp16_4d(self, device): - format_list = [0, 29] - shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] - self.softmax_result(shape_format) - - def test_softmax_shape_format_fp32_1d(self, device): - format_list = [0] - shape_format = [[np.float32, i, [18]] for i in format_list] - self.softmax_result(shape_format) - - def test_softmax_shape_format_fp32_2d(self, device): - format_list = [3, 29] - shape_format = [[np.float32, i, [5, 256]] for i in format_list] - self.softmax_result(shape_format) - - def test_softmax_shape_format_fp32_3d(self, device): - format_list = [0, 29] - shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] - self.softmax_result(shape_format) - - def test_softmax_shape_format_fp32_4d(self, device): - format_list = [3, 29] - shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] - self.softmax_result(shape_format) - - def test_softmax_dimname_shape_format(self, device): - cpu_input1 = torch.randn(4, 3, names=('N', 'C')) - npu_input1 = cpu_input1.npu() - cpu_output = self.cpu_op_exec(cpu_input1, 'N') - npu_output = self.npu_op_exec(npu_input1, 'N') - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestSoftMax, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestSoftMax(TestCase): + def cpu_op_exec(self, input1, dim): + output = torch.nn.functional.softmax(input1, dim) + output = output.numpy() + return output + + def npu_op_exec(self, input1, dim): + output = torch.nn.functional.softmax(input1, dim) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_dtype(self, input1, dim, dtype): + output = torch.nn.functional.softmax(input1, dim, dtype=dtype) + output = output.numpy() + return output + + def npu_op_exec_dtype(self, input1, dim, dtype): + output = torch.nn.functional.softmax(input1, dim, dtype=dtype) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_half_float(self, input1, dim): + output = torch._softmax(input1, dim, True) + output = output.to("cpu") + output = output.numpy() + return output + + def softmax_result(self, shape_format): + for item in shape_format: + dim = np.random.randint(0, len(item[2])) + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1, dim) + npu_output = self.npu_op_exec(npu_input1, dim) + + if npu_input1.dtype == torch.float16: + npu_output_half = self.npu_op_exec_half_float(npu_input1, dim) + npu_output_half = npu_output_half.astype(np.float16) + + cpu_output_inp = self.cpu_op_exec_dtype(cpu_input1, dim, torch.float32) + npu_output_inp = self.npu_op_exec_dtype(npu_input1, dim, torch.float32) + + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + if npu_input1.dtype == torch.float16: + self.assertRtolEqual(cpu_output, npu_output_half) + self.assertRtolEqual(cpu_output_inp, npu_output_inp) + + def test_softmax_shape_format_fp16_1d(self, device): + format_list = [0] + shape_format = [[np.float16, i, [18]] for i in format_list] + self.softmax_result(shape_format) + + def test_softmax_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [5, 256]] for i in format_list] + self.softmax_result(shape_format) + + def test_softmax_shape_format_fp16_3d(self, device): + format_list = [0, 29] + shape_format = [[np.float16, i, [32, 8, 8]] for i in format_list] + self.softmax_result(shape_format) + + def test_softmax_shape_format_fp16_4d(self, device): + format_list = [0, 29] + shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] + self.softmax_result(shape_format) + + def test_softmax_shape_format_fp32_1d(self, device): + format_list = [0] + shape_format = [[np.float32, i, [18]] for i in format_list] + self.softmax_result(shape_format) + + def test_softmax_shape_format_fp32_2d(self, device): + format_list = [3, 29] + shape_format = [[np.float32, i, [5, 256]] for i in format_list] + self.softmax_result(shape_format) + + def test_softmax_shape_format_fp32_3d(self, device): + format_list = [0, 29] + shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] + self.softmax_result(shape_format) + + def test_softmax_shape_format_fp32_4d(self, device): + format_list = [3, 29] + shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] + self.softmax_result(shape_format) + + def test_softmax_dimname_shape_format(self, device): + cpu_input1 = torch.randn(4, 3, names=('N', 'C')) + npu_input1 = cpu_input1.npu() + cpu_output = self.cpu_op_exec(cpu_input1, 'N') + npu_output = self.npu_op_exec(npu_input1, 'N') + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestSoftMax, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_softmaxcrossentropywithlogits.py b/test/test_npu/test_network_ops/test_softmaxcrossentropywithlogits.py index 2ee6e942b5ad4da2483441c99af3c286316d5ba7..4a4ca5b78e59b22d30e16fb8f683d91050b24919 100644 --- a/test/test_npu/test_network_ops/test_softmaxcrossentropywithlogits.py +++ b/test/test_npu/test_network_ops/test_softmaxcrossentropywithlogits.py @@ -1,37 +1,37 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - - -class TestSoftmaxCrossentropyWithLogits(TestCase): - def npu_op_exec(self, input1, label): - output = torch.npu_softmax_cross_entropy_with_logits(input1, label) - output = output.to("cpu") - output = output.numpy() - return output - - def test_SoftmaxCross(self, device): - input = torch.tensor([[1.,2.,3.,4.]]).npu() - label = torch.tensor([[1.,2.,3.,4.]]).npu() - exresult = torch.tensor([14.4019]) - output = self.npu_op_exec(input, label) - self.assertRtolEqual(exresult.numpy(), output) - -instantiate_device_type_tests(TestSoftmaxCrossentropyWithLogits, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + + +class TestSoftmaxCrossentropyWithLogits(TestCase): + def npu_op_exec(self, input1, label): + output = torch.npu_softmax_cross_entropy_with_logits(input1, label) + output = output.to("cpu") + output = output.numpy() + return output + + def test_SoftmaxCross(self, device): + input = torch.tensor([[1.,2.,3.,4.]]).npu() + label = torch.tensor([[1.,2.,3.,4.]]).npu() + exresult = torch.tensor([14.4019]) + output = self.npu_op_exec(input, label) + self.assertRtolEqual(exresult.numpy(), output) + +instantiate_device_type_tests(TestSoftmaxCrossentropyWithLogits, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_split.py b/test/test_npu/test_network_ops/test_split.py old mode 100644 new mode 100755 index 7def0d59dd77d811c5555134d44b24ebed5802a2..ebfe50aa7e1fe9e01394fa1f0c19435050e88035 --- a/test/test_npu/test_network_ops/test_split.py +++ b/test/test_npu/test_network_ops/test_split.py @@ -1,111 +1,111 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestSplit(TestCase): - def cpu_op_exec(self, input1, sections, dim): - output = torch.split(input1, sections, dim) - output = list(output) - for i in range(len(output)): - output[i] = output[i].numpy() - return output - - def npu_op_exec(self, input1, sections, dim): - output = torch.split(input1, sections, dim) - output = list(output) - for i in range(len(output)): - output[i] = output[i].to("cpu").numpy() - return output - - def split_result(self, shape_format): - for item in shape_format: - dim = np.random.randint(0, len(item[2])) - size1 = int(item[2][dim] / 2) - size2 = int(item[2][dim] - size1) - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - - cpu_output = self.cpu_op_exec(cpu_input1, [size1, size2], dim) - npu_output = self.npu_op_exec(npu_input1, [size1, size2], dim) - - for i in range(len(cpu_output)): - self.assertRtolEqual(cpu_output[i], npu_output[i]) - - def test_split_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float16, i, [18]] for i in format_list] - self.split_result(shape_format) - - def test_split_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [5, 256]] for i in format_list] - self.split_result(shape_format) - - def test_split_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [32, 3, 3]] for i in format_list] - self.split_result(shape_format) - - def test_split_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] - self.split_result(shape_format) - - def test_split_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float32, i, [18]] for i in format_list] - self.split_result(shape_format) - - def test_split_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [5, 256]] for i in format_list] - self.split_result(shape_format) - - def test_split_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] - self.split_result(shape_format) - - def test_split_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] - self.split_result(shape_format) - - def test_split_common_shape_format(self, device): - shape_format = [ - [[np.float32, 0 , (1, 4, 2, 3)], 3, 1], - [[np.float32, 0, (8,4)], [1,2,1,2,2],0], - [[np.float16, 0 , (1, 4, 2, 3)], 3, 1], - [[np.float16, 0, (8,4)], [1,2,1,2,2],0], - [[np.int32, 0 , (1, 4, 2, 3)], 3, 1], - [[np.int32, 0, (8,4)], [1,2,1,2,2],0], - [[np.int64, 0 , (1, 4, 2, 3)], 3, 1], - [[np.int64, 0, (8,4)], [1,2,1,2,2],0], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) - cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2]) - npu_output = self.npu_op_exec(npu_input1, item[1], item[2]) - for i in range(len(cpu_output)): - self.assertRtolEqual(cpu_output[i], npu_output[i]) - - -instantiate_device_type_tests(TestSplit, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestSplit(TestCase): + def cpu_op_exec(self, input1, sections, dim): + output = torch.split(input1, sections, dim) + output = list(output) + for i in range(len(output)): + output[i] = output[i].numpy() + return output + + def npu_op_exec(self, input1, sections, dim): + output = torch.split(input1, sections, dim) + output = list(output) + for i in range(len(output)): + output[i] = output[i].to("cpu").numpy() + return output + + def split_result(self, shape_format): + for item in shape_format: + dim = np.random.randint(0, len(item[2])) + size1 = int(item[2][dim] / 2) + size2 = int(item[2][dim] - size1) + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + + cpu_output = self.cpu_op_exec(cpu_input1, [size1, size2], dim) + npu_output = self.npu_op_exec(npu_input1, [size1, size2], dim) + + for i in range(len(cpu_output)): + self.assertRtolEqual(cpu_output[i], npu_output[i]) + + def test_split_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float16, i, [18]] for i in format_list] + self.split_result(shape_format) + + def test_split_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [5, 256]] for i in format_list] + self.split_result(shape_format) + + def test_split_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [32, 3, 3]] for i in format_list] + self.split_result(shape_format) + + def test_split_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] + self.split_result(shape_format) + + def test_split_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float32, i, [18]] for i in format_list] + self.split_result(shape_format) + + def test_split_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [5, 256]] for i in format_list] + self.split_result(shape_format) + + def test_split_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] + self.split_result(shape_format) + + def test_split_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] + self.split_result(shape_format) + + def test_split_common_shape_format(self, device): + shape_format = [ + [[np.float32, 0 , (1, 4, 2, 3)], 3, 1], + [[np.float32, 0, (8,4)], [1,2,1,2,2],0], + [[np.float16, 0 , (1, 4, 2, 3)], 3, 1], + [[np.float16, 0, (8,4)], [1,2,1,2,2],0], + [[np.int32, 0 , (1, 4, 2, 3)], 3, 1], + [[np.int32, 0, (8,4)], [1,2,1,2,2],0], + [[np.int64, 0 , (1, 4, 2, 3)], 3, 1], + [[np.int64, 0, (8,4)], [1,2,1,2,2],0], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) + cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2]) + npu_output = self.npu_op_exec(npu_input1, item[1], item[2]) + for i in range(len(cpu_output)): + self.assertRtolEqual(cpu_output[i], npu_output[i]) + + +instantiate_device_type_tests(TestSplit, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_sqrt.py b/test/test_npu/test_network_ops/test_sqrt.py old mode 100644 new mode 100755 index f9378d82efbae220f58341fb9902c996da187738..1b9e859644d6d851d2a378ae89dd4cb7e03b2c28 --- a/test/test_npu/test_network_ops/test_sqrt.py +++ b/test/test_npu/test_network_ops/test_sqrt.py @@ -1,143 +1,143 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import torch -import copy -from common_device_type import instantiate_device_type_tests -from common_utils import TestCase, run_tests -from util_test import create_common_tensor - - -class TestSqrt(TestCase): - def cpu_op_exec(self, input1): - cpu_output = torch.sqrt(input1) - cpu_output = cpu_output.numpy() - return cpu_output - - def npu_op_exec(self, input1): - output = torch.sqrt(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2): - torch.sqrt(input1, out=input2) - output = input2.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out_shape(self, input1): - input2 = torch.empty(0, dtype=input1.dtype).npu() - torch.sqrt(input1, out=input2) - output = input2.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out_contiguous(self, input1): - input2 = copy.deepcopy(input1) - if input2.dim() > 1 : - input2 = input2.transpose(0, 1); - torch.sqrt(input1, out=input2) - output = input2.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out_input_equal_output(self, input1): - input2 = copy.deepcopy(input1) - torch.sqrt(input2, out=input2) - output = input2.to("cpu") - output = output.numpy() - return output - - def cpu_inp_op_exec(self, input1): - torch.sqrt_(input1) - output = input1.numpy() - return output - - def npu_inp_op_exec(self, input1): - torch.sqrt_(input1) - output = input1.to("cpu") - output = output.numpy() - return output - - def sqrt_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 2, 100) - cpu_input2, npu_input2 = create_common_tensor(item, 2, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2) - npu_output_out1 = self.npu_op_exec_out_shape(npu_input1) - npu_output_out2 = self.npu_op_exec_out_contiguous(npu_input1) - npu_output_out3 = self.npu_op_exec_out_input_equal_output(npu_input1) - cpu_output_inp = self.cpu_inp_op_exec(cpu_input1) - npu_output_inp = self.npu_inp_op_exec(npu_input1) - - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output, npu_output_out) - self.assertRtolEqual(cpu_output, npu_output_out1) - self.assertRtolEqual(cpu_output, npu_output_out2) - self.assertRtolEqual(cpu_output, npu_output_out3) - self.assertRtolEqual(cpu_output_inp, npu_output_inp) - - def test_sqrt_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float16, i, [18]] for i in format_list] - self.sqrt_result(shape_format) - - def test_sqrt_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [5, 256]] for i in format_list] - self.sqrt_result(shape_format) - - def test_sqrt_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [32, 3, 3]] for i in format_list] - self.sqrt_result(shape_format) - - def test_sqrt_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] - self.sqrt_result(shape_format) - - def test_sqrt_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float32, i, [18]] for i in format_list] - self.sqrt_result(shape_format) - - def test_sqrt_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [5, 256]] for i in format_list] - self.sqrt_result(shape_format) - - def test_sqrt_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] - self.sqrt_result(shape_format) - - def test_sqrt_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] - self.sqrt_result(shape_format) - - -instantiate_device_type_tests(TestSqrt, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import torch +import copy +from common_device_type import instantiate_device_type_tests +from common_utils import TestCase, run_tests +from util_test import create_common_tensor + + +class TestSqrt(TestCase): + def cpu_op_exec(self, input1): + cpu_output = torch.sqrt(input1) + cpu_output = cpu_output.numpy() + return cpu_output + + def npu_op_exec(self, input1): + output = torch.sqrt(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2): + torch.sqrt(input1, out=input2) + output = input2.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out_shape(self, input1): + input2 = torch.empty(0, dtype=input1.dtype).npu() + torch.sqrt(input1, out=input2) + output = input2.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out_contiguous(self, input1): + input2 = copy.deepcopy(input1) + if input2.dim() > 1 : + input2 = input2.transpose(0, 1); + torch.sqrt(input1, out=input2) + output = input2.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out_input_equal_output(self, input1): + input2 = copy.deepcopy(input1) + torch.sqrt(input2, out=input2) + output = input2.to("cpu") + output = output.numpy() + return output + + def cpu_inp_op_exec(self, input1): + torch.sqrt_(input1) + output = input1.numpy() + return output + + def npu_inp_op_exec(self, input1): + torch.sqrt_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + def sqrt_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 2, 100) + cpu_input2, npu_input2 = create_common_tensor(item, 2, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2) + npu_output_out1 = self.npu_op_exec_out_shape(npu_input1) + npu_output_out2 = self.npu_op_exec_out_contiguous(npu_input1) + npu_output_out3 = self.npu_op_exec_out_input_equal_output(npu_input1) + cpu_output_inp = self.cpu_inp_op_exec(cpu_input1) + npu_output_inp = self.npu_inp_op_exec(npu_input1) + + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_output_inp = cpu_output_inp.astype(npu_output_inp.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output_out) + self.assertRtolEqual(cpu_output, npu_output_out1) + self.assertRtolEqual(cpu_output, npu_output_out2) + self.assertRtolEqual(cpu_output, npu_output_out3) + self.assertRtolEqual(cpu_output_inp, npu_output_inp) + + def test_sqrt_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float16, i, [18]] for i in format_list] + self.sqrt_result(shape_format) + + def test_sqrt_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [5, 256]] for i in format_list] + self.sqrt_result(shape_format) + + def test_sqrt_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [32, 3, 3]] for i in format_list] + self.sqrt_result(shape_format) + + def test_sqrt_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] + self.sqrt_result(shape_format) + + def test_sqrt_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float32, i, [18]] for i in format_list] + self.sqrt_result(shape_format) + + def test_sqrt_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [5, 256]] for i in format_list] + self.sqrt_result(shape_format) + + def test_sqrt_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] + self.sqrt_result(shape_format) + + def test_sqrt_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] + self.sqrt_result(shape_format) + + +instantiate_device_type_tests(TestSqrt, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_squeeze.py b/test/test_npu/test_network_ops/test_squeeze.py index 073f508c26eab05e1fbc6ec9711c6d68ade209e5..a5ba8c01e1815a8daafa92b75c138ef025d46864 100644 --- a/test/test_npu/test_network_ops/test_squeeze.py +++ b/test/test_npu/test_network_ops/test_squeeze.py @@ -1,46 +1,46 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -class TestSqueeze(TestCase): - def test_squeeze_common_shape_format(self, device): - def cpu_op_exec(input): - output = torch.squeeze(input) - output = output.numpy() - return output - - def npu_op_exec(input): - output = torch.squeeze(input) - output = output.to("cpu") - output = output.numpy() - return output - - shape_format = [ - [torch.float16, (2, 1, 2, 1, 2)], - [torch.float32, (2, 1, 2, 1, 2)] - ] - for shape in shape_format: - cpu_input = torch.zeros(shape[1],dtype=shape[0]) - npu_input = torch.zeros(shape[1],dtype=shape[0]).npu() - cpu_output = cpu_op_exec(cpu_input) - npu_output = npu_op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestSqueeze, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +class TestSqueeze(TestCase): + def test_squeeze_common_shape_format(self, device): + def cpu_op_exec(input): + output = torch.squeeze(input) + output = output.numpy() + return output + + def npu_op_exec(input): + output = torch.squeeze(input) + output = output.to("cpu") + output = output.numpy() + return output + + shape_format = [ + [torch.float16, (2, 1, 2, 1, 2)], + [torch.float32, (2, 1, 2, 1, 2)] + ] + for shape in shape_format: + cpu_input = torch.zeros(shape[1],dtype=shape[0]) + npu_input = torch.zeros(shape[1],dtype=shape[0]).npu() + cpu_output = cpu_op_exec(cpu_input) + npu_output = npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestSqueeze, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_stack.py b/test/test_npu/test_network_ops/test_stack.py old mode 100644 new mode 100755 index 118951970b4840771f162cfc206b1632b837ac78..4e64baff1d7086b84a1eb68797ca605c924582a7 --- a/test/test_npu/test_network_ops/test_stack.py +++ b/test/test_npu/test_network_ops/test_stack.py @@ -1,158 +1,158 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestStack(TestCase): - def cpu_op_exec(self, input1, input2, dim): - cpu_output = torch.stack((input1, input2), dim) - cpu_output = cpu_output.numpy() - return cpu_output - - def npu_op_exec(self, input1, input2, dim): - output = torch.stack((input1, input2), dim) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_out(self, input1, input2, dim, input3): - torch.stack((input1, input2), dim, out=input3) - output = input3.numpy() - return output - - def npu_op_exec_out(self, input1, input2, dim, input3): - torch.stack((input1, input2), dim, out=input3) - output = input3.to("cpu") - output = output.numpy() - return output - - def npu_output_size(self, inputs = [], dim = 0): - shape = [] - for i in range(dim): - shape.append(inputs[0].size(i)) - shape.append(len(inputs)) - for i in range(dim, inputs[0].dim()): - shape.append(inputs[0].size(i)) - - return shape - - def stack_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 100) - shape = self.npu_output_size([npu_input1,npu_input2], item[1]) - npu_input3 = torch.ones(shape, dtype = cpu_input1.dtype).npu() - cpu_input3 = torch.ones(shape, dtype = cpu_input1.dtype) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - cpu_input3 = cpu_input3.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, item[1]) - npu_output = self.npu_op_exec(npu_input1, npu_input2, item[1]) - cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, item[1], cpu_input3) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, item[1], npu_input3) - - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output, npu_output_out) - - def test_stack_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[[np.float16, i, [5, 256]], np.random.randint(0, 2)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[[np.float16, i, [32, 3, 3]], np.random.randint(0, 3)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_shape_format_fp16_4d(self, device): - format_list = [0, 29] - shape_format = [[[np.float16, i, [32, 32, 3, 3]], np.random.randint(0, 4)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[[np.float32, i, [5, 256]], np.random.randint(0, 2)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[[np.float32, i, [32, 3, 3]], np.random.randint(0, 3)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_shape_format_fp32_4d(self, device): - format_list = [0, 29] - shape_format = [[[np.float32, i, [32, 32, 3, 3]], np.random.randint(0, 4)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_shape_format_int32_1d(self, device): - format_list = [0] - shape_format = [[[np.int32, i, [18]], np.random.randint(0, 1)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_shape_format_int32_2d(self, device): - format_list = [0] - shape_format = [[[np.int32, i, [5, 256]], np.random.randint(0, 2)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_shape_format_int32_3d(self, device): - format_list = [0] - shape_format = [[[np.int32, i, [32, 3, 3]], np.random.randint(0, 3)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_shape_format_int32_4d(self, device): - format_list = [-1] - shape_format = [[[np.int32, i, [32, 32, 3, 3]], np.random.randint(0, 4)] for i in format_list] - self.stack_result(shape_format) - - def test_stack_size_dim(self, device): - def cpu_op_exec(input1): - output = torch.stack((input1, input1, input1, input1, input1, input1, input1, input1, input1)) - return output.numpy() - - def npu_op_exec(input1): - output = torch.stack((input1, input1, input1, input1, input1, input1, input1, input1, input1)) - output = output.to("cpu") - return output.numpy() - shape_format = [ - [[np.int32, 0, ()]], - [[np.float32, 0, ()]], - [[np.float16, 0, ()]], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_exec(cpu_input1) - npu_output = npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestStack, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestStack(TestCase): + def cpu_op_exec(self, input1, input2, dim): + cpu_output = torch.stack((input1, input2), dim) + cpu_output = cpu_output.numpy() + return cpu_output + + def npu_op_exec(self, input1, input2, dim): + output = torch.stack((input1, input2), dim) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_out(self, input1, input2, dim, input3): + torch.stack((input1, input2), dim, out=input3) + output = input3.numpy() + return output + + def npu_op_exec_out(self, input1, input2, dim, input3): + torch.stack((input1, input2), dim, out=input3) + output = input3.to("cpu") + output = output.numpy() + return output + + def npu_output_size(self, inputs = [], dim = 0): + shape = [] + for i in range(dim): + shape.append(inputs[0].size(i)) + shape.append(len(inputs)) + for i in range(dim, inputs[0].dim()): + shape.append(inputs[0].size(i)) + + return shape + + def stack_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[0], 0, 100) + shape = self.npu_output_size([npu_input1,npu_input2], item[1]) + npu_input3 = torch.ones(shape, dtype = cpu_input1.dtype).npu() + cpu_input3 = torch.ones(shape, dtype = cpu_input1.dtype) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + cpu_input3 = cpu_input3.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, item[1]) + npu_output = self.npu_op_exec(npu_input1, npu_input2, item[1]) + cpu_output_out = self.cpu_op_exec_out(cpu_input1, cpu_input2, item[1], cpu_input3) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, item[1], npu_input3) + + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output_out) + + def test_stack_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [[[np.float16, i, [18]], np.random.randint(0, 1)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[[np.float16, i, [5, 256]], np.random.randint(0, 2)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[[np.float16, i, [32, 3, 3]], np.random.randint(0, 3)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_shape_format_fp16_4d(self, device): + format_list = [0, 29] + shape_format = [[[np.float16, i, [32, 32, 3, 3]], np.random.randint(0, 4)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [[[np.float32, i, [18]], np.random.randint(0, 1)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[[np.float32, i, [5, 256]], np.random.randint(0, 2)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[[np.float32, i, [32, 3, 3]], np.random.randint(0, 3)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_shape_format_fp32_4d(self, device): + format_list = [0, 29] + shape_format = [[[np.float32, i, [32, 32, 3, 3]], np.random.randint(0, 4)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_shape_format_int32_1d(self, device): + format_list = [0] + shape_format = [[[np.int32, i, [18]], np.random.randint(0, 1)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_shape_format_int32_2d(self, device): + format_list = [0] + shape_format = [[[np.int32, i, [5, 256]], np.random.randint(0, 2)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_shape_format_int32_3d(self, device): + format_list = [0] + shape_format = [[[np.int32, i, [32, 3, 3]], np.random.randint(0, 3)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_shape_format_int32_4d(self, device): + format_list = [-1] + shape_format = [[[np.int32, i, [32, 32, 3, 3]], np.random.randint(0, 4)] for i in format_list] + self.stack_result(shape_format) + + def test_stack_size_dim(self, device): + def cpu_op_exec(input1): + output = torch.stack((input1, input1, input1, input1, input1, input1, input1, input1, input1)) + return output.numpy() + + def npu_op_exec(input1): + output = torch.stack((input1, input1, input1, input1, input1, input1, input1, input1, input1)) + output = output.to("cpu") + return output.numpy() + shape_format = [ + [[np.int32, 0, ()]], + [[np.float32, 0, ()]], + [[np.float16, 0, ()]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_output = cpu_op_exec(cpu_input1) + npu_output = npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestStack, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_std.py b/test/test_npu/test_network_ops/test_std.py index 179532e2b55e726ab3faca09404862d8603551a2..728f769b2a20f216e4161f991ff4cd9657b70e94 100644 --- a/test/test_npu/test_network_ops/test_std.py +++ b/test/test_npu/test_network_ops/test_std.py @@ -1,301 +1,301 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import random -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestStd(TestCase): - def cpu_op_exec(self, input, unbiased=True): - output = torch.std(input, unbiased=unbiased) - output = output.numpy() - return output - - def npu_op_exec(self, input, unbiased=True): - output = torch.std(input, unbiased=unbiased) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_dim_exec(self, input, dim, unbiased=True, keepdim=False): - output = torch.std(input, dim, unbiased=unbiased, keepdim=keepdim) - output = output.numpy() - return output - - def npu_op_dim_exec(self, input, dim, unbiased=True, keepdim=False): - output = torch.std(input, dim, unbiased=unbiased, keepdim=keepdim) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_dim_out_exec(self, input, dim, output1, unbiased=True, keepdim=False): - torch.std(input, dim, unbiased=unbiased, keepdim=keepdim,out=output1) - output1 = output1.numpy() - return output1 - - def npu_op_dim_out_exec(self, input, dim, output1, unbiased=True, keepdim=False): - torch.std(input, dim, unbiased=unbiased, keepdim=keepdim,out=output1) - output1 = output1.to("cpu") - output1 = output1.numpy() - return output1 - - def output_shape(self, inputshape, dim, unbiased=True, keepdim=False): - shape = list(inputshape) - if dim < len(inputshape) and keepdim == True: - shape[dim] = 1 - elif dim < len(inputshape) and keepdim == False: - shape.pop(dim) - return shape - - def create_output_tensor(self, minvalue,maxvalue,shape,npuformat,dtype): - input1 = np.random.uniform(minvalue, maxvalue, shape).astype(dtype) - cpu_input = torch.from_numpy(input1) - npu_input = torch.from_numpy(input1).npu() - if npuformat != -1: - npu_input = npu_input.npu_format_cast(npuformat) - return cpu_input, npu_input - - def test_std_shape_format_fp16(self, device): - format_list = [0] - shape_list = [[16], [32, 1024], [32, 8, 1024], [128, 32, 8, 1023]] - unbiased_list = [True, False] - shape_format = [ - [np.float16, i, j, k] for i in format_list for j in shape_list for k in unbiased_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output1 = self.cpu_op_exec(cpu_input1, item[3]) - cpu_output1 = cpu_output1.astype(np.float16) - npu_output1 = self.npu_op_exec(npu_input1, item[3]) - self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.1) - - def test_std_shape_format_fp32(self, device): - format_list = [0] - shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1024]] - unbiased_list = [True, False] - shape_format = [ - [np.float32, i, j, k] for i in format_list for j in shape_list for k in unbiased_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_op_exec(cpu_input1, item[3]) - npu_output = self.npu_op_exec(npu_input1, item[3]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_std_dim_shape_format_fp16(self, device): - format_list = [0] - shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1023]] - dim_list = [0] - unbiased_list = [True, False] - keepdim_list = [True, False] - shape_format = [ - [np.float16, i, j, k, l, m] for i in format_list for j in shape_list - for k in dim_list for l in unbiased_list for m in keepdim_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output1 = self.cpu_op_dim_exec(cpu_input1, item[3], item[4], item[5]) - cpu_output1 = cpu_output1.astype(np.float16) - npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) - self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.003) - - def test_std_dim_shape_format_fp32(self, device): - format_list = [0] - shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1023]] - dim_list = [0] - unbiased_list = [True, False] - keepdim_list = [True, False] - shape_format = [ - [np.float32, i, j, k, l, m] for i in format_list for j in shape_list - for k in dim_list for l in unbiased_list for m in keepdim_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output1 = self.cpu_op_dim_exec(cpu_input1, item[3], item[4], item[5]) - npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) - self.assertRtolEqual(cpu_output1, npu_output1) - - def test_std_dim_out_shape_format_fp16(self, device): - format_list = [0] - shape_list = [[1024], [32, 24], [32, 8, 24], [12, 32, 8, 24]] - dim_list = [0] - unbiased_list = [True, False] - keepdim_list = [True, False] - shape_format = [ - [np.float16, i, j, k, l, m] for i in format_list for j in shape_list - for k in dim_list for l in unbiased_list for m in keepdim_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - outputshape = self.output_shape(item[2],item[3],item[4],item[5]) - cpu_output,npu_output = self.create_output_tensor(0,1,outputshape,item[1],item[0]) - if item[0] == np.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = cpu_output.to(torch.float32) - cpu_output1 = self.cpu_op_dim_out_exec(cpu_input1, item[3], cpu_output, item[4], item[5]) - npu_output1 = self.npu_op_dim_out_exec(npu_input1, item[3], npu_output, item[4], item[5]) - if item[0] == np.float16: - cpu_output1 = cpu_output1.astype(np.float16) - self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.002) - - random_outputshape = [random.randint(1, 100)] - cpu_output, npu_output = self.create_output_tensor(0, 1, random_outputshape,item[1],item[0]) - if item[0] == np.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output = cpu_output.to(torch.float32) - cpu_output1 = self.cpu_op_dim_out_exec(cpu_input1, item[3], cpu_output, item[4], item[5]) - npu_output1 = self.npu_op_dim_out_exec(npu_input1, item[3], npu_output, item[4], item[5]) - if item[0] == np.float16: - cpu_output1 = cpu_output1.astype(np.float16) - self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.002) - - def test_std_dim_out_shape_format_fp32(self, device): - format_list = [0] - shape_list = [[1024], [32, 24], [32, 8, 24], [12, 32, 8, 24]] - dim_list = [0] - unbiased_list = [True, False] - keepdim_list = [True, False] - shape_format = [ - [np.float32, i, j, k, l, m] for i in format_list for j in shape_list - for k in dim_list for l in unbiased_list for m in keepdim_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - outputshape = self.output_shape(item[2],item[3],item[4],item[5]) - cpu_output,npu_output = self.create_output_tensor(0,1,outputshape,item[1],item[0]) - cpu_output1 = self.cpu_op_dim_out_exec(cpu_input1, item[3], cpu_output, item[4], item[5]) - npu_output1 = self.npu_op_dim_out_exec(npu_input1, item[3], npu_output, item[4], item[5]) - self.assertRtolEqual(cpu_output1, npu_output1) - - random_outputshape = [random.randint(1, 100)] - cpu_output, npu_output = self.create_output_tensor(0, 1, random_outputshape, item[1], item[0]) - cpu_output2 = self.cpu_op_dim_out_exec(cpu_input1, item[3], cpu_output.clone(), item[4], item[5]) - npu_output2 = self.npu_op_dim_out_exec(npu_input1, item[3], npu_output, item[4], item[5]) - self.assertRtolEqual(cpu_output2, npu_output2) - - def test_std_dim_name_fp16(self, device): - shape = (1024, 8, 32) - cpu_input = torch.rand(shape, dtype=torch.float32) - npu_input = cpu_input.npu().to(torch.float16) - cpu_input.names = ['N','C','H'] - npu_input.names = ['N','C','H'] - dim = np.random.choice(['N', 'C', 'H']) - cpu_output = torch.std(cpu_input, dim=dim) - npu_output = torch.std(npu_input, dim=dim) - self.assertRtolEqual(cpu_output.to(torch.float16).numpy(), npu_output.cpu().numpy()) - - def test_std_dim_name_fp32(self, device): - shape = (1024, 8, 32) - cpu_input = torch.rand(shape, dtype=torch.float32, names=('N', 'C', 'H')) - npu_input = cpu_input.npu() - dim = np.random.choice(['N', 'C', 'H']) - cpu_output = torch.std(cpu_input, dim=dim) - npu_output = torch.std(npu_input, dim=dim) - self.assertRtolEqual(cpu_output.numpy(), npu_output.cpu().numpy()) - - def test_std_dim_out_name_fp16(self, device): - shape = (1024, 8, 32) - dimlist = ['N', 'C', 'H'] - cpu_input = torch.rand(shape, dtype=torch.float32) - npu_input = cpu_input.npu() - dim = np.random.choice(dimlist) - dims = dimlist.index(dim) - outputshape = self.output_shape(shape, dims) - cpu_output,npu_output = self.create_output_tensor(0, 1, outputshape, -1, np.float32) - npu_input = npu_input.to(torch.float16) - npu_output = npu_output.to(torch.float16) - cpu_input.names = ['N','C','H'] - npu_input.names = ['N','C','H'] - - cpu_output = torch.std(cpu_input, dim=dim,out=cpu_output) - npu_output = torch.std(npu_input, dim=dim,out=npu_output) - cpu_output = cpu_output.to(torch.float16) - self.assertRtolEqual(cpu_output.numpy(), npu_output.cpu().numpy()) - - def test_std_dim_out_name_fp32(self, device): - shape = (1024, 8, 32) - dimlist = ['N', 'C', 'H'] - cpu_input = torch.rand(shape, dtype=torch.float32, names=('N', 'C', 'H')) - npu_input = cpu_input.npu() - dim = np.random.choice(dimlist) - dims = dimlist.index(dim) - outputshape = self.output_shape(shape, dims) - cpu_output,npu_output = self.create_output_tensor(0, 1, outputshape, -1, np.float32) - cpu_output = torch.std(cpu_input, dim=dim,out=cpu_output) - npu_output = torch.std(npu_input, dim=dim,out=npu_output) - self.assertRtolEqual(cpu_output.numpy(), npu_output.cpu().numpy()) - - def test_std_n_dim_shape_format_fp16(self, device): - format_list = [0] - shape_list = [[128, 32, 8, 1023]] - dim_list = [(3, 1)] - unbiased_list = [True, False] - keepdim_list = [True, False] - shape_format = [ - [np.float16, i, j, k, l, m] for i in format_list for j in shape_list - for k in dim_list for l in unbiased_list for m in keepdim_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output1 = self.cpu_op_dim_exec(cpu_input1, item[3], item[4], item[5]) - cpu_output1 = cpu_output1.astype(np.float16) - npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) - self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.003) - - def test_std_n_dim_shape_format_fp32(self, device): - format_list = [0] - shape_list = [[128, 32, 8, 1023]] - dim_list = [(3, 1)] - unbiased_list = [True, False] - keepdim_list = [True, False] - shape_format = [ - [np.float32, i, j, k, l, m] for i in format_list for j in shape_list - for k in dim_list for l in unbiased_list for m in keepdim_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_output1 = self.cpu_op_dim_exec(cpu_input1, item[3], item[4], item[5]) - npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) - self.assertRtolEqual(cpu_output1, npu_output1) - - def test_std_dim_shape_format_5d_fp16(self, device): - format_list = [-1] - shape_list = [[2, 94, 4, 52, 192]] - dim_list = [0] - unbiased_list = [True, False] - keepdim_list = [True, False] - shape_format = [ - [np.float16, i, j, k, l, m] for i in format_list for j in shape_list - for k in dim_list for l in unbiased_list for m in keepdim_list - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - cpu_input1 = cpu_input1.to(torch.float32) - cpu_output1 = self.cpu_op_dim_exec(cpu_input1, item[3], item[4], item[5]) - cpu_output1 = cpu_output1.astype(np.float16) - npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) - self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.006) - -instantiate_device_type_tests(TestStd, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import random +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestStd(TestCase): + def cpu_op_exec(self, input, unbiased=True): + output = torch.std(input, unbiased=unbiased) + output = output.numpy() + return output + + def npu_op_exec(self, input, unbiased=True): + output = torch.std(input, unbiased=unbiased) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_dim_exec(self, input, dim, unbiased=True, keepdim=False): + output = torch.std(input, dim, unbiased=unbiased, keepdim=keepdim) + output = output.numpy() + return output + + def npu_op_dim_exec(self, input, dim, unbiased=True, keepdim=False): + output = torch.std(input, dim, unbiased=unbiased, keepdim=keepdim) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_dim_out_exec(self, input, dim, output1, unbiased=True, keepdim=False): + torch.std(input, dim, unbiased=unbiased, keepdim=keepdim,out=output1) + output1 = output1.numpy() + return output1 + + def npu_op_dim_out_exec(self, input, dim, output1, unbiased=True, keepdim=False): + torch.std(input, dim, unbiased=unbiased, keepdim=keepdim,out=output1) + output1 = output1.to("cpu") + output1 = output1.numpy() + return output1 + + def output_shape(self, inputshape, dim, unbiased=True, keepdim=False): + shape = list(inputshape) + if dim < len(inputshape) and keepdim == True: + shape[dim] = 1 + elif dim < len(inputshape) and keepdim == False: + shape.pop(dim) + return shape + + def create_output_tensor(self, minvalue,maxvalue,shape,npuformat,dtype): + input1 = np.random.uniform(minvalue, maxvalue, shape).astype(dtype) + cpu_input = torch.from_numpy(input1) + npu_input = torch.from_numpy(input1).npu() + if npuformat != -1: + npu_input = npu_input.npu_format_cast(npuformat) + return cpu_input, npu_input + + def test_std_shape_format_fp16(self, device): + format_list = [0] + shape_list = [[16], [32, 1024], [32, 8, 1024], [128, 32, 8, 1023]] + unbiased_list = [True, False] + shape_format = [ + [np.float16, i, j, k] for i in format_list for j in shape_list for k in unbiased_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output1 = self.cpu_op_exec(cpu_input1, item[3]) + cpu_output1 = cpu_output1.astype(np.float16) + npu_output1 = self.npu_op_exec(npu_input1, item[3]) + self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.1) + + def test_std_shape_format_fp32(self, device): + format_list = [0] + shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1024]] + unbiased_list = [True, False] + shape_format = [ + [np.float32, i, j, k] for i in format_list for j in shape_list for k in unbiased_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input1, item[3]) + npu_output = self.npu_op_exec(npu_input1, item[3]) + self.assertRtolEqual(cpu_output, npu_output) + + def test_std_dim_shape_format_fp16(self, device): + format_list = [0] + shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1023]] + dim_list = [0] + unbiased_list = [True, False] + keepdim_list = [True, False] + shape_format = [ + [np.float16, i, j, k, l, m] for i in format_list for j in shape_list + for k in dim_list for l in unbiased_list for m in keepdim_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output1 = self.cpu_op_dim_exec(cpu_input1, item[3], item[4], item[5]) + cpu_output1 = cpu_output1.astype(np.float16) + npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) + self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.003) + + def test_std_dim_shape_format_fp32(self, device): + format_list = [0] + shape_list = [[1024], [32, 1024], [32, 8, 1024], [128, 32, 8, 1023]] + dim_list = [0] + unbiased_list = [True, False] + keepdim_list = [True, False] + shape_format = [ + [np.float32, i, j, k, l, m] for i in format_list for j in shape_list + for k in dim_list for l in unbiased_list for m in keepdim_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output1 = self.cpu_op_dim_exec(cpu_input1, item[3], item[4], item[5]) + npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) + self.assertRtolEqual(cpu_output1, npu_output1) + + def test_std_dim_out_shape_format_fp16(self, device): + format_list = [0] + shape_list = [[1024], [32, 24], [32, 8, 24], [12, 32, 8, 24]] + dim_list = [0] + unbiased_list = [True, False] + keepdim_list = [True, False] + shape_format = [ + [np.float16, i, j, k, l, m] for i in format_list for j in shape_list + for k in dim_list for l in unbiased_list for m in keepdim_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + outputshape = self.output_shape(item[2],item[3],item[4],item[5]) + cpu_output,npu_output = self.create_output_tensor(0,1,outputshape,item[1],item[0]) + if item[0] == np.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = cpu_output.to(torch.float32) + cpu_output1 = self.cpu_op_dim_out_exec(cpu_input1, item[3], cpu_output, item[4], item[5]) + npu_output1 = self.npu_op_dim_out_exec(npu_input1, item[3], npu_output, item[4], item[5]) + if item[0] == np.float16: + cpu_output1 = cpu_output1.astype(np.float16) + self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.002) + + random_outputshape = [random.randint(1, 100)] + cpu_output, npu_output = self.create_output_tensor(0, 1, random_outputshape,item[1],item[0]) + if item[0] == np.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = cpu_output.to(torch.float32) + cpu_output1 = self.cpu_op_dim_out_exec(cpu_input1, item[3], cpu_output, item[4], item[5]) + npu_output1 = self.npu_op_dim_out_exec(npu_input1, item[3], npu_output, item[4], item[5]) + if item[0] == np.float16: + cpu_output1 = cpu_output1.astype(np.float16) + self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.002) + + def test_std_dim_out_shape_format_fp32(self, device): + format_list = [0] + shape_list = [[1024], [32, 24], [32, 8, 24], [12, 32, 8, 24]] + dim_list = [0] + unbiased_list = [True, False] + keepdim_list = [True, False] + shape_format = [ + [np.float32, i, j, k, l, m] for i in format_list for j in shape_list + for k in dim_list for l in unbiased_list for m in keepdim_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + outputshape = self.output_shape(item[2],item[3],item[4],item[5]) + cpu_output,npu_output = self.create_output_tensor(0,1,outputshape,item[1],item[0]) + cpu_output1 = self.cpu_op_dim_out_exec(cpu_input1, item[3], cpu_output, item[4], item[5]) + npu_output1 = self.npu_op_dim_out_exec(npu_input1, item[3], npu_output, item[4], item[5]) + self.assertRtolEqual(cpu_output1, npu_output1) + + random_outputshape = [random.randint(1, 100)] + cpu_output, npu_output = self.create_output_tensor(0, 1, random_outputshape, item[1], item[0]) + cpu_output2 = self.cpu_op_dim_out_exec(cpu_input1, item[3], cpu_output.clone(), item[4], item[5]) + npu_output2 = self.npu_op_dim_out_exec(npu_input1, item[3], npu_output, item[4], item[5]) + self.assertRtolEqual(cpu_output2, npu_output2) + + def test_std_dim_name_fp16(self, device): + shape = (1024, 8, 32) + cpu_input = torch.rand(shape, dtype=torch.float32) + npu_input = cpu_input.npu().to(torch.float16) + cpu_input.names = ['N','C','H'] + npu_input.names = ['N','C','H'] + dim = np.random.choice(['N', 'C', 'H']) + cpu_output = torch.std(cpu_input, dim=dim) + npu_output = torch.std(npu_input, dim=dim) + self.assertRtolEqual(cpu_output.to(torch.float16).numpy(), npu_output.cpu().numpy()) + + def test_std_dim_name_fp32(self, device): + shape = (1024, 8, 32) + cpu_input = torch.rand(shape, dtype=torch.float32, names=('N', 'C', 'H')) + npu_input = cpu_input.npu() + dim = np.random.choice(['N', 'C', 'H']) + cpu_output = torch.std(cpu_input, dim=dim) + npu_output = torch.std(npu_input, dim=dim) + self.assertRtolEqual(cpu_output.numpy(), npu_output.cpu().numpy()) + + def test_std_dim_out_name_fp16(self, device): + shape = (1024, 8, 32) + dimlist = ['N', 'C', 'H'] + cpu_input = torch.rand(shape, dtype=torch.float32) + npu_input = cpu_input.npu() + dim = np.random.choice(dimlist) + dims = dimlist.index(dim) + outputshape = self.output_shape(shape, dims) + cpu_output,npu_output = self.create_output_tensor(0, 1, outputshape, -1, np.float32) + npu_input = npu_input.to(torch.float16) + npu_output = npu_output.to(torch.float16) + cpu_input.names = ['N','C','H'] + npu_input.names = ['N','C','H'] + + cpu_output = torch.std(cpu_input, dim=dim,out=cpu_output) + npu_output = torch.std(npu_input, dim=dim,out=npu_output) + cpu_output = cpu_output.to(torch.float16) + self.assertRtolEqual(cpu_output.numpy(), npu_output.cpu().numpy()) + + def test_std_dim_out_name_fp32(self, device): + shape = (1024, 8, 32) + dimlist = ['N', 'C', 'H'] + cpu_input = torch.rand(shape, dtype=torch.float32, names=('N', 'C', 'H')) + npu_input = cpu_input.npu() + dim = np.random.choice(dimlist) + dims = dimlist.index(dim) + outputshape = self.output_shape(shape, dims) + cpu_output,npu_output = self.create_output_tensor(0, 1, outputshape, -1, np.float32) + cpu_output = torch.std(cpu_input, dim=dim,out=cpu_output) + npu_output = torch.std(npu_input, dim=dim,out=npu_output) + self.assertRtolEqual(cpu_output.numpy(), npu_output.cpu().numpy()) + + def test_std_n_dim_shape_format_fp16(self, device): + format_list = [0] + shape_list = [[128, 32, 8, 1023]] + dim_list = [(3, 1)] + unbiased_list = [True, False] + keepdim_list = [True, False] + shape_format = [ + [np.float16, i, j, k, l, m] for i in format_list for j in shape_list + for k in dim_list for l in unbiased_list for m in keepdim_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output1 = self.cpu_op_dim_exec(cpu_input1, item[3], item[4], item[5]) + cpu_output1 = cpu_output1.astype(np.float16) + npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) + self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.003) + + def test_std_n_dim_shape_format_fp32(self, device): + format_list = [0] + shape_list = [[128, 32, 8, 1023]] + dim_list = [(3, 1)] + unbiased_list = [True, False] + keepdim_list = [True, False] + shape_format = [ + [np.float32, i, j, k, l, m] for i in format_list for j in shape_list + for k in dim_list for l in unbiased_list for m in keepdim_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_output1 = self.cpu_op_dim_exec(cpu_input1, item[3], item[4], item[5]) + npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) + self.assertRtolEqual(cpu_output1, npu_output1) + + def test_std_dim_shape_format_5d_fp16(self, device): + format_list = [-1] + shape_list = [[2, 94, 4, 52, 192]] + dim_list = [0] + unbiased_list = [True, False] + keepdim_list = [True, False] + shape_format = [ + [np.float16, i, j, k, l, m] for i in format_list for j in shape_list + for k in dim_list for l in unbiased_list for m in keepdim_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output1 = self.cpu_op_dim_exec(cpu_input1, item[3], item[4], item[5]) + cpu_output1 = cpu_output1.astype(np.float16) + npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) + self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.006) + +instantiate_device_type_tests(TestStd, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_stride.py b/test/test_npu/test_network_ops/test_stride.py index 72ed2f7d248610c3e0875f453cdf9ab75832a46e..a849d5d1ba2a6f401b683d6429315d998e6a0816 100644 --- a/test/test_npu/test_network_ops/test_stride.py +++ b/test/test_npu/test_network_ops/test_stride.py @@ -1,42 +1,42 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import instantiate_device_type_tests -from util_test import create_common_tensor - -class TestStride(TestCase): - def test_stride_common_shape_format(self, device): - def op_exec(input): - output = input.stride() - output = np.array(output, dtype=np.int32) - return output - - shape_format = [ - [[np.float16, 0, (64)]], - [[np.float32, 4, (32, 1, 3, 3)]], - [[np.float32, 3, (10, 128)]] - ] - for shape in shape_format: - cpu_input, npu_input = create_common_tensor(shape[0], -100, 100) - cpu_output = op_exec(cpu_input) - npu_output = op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestStride, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests +from util_test import create_common_tensor + +class TestStride(TestCase): + def test_stride_common_shape_format(self, device): + def op_exec(input): + output = input.stride() + output = np.array(output, dtype=np.int32) + return output + + shape_format = [ + [[np.float16, 0, (64)]], + [[np.float32, 4, (32, 1, 3, 3)]], + [[np.float32, 3, (10, 128)]] + ] + for shape in shape_format: + cpu_input, npu_input = create_common_tensor(shape[0], -100, 100) + cpu_output = op_exec(cpu_input) + npu_output = op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestStride, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_strideadd.py b/test/test_npu/test_network_ops/test_strideadd.py index 99959f52afa400c6e87a22a543e8db94208671b2..19b5719759551ed0909c193fb8e5fa3f87cb7c47 100644 --- a/test/test_npu/test_network_ops/test_strideadd.py +++ b/test/test_npu/test_network_ops/test_strideadd.py @@ -1,39 +1,39 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -from common_utils import TestCase, run_tests - - -class TestStrideAdd(TestCase): - def npu_op_exec(self, input1, input2, offset1, offset2, c1_len): - output = torch.npu_stride_add(input1, input2, offset1, offset2, c1_len) - output = output.to("cpu") - output = output.numpy() - - return output - - def test_StrideAdd(self, device): - input1 = torch.tensor([[[[[1.]]]]]).npu() - input2 = input1 - exoutput = torch.tensor([[[[[2.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]], - [[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]]]]) - output = self.npu_op_exec(input1, input2, 0, 0, 1) - self.assertRtolEqual(exoutput.numpy(), output) - -instantiate_device_type_tests(TestStrideAdd, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +from common_utils import TestCase, run_tests + + +class TestStrideAdd(TestCase): + def npu_op_exec(self, input1, input2, offset1, offset2, c1_len): + output = torch.npu_stride_add(input1, input2, offset1, offset2, c1_len) + output = output.to("cpu") + output = output.numpy() + + return output + + def test_StrideAdd(self, device): + input1 = torch.tensor([[[[[1.]]]]]).npu() + input2 = input1 + exoutput = torch.tensor([[[[[2.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]], + [[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]],[[[0.]]]]]) + output = self.npu_op_exec(input1, input2, 0, 0, 1) + self.assertRtolEqual(exoutput.numpy(), output) + +instantiate_device_type_tests(TestStrideAdd, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_sub.py b/test/test_npu/test_network_ops/test_sub.py old mode 100644 new mode 100755 index 7fa1d7aced19bf0813a2fe1c8b6a4a971d3bff81..6731ffefa5ed0dadf7b0aefcc3e36f103c4643e5 --- a/test/test_npu/test_network_ops/test_sub.py +++ b/test/test_npu/test_network_ops/test_sub.py @@ -1,236 +1,236 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestSub(TestCase): - def cpu_op_exec(self, input1, input2): - output = input1 - input2 - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - output = input1 - input2 - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_t(self, input1, input2): - output = torch.sub(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_t_out(self, input1, input2, input3): - torch.sub(input1, input2, out=input3) - output = input3.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_tensor(self, input1, input2): - output = input1.sub(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_inp_tensor(self, input1, input2): - input1.sub_(input2) - output = input1.numpy() - return output - - def npu_op_exec_inp_tensor(self, input1, input2): - input1.sub_(input2) - output = input1.to("cpu") - output = output.numpy() - return output - - def sub_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - npu_input3 = copy.deepcopy(cpu_input1).to("npu") - npu_input4 = torch.randn(6).to("npu").to(npu_input3.dtype) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - - if type(item[1]) == list: - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) - if cpu_input2.dtype == torch.float16: - cpu_input2 = cpu_input2.to(torch.float32) - else: - cpu_input2 = item[1] - npu_input2 = item[1] - - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - cpu_output = cpu_output.astype(npu_output.dtype) - - npu_output_t = self.npu_op_exec_t(npu_input1, npu_input2) - npu_output_t_out = self.npu_op_exec_t_out(npu_input1, npu_input2, npu_input3) - npu_output_tensor = self.npu_op_exec_tensor(npu_input1, npu_input2) - npu_output_t_out_chk = self.npu_op_exec_t_out(npu_input1, npu_input2, npu_input4)#out tensor shape not shape as self - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output, npu_output_t) - self.assertRtolEqual(cpu_output, npu_output_t_out) - self.assertRtolEqual(cpu_output, npu_output_tensor) - self.assertRtolEqual(cpu_output, npu_output_t_out_chk) - - # test for tensor - cpu_input1_tensor, npu_input1_tensor = create_common_tensor(item[0], 0, 100) - if cpu_input1_tensor.dtype == torch.float16: - cpu_input1_tensor = cpu_input1_tensor.to(torch.float32) - - if type(item[1]) == list: - cpu_input2_tensor, npu_input2_tensor = create_common_tensor(item[1], 0, 100) - if cpu_input2_tensor.dtype == torch.float16: - cpu_input2_tensor = cpu_input2_tensor.to(torch.float32) - else: - cpu_input2_tensor = item[1] - npu_input2_tensor = item[1] - - cpu_output_inp_tensor = self.cpu_op_exec_inp_tensor(cpu_input1_tensor, cpu_input2_tensor) - npu_output_inp_tensor = self.npu_op_exec_inp_tensor(npu_input1_tensor, npu_input2_tensor) - cpu_output_inp_tensor = cpu_output_inp_tensor.astype(npu_output_inp_tensor.dtype) - self.assertRtolEqual(cpu_output_inp_tensor, npu_output_inp_tensor) - - def test_sub_scalar_shape_format_fp32_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[[np.float32, i, [448]], np.random.uniform(0, 100)] for i in format_list] - self.sub_result(shape_format) - - def test_sub_scalar_shape_format_fp32_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [1000, 1280]], np.random.uniform(0, 100)] for i in format_list] - self.sub_result(shape_format) - - def test_sub_scalar_shape_format_fp32_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [32, 3, 3]], np.random.uniform(0, 100)] for i in format_list] - self.sub_result(shape_format) - - def test_sub_scalar_shape_format_fp32_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [256, 480, 14, 14]], np.random.uniform(0, 100)] for i in format_list] - self.sub_result(shape_format) - - def test_sub_scalar_shape_format_int32_1d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [448]], np.random.randint(0, 100)] for i in format_list] - self.sub_result(shape_format) - - def test_sub_scalar_shape_format_int32_2d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [64, 7]], np.random.randint(0, 100)] for i in format_list] - self.sub_result(shape_format) - - def test_sub_scalar_shape_format_int32_3d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [64, 7, 58]], np.random.randint(0, 100)] for i in format_list] - self.sub_result(shape_format) - - def test_sub_scalar_shape_format_int32_4d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [256, 480, 14, 14]], np.random.randint(0, 100)] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_fp16_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[[np.float16, i, [448]], [np.float16, i, [448]]] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_fp16_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [1000, 1280]], [np.float16, i, []]] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_fp16_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [32, 3, 3]], [np.float16, i, []]] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_fp16_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float16, i, [256, 480, 14, 14]], [np.float16, i, []]] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_fp32_1d(self, device): - format_list = [-1, 0, 3] - shape_format = [[[np.float32, i, [448]], [np.float32, i, []]] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_fp32_2d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [1000, 1280]], [np.float32, i, []]] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_fp32_3d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [32, 3, 3]], [np.float32, i, []]] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_fp32_4d(self, device): - format_list = [-1, 0, 3, 29] - shape_format = [[[np.float32, i, [256, 480, 14, 14]], [np.float32, i, []]] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_int32_1d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [448]], [np.int32, i, []]] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_int32_2d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [64, 7]], [np.int32, i, []]] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_int32_3d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [64, 7, 58]], [np.int32, i, []]] for i in format_list] - self.sub_result(shape_format) - - def test_sub_shape_format_int32_4d(self, device): - format_list = [-1, 0] - shape_format = [[[np.int32, i, [256, 480, 14, 14]], [np.int32, i, []]] for i in format_list] - self.sub_result(shape_format) -''' - # unsupport - def test_sub_scalar_shape_format_fp16_1d(self, device): - format_list = [-1, 0, 3, 4] - shape_format = [[[np.float16, i, [448]], np.random.uniform(0, 100)] for i in format_list] - self.sub_result(shape_format) - - def test_sub_scalar_shape_format_fp16_2d(self, device): - format_list = [-1, 0, 3, 4, 29] - shape_format = [[[np.float16, i, [1000, 1280]], np.random.uniform(0, 100)] for i in format_list] - self.sub_result(shape_format) - - def test_sub_scalar_shape_format_fp16_3d(self, device): - format_list = [-1, 0, 3, 4, 29] - shape_format = [[[np.float16, i, [32, 3, 3]], np.random.uniform(0, 100)] for i in format_list] - self.sub_result(shape_format) - - def test_sub_scalar_shape_format_fp16_4d(self, device): - format_list = [-1, 0, 3, 4, 29] - shape_format = [[[np.float16, i, [256, 480, 14, 14]], np.random.uniform(0, 100)] for i in format_list] - self.sub_result(shape_format) -''' -instantiate_device_type_tests(TestSub, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestSub(TestCase): + def cpu_op_exec(self, input1, input2): + output = input1 - input2 + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = input1 - input2 + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_t(self, input1, input2): + output = torch.sub(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_t_out(self, input1, input2, input3): + torch.sub(input1, input2, out=input3) + output = input3.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_tensor(self, input1, input2): + output = input1.sub(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_inp_tensor(self, input1, input2): + input1.sub_(input2) + output = input1.numpy() + return output + + def npu_op_exec_inp_tensor(self, input1, input2): + input1.sub_(input2) + output = input1.to("cpu") + output = output.numpy() + return output + + def sub_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + npu_input3 = copy.deepcopy(cpu_input1).to("npu") + npu_input4 = torch.randn(6).to("npu").to(npu_input3.dtype) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + + if type(item[1]) == list: + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + else: + cpu_input2 = item[1] + npu_input2 = item[1] + + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(npu_output.dtype) + + npu_output_t = self.npu_op_exec_t(npu_input1, npu_input2) + npu_output_t_out = self.npu_op_exec_t_out(npu_input1, npu_input2, npu_input3) + npu_output_tensor = self.npu_op_exec_tensor(npu_input1, npu_input2) + npu_output_t_out_chk = self.npu_op_exec_t_out(npu_input1, npu_input2, npu_input4)#out tensor shape not shape as self + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output_t) + self.assertRtolEqual(cpu_output, npu_output_t_out) + self.assertRtolEqual(cpu_output, npu_output_tensor) + self.assertRtolEqual(cpu_output, npu_output_t_out_chk) + + # test for tensor + cpu_input1_tensor, npu_input1_tensor = create_common_tensor(item[0], 0, 100) + if cpu_input1_tensor.dtype == torch.float16: + cpu_input1_tensor = cpu_input1_tensor.to(torch.float32) + + if type(item[1]) == list: + cpu_input2_tensor, npu_input2_tensor = create_common_tensor(item[1], 0, 100) + if cpu_input2_tensor.dtype == torch.float16: + cpu_input2_tensor = cpu_input2_tensor.to(torch.float32) + else: + cpu_input2_tensor = item[1] + npu_input2_tensor = item[1] + + cpu_output_inp_tensor = self.cpu_op_exec_inp_tensor(cpu_input1_tensor, cpu_input2_tensor) + npu_output_inp_tensor = self.npu_op_exec_inp_tensor(npu_input1_tensor, npu_input2_tensor) + cpu_output_inp_tensor = cpu_output_inp_tensor.astype(npu_output_inp_tensor.dtype) + self.assertRtolEqual(cpu_output_inp_tensor, npu_output_inp_tensor) + + def test_sub_scalar_shape_format_fp32_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[[np.float32, i, [448]], np.random.uniform(0, 100)] for i in format_list] + self.sub_result(shape_format) + + def test_sub_scalar_shape_format_fp32_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [1000, 1280]], np.random.uniform(0, 100)] for i in format_list] + self.sub_result(shape_format) + + def test_sub_scalar_shape_format_fp32_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [32, 3, 3]], np.random.uniform(0, 100)] for i in format_list] + self.sub_result(shape_format) + + def test_sub_scalar_shape_format_fp32_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [256, 480, 14, 14]], np.random.uniform(0, 100)] for i in format_list] + self.sub_result(shape_format) + + def test_sub_scalar_shape_format_int32_1d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [448]], np.random.randint(0, 100)] for i in format_list] + self.sub_result(shape_format) + + def test_sub_scalar_shape_format_int32_2d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [64, 7]], np.random.randint(0, 100)] for i in format_list] + self.sub_result(shape_format) + + def test_sub_scalar_shape_format_int32_3d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [64, 7, 58]], np.random.randint(0, 100)] for i in format_list] + self.sub_result(shape_format) + + def test_sub_scalar_shape_format_int32_4d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [256, 480, 14, 14]], np.random.randint(0, 100)] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_fp16_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[[np.float16, i, [448]], [np.float16, i, [448]]] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_fp16_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [1000, 1280]], [np.float16, i, []]] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_fp16_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [32, 3, 3]], [np.float16, i, []]] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_fp16_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float16, i, [256, 480, 14, 14]], [np.float16, i, []]] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_fp32_1d(self, device): + format_list = [-1, 0, 3] + shape_format = [[[np.float32, i, [448]], [np.float32, i, []]] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_fp32_2d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [1000, 1280]], [np.float32, i, []]] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_fp32_3d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [32, 3, 3]], [np.float32, i, []]] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_fp32_4d(self, device): + format_list = [-1, 0, 3, 29] + shape_format = [[[np.float32, i, [256, 480, 14, 14]], [np.float32, i, []]] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_int32_1d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [448]], [np.int32, i, []]] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_int32_2d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [64, 7]], [np.int32, i, []]] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_int32_3d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [64, 7, 58]], [np.int32, i, []]] for i in format_list] + self.sub_result(shape_format) + + def test_sub_shape_format_int32_4d(self, device): + format_list = [-1, 0] + shape_format = [[[np.int32, i, [256, 480, 14, 14]], [np.int32, i, []]] for i in format_list] + self.sub_result(shape_format) +''' + # unsupport + def test_sub_scalar_shape_format_fp16_1d(self, device): + format_list = [-1, 0, 3, 4] + shape_format = [[[np.float16, i, [448]], np.random.uniform(0, 100)] for i in format_list] + self.sub_result(shape_format) + + def test_sub_scalar_shape_format_fp16_2d(self, device): + format_list = [-1, 0, 3, 4, 29] + shape_format = [[[np.float16, i, [1000, 1280]], np.random.uniform(0, 100)] for i in format_list] + self.sub_result(shape_format) + + def test_sub_scalar_shape_format_fp16_3d(self, device): + format_list = [-1, 0, 3, 4, 29] + shape_format = [[[np.float16, i, [32, 3, 3]], np.random.uniform(0, 100)] for i in format_list] + self.sub_result(shape_format) + + def test_sub_scalar_shape_format_fp16_4d(self, device): + format_list = [-1, 0, 3, 4, 29] + shape_format = [[[np.float16, i, [256, 480, 14, 14]], np.random.uniform(0, 100)] for i in format_list] + self.sub_result(shape_format) +''' +instantiate_device_type_tests(TestSub, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_sub_sample.py b/test/test_npu/test_network_ops/test_sub_sample.py index fa71597d1a0d784c22fe57be6b1f96662ab3c51e..197de22ace0103534e426807bcba43fa6b03c4d5 100644 --- a/test/test_npu/test_network_ops/test_sub_sample.py +++ b/test/test_npu/test_network_ops/test_sub_sample.py @@ -1,75 +1,75 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch -import numpy as np -import torch.nn as nn -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestSubSample(TestCase): - def get_num(self, input1, output): - input_num1 = 0 - input_num0 = 0 - output_num1 = 0 - output_num0 = 0 - for i in range(input1.size()[0]): - if input1[i] == 1: - input_num1 = input_num1 + 1 - if input1[i] == 0: - input_num0 = input_num0 + 1 - for i in range(output.size()[0]): - if output[i] == 1: - output_num1 = output_num1 + 1 - if output[i] == 0: - output_num0 = output_num0 + 1 - return input_num1, input_num0, output_num1, output_num0 - - def numless_equal(self, input_num1, input_num0, output_num1, output_num0, size, fraction): - error_name = "result error" - if input_num1 < size * fraction: - if output_num1 != input_num1: - self.fail(error_name) - if input_num0 < size - input_num1 and output_num0 != input_num0: - self.fail(error_name) - if input_num0 >= size - input_num1 and output_num0 != size - input_num1: - self.fail(error_name) - - def nummore_equal(self, input_num1, input_num0, output_num1, output_num0, size, fraction): - error_name = "result error" - if input_num1 >=size * fraction : - if output_num1 != size * fraction: - self.fail(error_name) - if input_num0 < size - size * fraction and output_num0 != input_num0: - self.fail(error_name) - if input_num0 >= size - size * fraction and output_num0 != size - size * fraction: - self.fail(error_name) - - def test_subsample(self, device): - for _ in range(20): - input1 = np.random.randint(-1,2,size = (10)) - npu_input = torch.from_numpy(input1).to("npu") - #input only suport int32 - npu_input = npu_input.to(torch.int32) - npu_output1 = torch.npu_sub_sample(npu_input,5,0.6) - input_num1, input_num0, output_num1, output_num0 = self.get_num(npu_input, npu_output1) - self.numless_equal(input_num1, input_num0, output_num1, output_num0,5,0.6) - self.nummore_equal(input_num1, input_num0, output_num1, output_num0,5,0.6) - - -instantiate_device_type_tests(TestSubSample, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import numpy as np +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestSubSample(TestCase): + def get_num(self, input1, output): + input_num1 = 0 + input_num0 = 0 + output_num1 = 0 + output_num0 = 0 + for i in range(input1.size()[0]): + if input1[i] == 1: + input_num1 = input_num1 + 1 + if input1[i] == 0: + input_num0 = input_num0 + 1 + for i in range(output.size()[0]): + if output[i] == 1: + output_num1 = output_num1 + 1 + if output[i] == 0: + output_num0 = output_num0 + 1 + return input_num1, input_num0, output_num1, output_num0 + + def numless_equal(self, input_num1, input_num0, output_num1, output_num0, size, fraction): + error_name = "result error" + if input_num1 < size * fraction: + if output_num1 != input_num1: + self.fail(error_name) + if input_num0 < size - input_num1 and output_num0 != input_num0: + self.fail(error_name) + if input_num0 >= size - input_num1 and output_num0 != size - input_num1: + self.fail(error_name) + + def nummore_equal(self, input_num1, input_num0, output_num1, output_num0, size, fraction): + error_name = "result error" + if input_num1 >=size * fraction : + if output_num1 != size * fraction: + self.fail(error_name) + if input_num0 < size - size * fraction and output_num0 != input_num0: + self.fail(error_name) + if input_num0 >= size - size * fraction and output_num0 != size - size * fraction: + self.fail(error_name) + + def test_subsample(self, device): + for _ in range(20): + input1 = np.random.randint(-1,2,size = (10)) + npu_input = torch.from_numpy(input1).to("npu") + #input only suport int32 + npu_input = npu_input.to(torch.int32) + npu_output1 = torch.npu_sub_sample(npu_input,5,0.6) + input_num1, input_num0, output_num1, output_num0 = self.get_num(npu_input, npu_output1) + self.numless_equal(input_num1, input_num0, output_num1, output_num0,5,0.6) + self.nummore_equal(input_num1, input_num0, output_num1, output_num0,5,0.6) + + +instantiate_device_type_tests(TestSubSample, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_sum.py b/test/test_npu/test_network_ops/test_sum.py old mode 100644 new mode 100755 index b513ce3a702014f9889c90bf2bd5086d9c735cdc..9b1310e89e2783b177567e45e0ccb05192f11fd5 --- a/test/test_npu/test_network_ops/test_sum.py +++ b/test/test_npu/test_network_ops/test_sum.py @@ -1,219 +1,219 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('..') -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestSum(TestCase): - def cpu_op_exec(self, input1): - output = input1.sum() - output = output.numpy() - return output - - def npu_op_exec(self, input1): - output = input1.sum() - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_dim(self, input1, dim, dtype): - output = torch.sum(input1, dim, keepdim=True, dtype=dtype) - output = output.numpy() - return output - - def npu_op_exec_dim(self, input1, dim, dtype): - output = torch.sum(input1, dim, keepdim=True, dtype=dtype) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_dim_exec_out(self, input1, dim, keepdim): - out = torch.tensor(0).to(input1.dtype) - torch.sum(input1, dim=dim, keepdim=keepdim, out=out) - out = out.numpy() - return out - - def npu_op_dim_exec_out(self, input1, dim, keepdim): - out = torch.tensor(0).to(input1.dtype).npu() - torch.sum(input1, dim=dim, keepdim=keepdim, out=out) - out = out.to("cpu").numpy() - output = torch.sum(input1, dim=dim, keepdim=keepdim) - output = output.to("cpu").numpy() - return out, output - - def sum_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, -1, 1) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def sum_dim_result(self, shape_format): - for item in shape_format: - dim = np.random.randint(0, len(item[2])) - cpu_input1, npu_input1 = create_common_tensor(item, -1, 1) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - - cpu_out_dim = self.cpu_op_dim_exec_out(cpu_input1, dim=[0], keepdim=True) - npu_out_dim, npu_output_dim = self.npu_op_dim_exec_out(npu_input1, dim=[0], keepdim=True) - cpu_out_dim = cpu_out_dim.astype(npu_out_dim.dtype) - if npu_out_dim.dtype != np.float16: - self.assertRtolEqual(npu_out_dim, cpu_out_dim) - else: - self.assertRtolEqual(npu_out_dim, npu_output_dim) - - cpu_output_dim = self.cpu_op_exec_dim(cpu_input1, dim, cpu_input1.dtype) - npu_output_dim = self.npu_op_exec_dim(npu_input1, dim, npu_input1.dtype) - cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) - self.assertRtolEqual(cpu_output_dim, npu_output_dim) - - def test_sum_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float16, i, [18]] for i in format_list - ] - self.sum_result(shape_format) - - def test_sum_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float32, i, [18]] for i in format_list - ] - self.sum_result(shape_format) - - def test_sum_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float16, i, [256, 1000]] for i in format_list - ] - self.sum_result(shape_format) - - def test_sum_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [256, 1000]] for i in format_list - ] - self.sum_result(shape_format) - - def test_sum_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float16, i, [32, 48, 64]] for i in format_list - ] - self.sum_result(shape_format) - - def test_sum_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [32, 48, 64]] for i in format_list - ] - self.sum_result(shape_format) - - def test_sum_shape_format_fp16_4d(self, device): - format_list = [0, 4, 29] - shape_format = [ - [np.float16, i, [32, 24, 18, 18]] for i in format_list - ] - self.sum_result(shape_format) - - def test_sum_shape_format_fp32_4d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [ - [np.float32, i, [32, 24, 18, 18]] for i in format_list - ] - self.sum_result(shape_format) - - # --------sum dim--------------------- - - def test_sum_dim_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float16, i, [18]] for i in format_list - ] - self.sum_dim_result(shape_format) - - def test_sum_dim_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float32, i, [18]] for i in format_list - ] - self.sum_dim_result(shape_format) - - def test_sum_dim_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float16, i, [256, 1000]] for i in format_list - ] - self.sum_dim_result(shape_format) - - def test_sum_dim_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [256, 1000]] for i in format_list - ] - self.sum_dim_result(shape_format) - - def test_sum_dim_shape_format_fp16_3d(self, device): - # TODO(ascend): Insufficient precision - #format=29精度不满足 format_list = [0, 3, 29] - format_list = [0, 3] - shape_format = [ - [np.float16, i, [32, 48, 64]] for i in format_list - ] - self.sum_dim_result(shape_format) - - def test_sum_dim_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [ - [np.float32, i, [32, 48, 64]] for i in format_list - ] - self.sum_dim_result(shape_format) - - def test_sum_dim_shape_format_fp16_4d(self, device): - format_list = [0, 3] - shape_format = [ - [np.float16, i, [16, 16, 9, 9]] for i in format_list - ] - self.sum_dim_result(shape_format) - - def test_sum_dim_shape_format_fp32_4d(self, device): - format_list = [0, 3, 4] - shape_format = [ - [np.float32, i, [32, 24, 18, 18]] for i in format_list - ] - self.sum_dim_result(shape_format) - - def test_sum_dim_with_zero_shape_format(self, device): - format_list = [0, 3, 4] - shape_format = [ - [np.float32, i, [2, 0, 3]] for i in format_list - ] - self.sum_dim_result(shape_format) - self.sum_result(shape_format) - - -instantiate_device_type_tests(TestSum, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append('..') +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestSum(TestCase): + def cpu_op_exec(self, input1): + output = input1.sum() + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = input1.sum() + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_dim(self, input1, dim, dtype): + output = torch.sum(input1, dim, keepdim=True, dtype=dtype) + output = output.numpy() + return output + + def npu_op_exec_dim(self, input1, dim, dtype): + output = torch.sum(input1, dim, keepdim=True, dtype=dtype) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_dim_exec_out(self, input1, dim, keepdim): + out = torch.tensor(0).to(input1.dtype) + torch.sum(input1, dim=dim, keepdim=keepdim, out=out) + out = out.numpy() + return out + + def npu_op_dim_exec_out(self, input1, dim, keepdim): + out = torch.tensor(0).to(input1.dtype).npu() + torch.sum(input1, dim=dim, keepdim=keepdim, out=out) + out = out.to("cpu").numpy() + output = torch.sum(input1, dim=dim, keepdim=keepdim) + output = output.to("cpu").numpy() + return out, output + + def sum_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, -1, 1) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def sum_dim_result(self, shape_format): + for item in shape_format: + dim = np.random.randint(0, len(item[2])) + cpu_input1, npu_input1 = create_common_tensor(item, -1, 1) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + + cpu_out_dim = self.cpu_op_dim_exec_out(cpu_input1, dim=[0], keepdim=True) + npu_out_dim, npu_output_dim = self.npu_op_dim_exec_out(npu_input1, dim=[0], keepdim=True) + cpu_out_dim = cpu_out_dim.astype(npu_out_dim.dtype) + if npu_out_dim.dtype != np.float16: + self.assertRtolEqual(npu_out_dim, cpu_out_dim) + else: + self.assertRtolEqual(npu_out_dim, npu_output_dim) + + cpu_output_dim = self.cpu_op_exec_dim(cpu_input1, dim, cpu_input1.dtype) + npu_output_dim = self.npu_op_exec_dim(npu_input1, dim, npu_input1.dtype) + cpu_output_dim = cpu_output_dim.astype(npu_output_dim.dtype) + self.assertRtolEqual(cpu_output_dim, npu_output_dim) + + def test_sum_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float16, i, [18]] for i in format_list + ] + self.sum_result(shape_format) + + def test_sum_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float32, i, [18]] for i in format_list + ] + self.sum_result(shape_format) + + def test_sum_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float16, i, [256, 1000]] for i in format_list + ] + self.sum_result(shape_format) + + def test_sum_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [256, 1000]] for i in format_list + ] + self.sum_result(shape_format) + + def test_sum_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float16, i, [32, 48, 64]] for i in format_list + ] + self.sum_result(shape_format) + + def test_sum_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [32, 48, 64]] for i in format_list + ] + self.sum_result(shape_format) + + def test_sum_shape_format_fp16_4d(self, device): + format_list = [0, 4, 29] + shape_format = [ + [np.float16, i, [32, 24, 18, 18]] for i in format_list + ] + self.sum_result(shape_format) + + def test_sum_shape_format_fp32_4d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [ + [np.float32, i, [32, 24, 18, 18]] for i in format_list + ] + self.sum_result(shape_format) + + # --------sum dim--------------------- + + def test_sum_dim_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float16, i, [18]] for i in format_list + ] + self.sum_dim_result(shape_format) + + def test_sum_dim_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float32, i, [18]] for i in format_list + ] + self.sum_dim_result(shape_format) + + def test_sum_dim_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float16, i, [256, 1000]] for i in format_list + ] + self.sum_dim_result(shape_format) + + def test_sum_dim_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [256, 1000]] for i in format_list + ] + self.sum_dim_result(shape_format) + + def test_sum_dim_shape_format_fp16_3d(self, device): + # TODO(ascend): Insufficient precision + #format=29精度不满足 format_list = [0, 3, 29] + format_list = [0, 3] + shape_format = [ + [np.float16, i, [32, 48, 64]] for i in format_list + ] + self.sum_dim_result(shape_format) + + def test_sum_dim_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [ + [np.float32, i, [32, 48, 64]] for i in format_list + ] + self.sum_dim_result(shape_format) + + def test_sum_dim_shape_format_fp16_4d(self, device): + format_list = [0, 3] + shape_format = [ + [np.float16, i, [16, 16, 9, 9]] for i in format_list + ] + self.sum_dim_result(shape_format) + + def test_sum_dim_shape_format_fp32_4d(self, device): + format_list = [0, 3, 4] + shape_format = [ + [np.float32, i, [32, 24, 18, 18]] for i in format_list + ] + self.sum_dim_result(shape_format) + + def test_sum_dim_with_zero_shape_format(self, device): + format_list = [0, 3, 4] + shape_format = [ + [np.float32, i, [2, 0, 3]] for i in format_list + ] + self.sum_dim_result(shape_format) + self.sum_result(shape_format) + + +instantiate_device_type_tests(TestSum, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_tensor_equal.py b/test/test_npu/test_network_ops/test_tensor_equal.py index 830c6ed93ff5b6cb51ff46647a78c54d1c314979..d43db04421d4c98794c7f0744f93633070b60b24 100644 --- a/test/test_npu/test_network_ops/test_tensor_equal.py +++ b/test/test_npu/test_network_ops/test_tensor_equal.py @@ -1,76 +1,76 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestEqual(TestCase): - def cpu_op_exec(self, input1, input2): - output = torch.equal(input1, input2) - output = np.array(output, dtype=np.int32) - return output - - def npu_op_exec(self, input1, input2): - output = torch.equal(input1, input2) - output = np.array(output, dtype=np.int32) - return output - - def test_equal_shape_format_fp16(self, device): - shape_format = [ - [[np.float16, 0, [5]], [np.float16, 0, [5]]], - [[np.float16, 0, [2, 4]], [np.float16, 0, [2, 4, 4]]], - [[np.float16, 0, [2, 2, 4]], [np.float16, 0, [2, 3, 4]]], - [[np.float16, 0, [2, 3, 3, 4]], [np.float16, 0, [2, 3, 3, 4]]], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) - - cpu_input1 = cpu_input1.to(torch.float32) - cpu_input2 = cpu_input2.to(torch.float32) - - cpu_output1 = self.cpu_op_exec(cpu_input1, cpu_input1) - npu_output1 = self.npu_op_exec(npu_input1, npu_input1) - self.assertRtolEqual(cpu_output1, npu_output1) - - cpu_output0 = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output0 = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output0, npu_output0) - - def test_equal_shape_format_fp32(self, device): - shape_format = [ - [[np.float32, 0, [5]], [np.float32, 0, [5]]], - [[np.float32, 0, [2, 4]], [np.float32, 0, [2, 4, 4]]], - [[np.float32, 0, [2, 2, 4]], [np.float32, 0, [2, 3, 4]]], - [[np.float32, 0, [2, 3, 3, 4]], [np.float32, 0, [2, 3, 3, 4]]], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) - - cpu_output1 = self.cpu_op_exec(cpu_input1, cpu_input1) - npu_output1 = self.npu_op_exec(npu_input1, npu_input1) - self.assertRtolEqual(cpu_output1, npu_output1) - - cpu_output0 = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output0 = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output0, npu_output0) - - -instantiate_device_type_tests(TestEqual, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestEqual(TestCase): + def cpu_op_exec(self, input1, input2): + output = torch.equal(input1, input2) + output = np.array(output, dtype=np.int32) + return output + + def npu_op_exec(self, input1, input2): + output = torch.equal(input1, input2) + output = np.array(output, dtype=np.int32) + return output + + def test_equal_shape_format_fp16(self, device): + shape_format = [ + [[np.float16, 0, [5]], [np.float16, 0, [5]]], + [[np.float16, 0, [2, 4]], [np.float16, 0, [2, 4, 4]]], + [[np.float16, 0, [2, 2, 4]], [np.float16, 0, [2, 3, 4]]], + [[np.float16, 0, [2, 3, 3, 4]], [np.float16, 0, [2, 3, 3, 4]]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) + + cpu_input1 = cpu_input1.to(torch.float32) + cpu_input2 = cpu_input2.to(torch.float32) + + cpu_output1 = self.cpu_op_exec(cpu_input1, cpu_input1) + npu_output1 = self.npu_op_exec(npu_input1, npu_input1) + self.assertRtolEqual(cpu_output1, npu_output1) + + cpu_output0 = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output0 = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output0, npu_output0) + + def test_equal_shape_format_fp32(self, device): + shape_format = [ + [[np.float32, 0, [5]], [np.float32, 0, [5]]], + [[np.float32, 0, [2, 4]], [np.float32, 0, [2, 4, 4]]], + [[np.float32, 0, [2, 2, 4]], [np.float32, 0, [2, 3, 4]]], + [[np.float32, 0, [2, 3, 3, 4]], [np.float32, 0, [2, 3, 3, 4]]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], -100, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], -100, 100) + + cpu_output1 = self.cpu_op_exec(cpu_input1, cpu_input1) + npu_output1 = self.npu_op_exec(npu_input1, npu_input1) + self.assertRtolEqual(cpu_output1, npu_output1) + + cpu_output0 = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output0 = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output0, npu_output0) + + +instantiate_device_type_tests(TestEqual, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_tensor_npu.py b/test/test_npu/test_network_ops/test_tensor_npu.py index e0fdf11d358bf61bc29cd61188829017c8155b7f..c679316e5083b281b8f86fec51d15a795c9b7988 100644 --- a/test/test_npu/test_network_ops/test_tensor_npu.py +++ b/test/test_npu/test_network_ops/test_tensor_npu.py @@ -1,75 +1,75 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import instantiate_device_type_tests -from util_test import create_common_tensor - -class TestTensorNpu(TestCase): - - def cpu_op_exec(self, input): - output = input.to("cpu") - return output - - def npu_op_exec(self, input): - output = input.npu() - output = output.to("cpu") - return output - - def cpu_type_exec(self, input): - output = input.to("cpu") - output = output.is_npu - return output - - def npu_type_exec(self, input): - output = input.npu() - output = output.is_npu - return output - - def test_tensor_npu_shape_format(self, device): - shape_format = [ - [np.float32, 0, 1], - [np.float32, 0, (64, 10)], - [np.float32, 3, (256, 2048, 7, 7)], - [np.float32, 4, (32, 1, 3, 3)], - [np.float32, 29, (10, 128)] - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 100) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output.cpu()) - - def test_is_npu_shape_format(self, device): - shape_format = [ - [np.float32, 0, 1], - [np.float32, 0, (64, 10)], - [np.float32, 3, (256, 2048, 7, 7)], - [np.float32, 4, (32, 1, 3, 3)], - [np.float32, 29, (10, 128)] - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 100) - cpu_output = self.cpu_type_exec(cpu_input) - npu_output = self.npu_type_exec(npu_input) - self.assertEqual(cpu_output, False) - self.assertEqual(npu_output, True) - -instantiate_device_type_tests(TestTensorNpu, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests +from util_test import create_common_tensor + +class TestTensorNpu(TestCase): + + def cpu_op_exec(self, input): + output = input.to("cpu") + return output + + def npu_op_exec(self, input): + output = input.npu() + output = output.to("cpu") + return output + + def cpu_type_exec(self, input): + output = input.to("cpu") + output = output.is_npu + return output + + def npu_type_exec(self, input): + output = input.npu() + output = output.is_npu + return output + + def test_tensor_npu_shape_format(self, device): + shape_format = [ + [np.float32, 0, 1], + [np.float32, 0, (64, 10)], + [np.float32, 3, (256, 2048, 7, 7)], + [np.float32, 4, (32, 1, 3, 3)], + [np.float32, 29, (10, 128)] + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 100) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output.cpu()) + + def test_is_npu_shape_format(self, device): + shape_format = [ + [np.float32, 0, 1], + [np.float32, 0, (64, 10)], + [np.float32, 3, (256, 2048, 7, 7)], + [np.float32, 4, (32, 1, 3, 3)], + [np.float32, 29, (10, 128)] + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 100) + cpu_output = self.cpu_type_exec(cpu_input) + npu_output = self.npu_type_exec(npu_input) + self.assertEqual(cpu_output, False) + self.assertEqual(npu_output, True) + +instantiate_device_type_tests(TestTensorNpu, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_threshold.py b/test/test_npu/test_network_ops/test_threshold.py index e773cbaa4db06942d81e45126ce654c43bb7e379..5289993f086acbb7cf2f3339c55c944b98bc4dd5 100644 --- a/test/test_npu/test_network_ops/test_threshold.py +++ b/test/test_npu/test_network_ops/test_threshold.py @@ -1,75 +1,75 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestThreshold(TestCase): - - def cpu_op_exec(self,input1, threshold, value): - output = torch.nn.functional.threshold(input1, threshold, value) - output = output.numpy() - return output - - def npu_op_exec(self,input1, threshold, value): - output = torch.nn.functional.threshold(input1, threshold, value) - output = output.to("cpu") - output = output.numpy() - return output - - def test_threshold_common_shape_format(self, device): - shape_format = [ - [[np.float32, 0, (1,5)], [1.0], [20.0]], - [[np.int32, 0, (1,5)], [2], [20]], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 3) - cpu_threshold = npu_threshold = item[1][0] - cpu_value = npu_value = item[2][0] - cpu_output = self.cpu_op_exec(cpu_input1, cpu_threshold, cpu_value) - npu_output = self.npu_op_exec(npu_input1, npu_threshold, npu_value) - self.assertRtolEqual(cpu_output, npu_output) - - def test_threshold_inplace_common_shape_format(self, device): - def cpu_op_inplace_exec(input1, threshold, value): - torch.nn.functional.threshold_(input1, threshold, value) - output = input1.numpy() - return output - - def npu_op_inplace_exec(input1, threshold, value): - torch.nn.functional.threshold_(input1, threshold, value) - output = input1.to("cpu") - output = output.numpy() - return output - - shape_format = [ - [[np.float32, 0, (1,5)], [1.0], [20.0]], - [[np.int32, 0, (1,5)], [2], [20]], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 3) - cpu_threshold = npu_threshold = item[1][0] - cpu_value = npu_value = item[2][0] - cpu_output = cpu_op_inplace_exec(cpu_input1, cpu_threshold, cpu_value) - npu_output = npu_op_inplace_exec(npu_input1, npu_threshold, npu_value) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestThreshold, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestThreshold(TestCase): + + def cpu_op_exec(self,input1, threshold, value): + output = torch.nn.functional.threshold(input1, threshold, value) + output = output.numpy() + return output + + def npu_op_exec(self,input1, threshold, value): + output = torch.nn.functional.threshold(input1, threshold, value) + output = output.to("cpu") + output = output.numpy() + return output + + def test_threshold_common_shape_format(self, device): + shape_format = [ + [[np.float32, 0, (1,5)], [1.0], [20.0]], + [[np.int32, 0, (1,5)], [2], [20]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 3) + cpu_threshold = npu_threshold = item[1][0] + cpu_value = npu_value = item[2][0] + cpu_output = self.cpu_op_exec(cpu_input1, cpu_threshold, cpu_value) + npu_output = self.npu_op_exec(npu_input1, npu_threshold, npu_value) + self.assertRtolEqual(cpu_output, npu_output) + + def test_threshold_inplace_common_shape_format(self, device): + def cpu_op_inplace_exec(input1, threshold, value): + torch.nn.functional.threshold_(input1, threshold, value) + output = input1.numpy() + return output + + def npu_op_inplace_exec(input1, threshold, value): + torch.nn.functional.threshold_(input1, threshold, value) + output = input1.to("cpu") + output = output.numpy() + return output + + shape_format = [ + [[np.float32, 0, (1,5)], [1.0], [20.0]], + [[np.int32, 0, (1,5)], [2], [20]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 3) + cpu_threshold = npu_threshold = item[1][0] + cpu_value = npu_value = item[2][0] + cpu_output = cpu_op_inplace_exec(cpu_input1, cpu_threshold, cpu_value) + npu_output = npu_op_inplace_exec(npu_input1, npu_threshold, npu_value) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestThreshold, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_threshold_backward.py b/test/test_npu/test_network_ops/test_threshold_backward.py index d191f8e9ce9d970954c409e80286623bae22b219..3f66ad31a1524da22f1c06fa86d8073c64856673 100644 --- a/test/test_npu/test_network_ops/test_threshold_backward.py +++ b/test/test_npu/test_network_ops/test_threshold_backward.py @@ -1,68 +1,68 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestThresholdBackward(TestCase): - - def cpu_op_exec(self, input1, threshold, value): - input1.requires_grad_() - output = torch.nn.functional.threshold(input1, threshold, value) - w = torch.ones_like(output) - output.backward(w) - out = input1.grad - output = output.detach() - return output.numpy(), out.numpy() - - def npu_op_exec(self, input1, threshold, value): - input1.requires_grad_() - output = torch.nn.functional.threshold(input1, threshold, value) - w = torch.ones_like(output) - output.backward(w) - out = input1.grad.to("cpu") - output = output.detach().to("cpu") - return output.numpy(), out.numpy() - - def test_threshold_backward_common_shape_format(self, device): - shape_format = [ - [[np.float32, 0, (1,5)], [1.0], [20.0]], - [[np.float32, 0, (2,3,5)], [2.0], [20.0]], - [[np.float32, 0, (2,3,4,5)], [0], [0]], - [[np.float32, 3, (1,5)], [1.0], [20.0]], - [[np.float32, 3, (2,3,5)], [2.0], [20.0]], - [[np.float32, 3, (2,3,4,5)], [0], [0]], - [[np.float16, 0, (1,5)], [1.0], [20.0]], - [[np.float16, 0, (2,3,5)], [2.0], [20.0]], - [[np.float16, 3, (2,3,4,5)], [0], [0]], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 3) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_threshold = npu_threshold = item[1][0] - cpu_value = npu_value = item[2][0] - cpu_output1, cpu_output2 = self.cpu_op_exec(cpu_input1, cpu_threshold, cpu_value) - npu_output1, npu_output2 = self.npu_op_exec(npu_input1, npu_threshold, npu_value) - self.assertRtolEqual(npu_output1.astype(np.float32), cpu_output1) - self.assertRtolEqual(npu_output2.astype(np.float32), cpu_output2) - -instantiate_device_type_tests(TestThresholdBackward, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestThresholdBackward(TestCase): + + def cpu_op_exec(self, input1, threshold, value): + input1.requires_grad_() + output = torch.nn.functional.threshold(input1, threshold, value) + w = torch.ones_like(output) + output.backward(w) + out = input1.grad + output = output.detach() + return output.numpy(), out.numpy() + + def npu_op_exec(self, input1, threshold, value): + input1.requires_grad_() + output = torch.nn.functional.threshold(input1, threshold, value) + w = torch.ones_like(output) + output.backward(w) + out = input1.grad.to("cpu") + output = output.detach().to("cpu") + return output.numpy(), out.numpy() + + def test_threshold_backward_common_shape_format(self, device): + shape_format = [ + [[np.float32, 0, (1,5)], [1.0], [20.0]], + [[np.float32, 0, (2,3,5)], [2.0], [20.0]], + [[np.float32, 0, (2,3,4,5)], [0], [0]], + [[np.float32, 3, (1,5)], [1.0], [20.0]], + [[np.float32, 3, (2,3,5)], [2.0], [20.0]], + [[np.float32, 3, (2,3,4,5)], [0], [0]], + [[np.float16, 0, (1,5)], [1.0], [20.0]], + [[np.float16, 0, (2,3,5)], [2.0], [20.0]], + [[np.float16, 3, (2,3,4,5)], [0], [0]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 3) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_threshold = npu_threshold = item[1][0] + cpu_value = npu_value = item[2][0] + cpu_output1, cpu_output2 = self.cpu_op_exec(cpu_input1, cpu_threshold, cpu_value) + npu_output1, npu_output2 = self.npu_op_exec(npu_input1, npu_threshold, npu_value) + self.assertRtolEqual(npu_output1.astype(np.float32), cpu_output1) + self.assertRtolEqual(npu_output2.astype(np.float32), cpu_output2) + +instantiate_device_type_tests(TestThresholdBackward, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_to.py b/test/test_npu/test_network_ops/test_to.py index 1aca41a22203d327a2117dc57658e393817c60e5..b5d16df049d63db06d4bf37a1e6ef2d9350caecc 100644 --- a/test/test_npu/test_network_ops/test_to.py +++ b/test/test_npu/test_network_ops/test_to.py @@ -1,55 +1,55 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestTo(TestCase): - def cpu_op_exec(self, input1, target): - output = input1.to(target) - output = output.cpu().numpy() - return output - - def npu_op_exec(self,input1, target): - output = input1.to(target) - output = output.cpu().numpy() - return output - - def test_to(self, device): - shape_format = [ - [np.float32, 0, [3, 3]], - [np.float16, 0, [4, 3]], - [np.int32, 0, [3, 5]], - ] - - targets = [torch.float16, torch.float32, torch.int32, 'cpu', 'npu'] - for item in shape_format: - for target in targets: - cpu_input1, npu_input1 = create_common_tensor(item, -100, 100) - cpu_output = self.cpu_op_exec(cpu_input1, target) - npu_output = self.npu_op_exec(npu_input1, target) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestTo, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestTo(TestCase): + def cpu_op_exec(self, input1, target): + output = input1.to(target) + output = output.cpu().numpy() + return output + + def npu_op_exec(self,input1, target): + output = input1.to(target) + output = output.cpu().numpy() + return output + + def test_to(self, device): + shape_format = [ + [np.float32, 0, [3, 3]], + [np.float16, 0, [4, 3]], + [np.int32, 0, [3, 5]], + ] + + targets = [torch.float16, torch.float32, torch.int32, 'cpu', 'npu'] + for item in shape_format: + for target in targets: + cpu_input1, npu_input1 = create_common_tensor(item, -100, 100) + cpu_output = self.cpu_op_exec(cpu_input1, target) + npu_output = self.npu_op_exec(npu_input1, target) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestTo, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_tril.py b/test/test_npu/test_network_ops/test_tril.py index 79134acaa833a965bf2edf94b06847d2f288049a..620623edb86505648e7d833b2e7d9770d990fc3c 100644 --- a/test/test_npu/test_network_ops/test_tril.py +++ b/test/test_npu/test_network_ops/test_tril.py @@ -1,77 +1,77 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestTril(TestCase): - def test_tril(self, device): - dtype_list = [np.float32, np.float16] - format_list = [0, 3, 4] - shape_list = [[5, 5],[4, 5, 6]] - diagonal_list = [-1, 0, 1] - shape_format = [ - [i, j, k, l] for i in dtype_list for j in format_list for k in shape_list for l in diagonal_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[:-1], 0, 100) - cpu_output = self.cpu_op_exec(cpu_input, item[-1]) - npu_output = self.npu_op_exec(npu_input, item[-1]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_tril_inplace(self, device): - dtype_list = [np.float32, np.float16] - format_list = [0, 3, 4] - shape_list = [[5, 5], [4, 5, 6]] - diagonal_list = [-1, 0, 1] - shape_format = [ - [i, j, k, l] for i in dtype_list for j in format_list for k in shape_list for l in diagonal_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[:-1], 0, 100) - cpu_output = self.cpu_op_inplace_exec(cpu_input, item[-1]) - npu_output = self.npu_op_inplace_exec(npu_input, item[-1]) - self.assertRtolEqual(cpu_output, npu_output) - - def cpu_op_exec(self, input, diagonal=0): - output = torch.tril(input, diagonal) - output = output.numpy() - return output - - def npu_op_exec(self, input, diagonal=0): - output = torch.tril(input, diagonal) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_exec(self, input, diagonal=0): - output = input.tril_(diagonal) - output = output.numpy() - return output - - def npu_op_inplace_exec(self, input, diagonal=0): - output = input.tril_(diagonal) - output = output.to("cpu") - output = output.numpy() - return output - -instantiate_device_type_tests(TestTril, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestTril(TestCase): + def test_tril(self, device): + dtype_list = [np.float32, np.float16] + format_list = [0, 3, 4] + shape_list = [[5, 5],[4, 5, 6]] + diagonal_list = [-1, 0, 1] + shape_format = [ + [i, j, k, l] for i in dtype_list for j in format_list for k in shape_list for l in diagonal_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[:-1], 0, 100) + cpu_output = self.cpu_op_exec(cpu_input, item[-1]) + npu_output = self.npu_op_exec(npu_input, item[-1]) + self.assertRtolEqual(cpu_output, npu_output) + + def test_tril_inplace(self, device): + dtype_list = [np.float32, np.float16] + format_list = [0, 3, 4] + shape_list = [[5, 5], [4, 5, 6]] + diagonal_list = [-1, 0, 1] + shape_format = [ + [i, j, k, l] for i in dtype_list for j in format_list for k in shape_list for l in diagonal_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[:-1], 0, 100) + cpu_output = self.cpu_op_inplace_exec(cpu_input, item[-1]) + npu_output = self.npu_op_inplace_exec(npu_input, item[-1]) + self.assertRtolEqual(cpu_output, npu_output) + + def cpu_op_exec(self, input, diagonal=0): + output = torch.tril(input, diagonal) + output = output.numpy() + return output + + def npu_op_exec(self, input, diagonal=0): + output = torch.tril(input, diagonal) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_exec(self, input, diagonal=0): + output = input.tril_(diagonal) + output = output.numpy() + return output + + def npu_op_inplace_exec(self, input, diagonal=0): + output = input.tril_(diagonal) + output = output.to("cpu") + output = output.numpy() + return output + +instantiate_device_type_tests(TestTril, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_triu.py b/test/test_npu/test_network_ops/test_triu.py index 50cadc56ef55e6145dda9e9055e6ef2daa1cc5f1..c95dc291f53623439b7ad12d02afb9d94d3b9647 100644 --- a/test/test_npu/test_network_ops/test_triu.py +++ b/test/test_npu/test_network_ops/test_triu.py @@ -1,79 +1,79 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestTriu(TestCase): - def test_triu(self, device): - dtype_list = [np.float32, np.float16] - format_list = [0, 3] - shape_list = [[5, 5]] - shape_format = [ - [i, j, k] for i in dtype_list for j in format_list for k in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - print(cpu_input) - print(npu_input) - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - print(cpu_output) - print(npu_output) - self.assertRtolEqual(cpu_output, npu_output) - - def test_triu_inplace(self, device): - dtype_list = [np.float32, np.float16] - format_list = [0, 3] - shape_list = [[5, 5]] - shape_format = [ - [i, j, k] for i in dtype_list for j in format_list for k in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_op_inplace_exec(cpu_input) - npu_output = self.npu_op_inplace_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - - def cpu_op_exec(self, input): - output = torch.triu(input, 1) - output = output.numpy() - return output - - def npu_op_exec(self, input): - output = torch.triu(input, 1) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_inplace_exec(self, input): - output = input.triu_(1) - output = output.numpy() - return output - - def npu_op_inplace_exec(self, input): - output = input.triu_(1) - output = output.to("cpu") - output = output.numpy() - return output - -instantiate_device_type_tests(TestTriu, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestTriu(TestCase): + def test_triu(self, device): + dtype_list = [np.float32, np.float16] + format_list = [0, 3] + shape_list = [[5, 5]] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + print(cpu_input) + print(npu_input) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + print(cpu_output) + print(npu_output) + self.assertRtolEqual(cpu_output, npu_output) + + def test_triu_inplace(self, device): + dtype_list = [np.float32, np.float16] + format_list = [0, 3] + shape_list = [[5, 5]] + shape_format = [ + [i, j, k] for i in dtype_list for j in format_list for k in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_inplace_exec(cpu_input) + npu_output = self.npu_op_inplace_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + def cpu_op_exec(self, input): + output = torch.triu(input, 1) + output = output.numpy() + return output + + def npu_op_exec(self, input): + output = torch.triu(input, 1) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_inplace_exec(self, input): + output = input.triu_(1) + output = output.numpy() + return output + + def npu_op_inplace_exec(self, input): + output = input.triu_(1) + output = output.to("cpu") + output = output.numpy() + return output + +instantiate_device_type_tests(TestTriu, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_unbind.py b/test/test_npu/test_network_ops/test_unbind.py index dbd889508fbf8688e1c855117a80bfd65f394eae..ff22e7acae8be4c5459b25bd3f6d5236cf8c0166 100644 --- a/test/test_npu/test_network_ops/test_unbind.py +++ b/test/test_npu/test_network_ops/test_unbind.py @@ -1,42 +1,42 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import instantiate_device_type_tests -from util_test import create_common_tensor - -class TestUnbind(TestCase): - def test_unbind_common_shape_format(self, device): - def op_exec(input): - output = torch.unbind(input,0) - return output - - shape_format = [ - [[np.float16, 0, (64, 10)]], - [[np.float32, 4, (32, 1, 3, 3)]], - [[np.float32, 29, (10, 128)]] - ] - for shape in shape_format: - cpu_input, npu_input = create_common_tensor(shape[0], -1, 1) - cpu_output = op_exec(cpu_input) - npu_output = op_exec(npu_input) - self.assertRtolEqual(cpu_output[0], npu_output[0].cpu()) - self.assertRtolEqual(cpu_output[-1], npu_output[-1].cpu()) - - -instantiate_device_type_tests(TestUnbind, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests +from util_test import create_common_tensor + +class TestUnbind(TestCase): + def test_unbind_common_shape_format(self, device): + def op_exec(input): + output = torch.unbind(input,0) + return output + + shape_format = [ + [[np.float16, 0, (64, 10)]], + [[np.float32, 4, (32, 1, 3, 3)]], + [[np.float32, 29, (10, 128)]] + ] + for shape in shape_format: + cpu_input, npu_input = create_common_tensor(shape[0], -1, 1) + cpu_output = op_exec(cpu_input) + npu_output = op_exec(npu_input) + self.assertRtolEqual(cpu_output[0], npu_output[0].cpu()) + self.assertRtolEqual(cpu_output[-1], npu_output[-1].cpu()) + + +instantiate_device_type_tests(TestUnbind, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_uniform_.py b/test/test_npu/test_network_ops/test_uniform_.py index 723e02791cc13839681087c11582644680734254..38ec32e098bafeda54e706f6d9b2fd265b21999d 100644 --- a/test/test_npu/test_network_ops/test_uniform_.py +++ b/test/test_npu/test_network_ops/test_uniform_.py @@ -1,46 +1,46 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -from common_utils import TestCase, run_tests -from common_device_type import instantiate_device_type_tests - -class TestUniform(TestCase): - def test_uniform(self, device): - shape_format = [ - [(20,300), -100, 100, torch.float32], - [(20,300), -100, 100, torch.float16] - ] - - for item in shape_format: - input1 = torch.zeros(item[0], dtype=item[3]).npu() - input1.uniform_(item[1], item[2]) - self.assertTrue(item[1] <= input1.min()) - self.assertTrue(item[2] >= input1.max()) - - def test_uniform_trans(self, device): - shape_format = [ - [(20,300), -100, 100, torch.float32], - ] - - for item in shape_format: - input1 = torch.zeros(item[0], dtype=item[3]).npu() - input1.npu_format_cast(3) - input1.uniform_(item[1], item[2]) - self.assertTrue(item[1] <= input1.min()) - self.assertTrue(item[2] >= input1.max()) - - -instantiate_device_type_tests(TestUniform, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests + +class TestUniform(TestCase): + def test_uniform(self, device): + shape_format = [ + [(20,300), -100, 100, torch.float32], + [(20,300), -100, 100, torch.float16] + ] + + for item in shape_format: + input1 = torch.zeros(item[0], dtype=item[3]).npu() + input1.uniform_(item[1], item[2]) + self.assertTrue(item[1] <= input1.min()) + self.assertTrue(item[2] >= input1.max()) + + def test_uniform_trans(self, device): + shape_format = [ + [(20,300), -100, 100, torch.float32], + ] + + for item in shape_format: + input1 = torch.zeros(item[0], dtype=item[3]).npu() + input1.npu_format_cast(3) + input1.uniform_(item[1], item[2]) + self.assertTrue(item[1] <= input1.min()) + self.assertTrue(item[2] >= input1.max()) + + +instantiate_device_type_tests(TestUniform, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_unique2.py b/test/test_npu/test_network_ops/test_unique2.py index ac6b18611cf3e66e03d9b8ae424ae44dba300ad9..993779a1a302c00ad1afd617200bbaf7211b4aa6 100644 --- a/test/test_npu/test_network_ops/test_unique2.py +++ b/test/test_npu/test_network_ops/test_unique2.py @@ -1,58 +1,58 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestUnique2(TestCase): - def test_unique2(self, device): - shape_format = [ - [[np.uint8, (2, 3)], True, True, True], - [[np.int8, (2, 3)], True, True, True], - [[np.int16, (2, 3)], True, True, True], - [[np.int32, (2, 3)], True, True, True], - [[np.long, (2, 3)], True, True, False], - [[np.long, (5, 3)], True, False, True], - [[np.long, (2, 3, 4)], True, False, False], - [[np.long, (3, 3)], False, True, True], - [[np.long, (2, 3)], False, False, False], - [[np.float32, (2, 3)], True, False, False], - [[np.bool, (2, 3)], True, True, True], - [[np.float16, (2, 3)], True, True, True] - ] - - for item in shape_format: - input1 = np.random.uniform(-10, 10, item[0][1]).astype(item[0][0]) - cpu_input1 = torch.from_numpy(input1) - if item[0][0] == np.float16: - cpu_input1 = torch.from_numpy(input1.astype(np.float32)) - npu_input1 = torch.from_numpy(input1).npu() - - cpu_output_y, cpu_yInverse, cpu_yCounts = torch._unique2(cpu_input1, item[1], item[2], item[3]) - npu_output_y, npu_yInverse, npu_yCounts = torch._unique2(npu_input1, item[1], item[2], item[3]) - - self.assertRtolEqual(cpu_output_y.numpy().astype(np.float32), npu_output_y.cpu().numpy().astype(np.float32)) - self.assertRtolEqual(cpu_yInverse.numpy().astype(np.float32), npu_yInverse.cpu().numpy().astype(np.float32)) - self.assertRtolEqual(cpu_yCounts.numpy().astype(np.float32), npu_yCounts.cpu().numpy().astype(np.float32)) - -instantiate_device_type_tests(TestUnique2, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestUnique2(TestCase): + def test_unique2(self, device): + shape_format = [ + [[np.uint8, (2, 3)], True, True, True], + [[np.int8, (2, 3)], True, True, True], + [[np.int16, (2, 3)], True, True, True], + [[np.int32, (2, 3)], True, True, True], + [[np.long, (2, 3)], True, True, False], + [[np.long, (5, 3)], True, False, True], + [[np.long, (2, 3, 4)], True, False, False], + [[np.long, (3, 3)], False, True, True], + [[np.long, (2, 3)], False, False, False], + [[np.float32, (2, 3)], True, False, False], + [[np.bool, (2, 3)], True, True, True], + [[np.float16, (2, 3)], True, True, True] + ] + + for item in shape_format: + input1 = np.random.uniform(-10, 10, item[0][1]).astype(item[0][0]) + cpu_input1 = torch.from_numpy(input1) + if item[0][0] == np.float16: + cpu_input1 = torch.from_numpy(input1.astype(np.float32)) + npu_input1 = torch.from_numpy(input1).npu() + + cpu_output_y, cpu_yInverse, cpu_yCounts = torch._unique2(cpu_input1, item[1], item[2], item[3]) + npu_output_y, npu_yInverse, npu_yCounts = torch._unique2(npu_input1, item[1], item[2], item[3]) + + self.assertRtolEqual(cpu_output_y.numpy().astype(np.float32), npu_output_y.cpu().numpy().astype(np.float32)) + self.assertRtolEqual(cpu_yInverse.numpy().astype(np.float32), npu_yInverse.cpu().numpy().astype(np.float32)) + self.assertRtolEqual(cpu_yCounts.numpy().astype(np.float32), npu_yCounts.cpu().numpy().astype(np.float32)) + +instantiate_device_type_tests(TestUnique2, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_upsample_bicubic2d.py b/test/test_npu/test_network_ops/test_upsample_bicubic2d.py index 5d7eeec4ecd51d0074a664b4578d726d314f7184..95a0acbb1060296fab750020c8bfa99b632e41a9 100644 --- a/test/test_npu/test_network_ops/test_upsample_bicubic2d.py +++ b/test/test_npu/test_network_ops/test_upsample_bicubic2d.py @@ -1,126 +1,126 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestUpsampleBicubic2d(TestCase): - - def cpu_op_exec(self, input1, output_size, align_corners, scale_h, scale_w): - output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) - output = output.numpy() - return output - - def npu_op_exec(self, input1, output_size, align_corners, scale_h, scale_w): - output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) - output = output.to("cpu") - output = output.numpy() - return output - - # float32 [0.0002, 0.0001] - #pylint: disable=unused-argument - def test_upsample_bicubic2d_common_shape_format(self, device): - shape_format = [ - # same size - [[np.float32, -1, (1, 1, 1, 1)], (1, 1), True, 0, 0, 0, 255], # case 1 - [[np.float32, -1, (2, 65535, 2, 2)], (2, 2), True, 0, 0, 0, 255], # case 2 - [[np.float32, -1, (65535, 2, 4, 8)], (4, 8), True, 0, 0, 0, 255], # case 3 - [[np.float32, -1, (2, 4, 65535, 2)], (65535, 2), True, 0, 0, 0, 255], # case 4 - [[np.float32, -1, (1, 31, 149, 2)], (149, 2), True, 0, 0, 0, 255], # case 5 - [[np.float32, -1, (10, 10, 786432, 8)], (786432, 8), True, 0, 0, 0, 255], # case 6 - [[np.float32, -1, (32, 32, 32, 32)], (32, 32), True, 0, 0, 0, 3402823500.0], # case 7 - [[np.float32, -1, (10, 10, 786432, 8)], (786432, 8), False, 0, 0, 0, 255], # case 8 - [[np.float32, -1, (32, 32, 32, 32)], (32, 32), False, 0, 0, 0, 3402823500.0], # case 9 - - # align_corners = True - [[np.float32, -1, (1, 1, 1, 1)], (2, 2), True, 0, 0, 0, 255], # case 10 - [[np.float32, -1, (1, 1, 2, 2)], (4, 4), True, 0, 0, 0, 255], # case 11 - [[np.float32, -1, (2, 2, 1, 1)], (2, 2), True, 0, 0, 0, 255], # case 12 - [[np.float32, -1, (2, 2, 2, 2)], (10, 10), True, 0, 0, 0, 255], # case 13 - [[np.float32, -1, (1, 31, 149, 2)], (2, 149), True, 0, 0, 0, 255], # case 14 - #[[np.float32, -1, (32, 32, 32, 32)], (64, 64), True, 0, 0, 0, 255], # case 15 - #[[np.float32, -1, (32, 32, 32, 32)], (64, 64), True, 0, 0, 0, 3402823500.0], # case 16 - - # align_corners = False - [[np.float32, -1, (1, 1, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], # case 17 - [[np.float32, -1, (1, 1, 2, 2)], (4, 4), False, 0.5, 0.5, 0, 255], # case 18 - [[np.float32, -1, (2, 2, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], # case 19 - [[np.float32, -1, (2, 2, 2, 2)], (10, 10), False, 0.5, 0.5, 0, 255], # case 20 - [[np.float32, -1, (1, 31, 149, 2)], (2, 149), False, 0.5, 0.5, 0, 255], # case 21 - [[np.float32, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 255], # case 22 - [[np.float32, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 3402823500.0] # case 23 - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], item[5], item[6]) - cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2], item[3], item[4]) - npu_output = self.npu_op_exec(npu_input1, item[1], item[2], item[3], item[4]) - self.assertRtolEqual(cpu_output, npu_output) - - # float16 [0.002, 0.001] - #pylint: disable=unused-argument - def test_upsample_bicubic2d_float16_shape_format(self, device): - def cpu_op_exec_fp16(input1, output_size, align_corners, scale_h, scale_w): - input1 = input1.to(torch.float32) - output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) - output = output.numpy() - output = output.astype(np.float16) - return output - - shape_format = [ - # same size - [[np.float16, -1, (1, 1, 1, 1)], (1, 1), True, 0, 0, 0, 255], # case 24 - [[np.float16, -1, (2, 65535, 2, 2)], (2, 2), True, 0, 0, 0, 255], # case 25 - [[np.float16, -1, (65535, 2, 4, 8)], (4, 8), True, 0, 0, 0, 255], # case 26 - [[np.float16, -1, (2, 4, 65535, 2)], (65535, 2), True, 0, 0, 0, 255], # case 27 - [[np.float16, -1, (1, 31, 149, 2)], (149, 2), True, 0, 0, 0, 255], # case 28 - [[np.float16, -1, (10, 10, 786432, 8)], (786432, 8), True, 0, 0, 0, 255], # case 29 - [[np.float16, -1, (32, 32, 32, 32)], (32, 32), True, 0, 0, 0, 6550.0], # case 30 - [[np.float16, -1, (10, 10, 786432, 8)], (786432, 8), False, 0, 0, 0, 255], # case 31 - [[np.float16, -1, (32, 32, 32, 32)], (32, 32), False, 0, 0, 0, 6550.0], # case 32 - - # align_corners = True - [[np.float16, -1, (1, 1, 1, 1)], (2, 2), True, 0, 0, 0, 255], # case 33 - [[np.float16, -1, (1, 1, 2, 2)], (4, 4), True, 0, 0, 0, 255], # case 34 - [[np.float16, -1, (2, 2, 1, 1)], (2, 2), True, 0, 0, 0, 255], # case 35 - [[np.float16, -1, (2, 2, 2, 2)], (10, 10), True, 0, 0, 0, 255], # case 36 - [[np.float16, -1, (1, 31, 149, 2)], (2, 149), True, 0, 0, 0, 255], # case 37 - [[np.float16, -1, (32, 32, 32, 32)], (64, 64), True, 0, 0, 0, 255], # case 38 - [[np.float16, -1, (32, 32, 32, 32)], (64, 64), True, 0, 0, 0, 6550.0], # case 39 - - # align_corners = False - [[np.float16, -1, (1, 1, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], # case 40 - [[np.float16, -1, (1, 1, 2, 2)], (4, 4), False, 0.5, 0.5, 0, 255], # case 41 - [[np.float16, -1, (2, 2, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], # case 42 - [[np.float16, -1, (2, 2, 2, 2)], (10, 10), False, 0.5, 0.5, 0, 255], # case 43 - [[np.float16, -1, (1, 31, 149, 2)], (2, 149), False, 0.5, 0.5, 0, 255], # case 44 - [[np.float16, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 255], # case 45 - [[np.float16, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 6550.0] # case 46 - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], item[5], item[6]) - cpu_output = cpu_op_exec_fp16(cpu_input1, item[1], item[2], item[3], item[4]) - npu_output = self.npu_op_exec(npu_input1, item[1], item[2], item[3], item[4]) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestUpsampleBicubic2d, globals(), except_for='cpu') -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestUpsampleBicubic2d(TestCase): + + def cpu_op_exec(self, input1, output_size, align_corners, scale_h, scale_w): + output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) + output = output.numpy() + return output + + def npu_op_exec(self, input1, output_size, align_corners, scale_h, scale_w): + output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) + output = output.to("cpu") + output = output.numpy() + return output + + # float32 [0.0002, 0.0001] + #pylint: disable=unused-argument + def test_upsample_bicubic2d_common_shape_format(self, device): + shape_format = [ + # same size + [[np.float32, -1, (1, 1, 1, 1)], (1, 1), True, 0, 0, 0, 255], # case 1 + [[np.float32, -1, (2, 65535, 2, 2)], (2, 2), True, 0, 0, 0, 255], # case 2 + [[np.float32, -1, (65535, 2, 4, 8)], (4, 8), True, 0, 0, 0, 255], # case 3 + [[np.float32, -1, (2, 4, 65535, 2)], (65535, 2), True, 0, 0, 0, 255], # case 4 + [[np.float32, -1, (1, 31, 149, 2)], (149, 2), True, 0, 0, 0, 255], # case 5 + [[np.float32, -1, (10, 10, 786432, 8)], (786432, 8), True, 0, 0, 0, 255], # case 6 + [[np.float32, -1, (32, 32, 32, 32)], (32, 32), True, 0, 0, 0, 3402823500.0], # case 7 + [[np.float32, -1, (10, 10, 786432, 8)], (786432, 8), False, 0, 0, 0, 255], # case 8 + [[np.float32, -1, (32, 32, 32, 32)], (32, 32), False, 0, 0, 0, 3402823500.0], # case 9 + + # align_corners = True + [[np.float32, -1, (1, 1, 1, 1)], (2, 2), True, 0, 0, 0, 255], # case 10 + [[np.float32, -1, (1, 1, 2, 2)], (4, 4), True, 0, 0, 0, 255], # case 11 + [[np.float32, -1, (2, 2, 1, 1)], (2, 2), True, 0, 0, 0, 255], # case 12 + [[np.float32, -1, (2, 2, 2, 2)], (10, 10), True, 0, 0, 0, 255], # case 13 + [[np.float32, -1, (1, 31, 149, 2)], (2, 149), True, 0, 0, 0, 255], # case 14 + #[[np.float32, -1, (32, 32, 32, 32)], (64, 64), True, 0, 0, 0, 255], # case 15 + #[[np.float32, -1, (32, 32, 32, 32)], (64, 64), True, 0, 0, 0, 3402823500.0], # case 16 + + # align_corners = False + [[np.float32, -1, (1, 1, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], # case 17 + [[np.float32, -1, (1, 1, 2, 2)], (4, 4), False, 0.5, 0.5, 0, 255], # case 18 + [[np.float32, -1, (2, 2, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], # case 19 + [[np.float32, -1, (2, 2, 2, 2)], (10, 10), False, 0.5, 0.5, 0, 255], # case 20 + [[np.float32, -1, (1, 31, 149, 2)], (2, 149), False, 0.5, 0.5, 0, 255], # case 21 + [[np.float32, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 255], # case 22 + [[np.float32, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 3402823500.0] # case 23 + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], item[5], item[6]) + cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2], item[3], item[4]) + npu_output = self.npu_op_exec(npu_input1, item[1], item[2], item[3], item[4]) + self.assertRtolEqual(cpu_output, npu_output) + + # float16 [0.002, 0.001] + #pylint: disable=unused-argument + def test_upsample_bicubic2d_float16_shape_format(self, device): + def cpu_op_exec_fp16(input1, output_size, align_corners, scale_h, scale_w): + input1 = input1.to(torch.float32) + output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) + output = output.numpy() + output = output.astype(np.float16) + return output + + shape_format = [ + # same size + [[np.float16, -1, (1, 1, 1, 1)], (1, 1), True, 0, 0, 0, 255], # case 24 + [[np.float16, -1, (2, 65535, 2, 2)], (2, 2), True, 0, 0, 0, 255], # case 25 + [[np.float16, -1, (65535, 2, 4, 8)], (4, 8), True, 0, 0, 0, 255], # case 26 + [[np.float16, -1, (2, 4, 65535, 2)], (65535, 2), True, 0, 0, 0, 255], # case 27 + [[np.float16, -1, (1, 31, 149, 2)], (149, 2), True, 0, 0, 0, 255], # case 28 + [[np.float16, -1, (10, 10, 786432, 8)], (786432, 8), True, 0, 0, 0, 255], # case 29 + [[np.float16, -1, (32, 32, 32, 32)], (32, 32), True, 0, 0, 0, 6550.0], # case 30 + [[np.float16, -1, (10, 10, 786432, 8)], (786432, 8), False, 0, 0, 0, 255], # case 31 + [[np.float16, -1, (32, 32, 32, 32)], (32, 32), False, 0, 0, 0, 6550.0], # case 32 + + # align_corners = True + [[np.float16, -1, (1, 1, 1, 1)], (2, 2), True, 0, 0, 0, 255], # case 33 + [[np.float16, -1, (1, 1, 2, 2)], (4, 4), True, 0, 0, 0, 255], # case 34 + [[np.float16, -1, (2, 2, 1, 1)], (2, 2), True, 0, 0, 0, 255], # case 35 + [[np.float16, -1, (2, 2, 2, 2)], (10, 10), True, 0, 0, 0, 255], # case 36 + [[np.float16, -1, (1, 31, 149, 2)], (2, 149), True, 0, 0, 0, 255], # case 37 + [[np.float16, -1, (32, 32, 32, 32)], (64, 64), True, 0, 0, 0, 255], # case 38 + [[np.float16, -1, (32, 32, 32, 32)], (64, 64), True, 0, 0, 0, 6550.0], # case 39 + + # align_corners = False + [[np.float16, -1, (1, 1, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], # case 40 + [[np.float16, -1, (1, 1, 2, 2)], (4, 4), False, 0.5, 0.5, 0, 255], # case 41 + [[np.float16, -1, (2, 2, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], # case 42 + [[np.float16, -1, (2, 2, 2, 2)], (10, 10), False, 0.5, 0.5, 0, 255], # case 43 + [[np.float16, -1, (1, 31, 149, 2)], (2, 149), False, 0.5, 0.5, 0, 255], # case 44 + [[np.float16, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 255], # case 45 + [[np.float16, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 6550.0] # case 46 + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], item[5], item[6]) + cpu_output = cpu_op_exec_fp16(cpu_input1, item[1], item[2], item[3], item[4]) + npu_output = self.npu_op_exec(npu_input1, item[1], item[2], item[3], item[4]) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestUpsampleBicubic2d, globals(), except_for='cpu') +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_upsample_bicubic2d_backward.py b/test/test_npu/test_network_ops/test_upsample_bicubic2d_backward.py index f75f627040a738f9c4ee208c98458fb6f2966ba2..0d3f04813775d77fba8238666fd02c1c61423385 100644 --- a/test/test_npu/test_network_ops/test_upsample_bicubic2d_backward.py +++ b/test/test_npu/test_network_ops/test_upsample_bicubic2d_backward.py @@ -1,91 +1,91 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestUpsampleBicubic2dBackward(TestCase): - - def cpu_op_exec(self, input1, output_size, align_corners, scale_h, scale_w): - input1.requires_grad = True - output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) - output.backward(torch.ones_like(output)) - output_grad = input1.grad - output_grad = output_grad.detach().numpy() - return output_grad - - def npu_op_exec(self, input1, output_size, align_corners, scale_h, scale_w): - input1.requires_grad = True - output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) - output.backward(torch.ones_like(output)) - output_grad = input1.grad - output_grad = output_grad.to("cpu").detach().numpy() - return output_grad - - - def test_upsample_bicubic2d_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (1, 1, 1, 1)], (1, 1), True, 0, 0, 0, 255], - [[np.float32, -1, (2, 65535, 2, 2)], (2, 2), True, 0, 0, 0, 255], - [[np.float32, -1, (10, 10, 786432, 8)], (786432, 8), False, 0, 0, 0, 255], - [[np.float32, -1, (1, 1, 1, 1)], (2, 2), True, 0, 0, 0, 255], - [[np.float32, -1, (1, 1, 2, 2)], (4, 4), True, 0, 0, 0, 255], - [[np.float32, -1, (1, 1, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], - [[np.float32, -1, (1, 1, 2, 2)], (4, 4), False, 0.5, 0.5, 0, 255], - [[np.float32, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 3402823500.0] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], item[5], item[6]) - cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2], item[3], item[4]) - npu_output = self.npu_op_exec(npu_input1, item[1], item[2], item[3], item[4]) - self.assertRtolEqual(cpu_output, npu_output) - - - def test_upsample_bicubic2d_float16_shape_format(self, device): - def cpu_op_exec_fp16(input1, output_size, align_corners, scale_h, scale_w): - input1 = input1.to(torch.float32) - input1.requires_grad = True - output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) - output.backward(torch.ones_like(output)) - output_grad = input1.grad - output_grad = output_grad.detach().numpy() - output_grad = output_grad.astype(np.float16) - return output_grad - - shape_format = [ - [[np.float16, -1, (1, 1, 1, 1)], (1, 1), True, 0, 0, 0, 255], - [[np.float16, -1, (2, 65535, 2, 2)], (2, 2), True, 0, 0, 0, 255], - [[np.float16, -1, (32, 32, 32, 32)], (32, 32), False, 0, 0, 0, 6550.0], - [[np.float16, -1, (1, 1, 1, 1)], (2, 2), True, 0, 0, 0, 255], - [[np.float16, -1, (1, 1, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], - [[np.float16, -1, (1, 1, 2, 2)], (4, 4), False, 0.5, 0.5, 0, 255], - [[np.float16, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 6550.0] - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], item[5], item[6]) - cpu_output = cpu_op_exec_fp16(cpu_input1, item[1], item[2], item[3], item[4]) - npu_output = self.npu_op_exec(npu_input1, item[1], item[2], item[3], item[4]) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestUpsampleBicubic2dBackward, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestUpsampleBicubic2dBackward(TestCase): + + def cpu_op_exec(self, input1, output_size, align_corners, scale_h, scale_w): + input1.requires_grad = True + output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) + output.backward(torch.ones_like(output)) + output_grad = input1.grad + output_grad = output_grad.detach().numpy() + return output_grad + + def npu_op_exec(self, input1, output_size, align_corners, scale_h, scale_w): + input1.requires_grad = True + output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) + output.backward(torch.ones_like(output)) + output_grad = input1.grad + output_grad = output_grad.to("cpu").detach().numpy() + return output_grad + + + def test_upsample_bicubic2d_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (1, 1, 1, 1)], (1, 1), True, 0, 0, 0, 255], + [[np.float32, -1, (2, 65535, 2, 2)], (2, 2), True, 0, 0, 0, 255], + [[np.float32, -1, (10, 10, 786432, 8)], (786432, 8), False, 0, 0, 0, 255], + [[np.float32, -1, (1, 1, 1, 1)], (2, 2), True, 0, 0, 0, 255], + [[np.float32, -1, (1, 1, 2, 2)], (4, 4), True, 0, 0, 0, 255], + [[np.float32, -1, (1, 1, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], + [[np.float32, -1, (1, 1, 2, 2)], (4, 4), False, 0.5, 0.5, 0, 255], + [[np.float32, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 3402823500.0] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], item[5], item[6]) + cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2], item[3], item[4]) + npu_output = self.npu_op_exec(npu_input1, item[1], item[2], item[3], item[4]) + self.assertRtolEqual(cpu_output, npu_output) + + + def test_upsample_bicubic2d_float16_shape_format(self, device): + def cpu_op_exec_fp16(input1, output_size, align_corners, scale_h, scale_w): + input1 = input1.to(torch.float32) + input1.requires_grad = True + output = torch._C._nn.upsample_bicubic2d(input1, output_size, align_corners, scale_h, scale_w) + output.backward(torch.ones_like(output)) + output_grad = input1.grad + output_grad = output_grad.detach().numpy() + output_grad = output_grad.astype(np.float16) + return output_grad + + shape_format = [ + [[np.float16, -1, (1, 1, 1, 1)], (1, 1), True, 0, 0, 0, 255], + [[np.float16, -1, (2, 65535, 2, 2)], (2, 2), True, 0, 0, 0, 255], + [[np.float16, -1, (32, 32, 32, 32)], (32, 32), False, 0, 0, 0, 6550.0], + [[np.float16, -1, (1, 1, 1, 1)], (2, 2), True, 0, 0, 0, 255], + [[np.float16, -1, (1, 1, 1, 1)], (2, 2), False, 0.5, 0.5, 0, 255], + [[np.float16, -1, (1, 1, 2, 2)], (4, 4), False, 0.5, 0.5, 0, 255], + [[np.float16, -1, (32, 32, 32, 32)], (64, 64), False, 0.5, 0.5, 0, 6550.0] + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], item[5], item[6]) + cpu_output = cpu_op_exec_fp16(cpu_input1, item[1], item[2], item[3], item[4]) + npu_output = self.npu_op_exec(npu_input1, item[1], item[2], item[3], item[4]) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestUpsampleBicubic2dBackward, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_upsample_bilinear.py b/test/test_npu/test_network_ops/test_upsample_bilinear.py index 9bac334778b37b6ab773a24fe23d67689479e601..8ac02b7cab5f8a5971dfc65ed63da18516bd54af 100644 --- a/test/test_npu/test_network_ops/test_upsample_bilinear.py +++ b/test/test_npu/test_network_ops/test_upsample_bilinear.py @@ -1,76 +1,76 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import sys -import copy -import torch.nn as nn -import torch.nn.functional as F -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestUpsampleBilinear(TestCase): - def cpu_op_exec(self, input1, size): - input1.requires_grad_(True) - output = F.interpolate(input1, size, mode="bilinear") - w = torch.ones_like(output) - output.backward(w) - res = input1.grad - res = res.numpy() - output = output.detach().numpy() - return output, res - - def npu_op_exec(self, input1, size): - input1.requires_grad_(True) - output = F.interpolate(input1, size, mode="bilinear") - w = torch.ones_like(output) - w = w.to("npu") - output.backward(w) - output = output.to("cpu").detach().numpy() - res = input1.grad - res = res.to("cpu").numpy() - return output, res - - def upsample_bilinear_backward_result(self, shape_format): - for item in shape_format: - input_cpu, input_npu = create_common_tensor(item[0], 0, 100) - if input_cpu.dtype == torch.float16: - input_cpu = input_cpu.to(torch.float32) - - cpu_output, cpu_grad = self.cpu_op_exec(input_cpu, item[1]) - npu_output, npu_grad = self.npu_op_exec(input_npu, item[1]) - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_grad = cpu_grad.astype(npu_grad.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_grad, npu_grad) - - def test_upsample_bilinear_backward_shape_format_aicpu(self, device): - format_list = [0, 3] - size_list = [[10001, 2]] - shape_format = [[[np.float32, i, [2, 10020, 100, 1]], s] for i in format_list for s in size_list] - - self.upsample_bilinear_backward_result(shape_format) - - def test_upsample_bilinear_backward_shape_format_aicore(self, device): - format_list = [0, 3] - size_list = [[100, 2]] - shape_format = [[[np.float32, i, [2, 10020, 100, 1]], s] for i in format_list for s in size_list] - - self.upsample_bilinear_backward_result(shape_format) - -instantiate_device_type_tests(TestUpsampleBilinear, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +import torch.nn.functional as F +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestUpsampleBilinear(TestCase): + def cpu_op_exec(self, input1, size): + input1.requires_grad_(True) + output = F.interpolate(input1, size, mode="bilinear") + w = torch.ones_like(output) + output.backward(w) + res = input1.grad + res = res.numpy() + output = output.detach().numpy() + return output, res + + def npu_op_exec(self, input1, size): + input1.requires_grad_(True) + output = F.interpolate(input1, size, mode="bilinear") + w = torch.ones_like(output) + w = w.to("npu") + output.backward(w) + output = output.to("cpu").detach().numpy() + res = input1.grad + res = res.to("cpu").numpy() + return output, res + + def upsample_bilinear_backward_result(self, shape_format): + for item in shape_format: + input_cpu, input_npu = create_common_tensor(item[0], 0, 100) + if input_cpu.dtype == torch.float16: + input_cpu = input_cpu.to(torch.float32) + + cpu_output, cpu_grad = self.cpu_op_exec(input_cpu, item[1]) + npu_output, npu_grad = self.npu_op_exec(input_npu, item[1]) + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_grad = cpu_grad.astype(npu_grad.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_grad, npu_grad) + + def test_upsample_bilinear_backward_shape_format_aicpu(self, device): + format_list = [0, 3] + size_list = [[10001, 2]] + shape_format = [[[np.float32, i, [2, 10020, 100, 1]], s] for i in format_list for s in size_list] + + self.upsample_bilinear_backward_result(shape_format) + + def test_upsample_bilinear_backward_shape_format_aicore(self, device): + format_list = [0, 3] + size_list = [[100, 2]] + shape_format = [[[np.float32, i, [2, 10020, 100, 1]], s] for i in format_list for s in size_list] + + self.upsample_bilinear_backward_result(shape_format) + +instantiate_device_type_tests(TestUpsampleBilinear, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_upsample_bilinear_backward.py b/test/test_npu/test_network_ops/test_upsample_bilinear_backward.py old mode 100644 new mode 100755 index 2d1ae7e4e3679bf22d27acfed7e7ac6c621a5b51..ce01e8f55d48fb1fd33fc9f063a48c5af3ab2d22 --- a/test/test_npu/test_network_ops/test_upsample_bilinear_backward.py +++ b/test/test_npu/test_network_ops/test_upsample_bilinear_backward.py @@ -1,79 +1,79 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import sys -import copy -import torch.nn as nn -import torch.nn.functional as F -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestUpsampleBilinear(TestCase): - def cpu_op_exec(self, input1, size): - input1.requires_grad_(True) - output = F.interpolate(input1, size, mode="bilinear") - w = torch.ones_like(output) - output.backward(w) - res = input1.grad - res = res.numpy() - output = output.detach().numpy() - return output, res - - def npu_op_exec(self, input1, size): - input1.requires_grad_(True) - output = F.interpolate(input1, size, mode="bilinear") - w = torch.ones_like(output) - w = w.to("npu") - output.backward(w) - output = output.to("cpu").detach().numpy() - res = input1.grad - res = res.to("cpu").numpy() - return output, res - - def upsample_bilinear_backward_result(self, shape_format): - for item in shape_format: - input_cpu, input_npu = create_common_tensor(item[0], 0, 100) - if input_cpu.dtype == torch.float16: - input_cpu = input_cpu.to(torch.float32) - - cpu_output, cpu_grad = self.cpu_op_exec(input_cpu, item[1]) - npu_output, npu_grad = self.npu_op_exec(input_npu, item[1]) - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_grad = cpu_grad.astype(npu_grad.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_grad, npu_grad) - - def test_upsample_bilinear_backward_shape_format_fp16(self, device): - format_list = [0, 3, 29] - size_list = [[2, 2], [3, 3], [6, 6]] - shape_format = [[[np.float16, i, [24, 56, 18, 18]], s] for i in format_list for s in size_list] - - self.upsample_bilinear_backward_result(shape_format) - - def test_upsample_bilinear_backward_shape_format_fp32(self, device): - type_list = [np.float16, np.float32] - format_list = [0, 3, 29] - size_list = [[2, 2], [3, 3], [6, 6]] - shape_format = [[[np.float32, i, [24, 56, 18, 18]], s] for i in format_list for s in size_list] - - self.upsample_bilinear_backward_result(shape_format) - - -instantiate_device_type_tests(TestUpsampleBilinear, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +import torch.nn.functional as F +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestUpsampleBilinear(TestCase): + def cpu_op_exec(self, input1, size): + input1.requires_grad_(True) + output = F.interpolate(input1, size, mode="bilinear") + w = torch.ones_like(output) + output.backward(w) + res = input1.grad + res = res.numpy() + output = output.detach().numpy() + return output, res + + def npu_op_exec(self, input1, size): + input1.requires_grad_(True) + output = F.interpolate(input1, size, mode="bilinear") + w = torch.ones_like(output) + w = w.to("npu") + output.backward(w) + output = output.to("cpu").detach().numpy() + res = input1.grad + res = res.to("cpu").numpy() + return output, res + + def upsample_bilinear_backward_result(self, shape_format): + for item in shape_format: + input_cpu, input_npu = create_common_tensor(item[0], 0, 100) + if input_cpu.dtype == torch.float16: + input_cpu = input_cpu.to(torch.float32) + + cpu_output, cpu_grad = self.cpu_op_exec(input_cpu, item[1]) + npu_output, npu_grad = self.npu_op_exec(input_npu, item[1]) + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_grad = cpu_grad.astype(npu_grad.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_grad, npu_grad) + + def test_upsample_bilinear_backward_shape_format_fp16(self, device): + format_list = [0, 3, 29] + size_list = [[2, 2], [3, 3], [6, 6]] + shape_format = [[[np.float16, i, [24, 56, 18, 18]], s] for i in format_list for s in size_list] + + self.upsample_bilinear_backward_result(shape_format) + + def test_upsample_bilinear_backward_shape_format_fp32(self, device): + type_list = [np.float16, np.float32] + format_list = [0, 3, 29] + size_list = [[2, 2], [3, 3], [6, 6]] + shape_format = [[[np.float32, i, [24, 56, 18, 18]], s] for i in format_list for s in size_list] + + self.upsample_bilinear_backward_result(shape_format) + + +instantiate_device_type_tests(TestUpsampleBilinear, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_upsample_linear1d_backward.py b/test/test_npu/test_network_ops/test_upsample_linear1d_backward.py index 9b492e355d9d55c09e30b94b91ce18842090cb85..f18187e0887d5cb8556dfbba46a6e69839c1a330 100644 --- a/test/test_npu/test_network_ops/test_upsample_linear1d_backward.py +++ b/test/test_npu/test_network_ops/test_upsample_linear1d_backward.py @@ -1,129 +1,129 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestUpsampleLinear1DBackward(TestCase): - def cpu_op_exec(self, input, grads, size, align_corners): - input.requires_grad_(True) - output = torch._C._nn.upsample_linear1d(input, size, align_corners=align_corners) - output.backward(grads) - gradcpu = input.grad - return output.detach().numpy(), gradcpu.detach().numpy() - - def npu_op_exec(self, input, grads, size, align_corners): - input.requires_grad_(True) - output = torch._C._nn.upsample_linear1d(input, size, align_corners=align_corners) - output = output.to("npu") - output.backward(grads) - gradnpu = input.grad - gradnpu = gradnpu.to("cpu") - output = output.to("cpu") - return output.detach().numpy(), gradnpu.detach().numpy() - - def test_upsample_linear1d_backward_shape_format(self, device): - test_cases = [ - [[np.float16, 0, (1, 1, 1, 2)], [4, ], True], - [[np.float16, 0, (2, 1, 1, 4)], [8, ], True], - [[np.float16, 0, (2, 2, 1, 3)], [1, ], True], - [[np.float16, 0, (2, 1, 1, 1)], [4, ], False], - [[np.float16, 0, (4, 1, 1, 2)], [4, ], False], - [[np.float16, 0, (1, 1, 1, 1)], [1, ], False], - - [[np.float32, 0, (1, 1, 1, 2)], [4, ], True], - [[np.float32, 0, (2, 1, 1, 2)], [4, ], True], - [[np.float32, 0, (2, 2, 1, 3)], [1, ], True], - [[np.float32, 0, (3, 1, 1, 1)], [2, ], False], - [[np.float32, 0, (4, 1, 1, 1)], [2, ], False], - [[np.float32, 0, (1, 1, 1, 1)], [1, ], False], - - [[np.float32, 0, (9, 7, 1, 2)], [15, ], True], - [[np.float16, 0, (8, 7, 1, 1)], [2, ], True], - [[np.float16, 0, (17, 2, 1, 3)], [1, ], True], - [[np.float16, 0, (6, 4, 1, 1)], [3, ], False], - [[np.float16, 0, (8, 7, 1, 2)], [4, ], False], - [[np.float16, 0, (2, 7, 1, 7)], [1, ], False], - - [[np.float32, 0, (9, 7, 1, 2)], [7, ], True], - [[np.float32, 0, (8, 3, 1, 1)], [2, ], True], - [[np.float32, 0, (8, 3, 1, 1)], [2, ], True], - [[np.float32, 0, (17, 2, 1, 3)], [1, ], True], - [[np.float32, 0, (9, 7, 1, 2)], [7, ], False], - [[np.float32, 0, (8, 3, 1, 3)], [2, ], False], - [[np.float32, 0, (2, 7, 1, 7)], [1, ], False], - - [[np.float16, 0, (9, 7, 1, 2)], [17, ], True], - [[np.float16, 0, (17, 13, 1, 15)], [16, ], True], - [[np.float16, 0, (61, 41, 1, 1)], [7, ], False], - [[np.float16, 0, (38, 7, 1, 7)], [16, ], False], - [[np.float32, 0, (997, 3, 1, 1)], [32, ], True], - [[np.float32, 0, (627, 2, 1, 3)], [17, ], False], - [[np.float32, 0, (78, 73, 1, 1)], [48, ], False], - [[np.float32, 0, (6553, 2, 1, 2)], [4, ], False], - [[np.float16, 0, (6553, 2, 1, 2)], [4, ], False], - [[np.float32, 0, (1008, 3, 1, 2)], [4, ], False], - [[np.float16, 0, (1008, 3, 1, 2)], [4, ], False] - ] - for item in test_cases: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - - size = list(item[0][2]) - size[3] = item[1][0] - - grad_item = [] - grad_item.append(item[0][0]) - grad_item.append(item[0][1]) - grad_item.append(size) - cpu_grads, npu_grads = create_common_tensor(grad_item, 0, 100) - - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - - if cpu_grads.dtype == torch.float16: - cpu_grads = cpu_grads.to(torch.float32) - - if cpu_input.dim() == 4: - cpu_input = cpu_input.squeeze(2) - - if npu_input.dim() == 4: - npu_input = npu_input.squeeze(2) - - if cpu_grads.dim() == 4: - cpu_grads = cpu_grads.squeeze(2) - - if npu_grads.dim() == 4: - npu_grads = npu_grads.squeeze(2) - - size = item[1] - align_corners = item[2] - - cpu_output, cpu_grad = self.cpu_op_exec(cpu_input, cpu_grads, size, align_corners) - npu_output, npu_grad = self.npu_op_exec(npu_input, npu_grads, size, align_corners) - - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_grad = cpu_grad.astype(npu_grad.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_grad, npu_grad) - - - -instantiate_device_type_tests(TestUpsampleLinear1DBackward, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestUpsampleLinear1DBackward(TestCase): + def cpu_op_exec(self, input, grads, size, align_corners): + input.requires_grad_(True) + output = torch._C._nn.upsample_linear1d(input, size, align_corners=align_corners) + output.backward(grads) + gradcpu = input.grad + return output.detach().numpy(), gradcpu.detach().numpy() + + def npu_op_exec(self, input, grads, size, align_corners): + input.requires_grad_(True) + output = torch._C._nn.upsample_linear1d(input, size, align_corners=align_corners) + output = output.to("npu") + output.backward(grads) + gradnpu = input.grad + gradnpu = gradnpu.to("cpu") + output = output.to("cpu") + return output.detach().numpy(), gradnpu.detach().numpy() + + def test_upsample_linear1d_backward_shape_format(self, device): + test_cases = [ + [[np.float16, 0, (1, 1, 1, 2)], [4, ], True], + [[np.float16, 0, (2, 1, 1, 4)], [8, ], True], + [[np.float16, 0, (2, 2, 1, 3)], [1, ], True], + [[np.float16, 0, (2, 1, 1, 1)], [4, ], False], + [[np.float16, 0, (4, 1, 1, 2)], [4, ], False], + [[np.float16, 0, (1, 1, 1, 1)], [1, ], False], + + [[np.float32, 0, (1, 1, 1, 2)], [4, ], True], + [[np.float32, 0, (2, 1, 1, 2)], [4, ], True], + [[np.float32, 0, (2, 2, 1, 3)], [1, ], True], + [[np.float32, 0, (3, 1, 1, 1)], [2, ], False], + [[np.float32, 0, (4, 1, 1, 1)], [2, ], False], + [[np.float32, 0, (1, 1, 1, 1)], [1, ], False], + + [[np.float32, 0, (9, 7, 1, 2)], [15, ], True], + [[np.float16, 0, (8, 7, 1, 1)], [2, ], True], + [[np.float16, 0, (17, 2, 1, 3)], [1, ], True], + [[np.float16, 0, (6, 4, 1, 1)], [3, ], False], + [[np.float16, 0, (8, 7, 1, 2)], [4, ], False], + [[np.float16, 0, (2, 7, 1, 7)], [1, ], False], + + [[np.float32, 0, (9, 7, 1, 2)], [7, ], True], + [[np.float32, 0, (8, 3, 1, 1)], [2, ], True], + [[np.float32, 0, (8, 3, 1, 1)], [2, ], True], + [[np.float32, 0, (17, 2, 1, 3)], [1, ], True], + [[np.float32, 0, (9, 7, 1, 2)], [7, ], False], + [[np.float32, 0, (8, 3, 1, 3)], [2, ], False], + [[np.float32, 0, (2, 7, 1, 7)], [1, ], False], + + [[np.float16, 0, (9, 7, 1, 2)], [17, ], True], + [[np.float16, 0, (17, 13, 1, 15)], [16, ], True], + [[np.float16, 0, (61, 41, 1, 1)], [7, ], False], + [[np.float16, 0, (38, 7, 1, 7)], [16, ], False], + [[np.float32, 0, (997, 3, 1, 1)], [32, ], True], + [[np.float32, 0, (627, 2, 1, 3)], [17, ], False], + [[np.float32, 0, (78, 73, 1, 1)], [48, ], False], + [[np.float32, 0, (6553, 2, 1, 2)], [4, ], False], + [[np.float16, 0, (6553, 2, 1, 2)], [4, ], False], + [[np.float32, 0, (1008, 3, 1, 2)], [4, ], False], + [[np.float16, 0, (1008, 3, 1, 2)], [4, ], False] + ] + for item in test_cases: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + + size = list(item[0][2]) + size[3] = item[1][0] + + grad_item = [] + grad_item.append(item[0][0]) + grad_item.append(item[0][1]) + grad_item.append(size) + cpu_grads, npu_grads = create_common_tensor(grad_item, 0, 100) + + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + + if cpu_grads.dtype == torch.float16: + cpu_grads = cpu_grads.to(torch.float32) + + if cpu_input.dim() == 4: + cpu_input = cpu_input.squeeze(2) + + if npu_input.dim() == 4: + npu_input = npu_input.squeeze(2) + + if cpu_grads.dim() == 4: + cpu_grads = cpu_grads.squeeze(2) + + if npu_grads.dim() == 4: + npu_grads = npu_grads.squeeze(2) + + size = item[1] + align_corners = item[2] + + cpu_output, cpu_grad = self.cpu_op_exec(cpu_input, cpu_grads, size, align_corners) + npu_output, npu_grad = self.npu_op_exec(npu_input, npu_grads, size, align_corners) + + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_grad = cpu_grad.astype(npu_grad.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_grad, npu_grad) + + + +instantiate_device_type_tests(TestUpsampleLinear1DBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_upsample_nearest1d.py b/test/test_npu/test_network_ops/test_upsample_nearest1d.py index bb4c76edb7660e668dfee98d224ff13920683e1a..724ddc688c68ac14fe7d53586ecff34bd06d701d 100644 --- a/test/test_npu/test_network_ops/test_upsample_nearest1d.py +++ b/test/test_npu/test_network_ops/test_upsample_nearest1d.py @@ -1,90 +1,90 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import torch.nn.functional as F -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestUpsampleNearest1DBackward(TestCase): - def cpu_op_exec(self, input, size): - output = F.interpolate(input, size, mode="nearest") - return output.detach().numpy() - - def cpu_op_scale_exec(self, input, scale): - output = F.interpolate(input, scale_factor = scale, mode="nearest") - return output.detach().numpy() - - def npu_op_exec(self, input, size): - output = F.interpolate(input, size, mode="nearest") - output = output.cpu() - return output.detach().numpy() - - def npu_op_scale_exec(self, input, scale): - output = F.interpolate(input, scale_factor = scale, mode="nearest") - output = output.cpu() - return output.detach().numpy() - - def test_upsample_nearest1d_backward_shape_format(self, device): - test_cases = [ - [[np.float16, 0, (1, 1, 2)], [4, ]], - [[np.float16, 0, (2, 1, 4)], [8, ]], - [[np.float32, 3, (2, 2, 3)], [1, ]], - [[np.float32, 0, (2, 1, 1)], [4, ]], - [[np.float32, 0, (4, 1, 2)], [4, ]], - [[np.float32, 0, (1, 1, 1)], [1, ]] - ] - for item in test_cases: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - size = list(item[0][2]) - size[2] = item[1][0] - - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input, item[1]) - npu_output = self.npu_op_exec(npu_input, item[1]) - cpu_output = cpu_output.astype(npu_output.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - - def test_upsample_nearest1d_backward_shape_format_scale(self, device): - test_cases = [ - [[np.float16, 0, (1, 1, 2)], 2], - [[np.float16, 0, (2, 1, 4)], 2.2], - [[np.float32, 3, (2, 2, 3)], 0.4], - [[np.float32, 0, (2, 1, 1)], 4], - [[np.float32, 0, (4, 1, 2)], 2], - [[np.float32, 0, (1, 1, 1)], 1] - ] - for item in test_cases: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - - cpu_output = self.cpu_op_scale_exec(cpu_input, item[1]) - npu_output = self.npu_op_scale_exec(npu_input, item[1]) - - cpu_output = cpu_output.astype(npu_output.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestUpsampleNearest1DBackward, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() - +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import torch.nn.functional as F +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestUpsampleNearest1DBackward(TestCase): + def cpu_op_exec(self, input, size): + output = F.interpolate(input, size, mode="nearest") + return output.detach().numpy() + + def cpu_op_scale_exec(self, input, scale): + output = F.interpolate(input, scale_factor = scale, mode="nearest") + return output.detach().numpy() + + def npu_op_exec(self, input, size): + output = F.interpolate(input, size, mode="nearest") + output = output.cpu() + return output.detach().numpy() + + def npu_op_scale_exec(self, input, scale): + output = F.interpolate(input, scale_factor = scale, mode="nearest") + output = output.cpu() + return output.detach().numpy() + + def test_upsample_nearest1d_backward_shape_format(self, device): + test_cases = [ + [[np.float16, 0, (1, 1, 2)], [4, ]], + [[np.float16, 0, (2, 1, 4)], [8, ]], + [[np.float32, 3, (2, 2, 3)], [1, ]], + [[np.float32, 0, (2, 1, 1)], [4, ]], + [[np.float32, 0, (4, 1, 2)], [4, ]], + [[np.float32, 0, (1, 1, 1)], [1, ]] + ] + for item in test_cases: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + size = list(item[0][2]) + size[2] = item[1][0] + + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input, item[1]) + npu_output = self.npu_op_exec(npu_input, item[1]) + cpu_output = cpu_output.astype(npu_output.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + + def test_upsample_nearest1d_backward_shape_format_scale(self, device): + test_cases = [ + [[np.float16, 0, (1, 1, 2)], 2], + [[np.float16, 0, (2, 1, 4)], 2.2], + [[np.float32, 3, (2, 2, 3)], 0.4], + [[np.float32, 0, (2, 1, 1)], 4], + [[np.float32, 0, (4, 1, 2)], 2], + [[np.float32, 0, (1, 1, 1)], 1] + ] + for item in test_cases: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + + cpu_output = self.cpu_op_scale_exec(cpu_input, item[1]) + npu_output = self.npu_op_scale_exec(npu_input, item[1]) + + cpu_output = cpu_output.astype(npu_output.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestUpsampleNearest1DBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() + diff --git a/test/test_npu/test_network_ops/test_upsample_nearest1d_backward.py b/test/test_npu/test_network_ops/test_upsample_nearest1d_backward.py index 05f55517dadef4afa2a7ae0431d4e780a2e16387..0741f7dfcc6cc19915315e296b63bd1975dca38c 100644 --- a/test/test_npu/test_network_ops/test_upsample_nearest1d_backward.py +++ b/test/test_npu/test_network_ops/test_upsample_nearest1d_backward.py @@ -1,131 +1,131 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import math -import numpy as np -import torch.nn.functional as F -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestUpsampleNearest1DBackward(TestCase): - def cpu_op_exec(self, input, grads, size): - input.requires_grad_(True) - output = F.interpolate(input, size, mode="nearest") - output.backward(grads) - gradcpu = input.grad - return output.detach().numpy(), gradcpu.detach().numpy() - - def cpu_op_scale_exec(self, input, grads, scale): - input.requires_grad_(True) - output = F.interpolate(input, scale_factor = scale, mode="nearest") - output.backward(grads) - gradcpu = input.grad - return output.detach().numpy(), gradcpu.detach().numpy() - - def npu_op_exec(self, input, grads, size): - input.requires_grad_(True) - output = F.interpolate(input, size, mode="nearest") - output.backward(grads) - gradnpu = input.grad - gradnpu = gradnpu.to("cpu") - output = output.to("cpu") - return output.detach().numpy(), gradnpu.detach().numpy() - - def npu_op_scale_exec(self, input, grads, scale): - input.requires_grad_(True) - output = F.interpolate(input, scale_factor = scale, mode="nearest") - output.backward(grads) - gradnpu = input.grad - gradnpu = gradnpu.to("cpu") - output = output.to("cpu") - return output.detach().numpy(), gradnpu.detach().numpy() - - def test_upsample_nearest1d_backward_shape_format(self, device): - test_cases = [ - [[np.float16, 0, (1, 1, 2)], [4, ]], - [[np.float16, 0, (2, 1, 4)], [8, ]], - [[np.float32, 3, (2, 2, 3)], [1, ]], - [[np.float32, 0, (2, 1, 1)], [4, ]], - [[np.float32, 0, (4, 1, 2)], [4, ]], - [[np.float32, 0, (1, 1, 1)], [1, ]] - ] - for item in test_cases: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - size = list(item[0][2]) - size[2] = item[1][0] - - grad_item = [] - grad_item.append(item[0][0]) - grad_item.append(item[0][1]) - grad_item.append(size) - cpu_grads, npu_grads = create_common_tensor(grad_item, 0, 100) - - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - - if cpu_grads.dtype == torch.float16: - cpu_grads = cpu_grads.to(torch.float32) - - cpu_output, cpu_grad = self.cpu_op_exec(cpu_input, cpu_grads, item[1]) - npu_output, npu_grad = self.npu_op_exec(npu_input, npu_grads, item[1]) - - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_grad = cpu_grad.astype(npu_grad.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_grad, npu_grad) - - def test_upsample_nearest1d_backward_shape_format_scale(self, device): - test_cases = [ - [[np.float16, 0, (1, 1, 2)], 2], - [[np.float16, 0, (2, 1, 4)], 2.2], - [[np.float32, 3, (2, 2, 3)], 0.4], - [[np.float32, 0, (2, 1, 1)], 4], - [[np.float32, 0, (4, 1, 2)], 2], - [[np.float32, 0, (1, 1, 1)], 1] - ] - for item in test_cases: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - - size = list(item[0][2]) - size[2] = item[1] * item[0][2][2] - size[2] = math.floor(size[2]) - - grad_item = [] - grad_item.append(item[0][0]) - grad_item.append(item[0][1]) - grad_item.append(size) - cpu_grads, npu_grads = create_common_tensor(grad_item, 0, 100) - - if cpu_input.dtype == torch.float16: - cpu_input = cpu_input.to(torch.float32) - - if cpu_grads.dtype == torch.float16: - cpu_grads = cpu_grads.to(torch.float32) - - cpu_output, cpu_grad = self.cpu_op_scale_exec(cpu_input, cpu_grads, item[1]) - npu_output, npu_grad = self.npu_op_scale_exec(npu_input, npu_grads, item[1]) - - cpu_output = cpu_output.astype(npu_output.dtype) - cpu_grad = cpu_grad.astype(npu_grad.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_grad, npu_grad) - -instantiate_device_type_tests(TestUpsampleNearest1DBackward, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import math +import numpy as np +import torch.nn.functional as F +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestUpsampleNearest1DBackward(TestCase): + def cpu_op_exec(self, input, grads, size): + input.requires_grad_(True) + output = F.interpolate(input, size, mode="nearest") + output.backward(grads) + gradcpu = input.grad + return output.detach().numpy(), gradcpu.detach().numpy() + + def cpu_op_scale_exec(self, input, grads, scale): + input.requires_grad_(True) + output = F.interpolate(input, scale_factor = scale, mode="nearest") + output.backward(grads) + gradcpu = input.grad + return output.detach().numpy(), gradcpu.detach().numpy() + + def npu_op_exec(self, input, grads, size): + input.requires_grad_(True) + output = F.interpolate(input, size, mode="nearest") + output.backward(grads) + gradnpu = input.grad + gradnpu = gradnpu.to("cpu") + output = output.to("cpu") + return output.detach().numpy(), gradnpu.detach().numpy() + + def npu_op_scale_exec(self, input, grads, scale): + input.requires_grad_(True) + output = F.interpolate(input, scale_factor = scale, mode="nearest") + output.backward(grads) + gradnpu = input.grad + gradnpu = gradnpu.to("cpu") + output = output.to("cpu") + return output.detach().numpy(), gradnpu.detach().numpy() + + def test_upsample_nearest1d_backward_shape_format(self, device): + test_cases = [ + [[np.float16, 0, (1, 1, 2)], [4, ]], + [[np.float16, 0, (2, 1, 4)], [8, ]], + [[np.float32, 3, (2, 2, 3)], [1, ]], + [[np.float32, 0, (2, 1, 1)], [4, ]], + [[np.float32, 0, (4, 1, 2)], [4, ]], + [[np.float32, 0, (1, 1, 1)], [1, ]] + ] + for item in test_cases: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + size = list(item[0][2]) + size[2] = item[1][0] + + grad_item = [] + grad_item.append(item[0][0]) + grad_item.append(item[0][1]) + grad_item.append(size) + cpu_grads, npu_grads = create_common_tensor(grad_item, 0, 100) + + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + + if cpu_grads.dtype == torch.float16: + cpu_grads = cpu_grads.to(torch.float32) + + cpu_output, cpu_grad = self.cpu_op_exec(cpu_input, cpu_grads, item[1]) + npu_output, npu_grad = self.npu_op_exec(npu_input, npu_grads, item[1]) + + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_grad = cpu_grad.astype(npu_grad.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_grad, npu_grad) + + def test_upsample_nearest1d_backward_shape_format_scale(self, device): + test_cases = [ + [[np.float16, 0, (1, 1, 2)], 2], + [[np.float16, 0, (2, 1, 4)], 2.2], + [[np.float32, 3, (2, 2, 3)], 0.4], + [[np.float32, 0, (2, 1, 1)], 4], + [[np.float32, 0, (4, 1, 2)], 2], + [[np.float32, 0, (1, 1, 1)], 1] + ] + for item in test_cases: + cpu_input, npu_input = create_common_tensor(item[0], 0, 100) + + size = list(item[0][2]) + size[2] = item[1] * item[0][2][2] + size[2] = math.floor(size[2]) + + grad_item = [] + grad_item.append(item[0][0]) + grad_item.append(item[0][1]) + grad_item.append(size) + cpu_grads, npu_grads = create_common_tensor(grad_item, 0, 100) + + if cpu_input.dtype == torch.float16: + cpu_input = cpu_input.to(torch.float32) + + if cpu_grads.dtype == torch.float16: + cpu_grads = cpu_grads.to(torch.float32) + + cpu_output, cpu_grad = self.cpu_op_scale_exec(cpu_input, cpu_grads, item[1]) + npu_output, npu_grad = self.npu_op_scale_exec(npu_input, npu_grads, item[1]) + + cpu_output = cpu_output.astype(npu_output.dtype) + cpu_grad = cpu_grad.astype(npu_grad.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_grad, npu_grad) + +instantiate_device_type_tests(TestUpsampleNearest1DBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_where.py b/test/test_npu/test_network_ops/test_where.py old mode 100644 new mode 100755 index 971735f457b6ef16a2747cdd58bbc80227da5708..e9c21dadc9c8f1fd1bf25001f8d4f152d8288231 --- a/test/test_npu/test_network_ops/test_where.py +++ b/test/test_npu/test_network_ops/test_where.py @@ -1,128 +1,128 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestWhere(TestCase): - def cpu_op_exec(self, input1): - output = torch.where(input1) - output = list(output) - for i in range(len(output)): - output[i] = output[i].numpy().astype(np.int32) - return output - - def npu_op_exec(self, input1): - output = torch.where(input1) - output = list(output) - for i in range(len(output)): - output[i] = output[i].to("cpu").numpy().astype(np.int32) - return output - - def cpu_op_exec_condition(self, input1, ones): - output = torch.where(input1 > 0, input1, ones) - output = output.numpy() - return output - - def npu_op_exec_condition(self, input1, ones): - output = torch.where(input1 > 0, input1, ones) - output = output.to("cpu").numpy() - return output - - def cpu_op_exec_s(self, input1, ones): - output = torch._s_where(input1 > 0, input1, ones) - output = output.numpy() - return output - - def npu_op_exec_s(self, input1, ones): - output = torch._s_where(input1 > 0, input1, ones) - output = output.to("cpu").numpy() - return output - - def where_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, -100, 100) - cpu_ones = torch.ones_like(cpu_input1) - npu_ones = cpu_ones.to("npu") - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - cpu_ones = cpu_ones.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - - cpu_output_cond = self.cpu_op_exec_condition(cpu_input1, cpu_ones) - npu_output_cond = self.npu_op_exec_condition(npu_input1, npu_ones) - cpu_output_cond = cpu_output_cond.astype(npu_output_cond.dtype) - - cpu_output_s = self.cpu_op_exec_s(cpu_input1, cpu_ones) - npu_output_s = self.npu_op_exec_s(npu_input1, npu_ones) - cpu_output_s = cpu_output_s.astype(npu_output_s.dtype) - - for i in range(len(cpu_output)): - cpu_output[i] = cpu_output[i].astype(npu_output[i].dtype) - self.assertRtolEqual(cpu_output[i], npu_output[i]) - - self.assertRtolEqual(cpu_output_cond, npu_output_cond) - self.assertRtolEqual(cpu_output_s, npu_output_s) - - def test_where_shape_format_fp32_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float32, i, [18]] for i in format_list] - self.where_result(shape_format) - - def test_where_shape_format_fp32_2d(self, device): - format_list = [0] - shape_format = [[np.float32, i, [5, 256]] for i in format_list] - self.where_result(shape_format) - - def test_where_shape_format_fp32_3d(self, device): - format_list = [0] - shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] - self.where_result(shape_format) - - def test_where_shape_format_fp32_4d(self, device): - format_list = [0, 3] - shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] - self.where_result(shape_format) - - def test_where_shape_format_fp16_1d(self, device): - format_list = [0, 3] - shape_format = [[np.float16, i, [18]] for i in format_list] - self.where_result(shape_format) - - def test_where_shape_format_fp16_2d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [[np.float16, i, [5, 256]] for i in format_list] - self.where_result(shape_format) - - def test_where_shape_format_fp16_3d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [[np.float16, i, [32, 3, 3]] for i in format_list] - self.where_result(shape_format) - - def test_where_shape_format_fp16_4d(self, device): - format_list = [0, 3, 4, 29] - shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] - self.where_result(shape_format) - - - -instantiate_device_type_tests(TestWhere, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestWhere(TestCase): + def cpu_op_exec(self, input1): + output = torch.where(input1) + output = list(output) + for i in range(len(output)): + output[i] = output[i].numpy().astype(np.int32) + return output + + def npu_op_exec(self, input1): + output = torch.where(input1) + output = list(output) + for i in range(len(output)): + output[i] = output[i].to("cpu").numpy().astype(np.int32) + return output + + def cpu_op_exec_condition(self, input1, ones): + output = torch.where(input1 > 0, input1, ones) + output = output.numpy() + return output + + def npu_op_exec_condition(self, input1, ones): + output = torch.where(input1 > 0, input1, ones) + output = output.to("cpu").numpy() + return output + + def cpu_op_exec_s(self, input1, ones): + output = torch._s_where(input1 > 0, input1, ones) + output = output.numpy() + return output + + def npu_op_exec_s(self, input1, ones): + output = torch._s_where(input1 > 0, input1, ones) + output = output.to("cpu").numpy() + return output + + def where_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, -100, 100) + cpu_ones = torch.ones_like(cpu_input1) + npu_ones = cpu_ones.to("npu") + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + cpu_ones = cpu_ones.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + + cpu_output_cond = self.cpu_op_exec_condition(cpu_input1, cpu_ones) + npu_output_cond = self.npu_op_exec_condition(npu_input1, npu_ones) + cpu_output_cond = cpu_output_cond.astype(npu_output_cond.dtype) + + cpu_output_s = self.cpu_op_exec_s(cpu_input1, cpu_ones) + npu_output_s = self.npu_op_exec_s(npu_input1, npu_ones) + cpu_output_s = cpu_output_s.astype(npu_output_s.dtype) + + for i in range(len(cpu_output)): + cpu_output[i] = cpu_output[i].astype(npu_output[i].dtype) + self.assertRtolEqual(cpu_output[i], npu_output[i]) + + self.assertRtolEqual(cpu_output_cond, npu_output_cond) + self.assertRtolEqual(cpu_output_s, npu_output_s) + + def test_where_shape_format_fp32_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float32, i, [18]] for i in format_list] + self.where_result(shape_format) + + def test_where_shape_format_fp32_2d(self, device): + format_list = [0] + shape_format = [[np.float32, i, [5, 256]] for i in format_list] + self.where_result(shape_format) + + def test_where_shape_format_fp32_3d(self, device): + format_list = [0] + shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] + self.where_result(shape_format) + + def test_where_shape_format_fp32_4d(self, device): + format_list = [0, 3] + shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] + self.where_result(shape_format) + + def test_where_shape_format_fp16_1d(self, device): + format_list = [0, 3] + shape_format = [[np.float16, i, [18]] for i in format_list] + self.where_result(shape_format) + + def test_where_shape_format_fp16_2d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [[np.float16, i, [5, 256]] for i in format_list] + self.where_result(shape_format) + + def test_where_shape_format_fp16_3d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [[np.float16, i, [32, 3, 3]] for i in format_list] + self.where_result(shape_format) + + def test_where_shape_format_fp16_4d(self, device): + format_list = [0, 3, 4, 29] + shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] + self.where_result(shape_format) + + + +instantiate_device_type_tests(TestWhere, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_xor.py b/test/test_npu/test_network_ops/test_xor.py index 438f0a486321d2efb26b0147a36ee34e74295e5e..aea632524b7d1b63b60069895f35507b4ab74517 100644 --- a/test/test_npu/test_network_ops/test_xor.py +++ b/test/test_npu/test_network_ops/test_xor.py @@ -1,174 +1,174 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestXor(TestCase): - - def generate_bool_data(self, shape): - input1 = np.random.uniform(0, 1, shape) - input2 = np.random.uniform(0, 1, shape) - input1 = input1.reshape(-1) - input2 = input2.reshape(-1) - for i in range(len(input1)): - if input1[i] < 0.5: - input1[i] = 0 - for i in range(len(input2)): - if input2[i] < 0.5: - input2[i] = 0 - input1 = input1.astype(np.bool) - input2 = input2.astype(np.bool) - input1 = input1.reshape(shape) - input2 = input2.reshape(shape) - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - - return npu_input1, npu_input2 - - def generate_single_bool_data(self, shape): - input1 = np.random.uniform(0, 1, shape) - input1 = input1.reshape(-1) - for i in range(len(input1)): - if input1[i] < 0.5: - input1[i] = 0 - input1 = input1.astype(np.bool) - input1 = input1.reshape(shape) - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - - return npu_input1, npu_input2 - - def generate_single_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - - return npu_input1 - - def cpu_op_exec(self, input1, input2): - output = input1 ^ input2 - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = input1.__xor__(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_scalar(self, input1, input2): - input1 = input1.to("npu") - output = input1.__xor__(input2) - output = output.to("cpu") - output = output.numpy() - return output - - def test_xor_tensor_int32(self, device): - npu_input1 = self.generate_single_data(0, 100, (10, 10), np.int32) - npu_input2 = self.generate_single_data(0, 100, (10, 10), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertEqual(cpu_output, npu_output) - - def test_xor_tensor_int16(self, device): - npu_input1 = self.generate_single_data(0, 100, (10, 10), np.int16) - npu_input2 = self.generate_single_data(0, 100, (10, 10), np.int16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertEqual(cpu_output, npu_output) - - def test_xor_tensor_int8(self, device): - npu_input1 = self.generate_single_data(0, 100, (10, 10), np.int8) - npu_input2 = self.generate_single_data(0, 100, (10, 10), np.int8) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertEqual(cpu_output, npu_output) - - def test_xor_scalar_int32(self, device): - npu_input = self.generate_single_data(0, 100, (1, 10), np.int32) - npu_input_scalr = np.random.randint(0, 100) - cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) - npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) - self.assertEqual(cpu_output, npu_output) - - def test_xor_scalar_int16(self, device): - npu_input = self.generate_single_data(0, 100, (10, 20), np.int16) - npu_input_scalr = np.random.randint(0, 100) - cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) - npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) - self.assertEqual(cpu_output, npu_output) - - def test_xor_scalar_int8(self, device): - npu_input = self.generate_single_data(0, 100, (20, 10), np.int8) - npu_input_scalr = np.random.randint(0, 100) - cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) - npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) - self.assertEqual(cpu_output, npu_output) - - def test_xor_tensor_uint8(self, device): - npu_input1 = self.generate_single_data(0, 100, (10, 10), np.uint8) - npu_input2 = self.generate_single_data(0, 100, (10, 10), np.uint8) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertEqual(cpu_output, npu_output) - - def test_xor_scalar_uint8(self, device): - npu_input = self.generate_single_data(0, 100, (5, 10), np.uint8) - npu_input_scalr = np.random.randint(0, 100) - cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) - npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) - self.assertEqual(cpu_output, npu_output) - - def test_xor_scalar_bool1(self, device): - npu_input = self.generate_single_bool_data((10, 10)) - npu_input_scalr = True - cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) - npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) - self.assertEqual(cpu_output, npu_output) - - def test_xor_scalar_bool2(self, device): - npu_input = self.generate_single_bool_data((10, 10)) - npu_input_scalr = False - cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) - npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) - self.assertEqual(cpu_output, npu_output) - - def test_xor_tensor_bool(self, device): - npu_input1, npu_input2 = self.generate_bool_data((10, 10)) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestXor, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestXor(TestCase): + + def generate_bool_data(self, shape): + input1 = np.random.uniform(0, 1, shape) + input2 = np.random.uniform(0, 1, shape) + input1 = input1.reshape(-1) + input2 = input2.reshape(-1) + for i in range(len(input1)): + if input1[i] < 0.5: + input1[i] = 0 + for i in range(len(input2)): + if input2[i] < 0.5: + input2[i] = 0 + input1 = input1.astype(np.bool) + input2 = input2.astype(np.bool) + input1 = input1.reshape(shape) + input2 = input2.reshape(shape) + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + + return npu_input1, npu_input2 + + def generate_single_bool_data(self, shape): + input1 = np.random.uniform(0, 1, shape) + input1 = input1.reshape(-1) + for i in range(len(input1)): + if input1[i] < 0.5: + input1[i] = 0 + input1 = input1.astype(np.bool) + input1 = input1.reshape(shape) + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + + return npu_input1, npu_input2 + + def generate_single_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + + return npu_input1 + + def cpu_op_exec(self, input1, input2): + output = input1 ^ input2 + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = input1.__xor__(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_scalar(self, input1, input2): + input1 = input1.to("npu") + output = input1.__xor__(input2) + output = output.to("cpu") + output = output.numpy() + return output + + def test_xor_tensor_int32(self, device): + npu_input1 = self.generate_single_data(0, 100, (10, 10), np.int32) + npu_input2 = self.generate_single_data(0, 100, (10, 10), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertEqual(cpu_output, npu_output) + + def test_xor_tensor_int16(self, device): + npu_input1 = self.generate_single_data(0, 100, (10, 10), np.int16) + npu_input2 = self.generate_single_data(0, 100, (10, 10), np.int16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertEqual(cpu_output, npu_output) + + def test_xor_tensor_int8(self, device): + npu_input1 = self.generate_single_data(0, 100, (10, 10), np.int8) + npu_input2 = self.generate_single_data(0, 100, (10, 10), np.int8) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertEqual(cpu_output, npu_output) + + def test_xor_scalar_int32(self, device): + npu_input = self.generate_single_data(0, 100, (1, 10), np.int32) + npu_input_scalr = np.random.randint(0, 100) + cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) + npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) + self.assertEqual(cpu_output, npu_output) + + def test_xor_scalar_int16(self, device): + npu_input = self.generate_single_data(0, 100, (10, 20), np.int16) + npu_input_scalr = np.random.randint(0, 100) + cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) + npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) + self.assertEqual(cpu_output, npu_output) + + def test_xor_scalar_int8(self, device): + npu_input = self.generate_single_data(0, 100, (20, 10), np.int8) + npu_input_scalr = np.random.randint(0, 100) + cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) + npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) + self.assertEqual(cpu_output, npu_output) + + def test_xor_tensor_uint8(self, device): + npu_input1 = self.generate_single_data(0, 100, (10, 10), np.uint8) + npu_input2 = self.generate_single_data(0, 100, (10, 10), np.uint8) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertEqual(cpu_output, npu_output) + + def test_xor_scalar_uint8(self, device): + npu_input = self.generate_single_data(0, 100, (5, 10), np.uint8) + npu_input_scalr = np.random.randint(0, 100) + cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) + npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) + self.assertEqual(cpu_output, npu_output) + + def test_xor_scalar_bool1(self, device): + npu_input = self.generate_single_bool_data((10, 10)) + npu_input_scalr = True + cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) + npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) + self.assertEqual(cpu_output, npu_output) + + def test_xor_scalar_bool2(self, device): + npu_input = self.generate_single_bool_data((10, 10)) + npu_input_scalr = False + cpu_output = self.cpu_op_exec(npu_input, npu_input_scalr) + npu_output = self.npu_op_exec_scalar(npu_input, npu_input_scalr) + self.assertEqual(cpu_output, npu_output) + + def test_xor_tensor_bool(self, device): + npu_input1, npu_input2 = self.generate_bool_data((10, 10)) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestXor, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_yolo_boxes_encode.py b/test/test_npu/test_network_ops/test_yolo_boxes_encode.py index 57a0a7b30e40b0704b79fffcee376465eed89aca..71dce425b6c8258c817056810bf6a521797308c7 100644 --- a/test/test_npu/test_network_ops/test_yolo_boxes_encode.py +++ b/test/test_npu/test_network_ops/test_yolo_boxes_encode.py @@ -1,39 +1,39 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import copy -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -class TestYoloBoxesEncode(TestCase): - def npu_op_exec(self, anchor_boxes, gt_bboxes, stride, impl_mode=False): - out = torch.npu_yolo_boxes_encode(anchor_boxes, gt_bboxes, stride, impl_mode) - out = out.to("cpu") - return out.detach().numpy() - - def test_yolo_boxes_encode(self, device): - anchor_boxes = torch.rand((2, 4), dtype=torch.float32).to("npu") - gt_bboxes = torch.rand((2, 4), dtype=torch.float32).to("npu") - stride = torch.tensor([2, 2], dtype=torch.int32).to("npu") - expect_cpu = torch.tensor([[0.7921727, 0.5314963, -0.74224466, -13.815511], - [0.7360072, 0.58343244, 4.3334002, -0.51378196]], dtype=torch.float32) - npu_output = self.npu_op_exec(anchor_boxes, gt_bboxes, stride, False) - self.assertRtolEqual(expect_cpu.numpy(), npu_output) - - -instantiate_device_type_tests(TestYoloBoxesEncode, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np +import copy +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +class TestYoloBoxesEncode(TestCase): + def npu_op_exec(self, anchor_boxes, gt_bboxes, stride, impl_mode=False): + out = torch.npu_yolo_boxes_encode(anchor_boxes, gt_bboxes, stride, impl_mode) + out = out.to("cpu") + return out.detach().numpy() + + def test_yolo_boxes_encode(self, device): + anchor_boxes = torch.rand((2, 4), dtype=torch.float32).to("npu") + gt_bboxes = torch.rand((2, 4), dtype=torch.float32).to("npu") + stride = torch.tensor([2, 2], dtype=torch.int32).to("npu") + expect_cpu = torch.tensor([[0.7921727, 0.5314963, -0.74224466, -13.815511], + [0.7360072, 0.58343244, 4.3334002, -0.51378196]], dtype=torch.float32) + npu_output = self.npu_op_exec(anchor_boxes, gt_bboxes, stride, False) + self.assertRtolEqual(expect_cpu.numpy(), npu_output) + + +instantiate_device_type_tests(TestYoloBoxesEncode, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_zero.py b/test/test_npu/test_network_ops/test_zero.py old mode 100644 new mode 100755 index 3326b938aabb77dde1df548fc404822bc98c04ef..0781c411d300cd5ff1a5928cf5edc20b51d7608c --- a/test/test_npu/test_network_ops/test_zero.py +++ b/test/test_npu/test_network_ops/test_zero.py @@ -1,109 +1,109 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestZero(TestCase): - def cpu_op_exec(self, input1): - torch.zero_(input1) - output = input1.numpy() - return output - - def npu_op_exec(self, input1): - torch.zero_(input1) - output = input1.to("cpu") - output = output.numpy() - return output - - def zero_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - - cpu_output = cpu_output.astype(npu_output.dtype) - self.assertRtolEqual(cpu_output, npu_output) - - def test_zero_shape_format_fp16_1d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [18]] for i in format_list] - self.zero_result(shape_format) - - def test_zero_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [5, 256]] for i in format_list] - self.zero_result(shape_format) - - def test_zero_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [32, 3, 3]] for i in format_list] - self.zero_result(shape_format) - - def test_zero_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] - self.zero_result(shape_format) - - def test_zero_shape_format_fp32_1d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [18]] for i in format_list] - self.zero_result(shape_format) - - def test_zero_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [5, 256]] for i in format_list] - self.zero_result(shape_format) - - def test_zero_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] - self.zero_result(shape_format) - - def test_zero_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] - self.zero_result(shape_format) - - def test_zero_shape_format_int32_1d(self, device): - format_list = [-1, 0] - shape_format = [[np.int32, i, [18]] for i in format_list] - self.zero_result(shape_format) - - def test_zero_shape_format_int32_2d(self, device): - format_list = [-1, 0] - shape_format = [[np.int32, i, [5, 256]] for i in format_list] - self.zero_result(shape_format) - - def test_zero_shape_format_int32_3d(self, device): - format_list = [-1, 0] - shape_format = [[np.int32, i, [32, 3, 3]] for i in format_list] - self.zero_result(shape_format) - - def test_zero_shape_format_int32_4d(self, device): - format_list = [-1, 0] - shape_format = [[np.int32, i, [64, 112, 7, 7]] for i in format_list] - self.zero_result(shape_format) - - -instantiate_device_type_tests(TestZero, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestZero(TestCase): + def cpu_op_exec(self, input1): + torch.zero_(input1) + output = input1.numpy() + return output + + def npu_op_exec(self, input1): + torch.zero_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + def zero_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_zero_shape_format_fp16_1d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [18]] for i in format_list] + self.zero_result(shape_format) + + def test_zero_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [5, 256]] for i in format_list] + self.zero_result(shape_format) + + def test_zero_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [32, 3, 3]] for i in format_list] + self.zero_result(shape_format) + + def test_zero_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float16, i, [64, 112, 7, 7]] for i in format_list] + self.zero_result(shape_format) + + def test_zero_shape_format_fp32_1d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [18]] for i in format_list] + self.zero_result(shape_format) + + def test_zero_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [5, 256]] for i in format_list] + self.zero_result(shape_format) + + def test_zero_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [32, 3, 3]] for i in format_list] + self.zero_result(shape_format) + + def test_zero_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] + self.zero_result(shape_format) + + def test_zero_shape_format_int32_1d(self, device): + format_list = [-1, 0] + shape_format = [[np.int32, i, [18]] for i in format_list] + self.zero_result(shape_format) + + def test_zero_shape_format_int32_2d(self, device): + format_list = [-1, 0] + shape_format = [[np.int32, i, [5, 256]] for i in format_list] + self.zero_result(shape_format) + + def test_zero_shape_format_int32_3d(self, device): + format_list = [-1, 0] + shape_format = [[np.int32, i, [32, 3, 3]] for i in format_list] + self.zero_result(shape_format) + + def test_zero_shape_format_int32_4d(self, device): + format_list = [-1, 0] + shape_format = [[np.int32, i, [64, 112, 7, 7]] for i in format_list] + self.zero_result(shape_format) + + +instantiate_device_type_tests(TestZero, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_zeros.py b/test/test_npu/test_network_ops/test_zeros.py old mode 100644 new mode 100755 index 66cdbf95d0494fa365dec3bf513c8cd3a7a88e35..bab4366186c6abb37b992914ea5cac04c259d158 --- a/test/test_npu/test_network_ops/test_zeros.py +++ b/test/test_npu/test_network_ops/test_zeros.py @@ -1,151 +1,151 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestZeros(TestCase): - def cpu_op_exec(self, input1, dtype): - output = torch.zeros(input1.size(), dtype=dtype, device="cpu") - output = output.numpy() - return output - - def npu_op_exec(self, input1, dtype): - output = torch.zeros(input1.size(), dtype=dtype, device="npu") - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_out(self, input1, input2, dtype): - torch.zeros(input1.size(), dtype=dtype, device="npu", out=input2) - output = input2.to("cpu") - output = output.numpy() - return output - - def zeros_result(self, shape_format): - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - npu_input2 = copy.deepcopy(cpu_input1) - npu_input2 = npu_input2.to(item[1]).to('npu') - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, item[1]) - cpu_output = cpu_output.astype(npu_output.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - self.assertRtolEqual(cpu_output, npu_output_out) - - def test_zeros_shape_format_names(self, device): - format_list = [0, 3, 29] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.float32, i, [18, 24, 8, 8]], j] for i in format_list for j in dtype_list] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) - if cpu_input1.dtype == torch.float16: - cpu_input1 = cpu_input1.to(torch.float32) - - cpu_output = torch.zeros(cpu_input1.size(), names=('N', 'C', 'H', 'W'), dtype=item[1], device="cpu") - cpu_output = cpu_output.numpy() - npu_output = torch.zeros(cpu_input1.size(), names=('N', 'C', 'H', 'W'), dtype=item[1], device="npu") - npu_output = npu_output.to("cpu") - npu_output = npu_output.numpy() - cpu_output = cpu_output.astype(npu_output.dtype) - - self.assertRtolEqual(cpu_output, npu_output) - - def test_zeros_shape_format_fp16_1d(self, device): - format_list = [0, 3, 29] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.float16, i, [18]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - def test_zeros_shape_format_fp16_2d(self, device): - format_list = [0, 3, 29] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.float16, i, [5, 256]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - def test_zeros_shape_format_fp16_3d(self, device): - format_list = [0, 3, 29] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.float16, i, [32, 3, 3]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - def test_zeros_shape_format_fp16_4d(self, device): - format_list = [0, 3, 29] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.float16, i, [64, 112, 7, 7]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - def test_zeros_shape_format_fp32_1d(self, device): - format_list = [0, 3, 29] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.float32, i, [18]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - def test_zeros_shape_format_fp32_2d(self, device): - format_list = [0, 3, 29] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.float32, i, [5, 256]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - def test_zeros_shape_format_fp32_3d(self, device): - format_list = [0, 3, 29] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.float32, i, [32, 3, 3]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - def test_zeros_shape_format_fp32_4d(self, device): - format_list = [0, 3, 29] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.float32, i, [64, 112, 7, 7]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - def test_zeros_shape_format_int32_1d(self, device): - format_list = [-1, 0] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.int32, i, [18]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - def test_zeros_shape_format_int32_2d(self, device): - format_list = [-1, 0] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.int32, i, [5, 256]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - def test_zeros_shape_format_int32_3d(self, device): - format_list = [-1, 0] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.int32, i, [32, 3, 3]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - def test_zeros_shape_format_int32_4d(self, device): - format_list = [-1, 0] - dtype_list = [torch.float16, torch.float32, torch.int32] - shape_format = [[[np.int32, i, [64, 112, 7, 7]], j] for i in format_list for j in dtype_list] - self.zeros_result(shape_format) - - -instantiate_device_type_tests(TestZeros, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestZeros(TestCase): + def cpu_op_exec(self, input1, dtype): + output = torch.zeros(input1.size(), dtype=dtype, device="cpu") + output = output.numpy() + return output + + def npu_op_exec(self, input1, dtype): + output = torch.zeros(input1.size(), dtype=dtype, device="npu") + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2, dtype): + torch.zeros(input1.size(), dtype=dtype, device="npu", out=input2) + output = input2.to("cpu") + output = output.numpy() + return output + + def zeros_result(self, shape_format): + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + npu_input2 = copy.deepcopy(cpu_input1) + npu_input2 = npu_input2.to(item[1]).to('npu') + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input1, item[1]) + npu_output = self.npu_op_exec(npu_input1, item[1]) + npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, item[1]) + cpu_output = cpu_output.astype(npu_output.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output_out) + + def test_zeros_shape_format_names(self, device): + format_list = [0, 3, 29] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.float32, i, [18, 24, 8, 8]], j] for i in format_list for j in dtype_list] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + + cpu_output = torch.zeros(cpu_input1.size(), names=('N', 'C', 'H', 'W'), dtype=item[1], device="cpu") + cpu_output = cpu_output.numpy() + npu_output = torch.zeros(cpu_input1.size(), names=('N', 'C', 'H', 'W'), dtype=item[1], device="npu") + npu_output = npu_output.to("cpu") + npu_output = npu_output.numpy() + cpu_output = cpu_output.astype(npu_output.dtype) + + self.assertRtolEqual(cpu_output, npu_output) + + def test_zeros_shape_format_fp16_1d(self, device): + format_list = [0, 3, 29] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.float16, i, [18]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + def test_zeros_shape_format_fp16_2d(self, device): + format_list = [0, 3, 29] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.float16, i, [5, 256]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + def test_zeros_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.float16, i, [32, 3, 3]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + def test_zeros_shape_format_fp16_4d(self, device): + format_list = [0, 3, 29] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.float16, i, [64, 112, 7, 7]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + def test_zeros_shape_format_fp32_1d(self, device): + format_list = [0, 3, 29] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.float32, i, [18]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + def test_zeros_shape_format_fp32_2d(self, device): + format_list = [0, 3, 29] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.float32, i, [5, 256]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + def test_zeros_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.float32, i, [32, 3, 3]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + def test_zeros_shape_format_fp32_4d(self, device): + format_list = [0, 3, 29] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.float32, i, [64, 112, 7, 7]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + def test_zeros_shape_format_int32_1d(self, device): + format_list = [-1, 0] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.int32, i, [18]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + def test_zeros_shape_format_int32_2d(self, device): + format_list = [-1, 0] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.int32, i, [5, 256]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + def test_zeros_shape_format_int32_3d(self, device): + format_list = [-1, 0] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.int32, i, [32, 3, 3]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + def test_zeros_shape_format_int32_4d(self, device): + format_list = [-1, 0] + dtype_list = [torch.float16, torch.float32, torch.int32] + shape_format = [[[np.int32, i, [64, 112, 7, 7]], j] for i in format_list for j in dtype_list] + self.zeros_result(shape_format) + + +instantiate_device_type_tests(TestZeros, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_zeroslike.py b/test/test_npu/test_network_ops/test_zeroslike.py old mode 100644 new mode 100755 index 30a7b2a0777c65700b1316bef432e11833d0ab34..3dcd1c3c826d4a24cd696cd8e50f64133eb3b310 --- a/test/test_npu/test_network_ops/test_zeroslike.py +++ b/test/test_npu/test_network_ops/test_zeroslike.py @@ -1,65 +1,65 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestZerosLike(TestCase): - def cpu_op_exec(self, input1, dtype): - output = torch.zeros_like(input1, dtype=dtype) - output = output.numpy() - return output - - def npu_op_exec(self, input1, dtype): - output = torch.zeros_like(input1, dtype=dtype) - output = output.to("cpu") - output = output.numpy() - return output - - def test_zeroslike_fp32(self, device): - format_list = [0, 3, 29] - shape_list = [1, (1000, 1280), (32, 3, 3), (32, 144, 1, 1)] - shape_format = [ - [np.float32, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_output = self.cpu_op_exec(cpu_input, torch.float32) - npu_output = self.npu_op_exec(npu_input, torch.float32) - self.assertRtolEqual(cpu_output, npu_output) - - def test_zeroslike_fp16(self, device): - format_list = [0, 3, 29] - shape_list = [1, (1000, 1280), (32, 3, 3), (32, 144, 1, 1)] - shape_format = [ - [np.float16, i, j] for i in format_list for j in shape_list - ] - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 0, 100) - cpu_input = cpu_input.to(torch.float32) - cpu_output = self.cpu_op_exec(cpu_input, torch.float16) - npu_output = self.npu_op_exec(npu_input, torch.float16) - cpu_output = cpu_output.astype(np.float16) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestZerosLike, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestZerosLike(TestCase): + def cpu_op_exec(self, input1, dtype): + output = torch.zeros_like(input1, dtype=dtype) + output = output.numpy() + return output + + def npu_op_exec(self, input1, dtype): + output = torch.zeros_like(input1, dtype=dtype) + output = output.to("cpu") + output = output.numpy() + return output + + def test_zeroslike_fp32(self, device): + format_list = [0, 3, 29] + shape_list = [1, (1000, 1280), (32, 3, 3), (32, 144, 1, 1)] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_output = self.cpu_op_exec(cpu_input, torch.float32) + npu_output = self.npu_op_exec(npu_input, torch.float32) + self.assertRtolEqual(cpu_output, npu_output) + + def test_zeroslike_fp16(self, device): + format_list = [0, 3, 29] + shape_list = [1, (1000, 1280), (32, 3, 3), (32, 144, 1, 1)] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 0, 100) + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input, torch.float16) + npu_output = self.npu_op_exec(npu_input, torch.float16) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestZerosLike, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/util_test.py b/test/test_npu/test_network_ops/util_test.py old mode 100644 new mode 100755 index fd0682982f94a94558efc1d331217e5467978aa3..e60fe1997e3affac44a6540a97d62acc79d95c98 --- a/test/test_npu/test_network_ops/util_test.py +++ b/test/test_npu/test_network_ops/util_test.py @@ -1,22 +1,22 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -common_path = os.path.dirname("../common/") -if common_path not in sys.path: - sys.path.append(common_path) -from util_test_new import create_common_tensor, test_2args_broadcast, create_dtype_tensor, UT_FAST_MODE - +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +common_path = os.path.dirname("../common/") +if common_path not in sys.path: + sys.path.append(common_path) +from util_test_new import create_common_tensor, test_2args_broadcast, create_dtype_tensor, UT_FAST_MODE + diff --git a/test/test_npu/test_ones_like.py b/test/test_npu/test_ones_like.py index 512b0ff03116ecbbfa2490a86b31c8bef5a16fbc..6b075e0fcee26aa11514964ae5ae4c3b39b101a8 100644 --- a/test/test_npu/test_ones_like.py +++ b/test/test_npu/test_ones_like.py @@ -1,81 +1,81 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from util_test import create_common_tensor - -class TestOnesLike(TestCase): - - def cpu_op_exec(self, input1): - output = torch.ones_like(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1): - output = torch.ones_like(input1) - output = output.to('cpu') - output = output.numpy() - return output - - def test_ones_like_shape_format(self, device): - shape_format = [ - [np.float32, -1, (3, )], - [np.float32, -1, (2, 4)], - [np.float32, -1, (3, 6, 9)], - [np.int8, -1, (3,)], - [np.int8, -1, (2, 4)], - [np.int8, -1, (3, 6, 9)], - [np.int32, -1, (3,)], - [np.int32, -1, (2, 4)], - [np.int32, -1, (3, 6, 9)], - [np.uint8, -1, (3,)], - [np.uint8, -1, (2, 4)], - [np.uint8, -1, (3, 6, 9)] - ] - - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 100) - - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - - self.assertRtolEqual(cpu_output, npu_output) - - - def test_ones_like_float16_shape_format(self, device): - shape_format = [ - [np.float16, -1, (3, )], - [np.float16, -1, (2, 4)], - [np.float16, -1, (3, 6, 9)], - [np.float16, -1, (3, 4, 5, 12)] - ] - - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item, 1, 100) - - cpu_input = cpu_input.to(torch.float32) - - cpu_output = self.cpu_op_exec(cpu_input) - npu_output = self.npu_op_exec(npu_input) - - cpu_output = cpu_output.astype(np.float16) - - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestOnesLike, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:5") +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from util_test import create_common_tensor + +class TestOnesLike(TestCase): + + def cpu_op_exec(self, input1): + output = torch.ones_like(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = torch.ones_like(input1) + output = output.to('cpu') + output = output.numpy() + return output + + def test_ones_like_shape_format(self, device): + shape_format = [ + [np.float32, -1, (3, )], + [np.float32, -1, (2, 4)], + [np.float32, -1, (3, 6, 9)], + [np.int8, -1, (3,)], + [np.int8, -1, (2, 4)], + [np.int8, -1, (3, 6, 9)], + [np.int32, -1, (3,)], + [np.int32, -1, (2, 4)], + [np.int32, -1, (3, 6, 9)], + [np.uint8, -1, (3,)], + [np.uint8, -1, (2, 4)], + [np.uint8, -1, (3, 6, 9)] + ] + + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 100) + + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + + self.assertRtolEqual(cpu_output, npu_output) + + + def test_ones_like_float16_shape_format(self, device): + shape_format = [ + [np.float16, -1, (3, )], + [np.float16, -1, (2, 4)], + [np.float16, -1, (3, 6, 9)], + [np.float16, -1, (3, 4, 5, 12)] + ] + + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 100) + + cpu_input = cpu_input.to(torch.float32) + + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + + cpu_output = cpu_output.astype(np.float16) + + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestOnesLike, globals(), except_for='cpu') +if __name__ == "__main__": + torch.npu.set_device("npu:5") run_tests() \ No newline at end of file diff --git a/test/test_npu/test_onnx/torch.onnx/eval/onnx/cp_onnx_eval.py b/test/test_npu/test_onnx/torch.onnx/eval/onnx/cp_onnx_eval.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/eval/onnxrt/onnxrt_eval.py b/test/test_npu/test_onnx/torch.onnx/eval/onnxrt/onnxrt_eval.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/export/cp_parser.py b/test/test_npu/test_onnx/torch.onnx/export/cp_parser.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/export/export_onnx.py b/test/test_npu/test_onnx/torch.onnx/export/export_onnx.py old mode 100644 new mode 100755 index f7d79c7d9ee2293a7ffb1d260d3e444f2d2708b4..4c3a0e64506aba2d06dfc0ff355d84ef417f7376 --- a/test/test_npu/test_onnx/torch.onnx/export/export_onnx.py +++ b/test/test_npu/test_onnx/torch.onnx/export/export_onnx.py @@ -1,90 +1,90 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torchvision -from export.cp_parser import * - -def getDeviceStr(deviceStr, DeviceNo): - #print("cp_getDeviceId test device : ","(", deviceStr," ", DeviceNo, ")") - if DeviceNo == None: - return deviceStr - if deviceStr == 'cpu': - return deviceStr - elif deviceStr == 'npu' or deviceStr == 'cuda': - loc = '{}:{}'.format(deviceStr, DeviceNo) - return loc - else: - return deviceStr - - -def cp2onnx(model,cpfile,onnxfile, input_data, ispth=False,device="cpu",dno=None): - if os.path.isfile(cpfile): - #model = torchvision.models.resnet50(pretrained=False) - model = cp_load(model,cpfile,ispth=ispth,device=device,dno=dno) - else : - print("warning : \"",cpfile,"\"not exist!") - model.state_dict() - deviceStr = getDeviceStr(device,dno) - print("cp2onnx device: ",deviceStr,"(",device," ",dno,")") - #torch.npu.set_device("npu:0") - #dummy_input = torch.randn(10, 3, 224, 224, device='npu:0') - dummy_input = input_data.to(deviceStr) - - # Providing input and output names sets the display names for values - # within the model's graph. Setting these does not change the semantics - # of the graph; it is only for readability. - # - # The inputs to the network consist of the flat list of inputs (i.e. - # the values you would pass to the forward() method) followed by the - # flat list of parameters. You can partially specify names, i.e. provide - # a list here shorter than the number of inputs to the model, and we will - # only set that subset of names, starting from the beginning. - input_names = [ "actual_input_1" ] #+ [ "learned_%d" % i for i in range(16) ] - output_names = [ "output1" ] - model = model.to(deviceStr) - torch.onnx.export(model, dummy_input, onnxfile, verbose=True, input_names=input_names, output_names=output_names,opset_version=11) - - -def cp2onnx_dynamic_axes(model,cpfile,onnxfile,device="cuda",dno=None): - if os.path.isfile(cpfile): - #model = torchvision.models.resnet50(pretrained=False) - model = cp_load(model,cpfile) - else : - print("warning : \"",cpfile,"\"not exist!") - model.state_dict() - deviceStr = getDeviceStr(device,dno) - #torch.npu.set_device("npu:0") - #dummy_input = torch.randn(10, 3, 224, 224, device='npu:0') - dummy_input = torch.randn(10, 3, 224, 224) - dummy_input = dummy_input.to(deviceStr) - - # Providing input and output names sets the display names for values - # within the model's graph. Setting these does not change the semantics - # of the graph; it is only for readability. - # - # The inputs to the network consist of the flat list of inputs (i.e. - # the values you would pass to the forward() method) followed by the - # flat list of parameters. You can partially specify names, i.e. provide - # a list here shorter than the number of inputs to the model, and we will - # only set that subset of names, starting from the beginning. - input_names = [ "actual_input_1" ] #+ [ "learned_%d" % i for i in range(16) ] - output_names = [ "output1" ] - model = model.to(deviceStr) - dynamic_axes = {'actual_input_1': {0: '-1'}, 'output1': {0: '-1'}} - torch.onnx.export(model, dummy_input, onnxfile, verbose=True, input_names=input_names, output_names=output_names,dynamic_axes=dynamic_axes,opset_version=11) - - +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torchvision +from export.cp_parser import * + +def getDeviceStr(deviceStr, DeviceNo): + #print("cp_getDeviceId test device : ","(", deviceStr," ", DeviceNo, ")") + if DeviceNo == None: + return deviceStr + if deviceStr == 'cpu': + return deviceStr + elif deviceStr == 'npu' or deviceStr == 'cuda': + loc = '{}:{}'.format(deviceStr, DeviceNo) + return loc + else: + return deviceStr + + +def cp2onnx(model,cpfile,onnxfile, input_data, ispth=False,device="cpu",dno=None): + if os.path.isfile(cpfile): + #model = torchvision.models.resnet50(pretrained=False) + model = cp_load(model,cpfile,ispth=ispth,device=device,dno=dno) + else : + print("warning : \"",cpfile,"\"not exist!") + model.state_dict() + deviceStr = getDeviceStr(device,dno) + print("cp2onnx device: ",deviceStr,"(",device," ",dno,")") + #torch.npu.set_device("npu:0") + #dummy_input = torch.randn(10, 3, 224, 224, device='npu:0') + dummy_input = input_data.to(deviceStr) + + # Providing input and output names sets the display names for values + # within the model's graph. Setting these does not change the semantics + # of the graph; it is only for readability. + # + # The inputs to the network consist of the flat list of inputs (i.e. + # the values you would pass to the forward() method) followed by the + # flat list of parameters. You can partially specify names, i.e. provide + # a list here shorter than the number of inputs to the model, and we will + # only set that subset of names, starting from the beginning. + input_names = [ "actual_input_1" ] #+ [ "learned_%d" % i for i in range(16) ] + output_names = [ "output1" ] + model = model.to(deviceStr) + torch.onnx.export(model, dummy_input, onnxfile, verbose=True, input_names=input_names, output_names=output_names,opset_version=11) + + +def cp2onnx_dynamic_axes(model,cpfile,onnxfile,device="cuda",dno=None): + if os.path.isfile(cpfile): + #model = torchvision.models.resnet50(pretrained=False) + model = cp_load(model,cpfile) + else : + print("warning : \"",cpfile,"\"not exist!") + model.state_dict() + deviceStr = getDeviceStr(device,dno) + #torch.npu.set_device("npu:0") + #dummy_input = torch.randn(10, 3, 224, 224, device='npu:0') + dummy_input = torch.randn(10, 3, 224, 224) + dummy_input = dummy_input.to(deviceStr) + + # Providing input and output names sets the display names for values + # within the model's graph. Setting these does not change the semantics + # of the graph; it is only for readability. + # + # The inputs to the network consist of the flat list of inputs (i.e. + # the values you would pass to the forward() method) followed by the + # flat list of parameters. You can partially specify names, i.e. provide + # a list here shorter than the number of inputs to the model, and we will + # only set that subset of names, starting from the beginning. + input_names = [ "actual_input_1" ] #+ [ "learned_%d" % i for i in range(16) ] + output_names = [ "output1" ] + model = model.to(deviceStr) + dynamic_axes = {'actual_input_1': {0: '-1'}, 'output1': {0: '-1'}} + torch.onnx.export(model, dummy_input, onnxfile, verbose=True, input_names=input_names, output_names=output_names,dynamic_axes=dynamic_axes,opset_version=11) + + diff --git a/test/test_npu/test_onnx/torch.onnx/export/model_export-cpu.py b/test/test_npu/test_onnx/torch.onnx/export/model_export-cpu.py old mode 100644 new mode 100755 index 85a422688d14aab2d9870c32c957ee3ddd26d509..c6049ec810867bdb0b18d36089a7ccd58f41793e --- a/test/test_npu/test_onnx/torch.onnx/export/model_export-cpu.py +++ b/test/test_npu/test_onnx/torch.onnx/export/model_export-cpu.py @@ -1,48 +1,48 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torchvision -# torch.cpu.set_device("cpu:0") -#dummy_input = torch.randn(10, 3, 224, 224, device='npu:0') -dummy_input = torch.randn(10, 3, 224, 224) -dummy_input = dummy_input.to("cpu") -model = torchvision.models.resnet50(pretrained=False) - -# Providing input and output names sets the display names for values -# within the model's graph. Setting these does not change the semantics -# of the graph; it is only for readability. -# -# The inputs to the network consist of the flat list of inputs (i.e. -# the values you would pass to the forward() method) followed by the -# flat list of parameters. You can partially specify names, i.e. provide -# a list here shorter than the number of inputs to the model, and we will -# only set that subset of names, starting from the beginning. -input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ] -output_names = [ "output1" ] -model = model.to("cpu") -torch.onnx.export(model, dummy_input, "resnet50.onnx", verbose=True, input_names=input_names, output_names=output_names) - - - -# 有坑 会提示下载不下来 修改下resnet.py,手动下载下来,然后放到 D:/Pytorch/models 目录下。 -def _resnet(arch, block, layers, pretrained, progress, **kwargs): - model = ResNet(block, layers, **kwargs) - if pretrained: - state_dict = load_state_dict_from_url(model_urls[arch], model_dir="D:/Pytorch/models", - progress=progress) - model.load_state_dict(state_dict) - return model +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torchvision +# torch.cpu.set_device("cpu:0") +#dummy_input = torch.randn(10, 3, 224, 224, device='npu:0') +dummy_input = torch.randn(10, 3, 224, 224) +dummy_input = dummy_input.to("cpu") +model = torchvision.models.resnet50(pretrained=False) + +# Providing input and output names sets the display names for values +# within the model's graph. Setting these does not change the semantics +# of the graph; it is only for readability. +# +# The inputs to the network consist of the flat list of inputs (i.e. +# the values you would pass to the forward() method) followed by the +# flat list of parameters. You can partially specify names, i.e. provide +# a list here shorter than the number of inputs to the model, and we will +# only set that subset of names, starting from the beginning. +input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ] +output_names = [ "output1" ] +model = model.to("cpu") +torch.onnx.export(model, dummy_input, "resnet50.onnx", verbose=True, input_names=input_names, output_names=output_names) + + + +# 有坑 会提示下载不下来 修改下resnet.py,手动下载下来,然后放到 D:/Pytorch/models 目录下。 +def _resnet(arch, block, layers, pretrained, progress, **kwargs): + model = ResNet(block, layers, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls[arch], model_dir="D:/Pytorch/models", + progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/test/test_npu/test_onnx/torch.onnx/export/model_export-gpu.py b/test/test_npu/test_onnx/torch.onnx/export/model_export-gpu.py old mode 100644 new mode 100755 index 9e2e11bcb02493a2a3f12ea544e53cc1e40d1d51..31e2f9c7ddc2003a4f160ec54fd40eea35036176 --- a/test/test_npu/test_onnx/torch.onnx/export/model_export-gpu.py +++ b/test/test_npu/test_onnx/torch.onnx/export/model_export-gpu.py @@ -1,48 +1,48 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torchvision -#torch.npu.set_device("npu:0") -#dummy_input = torch.randn(10, 3, 224, 224, device='npu:0') -dummy_input = torch.randn(10, 3, 224, 224) -dummy_input = dummy_input.to("cuda") -model = torchvision.models.resnet50(pretrained=False) - -# Providing input and output names sets the display names for values -# within the model's graph. Setting these does not change the semantics -# of the graph; it is only for readability. -# -# The inputs to the network consist of the flat list of inputs (i.e. -# the values you would pass to the forward() method) followed by the -# flat list of parameters. You can partially specify names, i.e. provide -# a list here shorter than the number of inputs to the model, and we will -# only set that subset of names, starting from the beginning. -input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ] -output_names = [ "output1" ] -model = model.to("cuda") -torch.onnx.export(model, dummy_input, "resnet50.onnx", verbose=True, input_names=input_names, output_names=output_names) - - - -# 有坑 会提示下载不下来 修改下resnet.py,手动下载下来,然后放到 D:/Pytorch/models 目录下。 -def _resnet(arch, block, layers, pretrained, progress, **kwargs): - model = ResNet(block, layers, **kwargs) - if pretrained: - state_dict = load_state_dict_from_url(model_urls[arch], model_dir="D:/Pytorch/models", - progress=progress) - model.load_state_dict(state_dict) - return model +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torchvision +#torch.npu.set_device("npu:0") +#dummy_input = torch.randn(10, 3, 224, 224, device='npu:0') +dummy_input = torch.randn(10, 3, 224, 224) +dummy_input = dummy_input.to("cuda") +model = torchvision.models.resnet50(pretrained=False) + +# Providing input and output names sets the display names for values +# within the model's graph. Setting these does not change the semantics +# of the graph; it is only for readability. +# +# The inputs to the network consist of the flat list of inputs (i.e. +# the values you would pass to the forward() method) followed by the +# flat list of parameters. You can partially specify names, i.e. provide +# a list here shorter than the number of inputs to the model, and we will +# only set that subset of names, starting from the beginning. +input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ] +output_names = [ "output1" ] +model = model.to("cuda") +torch.onnx.export(model, dummy_input, "resnet50.onnx", verbose=True, input_names=input_names, output_names=output_names) + + + +# 有坑 会提示下载不下来 修改下resnet.py,手动下载下来,然后放到 D:/Pytorch/models 目录下。 +def _resnet(arch, block, layers, pretrained, progress, **kwargs): + model = ResNet(block, layers, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls[arch], model_dir="D:/Pytorch/models", + progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/test/test_npu/test_onnx/torch.onnx/export/model_export-npu.py b/test/test_npu/test_onnx/torch.onnx/export/model_export-npu.py old mode 100644 new mode 100755 index 030d7492613c47f64bdf61f624767bed123687d5..aeac7133b6496daf43023b190c452779c574a393 --- a/test/test_npu/test_onnx/torch.onnx/export/model_export-npu.py +++ b/test/test_npu/test_onnx/torch.onnx/export/model_export-npu.py @@ -1,48 +1,48 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torchvision -torch.npu.set_device("npu:0") -#dummy_input = torch.randn(10, 3, 224, 224, device='npu:0') -dummy_input = torch.randn(10, 3, 224, 224) -dummy_input = dummy_input.to("npu") -model = torchvision.models.resnet50(pretrained=False) - -# Providing input and output names sets the display names for values -# within the model's graph. Setting these does not change the semantics -# of the graph; it is only for readability. -# -# The inputs to the network consist of the flat list of inputs (i.e. -# the values you would pass to the forward() method) followed by the -# flat list of parameters. You can partially specify names, i.e. provide -# a list here shorter than the number of inputs to the model, and we will -# only set that subset of names, starting from the beginning. -input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ] -output_names = [ "output1" ] -model = model.to("npu") -torch.onnx.export(model, dummy_input, "resnet50.onnx", verbose=True, input_names=input_names, output_names=output_names) - - - -# 有坑 会提示下载不下来 修改下resnet.py,手动下载下来,然后放到 D:/Pytorch/models 目录下。 -def _resnet(arch, block, layers, pretrained, progress, **kwargs): - model = ResNet(block, layers, **kwargs) - if pretrained: - state_dict = load_state_dict_from_url(model_urls[arch], model_dir="D:/Pytorch/models", - progress=progress) - model.load_state_dict(state_dict) - return model +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torchvision +torch.npu.set_device("npu:0") +#dummy_input = torch.randn(10, 3, 224, 224, device='npu:0') +dummy_input = torch.randn(10, 3, 224, 224) +dummy_input = dummy_input.to("npu") +model = torchvision.models.resnet50(pretrained=False) + +# Providing input and output names sets the display names for values +# within the model's graph. Setting these does not change the semantics +# of the graph; it is only for readability. +# +# The inputs to the network consist of the flat list of inputs (i.e. +# the values you would pass to the forward() method) followed by the +# flat list of parameters. You can partially specify names, i.e. provide +# a list here shorter than the number of inputs to the model, and we will +# only set that subset of names, starting from the beginning. +input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ] +output_names = [ "output1" ] +model = model.to("npu") +torch.onnx.export(model, dummy_input, "resnet50.onnx", verbose=True, input_names=input_names, output_names=output_names) + + + +# 有坑 会提示下载不下来 修改下resnet.py,手动下载下来,然后放到 D:/Pytorch/models 目录下。 +def _resnet(arch, block, layers, pretrained, progress, **kwargs): + model = ResNet(block, layers, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls[arch], model_dir="D:/Pytorch/models", + progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/test/test_npu/test_onnx/torch.onnx/export/model_export.py b/test/test_npu/test_onnx/torch.onnx/export/model_export.py old mode 100644 new mode 100755 index a5b1b6b3d3503c3b6aed180e065d6388bee9960e..66101388d186896b7c0f13473eb7ceafdaeb0e7e --- a/test/test_npu/test_onnx/torch.onnx/export/model_export.py +++ b/test/test_npu/test_onnx/torch.onnx/export/model_export.py @@ -1,48 +1,48 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torchvision - -dummy_input = torch.randn(10, 3, 224, 224, device='cpu') -#dummy_input = torch.randn(10, 3, 224, 224) -#dummy_input = dummy_input.to("npu") -model = torchvision.models.resnet50(pretrained=False) - -# Providing input and output names sets the display names for values -# within the model's graph. Setting these does not change the semantics -# of the graph; it is only for readability. -# -# The inputs to the network consist of the flat list of inputs (i.e. -# the values you would pass to the forward() method) followed by the -# flat list of parameters. You can partially specify names, i.e. provide -# a list here shorter than the number of inputs to the model, and we will -# only set that subset of names, starting from the beginning. -input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ] -output_names = [ "output1" ] - -torch.onnx.export(model, dummy_input, "resnet50.onnx", verbose=True, input_names=input_names, output_names=output_names) - - - -# 有坑 会提示下载不下来 修改下resnet.py,手动下载下来,然后放到 D:/Pytorch/models 目录下。 -def _resnet(arch, block, layers, pretrained, progress, **kwargs): - model = ResNet(block, layers, **kwargs) - if pretrained: - state_dict = load_state_dict_from_url(model_urls[arch], model_dir="D:/Pytorch/models", - progress=progress) - model.load_state_dict(state_dict) - return model +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torchvision + +dummy_input = torch.randn(10, 3, 224, 224, device='cpu') +#dummy_input = torch.randn(10, 3, 224, 224) +#dummy_input = dummy_input.to("npu") +model = torchvision.models.resnet50(pretrained=False) + +# Providing input and output names sets the display names for values +# within the model's graph. Setting these does not change the semantics +# of the graph; it is only for readability. +# +# The inputs to the network consist of the flat list of inputs (i.e. +# the values you would pass to the forward() method) followed by the +# flat list of parameters. You can partially specify names, i.e. provide +# a list here shorter than the number of inputs to the model, and we will +# only set that subset of names, starting from the beginning. +input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ] +output_names = [ "output1" ] + +torch.onnx.export(model, dummy_input, "resnet50.onnx", verbose=True, input_names=input_names, output_names=output_names) + + + +# 有坑 会提示下载不下来 修改下resnet.py,手动下载下来,然后放到 D:/Pytorch/models 目录下。 +def _resnet(arch, block, layers, pretrained, progress, **kwargs): + model = ResNet(block, layers, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls[arch], model_dir="D:/Pytorch/models", + progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/test/test_npu/test_onnx/torch.onnx/export/onnx_parser.py b/test/test_npu/test_onnx/torch.onnx/export/onnx_parser.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/main.py b/test/test_npu/test_onnx/torch.onnx/main.py old mode 100644 new mode 100755 index 28317d74ef5ff0f9b4bc602667261a045f95422e..b340984068d8050af9225149ef09ab96a7fa5bbf --- a/test/test_npu/test_onnx/torch.onnx/main.py +++ b/test/test_npu/test_onnx/torch.onnx/main.py @@ -1,41 +1,41 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf8 - -import logging - -logger_level_relations = { - 'debug':logging.DEBUG, - 'info':logging.INFO, - 'warning':logging.WARNING, - 'error':logging.ERROR, - 'crit':logging.CRITICAL - }#־ϵӳ - -loggerScreanHander = logging.StreamHandler() -if 0 : - loggerScreanHander.setFormatter(logging.Formatter('%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s') ) -else: - loggerScreanHander.setFormatter(logging.Formatter('%(message)s') ) - -logger = logging.getLogger('torch.onnx.export.test.main') -logger.addHandler(loggerScreanHander) -logger.setLevel(logger_level_relations.get('debug')) -logger.debug('__file__={0:<35} | __name__={1:<20} | __package__={2:<20}'.format(__file__,__name__,str(__package__))) - -import export.cp2cp -import export.model2onnx +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf8 + +import logging + +logger_level_relations = { + 'debug':logging.DEBUG, + 'info':logging.INFO, + 'warning':logging.WARNING, + 'error':logging.ERROR, + 'crit':logging.CRITICAL + }#־ϵӳ + +loggerScreanHander = logging.StreamHandler() +if 0 : + loggerScreanHander.setFormatter(logging.Formatter('%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s') ) +else: + loggerScreanHander.setFormatter(logging.Formatter('%(message)s') ) + +logger = logging.getLogger('torch.onnx.export.test.main') +logger.addHandler(loggerScreanHander) +logger.setLevel(logger_level_relations.get('debug')) +logger.debug('__file__={0:<35} | __name__={1:<20} | __package__={2:<20}'.format(__file__,__name__,str(__package__))) + +import export.cp2cp +import export.model2onnx diff --git a/test/test_npu/test_pairwise_distance.py b/test/test_npu/test_pairwise_distance.py index bfdf729e6c711c756183591cc13825a93fe88c17..6acc37211646412f92a07a26456729fac283c4b1 100644 --- a/test/test_npu/test_pairwise_distance.py +++ b/test/test_npu/test_pairwise_distance.py @@ -1,160 +1,160 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestPairwiseDistance(TestCase): - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - return npu_input1, npu_input2 - - def cpu_op_exec_default(self, input1, input2): - stype = input1.dtype - if stype == torch.float16: - input1 = input1.float() - input2 = input2.float() - pdist = torch.nn.PairwiseDistance() - output = pdist(input1, input2) - if stype == torch.float16: - output = output.half() - output = output.numpy() - return output - - def npu_op_exec_default(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - pdist = torch.nn.PairwiseDistance() - output = pdist(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec(self, input1, input2, p, eps, keepdim): - stype = input1.dtype - if stype == torch.float16: - input1 = input1.float() - input2 = input2.float() - pdist = torch.nn.PairwiseDistance(p=p, eps=eps, keepdim=keepdim) - output = pdist(input1, input2) - if stype == torch.float16: - output = output.half() - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2, p, eps, keepdim): - input1 = input1.to("npu") - input2 = input2.to("npu") - pdist = torch.nn.PairwiseDistance(p=p, eps=eps, keepdim=keepdim) - output = pdist(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def test_pairwise_distance_5_360_float16(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (5, 360), np.float16) - cpu_output = self.cpu_op_exec_default(npu_input1, npu_input2) - npu_output = self.npu_op_exec_default(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_pairwise_distance_10_3600_30_float32(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (10, 3600, 30), np.float32) - cpu_output = self.cpu_op_exec_default(npu_input1, npu_input2) - npu_output = self.npu_op_exec_default(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_pairwise_distance_505_10_30_float32(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 10, 30), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0, 1e-6, True) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0, 1e-6, True) - self.assertRtolEqual(cpu_output, npu_output) - - def test_pairwise_distance_505_10_30_23_float16(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 10, 30, 23), np.float16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0, 1e-5, True) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0, 1e-5, True) - self.assertRtolEqual(cpu_output, npu_output) - - def test_pairwise_distance_505_1_30_23_float32(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 1, 30, 23), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0, 0, False) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0, 0, False) - self.assertRtolEqual(cpu_output, npu_output) - - - def test_pairwise_distance_55_10_30_float32(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (55, 10, 30), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 10, -1e-6, False) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 10, -1e-6, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_pairwise_distance_30_23_float16(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (30, 23), np.float16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 10, 5, False) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 10, 5, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_pairwise_distance_505_1_23_19_float16(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 1, 23, 19), np.float16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 10, 10, True) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 10, 10, True) - self.assertRtolEqual(cpu_output, npu_output) - - def test_pairwise_distance_30_23_19_float32(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (30, 23, 19), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 10, -10, True) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 10, -10, True) - self.assertRtolEqual(cpu_output, npu_output) - - def test_pairwise_distance_505_1_float32(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 1), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0, -1e-4, False) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0, -1e-4, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_pairwise_distance_1_520_float32(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (1, 520), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 10, 1e-4, False) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 10, 1e-4, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_pairwise_distance_1_1_float32(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (1, 1), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 1, 2, True) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 1, 2, True) - self.assertRtolEqual(cpu_output, npu_output) - - #can't pass test - def test_pairwise_distance_505_12_float16(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 12), np.float16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0, -1e-4, False) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0, -1e-4, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_pairwise_distance_509_35_float16(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, (509, 35), np.float16) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0.0, -1, False) - npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.0, -1, False) - self.assertRtolEqual(cpu_output, npu_output) -instantiate_device_type_tests(TestPairwiseDistance, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:2") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestPairwiseDistance(TestCase): + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + return npu_input1, npu_input2 + + def cpu_op_exec_default(self, input1, input2): + stype = input1.dtype + if stype == torch.float16: + input1 = input1.float() + input2 = input2.float() + pdist = torch.nn.PairwiseDistance() + output = pdist(input1, input2) + if stype == torch.float16: + output = output.half() + output = output.numpy() + return output + + def npu_op_exec_default(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + pdist = torch.nn.PairwiseDistance() + output = pdist(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec(self, input1, input2, p, eps, keepdim): + stype = input1.dtype + if stype == torch.float16: + input1 = input1.float() + input2 = input2.float() + pdist = torch.nn.PairwiseDistance(p=p, eps=eps, keepdim=keepdim) + output = pdist(input1, input2) + if stype == torch.float16: + output = output.half() + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2, p, eps, keepdim): + input1 = input1.to("npu") + input2 = input2.to("npu") + pdist = torch.nn.PairwiseDistance(p=p, eps=eps, keepdim=keepdim) + output = pdist(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def test_pairwise_distance_5_360_float16(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (5, 360), np.float16) + cpu_output = self.cpu_op_exec_default(npu_input1, npu_input2) + npu_output = self.npu_op_exec_default(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_pairwise_distance_10_3600_30_float32(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (10, 3600, 30), np.float32) + cpu_output = self.cpu_op_exec_default(npu_input1, npu_input2) + npu_output = self.npu_op_exec_default(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_pairwise_distance_505_10_30_float32(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 10, 30), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0, 1e-6, True) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0, 1e-6, True) + self.assertRtolEqual(cpu_output, npu_output) + + def test_pairwise_distance_505_10_30_23_float16(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 10, 30, 23), np.float16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0, 1e-5, True) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0, 1e-5, True) + self.assertRtolEqual(cpu_output, npu_output) + + def test_pairwise_distance_505_1_30_23_float32(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 1, 30, 23), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0, 0, False) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0, 0, False) + self.assertRtolEqual(cpu_output, npu_output) + + + def test_pairwise_distance_55_10_30_float32(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (55, 10, 30), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 10, -1e-6, False) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 10, -1e-6, False) + self.assertRtolEqual(cpu_output, npu_output) + + def test_pairwise_distance_30_23_float16(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (30, 23), np.float16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 10, 5, False) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 10, 5, False) + self.assertRtolEqual(cpu_output, npu_output) + + def test_pairwise_distance_505_1_23_19_float16(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 1, 23, 19), np.float16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 10, 10, True) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 10, 10, True) + self.assertRtolEqual(cpu_output, npu_output) + + def test_pairwise_distance_30_23_19_float32(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (30, 23, 19), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 10, -10, True) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 10, -10, True) + self.assertRtolEqual(cpu_output, npu_output) + + def test_pairwise_distance_505_1_float32(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 1), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0, -1e-4, False) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0, -1e-4, False) + self.assertRtolEqual(cpu_output, npu_output) + + def test_pairwise_distance_1_520_float32(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (1, 520), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 10, 1e-4, False) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 10, 1e-4, False) + self.assertRtolEqual(cpu_output, npu_output) + + def test_pairwise_distance_1_1_float32(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (1, 1), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 1, 2, True) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 1, 2, True) + self.assertRtolEqual(cpu_output, npu_output) + + #can't pass test + def test_pairwise_distance_505_12_float16(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (505, 12), np.float16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0, -1e-4, False) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0, -1e-4, False) + self.assertRtolEqual(cpu_output, npu_output) + + def test_pairwise_distance_509_35_float16(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, (509, 35), np.float16) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, 0.0, -1, False) + npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.0, -1, False) + self.assertRtolEqual(cpu_output, npu_output) +instantiate_device_type_tests(TestPairwiseDistance, globals(), except_for='cpu') +if __name__ == "__main__": + torch.npu.set_device("npu:2") + run_tests() diff --git a/test/test_npu/test_reflection_pad2d.py b/test/test_npu/test_reflection_pad2d.py deleted file mode 100644 index d150c4c955b8d3670c033e7056f6d71810d6baef..0000000000000000000000000000000000000000 --- a/test/test_npu/test_reflection_pad2d.py +++ /dev/null @@ -1,238 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestReflectionPad2d(TestCase): - def cpu_op_out_exec(self, input1, pad, output): - m = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m = m.numpy() - return m - - def npu_op_out_exec(self, input1, pad, output): - m_n = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m_n = m_n.to("cpu") - m_n = m_n.numpy() - return m_n - - def cpu_op_exec(self, input1, pad): - m = torch.nn.ReflectionPad2d(pad) - output = m(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1, pad): - m = torch.nn.ReflectionPad2d(pad).to("npu") - output = m(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def test_reflectionPad2d_out_shape_format(self, device): - shape_format = [ - [[np.float32, 0, (1, 1, 3, 3)], [2, 2, 2, 2]], - [[np.float32, 3, (1, 1, 4, 3)], 2] - ] - for item in shape_format: - cpuout = torch.randn(1, 1, 3, 3) - npuout = cpuout.npu() - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_out_exec(cpu_input1, item[1], cpuout) - npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_out_shape_format_fp16(self, device): - shape_format = [ - [[np.float16, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.float16, 3, (1, 1, 4, 3)], 2] - ] - - def cpu_op_out_exec_fp16(input1, pad, output): - input1 = input1.to(torch.float32) - m = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m = m.numpy() - m = m.astype(np.float16) - return m - - for item in shape_format: - cpuout = torch.randn(1, 1, 3, 3) - npuout = cpuout.npu() - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_out_exec_fp16(cpu_input1, item[1], cpuout) - npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_out_shape_format_int8(self, device): - shape_format = [ - [[np.int8, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.int8, 0, (1, 1, 5, 3)], 2] - ] - - def cpu_op_out_exec_int8(input1, pad, output): - input1 = input1.to(torch.float32) - m = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m = m.numpy() - m = m.astype(np.int8) - return m - - for item in shape_format: - cpuout = torch.randn(1, 1, 3, 3) - npuout = cpuout.npu() - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_out_exec_int8(cpu_input1, item[1], cpuout) - npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_out_shape_format_uint8(self, device): - shape_format = [ - [[np.uint8, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.uint8, 0, (1, 1, 4, 9)], 3] - ] - - def cpu_op_out_exec_uint8(input1, pad, output): - input1 = input1.to(torch.float32) - m = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m = m.numpy() - m = m.astype(np.uint8) - return m - - for item in shape_format: - cpuout = torch.randn(1, 1, 3, 3) - npuout = cpuout.npu() - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_out_exec_uint8(cpu_input1, item[1], cpuout) - npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_out_shape_format_int32(self, device): - shape_format = [ - [[np.int32, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.int32, 0, (1, 1, 4, 9)], 2] - ] - - def cpu_op_out_exec_int32(input1, pad, output): - input1 = input1.to(torch.float32) - m = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m = m.numpy() - m = m.astype(np.int32) - return m - - for item in shape_format: - cpuout = torch.randn(1, 1, 3, 3) - npuout = cpuout.npu() - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_out_exec_int32(cpu_input1, item[1], cpuout) - npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_shape_format(self, device): - shape_format = [ - [[np.float32, 0, (1, 1, 3, 3)], [2, 2, 2, 2]], - [[np.float32, 3, (1, 1, 4, 3)], 2] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_exec(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_shape_format_fp16(self, device): - shape_format = [ - [[np.float16, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.float16, 3, (1, 1, 4, 3)], 2] - ] - - def cpu_op_exec_fp16(input1, pad): - input1 = input1.to(torch.float32) - m = torch.nn.ReflectionPad2d(pad) - output = m(input1) - output = output.numpy() - output = output.astype(np.float16) - return output - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_exec_fp16(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_shape_format_int8(self, device): - shape_format = [ - [[np.int8, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.int8, 0, (1, 1, 5, 3)], 2] - ] - - def cpu_op_exec_int8(input1, pad): - input1 = input1.to(torch.float32) - m = torch.nn.ReflectionPad2d(pad) - output = m(input1) - output = output.numpy() - output = output.astype(np.int8) - return output - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_exec_int8(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_shape_format_uint8(self, device): - shape_format = [ - [[np.uint8, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.uint8, 0, (1, 1, 4, 9)], 3] - ] - - def cpu_op_exec_uint8(input1, pad): - input1 = input1.to(torch.float32) - m = torch.nn.ReflectionPad2d(pad) - output = m(input1) - output = output.numpy() - output = output.astype(np.uint8) - return output - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_exec_uint8(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_shape_format_int32(self, device): - shape_format = [ - [[np.int32, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.int32, 0, (1, 1, 4, 9)], 2] - ] - - def cpu_op_exec_int32(input1, pad): - input1 = input1.to(torch.float32) - m = torch.nn.ReflectionPad2d(pad) - output = m(input1) - output = output.numpy() - output = output.astype(np.int32) - return output - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_exec_int32(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestReflectionPad2d, globals(), except_for="cpu") -if __name__ == "__main__": - torch.npu.set_device("npu:2") - run_tests() diff --git a/test/test_npu/test_scatter_dim_update.py b/test/test_npu/test_scatter_dim_update.py index 630dbe48e41a2b019994a19b5d014a29d6936fa3..0fab3b7d9b9919bf0479602e17b14c0fcb3d344e 100644 --- a/test/test_npu/test_scatter_dim_update.py +++ b/test/test_npu/test_scatter_dim_update.py @@ -1,96 +1,96 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestScatterDimUpdate(TestCase): - - def generate_data(self, min, max, shape_var, shape_indices, shape_updates, dtype_var, - dtype_indices, dtype_updates, dim): - var = np.random.uniform(min, max, shape_var).astype(dtype_var) - updates = np.random.uniform(min, max, shape_updates).astype(dtype_updates) - indices = np.random.randint(0, shape_var[dim], shape_indices).astype(dtype_indices) - - #modify from numpy.ndarray to torch.tensor - var = torch.from_numpy(var) - indices = torch.from_numpy(indices) - updates = torch.from_numpy(updates) - - return var, indices, updates, dim - - def cpu_op_exec(self, var, indices, updates, dim): - output = var.scatter(dim=dim, index=indices.long(), src=updates) - return output.numpy() - - def npu_op_exec(self, var, indices, updates, dim): - var = var.to("npu") - indices = indices.to("npu") - updates = updates.to("npu") - output = torch.scatter(var, dim, indices, updates) - output = output.to("cpu") - output = output.numpy() - return output - - def test_scatter_dim_update_32_float32(self, device): - var, indices, updates, dim = self.generate_data(-2, 2, (32, ), (32, ), (32, ), - "float32", "int32", "float32", 0) - cpu_output = self.cpu_op_exec(var, indices, updates, dim) - npu_output = self.npu_op_exec(var, indices, updates, dim) - self.assertRtolEqual(cpu_output, npu_output) - - def test_scatter_dim_update_32_32_float16(self, device): - var, indices, updates, dim = self.generate_data(-2, 2, (32, 32), (32, 32), (32, 32), - "float16", "int32", "float16", 0) - cpu_output = self.cpu_op_exec(var, indices, updates, dim) - npu_output = self.npu_op_exec(var, indices, updates, dim) - self.assertRtolEqual(cpu_output, npu_output) - - def test_scatter_dim_update_32_32_float32(self, device): - var, indices, updates, dim = self.generate_data(-2, 2, (32, 32), (24, 24), (24, 24), - "float32", "int32", "float32", 1) - cpu_output = self.cpu_op_exec(var, indices, updates, dim) - npu_output = self.npu_op_exec(var, indices, updates, dim) - self.assertRtolEqual(cpu_output, npu_output) - - def test_scatter_dim_update_32_32_32_int8(self, device): - var, indices, updates, dim = self.generate_data(-2, 2, (32, 32, 32), (24, 24, 24), (32, 32, 32), - "int8", "int32", "int8", 1) - cpu_output = self.cpu_op_exec(var, indices, updates, dim) - npu_output = self.npu_op_exec(var, indices, updates, dim) - self.assertRtolEqual(cpu_output, npu_output) - - def test_scatter_dim_update_16_16_16_16_float16(self, device): - var, indices, updates, dim = self.generate_data(-2, 2, (16, 16, 16, 16), (8, 8, 8, 8), (12, 12, 12, 12), - "float16", "int32", "float16", 2) - cpu_output = self.cpu_op_exec(var, indices, updates, dim) - npu_output = self.npu_op_exec(var, indices, updates, dim) - self.assertRtolEqual(cpu_output, npu_output) - - def test_scatter_dim_update_8_8_8_8_8_floa32(self, device): - var, indices, updates, dim = self.generate_data(-2, 2, (8, 8, 8, 8, 8), (3, 3, 3, 3, 3), (8, 8, 8, 8, 8), - "float32", "int32", "float32", 3) - cpu_output = self.cpu_op_exec(var, indices, updates, dim) - npu_output = self.npu_op_exec(var, indices, updates, dim) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestScatterDimUpdate, globals(), except_for='cpu') -if __name__ == '__main__': - torch.npu.set_device("npu:2") +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestScatterDimUpdate(TestCase): + + def generate_data(self, min, max, shape_var, shape_indices, shape_updates, dtype_var, + dtype_indices, dtype_updates, dim): + var = np.random.uniform(min, max, shape_var).astype(dtype_var) + updates = np.random.uniform(min, max, shape_updates).astype(dtype_updates) + indices = np.random.randint(0, shape_var[dim], shape_indices).astype(dtype_indices) + + #modify from numpy.ndarray to torch.tensor + var = torch.from_numpy(var) + indices = torch.from_numpy(indices) + updates = torch.from_numpy(updates) + + return var, indices, updates, dim + + def cpu_op_exec(self, var, indices, updates, dim): + output = var.scatter(dim=dim, index=indices.long(), src=updates) + return output.numpy() + + def npu_op_exec(self, var, indices, updates, dim): + var = var.to("npu") + indices = indices.to("npu") + updates = updates.to("npu") + output = torch.scatter(var, dim, indices, updates) + output = output.to("cpu") + output = output.numpy() + return output + + def test_scatter_dim_update_32_float32(self, device): + var, indices, updates, dim = self.generate_data(-2, 2, (32, ), (32, ), (32, ), + "float32", "int32", "float32", 0) + cpu_output = self.cpu_op_exec(var, indices, updates, dim) + npu_output = self.npu_op_exec(var, indices, updates, dim) + self.assertRtolEqual(cpu_output, npu_output) + + def test_scatter_dim_update_32_32_float16(self, device): + var, indices, updates, dim = self.generate_data(-2, 2, (32, 32), (32, 32), (32, 32), + "float16", "int32", "float16", 0) + cpu_output = self.cpu_op_exec(var, indices, updates, dim) + npu_output = self.npu_op_exec(var, indices, updates, dim) + self.assertRtolEqual(cpu_output, npu_output) + + def test_scatter_dim_update_32_32_float32(self, device): + var, indices, updates, dim = self.generate_data(-2, 2, (32, 32), (24, 24), (24, 24), + "float32", "int32", "float32", 1) + cpu_output = self.cpu_op_exec(var, indices, updates, dim) + npu_output = self.npu_op_exec(var, indices, updates, dim) + self.assertRtolEqual(cpu_output, npu_output) + + def test_scatter_dim_update_32_32_32_int8(self, device): + var, indices, updates, dim = self.generate_data(-2, 2, (32, 32, 32), (24, 24, 24), (32, 32, 32), + "int8", "int32", "int8", 1) + cpu_output = self.cpu_op_exec(var, indices, updates, dim) + npu_output = self.npu_op_exec(var, indices, updates, dim) + self.assertRtolEqual(cpu_output, npu_output) + + def test_scatter_dim_update_16_16_16_16_float16(self, device): + var, indices, updates, dim = self.generate_data(-2, 2, (16, 16, 16, 16), (8, 8, 8, 8), (12, 12, 12, 12), + "float16", "int32", "float16", 2) + cpu_output = self.cpu_op_exec(var, indices, updates, dim) + npu_output = self.npu_op_exec(var, indices, updates, dim) + self.assertRtolEqual(cpu_output, npu_output) + + def test_scatter_dim_update_8_8_8_8_8_floa32(self, device): + var, indices, updates, dim = self.generate_data(-2, 2, (8, 8, 8, 8, 8), (3, 3, 3, 3, 3), (8, 8, 8, 8, 8), + "float32", "int32", "float32", 3) + cpu_output = self.cpu_op_exec(var, indices, updates, dim) + npu_output = self.npu_op_exec(var, indices, updates, dim) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestScatterDimUpdate, globals(), except_for='cpu') +if __name__ == '__main__': + torch.npu.set_device("npu:2") run_tests() \ No newline at end of file diff --git a/test/test_npu/test_sign.py b/test/test_npu/test_sign.py index 2b4d7a42749bddcb36fe89f047682141498d93df..205a24e9b9d5f47b1cabfbfce964a878fa7cbe19 100644 --- a/test/test_npu/test_sign.py +++ b/test/test_npu/test_sign.py @@ -1,64 +1,64 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestSign(TestCase): - - def generate_data(self, min_d, max_d, shape, dtype): - input= np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input = torch.from_numpy(input) - return npu_input - - def cpu_op_exec(self, input_x): - output = torch.sign(input_x) - output = output.numpy() - return output - - def npu_op_exec(self, input_x): - input = input_x.to("npu") - output= torch.sign(input) - output = output.to("cpu") - output = output.numpy() - return output - - def test_sign_float16(self, device): - def cpu_op_exec_fp16(input): - input = input.to(torch.float32) - output = torch.sign(input) - output = output.numpy() - output = output.astype(np.float16) - return output - - npu_input = self.generate_data(0, 100, (5,3), np.float16) - cpu_output = cpu_op_exec_fp16(npu_input) - npu_output = self.npu_op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sign_float32(self, device): - npu_input = self.generate_data(0, 100, (4,3), np.float32) - cpu_output = self.cpu_op_exec(npu_input) - npu_output = self.npu_op_exec(npu_input) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestSign, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestSign(TestCase): + + def generate_data(self, min_d, max_d, shape, dtype): + input= np.random.uniform(min_d, max_d, shape).astype(dtype) + npu_input = torch.from_numpy(input) + return npu_input + + def cpu_op_exec(self, input_x): + output = torch.sign(input_x) + output = output.numpy() + return output + + def npu_op_exec(self, input_x): + input = input_x.to("npu") + output= torch.sign(input) + output = output.to("cpu") + output = output.numpy() + return output + + def test_sign_float16(self, device): + def cpu_op_exec_fp16(input): + input = input.to(torch.float32) + output = torch.sign(input) + output = output.numpy() + output = output.astype(np.float16) + return output + + npu_input = self.generate_data(0, 100, (5,3), np.float16) + cpu_output = cpu_op_exec_fp16(npu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sign_float32(self, device): + npu_input = self.generate_data(0, 100, (4,3), np.float32) + cpu_output = self.cpu_op_exec(npu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestSign, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_solve.py b/test/test_npu/test_solve.py index 53a986db75a89ce07c10fdebe1e45461cae93860..0f0e6ade83c6cec2648d204d7ad21b971deb9063 100644 --- a/test/test_npu/test_solve.py +++ b/test/test_npu/test_solve.py @@ -1,89 +1,89 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import random -import copy -from torch.autograd import Variable -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestSolve(TestCase): - def generate_data(self, min, max, shape, dtype): - input = np.random.uniform(min, max, shape).astype(dtype) - npu_input = torch.from_numpy(input) - return npu_input - - def cpu_op_exec(self, input1, input2): - X, LU = torch.solve(input2, input1) - return X - - def npu_op_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - X, LU = torch.solve(input2, input1) - X = X.to("cpu") - return X - - def test_solve_float16_2(self, device): - def cpu_op_exec_float16_2(input1, input2): - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - X, LU = torch.solve(input2, input1) - X = X.numpy() - X = X.astype(np.float16) - return X - npu_input1 = self.generate_data(0, 100, (2, 2), np.float16) - npu_input2 = self.generate_data(0, 100, (2, 1), np.float16) - cpu_output = cpu_op_exec_float16_2(npu_input1, npu_input2) - # npu_output = self.npu_op_exec(npu_input1, npu_input2) - #self.assertRtolEqual(cpu_output, npu_output) - - def test_solve_float16_1(self, device): - def cpu_op_exec_float16_1(input1, input2): - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - X, LU = torch.solve(input2, input1) - X = X.numpy() - X = X.astype(np.float16) - return X - npu_input1 = self.generate_data(0, 100, (5, 5), np.float16) - npu_input2 = self.generate_data(0, 100, (5, 5), np.float16) - cpu_output = cpu_op_exec_float16_1(npu_input1, npu_input2) - # npu_output = self.npu_op_exec(npu_input1, npu_input2) - #self.assertRtolEqual(cpu_output, npu_output) - - def test_solve_float32_1(self, device): - npu_input1 = self.generate_data(0, 100, (2, 3, 2, 2), np.float32) - npu_input2 = self.generate_data(0, 100, (2, 1, 2, 1), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - # npu_output = self.npu_op_exec(npu_input1, npu_input2) - # self.assertRtolEqual(cpu_output, npu_output) - - def test_solve_float32_2(self, device): - npu_input1 = self.generate_data(0, 100, (3, 3, 3), np.float32) - npu_input2 = self.generate_data(0, 100, (3, 3, 2), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - # npu_output = self.npu_op_exec(npu_input1, npu_input2) - # self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestSolve, globals(), except_for='cpu') -if __name__ == '__main__': +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import random +import copy +from torch.autograd import Variable +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestSolve(TestCase): + def generate_data(self, min, max, shape, dtype): + input = np.random.uniform(min, max, shape).astype(dtype) + npu_input = torch.from_numpy(input) + return npu_input + + def cpu_op_exec(self, input1, input2): + X, LU = torch.solve(input2, input1) + return X + + def npu_op_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + X, LU = torch.solve(input2, input1) + X = X.to("cpu") + return X + + def test_solve_float16_2(self, device): + def cpu_op_exec_float16_2(input1, input2): + input1 = input1.to(torch.float32) + input2 = input2.to(torch.float32) + X, LU = torch.solve(input2, input1) + X = X.numpy() + X = X.astype(np.float16) + return X + npu_input1 = self.generate_data(0, 100, (2, 2), np.float16) + npu_input2 = self.generate_data(0, 100, (2, 1), np.float16) + cpu_output = cpu_op_exec_float16_2(npu_input1, npu_input2) + # npu_output = self.npu_op_exec(npu_input1, npu_input2) + #self.assertRtolEqual(cpu_output, npu_output) + + def test_solve_float16_1(self, device): + def cpu_op_exec_float16_1(input1, input2): + input1 = input1.to(torch.float32) + input2 = input2.to(torch.float32) + X, LU = torch.solve(input2, input1) + X = X.numpy() + X = X.astype(np.float16) + return X + npu_input1 = self.generate_data(0, 100, (5, 5), np.float16) + npu_input2 = self.generate_data(0, 100, (5, 5), np.float16) + cpu_output = cpu_op_exec_float16_1(npu_input1, npu_input2) + # npu_output = self.npu_op_exec(npu_input1, npu_input2) + #self.assertRtolEqual(cpu_output, npu_output) + + def test_solve_float32_1(self, device): + npu_input1 = self.generate_data(0, 100, (2, 3, 2, 2), np.float32) + npu_input2 = self.generate_data(0, 100, (2, 1, 2, 1), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + # npu_output = self.npu_op_exec(npu_input1, npu_input2) + # self.assertRtolEqual(cpu_output, npu_output) + + def test_solve_float32_2(self, device): + npu_input1 = self.generate_data(0, 100, (3, 3, 3), np.float32) + npu_input2 = self.generate_data(0, 100, (3, 3, 2), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + # npu_output = self.npu_op_exec(npu_input1, npu_input2) + # self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestSolve, globals(), except_for='cpu') +if __name__ == '__main__': run_tests() \ No newline at end of file diff --git a/test/test_npu/test_sub.py b/test/test_npu/test_sub.py index 41580f85bdc2a2d822058e79cc6ab85cf724e4b5..b2a17b4eb7fa2c16cd9a4e70d34a286ad1e85578 100644 --- a/test/test_npu/test_sub.py +++ b/test/test_npu/test_sub.py @@ -1,193 +1,193 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestSub(TestCase): - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) - - # modify from numpy.ndarray to torch.tensor - npu_input1 = torch.from_numpy(input1) - npu_input2 = torch.from_numpy(input2) - return npu_input1, npu_input2 - - def cpu_op_exec(self, input1, input2): - output = torch.sub(input1, input2) - output = output.numpy() - return output - - def cpu_op_exec_fp16(self, input1, input2): - input1 = input1.to(torch.float32) - output = torch.sub(input1, input2) - output = output.numpy() - output = output.astype(np.float16) - return output - - def npu_op_exec(self, input1, input2): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.sub(input1, input2) - output = output.to("cpu") - output = output.numpy() - return output - - def test_sub_int32_1(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 3), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_int32_2(self, device): - npu_input1, npu_input2 = self.generate_data(2147483647, 2147483648, (2, 3), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_int32_3(self, device): - npu_input1, npu_input2 = self.generate_data(-100, 0, (2, 3), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_int32_4(self, device): - npu_input1, npu_input2 = self.generate_data(0, 100, (500, 500), np.int32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float16_1(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, ((65535, 1, 1, 1)), np.float16) - cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float16_2(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, ((1, 1, 1, 8192)), np.float16) - cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float16_3(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, ((1, 1, 1, 65535)), np.float16) - cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float16_4(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, ((1, 1, 1, 524288)), np.float16) - cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float16_5(self, device): - npu_input1, npu_input2 = self.generate_data(-2, 2, ((1, 1, 1, 786432)), np.float16) - cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float16_6(self, device): - npu_input1, npu_input2 = self.generate_data(-5, 5, ((1, 1, 1, 786432)), np.float16) - cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float32_1(self, device): - npu_input1, npu_input2 = self.generate_data(-1.1754943508e-38, -1.1754943508e-38, ((1, 31, 149, 2)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float32_2(self, device): - npu_input1, npu_input2 = self.generate_data(-0.000030517578125, 0.000030517578125, ((2, 32, 149, 31)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float32_3(self, device): - npu_input1, npu_input2 = self.generate_data(-9.313225746154785e-10, 9.313225746154785e-10, ((184965, 1)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float32_4(self, device): - npu_input1, npu_input2 = self.generate_data(-3, 3, ((1, 31, 149, 2)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float32_5(self, device): - npu_input1, npu_input2 = self.generate_data(-9.313225746154785e-10, 9.313225746154785e-10, ((1, 31, 149, 2)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float32_6(self, device): - npu_input1, npu_input2 = self.generate_data(-0.000000000000000000000000000000000000011754943508, - 0.000000000000000000000000000000000000011754943508, ((2, 31, 149, 2)), - np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float32_7(self, device): - npu_input1, npu_input2 = self.generate_data(0.000000000000000000000000000000000000011754943508, - 0.000000000000000000000000000000000000011754943508, ((4, 31, 149, 2)), - np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float32_8(self, device): - npu_input1, npu_input2 = self.generate_data(-0.000000000000000000000000000000000000011754943508, - -0.000000000000000000000000000000000000011754943508, ((2048, 31, 1, 2)), - np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_float32_9(self, device): - npu_input1, npu_input2 = self.generate_data(-0.000000000000000000000000000000000000011754943508, - 0.000000000000000000000000000000000000011754943508, ((8, 7, 149)), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_mix_dtype_1(self,device): - npu_input1, npu_input2= self.generate_data(0, 100, (2, 3), np.int32) - npu_input3, npu_input4 = self.generate_data(0, 100, (2, 3), np.float32) - cpu_output = self.cpu_op_exec(npu_input1, npu_input3) - npu_output = self.npu_op_exec(npu_input1, npu_input3) - self.assertRtolEqual(cpu_output, npu_output) - - def test_sub_mix_dtype_2(self,device): - npu_input1, npu_input2 = self.generate_data(0, 100, (2, 3), np.float32) - npu_input3 = torch.tensor(3).int() - cpu_output = self.cpu_op_exec(npu_input1, npu_input3) - npu_output = self.npu_op_exec(npu_input1, npu_input3) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestSub, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:5") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestSub(TestCase): + def generate_data(self, min_d, max_d, shape, dtype): + input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) + input2 = np.random.uniform(min_d, max_d, shape).astype(dtype) + + # modify from numpy.ndarray to torch.tensor + npu_input1 = torch.from_numpy(input1) + npu_input2 = torch.from_numpy(input2) + return npu_input1, npu_input2 + + def cpu_op_exec(self, input1, input2): + output = torch.sub(input1, input2) + output = output.numpy() + return output + + def cpu_op_exec_fp16(self, input1, input2): + input1 = input1.to(torch.float32) + output = torch.sub(input1, input2) + output = output.numpy() + output = output.astype(np.float16) + return output + + def npu_op_exec(self, input1, input2): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = torch.sub(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def test_sub_int32_1(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 3), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_int32_2(self, device): + npu_input1, npu_input2 = self.generate_data(2147483647, 2147483648, (2, 3), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_int32_3(self, device): + npu_input1, npu_input2 = self.generate_data(-100, 0, (2, 3), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_int32_4(self, device): + npu_input1, npu_input2 = self.generate_data(0, 100, (500, 500), np.int32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float16_1(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, ((65535, 1, 1, 1)), np.float16) + cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float16_2(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, ((1, 1, 1, 8192)), np.float16) + cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float16_3(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, ((1, 1, 1, 65535)), np.float16) + cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float16_4(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, ((1, 1, 1, 524288)), np.float16) + cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float16_5(self, device): + npu_input1, npu_input2 = self.generate_data(-2, 2, ((1, 1, 1, 786432)), np.float16) + cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float16_6(self, device): + npu_input1, npu_input2 = self.generate_data(-5, 5, ((1, 1, 1, 786432)), np.float16) + cpu_output = self.cpu_op_exec_fp16(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float32_1(self, device): + npu_input1, npu_input2 = self.generate_data(-1.1754943508e-38, -1.1754943508e-38, ((1, 31, 149, 2)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float32_2(self, device): + npu_input1, npu_input2 = self.generate_data(-0.000030517578125, 0.000030517578125, ((2, 32, 149, 31)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float32_3(self, device): + npu_input1, npu_input2 = self.generate_data(-9.313225746154785e-10, 9.313225746154785e-10, ((184965, 1)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float32_4(self, device): + npu_input1, npu_input2 = self.generate_data(-3, 3, ((1, 31, 149, 2)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float32_5(self, device): + npu_input1, npu_input2 = self.generate_data(-9.313225746154785e-10, 9.313225746154785e-10, ((1, 31, 149, 2)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float32_6(self, device): + npu_input1, npu_input2 = self.generate_data(-0.000000000000000000000000000000000000011754943508, + 0.000000000000000000000000000000000000011754943508, ((2, 31, 149, 2)), + np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float32_7(self, device): + npu_input1, npu_input2 = self.generate_data(0.000000000000000000000000000000000000011754943508, + 0.000000000000000000000000000000000000011754943508, ((4, 31, 149, 2)), + np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float32_8(self, device): + npu_input1, npu_input2 = self.generate_data(-0.000000000000000000000000000000000000011754943508, + -0.000000000000000000000000000000000000011754943508, ((2048, 31, 1, 2)), + np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_float32_9(self, device): + npu_input1, npu_input2 = self.generate_data(-0.000000000000000000000000000000000000011754943508, + 0.000000000000000000000000000000000000011754943508, ((8, 7, 149)), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_mix_dtype_1(self,device): + npu_input1, npu_input2= self.generate_data(0, 100, (2, 3), np.int32) + npu_input3, npu_input4 = self.generate_data(0, 100, (2, 3), np.float32) + cpu_output = self.cpu_op_exec(npu_input1, npu_input3) + npu_output = self.npu_op_exec(npu_input1, npu_input3) + self.assertRtolEqual(cpu_output, npu_output) + + def test_sub_mix_dtype_2(self,device): + npu_input1, npu_input2 = self.generate_data(0, 100, (2, 3), np.float32) + npu_input3 = torch.tensor(3).int() + cpu_output = self.cpu_op_exec(npu_input1, npu_input3) + npu_output = self.npu_op_exec(npu_input1, npu_input3) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestSub, globals(), except_for='cpu') +if __name__ == "__main__": + torch.npu.set_device("npu:5") + run_tests() diff --git a/test/test_npu/test_sum.py b/test/test_npu/test_sum.py index 3daa2d4dbda88a70773f43df9d4395c993ce3c6a..abbc85d2482f327d494630b29c585331e5bea983 100644 --- a/test/test_npu/test_sum.py +++ b/test/test_npu/test_sum.py @@ -1,62 +1,62 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestSum(TestCase): - def cpu_op_exec(self, input1): - output = input1.sum() - output = output.numpy() - return output - - def npu_op_exec(self, input1): - output = input1.sum() - output = output.to("cpu") - output = output.numpy() - return output - - def cpu_op_exec_dim(self, input1, dim, dtype): - output = torch.sum(input1, dim, keepdim=True, dtype=dtype) - output = output.numpy() - return output - - def npu_op_exec_dim(self, input1, dim, dtype): - output = torch.sum(input1, dim, keepdim=True, dtype=dtype) - output = output.to("cpu") - output = output.numpy() - return output - - def test_sum_shape_format(self, device): - shape_format = [ - [[np.float32, 0, [256]], [0]], - [[np.float32, 0, [256, 1000]], [0]], - [[np.int32, 0, [5, 256]], [0]], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 2, 100) - cpu_output = self.cpu_op_exec_dim(cpu_input1, item[1], cpu_input1.dtype) - npu_output = self.npu_op_exec_dim(npu_input1, item[1], cpu_input1.dtype) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestSum, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestSum(TestCase): + def cpu_op_exec(self, input1): + output = input1.sum() + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = input1.sum() + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_dim(self, input1, dim, dtype): + output = torch.sum(input1, dim, keepdim=True, dtype=dtype) + output = output.numpy() + return output + + def npu_op_exec_dim(self, input1, dim, dtype): + output = torch.sum(input1, dim, keepdim=True, dtype=dtype) + output = output.to("cpu") + output = output.numpy() + return output + + def test_sum_shape_format(self, device): + shape_format = [ + [[np.float32, 0, [256]], [0]], + [[np.float32, 0, [256, 1000]], [0]], + [[np.int32, 0, [5, 256]], [0]], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 2, 100) + cpu_output = self.cpu_op_exec_dim(cpu_input1, item[1], cpu_input1.dtype) + npu_output = self.npu_op_exec_dim(npu_input1, item[1], cpu_input1.dtype) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestSum, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_topk.py b/test/test_npu/test_topk.py index 6713a511ac053c183df14f138d6b13d38ee38339..8055ac38ef3f3ccccf06774af3174cd5dee8655b 100644 --- a/test/test_npu/test_topk.py +++ b/test/test_npu/test_topk.py @@ -1,97 +1,97 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -import time - -class TestTopk(TestCase): - def cpu_op_exec(self, input1, k, dim, largest, sorted1): - output, index = torch.topk(input1, k, dim, largest, sorted1) - output = output.numpy() - index = index.numpy() - return output, index - - def npu_op_exec(self, input1, k, dim, largest, sorted1): - output, index = torch.topk(input1, k, dim, largest, sorted1) - output = output.to("cpu") - index = index.to("cpu") - output = output.numpy() - index = index.numpy() - return output, index - - def test_topk_shape_format(self, device): - np.random.seed(0) - shape_format = [ - # [k, dim, [input_type, input_format, input_shape, min, max], largest, sorted] - # dim - [3, 0, [np.float32, 0, [8, 10], 0, 100], True, True], - [3, 1, [np.float32, 0, [8, 10], 0, 100], True, True], - [5, 2, [np.float32, 0, [8, 9, 10], 0, 1000], True, True], - [5, 3, [np.float32, 0, [8, 9, 10, 11], 0, 1000], True, True], - # dtype - [3, 0, [np.int32, 0, [8, 10], 0, 100], True, True], - [5, 2, [np.int32, 0, [8, 9, 10], 0, 1000], True, True], - # random - [5, 0, [np.float32, 0, [31, 47], 0, 100], True, True], - [5, 1, [np.float32, 0, [42, 53, 7], 0, 100], True, True], - # largest - [3, 0, [np.float32, 0, [8, 10], 0, 100], False, True], - ] - - cnt = 0 - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[2], item[2][3], item[2][4]) - cpu_output, cpu_index = self.cpu_op_exec(cpu_input, item[0], item[1], item[3], item[4]) - npu_output, npu_index = self.npu_op_exec(npu_input, item[0], item[1], item[3], item[4]) - # 目前只支持fp16,fp32降低阈值判断 - self.assertRtolEqual(cpu_output, npu_output, prec=1.e-1) - #self.assertRtolEqual(cpu_index, npu_index) - cnt += 1 - - def test_topk_float16_shape_format(self, device): - def cpu_op_exec_fp16(input1, k, dim, largest, sorted1): - input1 = input1.to(torch.float32) - output, index = torch.topk(input1, k, dim, largest, sorted1) - output = output.numpy().astype(np.float16) - index = index.numpy().astype(np.int32) - return output, index - - np.random.seed(0) - shape_format = [ - # [k, dim, [input_type, input_format, input_shape, min, max], largest, sorted] - # dim - [3, 0, [np.float16, 0, [8, 10], 0, 100], True, True], - [3, 1, [np.float16, 0, [8, 10], 0, 100], True, True], - [5, 2, [np.float16, 0, [8, 9, 10], 0, 1000], True, True], - [5, 3, [np.float16, 0, [8, 9, 10, 11], 0, 1000], True, True], - ] - - cnt = 0 - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[2], item[2][3], item[2][4]) - cpu_output, cpu_index = cpu_op_exec_fp16(cpu_input, item[0], item[1], item[3], item[4]) - npu_output, npu_index = self.npu_op_exec(npu_input, item[0], item[1], item[3], item[4]) - cpu_index = cpu_index.astype(npu_index.dtype) - self.assertRtolEqual(cpu_output, npu_output) - #self.assertRtolEqual(cpu_index, npu_index) - cnt += 1 - -instantiate_device_type_tests(TestTopk, globals(), except_for="cpu") -if __name__ == "__main__": +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor +import time + +class TestTopk(TestCase): + def cpu_op_exec(self, input1, k, dim, largest, sorted1): + output, index = torch.topk(input1, k, dim, largest, sorted1) + output = output.numpy() + index = index.numpy() + return output, index + + def npu_op_exec(self, input1, k, dim, largest, sorted1): + output, index = torch.topk(input1, k, dim, largest, sorted1) + output = output.to("cpu") + index = index.to("cpu") + output = output.numpy() + index = index.numpy() + return output, index + + def test_topk_shape_format(self, device): + np.random.seed(0) + shape_format = [ + # [k, dim, [input_type, input_format, input_shape, min, max], largest, sorted] + # dim + [3, 0, [np.float32, 0, [8, 10], 0, 100], True, True], + [3, 1, [np.float32, 0, [8, 10], 0, 100], True, True], + [5, 2, [np.float32, 0, [8, 9, 10], 0, 1000], True, True], + [5, 3, [np.float32, 0, [8, 9, 10, 11], 0, 1000], True, True], + # dtype + [3, 0, [np.int32, 0, [8, 10], 0, 100], True, True], + [5, 2, [np.int32, 0, [8, 9, 10], 0, 1000], True, True], + # random + [5, 0, [np.float32, 0, [31, 47], 0, 100], True, True], + [5, 1, [np.float32, 0, [42, 53, 7], 0, 100], True, True], + # largest + [3, 0, [np.float32, 0, [8, 10], 0, 100], False, True], + ] + + cnt = 0 + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[2], item[2][3], item[2][4]) + cpu_output, cpu_index = self.cpu_op_exec(cpu_input, item[0], item[1], item[3], item[4]) + npu_output, npu_index = self.npu_op_exec(npu_input, item[0], item[1], item[3], item[4]) + # 目前只支持fp16,fp32降低阈值判断 + self.assertRtolEqual(cpu_output, npu_output, prec=1.e-1) + #self.assertRtolEqual(cpu_index, npu_index) + cnt += 1 + + def test_topk_float16_shape_format(self, device): + def cpu_op_exec_fp16(input1, k, dim, largest, sorted1): + input1 = input1.to(torch.float32) + output, index = torch.topk(input1, k, dim, largest, sorted1) + output = output.numpy().astype(np.float16) + index = index.numpy().astype(np.int32) + return output, index + + np.random.seed(0) + shape_format = [ + # [k, dim, [input_type, input_format, input_shape, min, max], largest, sorted] + # dim + [3, 0, [np.float16, 0, [8, 10], 0, 100], True, True], + [3, 1, [np.float16, 0, [8, 10], 0, 100], True, True], + [5, 2, [np.float16, 0, [8, 9, 10], 0, 1000], True, True], + [5, 3, [np.float16, 0, [8, 9, 10, 11], 0, 1000], True, True], + ] + + cnt = 0 + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[2], item[2][3], item[2][4]) + cpu_output, cpu_index = cpu_op_exec_fp16(cpu_input, item[0], item[1], item[3], item[4]) + npu_output, npu_index = self.npu_op_exec(npu_input, item[0], item[1], item[3], item[4]) + cpu_index = cpu_index.astype(npu_index.dtype) + self.assertRtolEqual(cpu_output, npu_output) + #self.assertRtolEqual(cpu_index, npu_index) + cnt += 1 + +instantiate_device_type_tests(TestTopk, globals(), except_for="cpu") +if __name__ == "__main__": run_tests() \ No newline at end of file diff --git a/test/test_npu/test_triangular_solve.py b/test/test_npu/test_triangular_solve.py index 19c29815db5e30f915c77de92c8ca4fd10a94afa..1cb5e4b8f8219a53d189090ee902cae8d4f28c6d 100644 --- a/test/test_npu/test_triangular_solve.py +++ b/test/test_npu/test_triangular_solve.py @@ -1,84 +1,84 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestTriangularSolve(TestCase): - def generate_data(self, min, max, shape, dtype): - input1 = np.random.uniform(min, max, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - return npu_input1 - - def cpu_op_exec(self, input1, input2, input3, input4, input5): - output = input1.triangular_solve(input2,upper=input3,transpose=input4,unitriangular=input5) - return output - - def cpu_op_exec_float16(self, input1, input2, input3, input4, input5): - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - output = input1.triangular_solve(input2,upper=input3,transpose=input4,unitriangular=input5) - return output - - def npu_op_exec(self, input1, input2, input3, input4, input5): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = input1.triangular_solve(input2,upper=input3,transpose=input4,unitriangular=input5) - output = output.to("cpu") - return output - - def test_triangular_solve_float32(self, device): - npu_input1 = self.generate_data(0, 100, (2,3) , np.float32) - npu_input2 = self.generate_data(0, 100, (2,2) , np.float32) - npu_true = True - npu_false = False - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, npu_true, npu_false, npu_false) - #npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_true, npu_false, npu_false) - #self.assertRtolEqual(cpu_output, npu_output) - - def test_triangular_solve_float32_zhuanzhi(self, device): - npu_input1 = self.generate_data(0, 100, (2,3) , np.float32) - npu_input2 = self.generate_data(0, 100, (2,2) , np.float32) - npu_true = True - npu_false = False - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, npu_true, npu_true, npu_false) - #npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_true, npu_true, npu_false) - #self.assertRtolEqual(cpu_output, npu_output) - - def test_triangular_solve_float32_danwei(self, device): - npu_input1 = self.generate_data(0, 100, (2,3) , np.float32) - npu_input2 = self.generate_data(0, 100, (2,2) , np.float32) - npu_true = True - npu_false = False - cpu_output = self.cpu_op_exec(npu_input1, npu_input2, npu_true, npu_false, npu_true) - #npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_true, npu_false, npu_true) - #self.assertRtolEqual(cpu_output, npu_output) - - def test_triangular_solve_float16(self, device): - npu_input1 = self.generate_data(0, 100, (2,3) , np.float16) - npu_input2 = self.generate_data(0, 100, (2,2) , np.float16) - npu_true = True - npu_false = False - cpu_output = self.cpu_op_exec_float16(npu_input1, npu_input2, npu_true, npu_false, npu_true) - #npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_true, npu_false, npu_true) - #self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestTriangularSolve, globals(), except_for='cpu') -if __name__ == '__main__': - torch.npu.set_device("npu:2") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestTriangularSolve(TestCase): + def generate_data(self, min, max, shape, dtype): + input1 = np.random.uniform(min, max, shape).astype(dtype) + npu_input1 = torch.from_numpy(input1) + return npu_input1 + + def cpu_op_exec(self, input1, input2, input3, input4, input5): + output = input1.triangular_solve(input2,upper=input3,transpose=input4,unitriangular=input5) + return output + + def cpu_op_exec_float16(self, input1, input2, input3, input4, input5): + input1 = input1.to(torch.float32) + input2 = input2.to(torch.float32) + output = input1.triangular_solve(input2,upper=input3,transpose=input4,unitriangular=input5) + return output + + def npu_op_exec(self, input1, input2, input3, input4, input5): + input1 = input1.to("npu") + input2 = input2.to("npu") + output = input1.triangular_solve(input2,upper=input3,transpose=input4,unitriangular=input5) + output = output.to("cpu") + return output + + def test_triangular_solve_float32(self, device): + npu_input1 = self.generate_data(0, 100, (2,3) , np.float32) + npu_input2 = self.generate_data(0, 100, (2,2) , np.float32) + npu_true = True + npu_false = False + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, npu_true, npu_false, npu_false) + #npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_true, npu_false, npu_false) + #self.assertRtolEqual(cpu_output, npu_output) + + def test_triangular_solve_float32_zhuanzhi(self, device): + npu_input1 = self.generate_data(0, 100, (2,3) , np.float32) + npu_input2 = self.generate_data(0, 100, (2,2) , np.float32) + npu_true = True + npu_false = False + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, npu_true, npu_true, npu_false) + #npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_true, npu_true, npu_false) + #self.assertRtolEqual(cpu_output, npu_output) + + def test_triangular_solve_float32_danwei(self, device): + npu_input1 = self.generate_data(0, 100, (2,3) , np.float32) + npu_input2 = self.generate_data(0, 100, (2,2) , np.float32) + npu_true = True + npu_false = False + cpu_output = self.cpu_op_exec(npu_input1, npu_input2, npu_true, npu_false, npu_true) + #npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_true, npu_false, npu_true) + #self.assertRtolEqual(cpu_output, npu_output) + + def test_triangular_solve_float16(self, device): + npu_input1 = self.generate_data(0, 100, (2,3) , np.float16) + npu_input2 = self.generate_data(0, 100, (2,2) , np.float16) + npu_true = True + npu_false = False + cpu_output = self.cpu_op_exec_float16(npu_input1, npu_input2, npu_true, npu_false, npu_true) + #npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_true, npu_false, npu_true) + #self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestTriangularSolve, globals(), except_for='cpu') +if __name__ == '__main__': + torch.npu.set_device("npu:2") + run_tests() diff --git a/test/test_npu/test_type_as.py b/test/test_npu/test_type_as.py index 67f9ad01c81909650ca6a753d2270778e84b15db..65e990117d38a071f9ddd0ab6c24148a62556724 100644 --- a/test/test_npu/test_type_as.py +++ b/test/test_npu/test_type_as.py @@ -1,70 +1,70 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestTypeAs(TestCase): - def cpu_op_exec(self, input1, input2): - tensor1 = input1 - tensor2 = input2 - output = tensor1.type_as(tensor2) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2): - tensor1 = input1 - tensor2 = input2 - output = tensor1.type_as(tensor2) - output = output.to("cpu") - output = output.numpy() - return output - - def test_type_as_int32_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (4, 3)], [np.int32, -1, (4, 3)]], - [[np.float32, -1, (4, 3, 1)], [np.int32, -1, (4, 3, 1)]], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 100) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - def test_type_as_float32_shape_format(self, device): - shape_format = [ - [[np.int32, -1, (8, 5)], [np.float32, -1, (8, 5)]], - [[np.int32, -1, (9, 4, 2)], [np.float32, -1, (9, 4, 2)]], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 100) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) - npu_output = self.npu_op_exec(npu_input1, npu_input2) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestTypeAs, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:5") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestTypeAs(TestCase): + def cpu_op_exec(self, input1, input2): + tensor1 = input1 + tensor2 = input2 + output = tensor1.type_as(tensor2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + tensor1 = input1 + tensor2 = input2 + output = tensor1.type_as(tensor2) + output = output.to("cpu") + output = output.numpy() + return output + + def test_type_as_int32_shape_format(self, device): + shape_format = [ + [[np.float32, -1, (4, 3)], [np.int32, -1, (4, 3)]], + [[np.float32, -1, (4, 3, 1)], [np.int32, -1, (4, 3, 1)]], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 100) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + def test_type_as_float32_shape_format(self, device): + shape_format = [ + [[np.int32, -1, (8, 5)], [np.float32, -1, (8, 5)]], + [[np.int32, -1, (9, 4, 2)], [np.float32, -1, (9, 4, 2)]], + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 100) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestTypeAs, globals(), except_for='cpu') +if __name__ == "__main__": + torch.npu.set_device("npu:5") + run_tests() diff --git a/test/test_npu/test_unbind.py b/test/test_npu/test_unbind.py index a843bec1dada8fc2e57b6d016e18cbd2051f6d94..19b5e7977da576c7c96cda166427736ba76ab7eb 100644 --- a/test/test_npu/test_unbind.py +++ b/test/test_npu/test_unbind.py @@ -1,84 +1,84 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestUnbind(TestCase): - - def cpu_op_exec(self, input1, dim): - output_tuple= torch.unbind(input1, dim=dim) - listtuple1 = [] - for i in range(len(output_tuple)): - listtuple1 += list(output_tuple[i].contiguous().view(-1)) - output = torch.tensor(listtuple1) - output = output.numpy() - return output - - def npu_op_exec(self, input1, dim): - output_tuple = torch.unbind(input1, dim=dim) - listtuple1 = [] - for i in range(len(output_tuple)): - listtuple1 += list(output_tuple[i].contiguous().view(-1)) - output = torch.tensor(listtuple1) - output = output.to("cpu") - output = output.numpy() - return output - - def test_unbind_common_shape_format(self, device): - shape_format = [ - [[np.float32, 0 , (1, 4, 2, 3)], 1], - [[np.float32, 0, (1, 3, 2, 3)], 2], - [[np.float32, 0, (3, 2, 3)], 2], - [[np.float32, 0, ( 2, 3)], 0], - [[np.float16, 0 , (1, 4, 2, 3)], 1], - [[np.float16, 0, (1, 3, 2, 3)], 3], - [[np.float16, 0, (3, 2, 3)], 2], - [[np.float16, 0, ( 2, 3)], 0], - [[np.int32, 0 , (1, 4, 2, 3)], 1], - [[np.int32, 0, (1, 3, 2, 3)], 3], - [[np.int32, 0, (3, 2, 3)], 2], - [[np.int32, 0, ( 2, 3)], 0], - [[np.int16, 0 , (1, 4, 2, 3)], 1], - [[np.int16, 0, (1, 3, 2, 3)], 3], - [[np.int16, 0, (3, 2, 3)], 2], - [[np.int16, 0, ( 2, 3)], 0], - [[np.int8, 0 , (1, 4, 2, 3)], 1], - [[np.int8, 0, (1, 3, 2, 3)], 3], - [[np.int8, 0, (3, 2, 3)], 2], - [[np.int8, 0, ( 2, 3)], 0], - [[np.uint8, 0 , (1, 4, 2, 3)], 1], - [[np.uint8, 0, (1, 3, 2, 3)], 3], - [[np.uint8, 0, (3, 2, 3)], 2], - [[np.uint8, 0, ( 2, 3)], 0], - [[np.int64, 0 , (1, 4, 2, 3)], 1], - [[np.int64, 0, (1, 3, 2, 3)], 3], - [[np.int64, 0, (3, 2, 3)], 2], - [[np.int64, 0, ( 2, 3)], 0] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_exec(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestUnbind, globals(), except_for="cpu") -if __name__ == "__main__": - torch.npu.set_device("npu:6") - run_tests() +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +import torch +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestUnbind(TestCase): + + def cpu_op_exec(self, input1, dim): + output_tuple= torch.unbind(input1, dim=dim) + listtuple1 = [] + for i in range(len(output_tuple)): + listtuple1 += list(output_tuple[i].contiguous().view(-1)) + output = torch.tensor(listtuple1) + output = output.numpy() + return output + + def npu_op_exec(self, input1, dim): + output_tuple = torch.unbind(input1, dim=dim) + listtuple1 = [] + for i in range(len(output_tuple)): + listtuple1 += list(output_tuple[i].contiguous().view(-1)) + output = torch.tensor(listtuple1) + output = output.to("cpu") + output = output.numpy() + return output + + def test_unbind_common_shape_format(self, device): + shape_format = [ + [[np.float32, 0 , (1, 4, 2, 3)], 1], + [[np.float32, 0, (1, 3, 2, 3)], 2], + [[np.float32, 0, (3, 2, 3)], 2], + [[np.float32, 0, ( 2, 3)], 0], + [[np.float16, 0 , (1, 4, 2, 3)], 1], + [[np.float16, 0, (1, 3, 2, 3)], 3], + [[np.float16, 0, (3, 2, 3)], 2], + [[np.float16, 0, ( 2, 3)], 0], + [[np.int32, 0 , (1, 4, 2, 3)], 1], + [[np.int32, 0, (1, 3, 2, 3)], 3], + [[np.int32, 0, (3, 2, 3)], 2], + [[np.int32, 0, ( 2, 3)], 0], + [[np.int16, 0 , (1, 4, 2, 3)], 1], + [[np.int16, 0, (1, 3, 2, 3)], 3], + [[np.int16, 0, (3, 2, 3)], 2], + [[np.int16, 0, ( 2, 3)], 0], + [[np.int8, 0 , (1, 4, 2, 3)], 1], + [[np.int8, 0, (1, 3, 2, 3)], 3], + [[np.int8, 0, (3, 2, 3)], 2], + [[np.int8, 0, ( 2, 3)], 0], + [[np.uint8, 0 , (1, 4, 2, 3)], 1], + [[np.uint8, 0, (1, 3, 2, 3)], 3], + [[np.uint8, 0, (3, 2, 3)], 2], + [[np.uint8, 0, ( 2, 3)], 0], + [[np.int64, 0 , (1, 4, 2, 3)], 1], + [[np.int64, 0, (1, 3, 2, 3)], 3], + [[np.int64, 0, (3, 2, 3)], 2], + [[np.int64, 0, ( 2, 3)], 0] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) + cpu_output = self.cpu_op_exec(cpu_input1, item[1]) + npu_output = self.npu_op_exec(npu_input1, item[1]) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestUnbind, globals(), except_for="cpu") +if __name__ == "__main__": + torch.npu.set_device("npu:6") + run_tests()