From 29e161ad7a7980b2ff2fffba6b4d1996b2995b55 Mon Sep 17 00:00:00 2001 From: stormkingz Date: Fri, 7 Mar 2025 06:09:19 +0000 Subject: [PATCH 1/2] =?UTF-8?q?update=20mx=5Fdriving/csrc/SubmSparseCov3dG?= =?UTF-8?q?rad.cpp.=201.kernelsum=E6=9C=80=E5=A5=BD=E6=A0=B9=E6=8D=AEkerne?= =?UTF-8?q?l=5Fsize=E6=9D=A5=E8=AE=A1=E7=AE=97=EF=BC=8C=E8=BF=99=E4=B8=8E?= =?UTF-8?q?=E5=85=B6=E4=BB=96=E4=BB=A3=E7=A0=81=E5=A4=84=E4=BF=9D=E6=8C=81?= =?UTF-8?q?=E4=BA=86=E4=B8=80=E8=87=B4=EF=BC=8C=E6=9B=B4=E6=98=93=E9=98=85?= =?UTF-8?q?=E8=AF=BB=EF=BC=9B2.output=5Fsize=E6=9C=80=E5=A5=BD=E7=94=A8?= =?UTF-8?q?=E4=B8=80=E7=BB=B4=E6=95=B0=E7=BB=84=EF=BC=8C=E5=9B=A0=E4=B8=BA?= =?UTF-8?q?npu=E8=AE=BF=E9=97=AE=E6=97=B6=E6=98=AF=E6=8C=89=E7=85=A7?= =?UTF-8?q?=E4=B8=80=E7=BB=B4=E6=95=B0=E7=BB=84=E5=8E=BB=E8=AE=BF=E9=97=AE?= =?UTF-8?q?=E7=9A=84=EF=BC=8C=E6=AD=A4=E5=A4=84=E8=8B=A5=E5=BC=80=E8=BE=9F?= =?UTF-8?q?=E5=A4=9A=E7=BB=B4=E6=95=B0=E7=BB=84=E5=8F=AF=E8=83=BD=E9=9D=A2?= =?UTF-8?q?=E4=B8=B4=E6=BD=9C=E5=9C=A8=E7=9A=84=E5=86=85=E5=AD=98=E4=B8=8D?= =?UTF-8?q?=E8=BF=9E=E7=BB=AD=E7=9A=84=E9=97=AE=E9=A2=98=EF=BC=9B3.inchane?= =?UTF-8?q?l=E8=AE=A1=E7=AE=97=E6=9C=89=E8=AF=AF=EF=BC=8C=E4=BF=AE?= =?UTF-8?q?=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: stormkingz --- mx_driving/csrc/SubmSparseCov3dGrad.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mx_driving/csrc/SubmSparseCov3dGrad.cpp b/mx_driving/csrc/SubmSparseCov3dGrad.cpp index d85993ad..3fcf5f3f 100644 --- a/mx_driving/csrc/SubmSparseCov3dGrad.cpp +++ b/mx_driving/csrc/SubmSparseCov3dGrad.cpp @@ -23,12 +23,13 @@ at::Tensor npu_subm_sparse_conv3d_grad(const at::Tensor& ouidx_offset, const at: { auto weight_size = weight.sizes(); int64_t kernelsum = 1; - for (int32_t i = 0; i < weight_size.size() - 2; i++) { - kernelsum *= weight_size[i]; + for (int32_t i = 0; i < kernel_size.size(); i++) { + kernelsum *= kernel_size[i]; } - c10::SmallVector output_size = {indices_number, kernelsum, weight_size[4]}; + int outchannel = weight_size[4]; + int inchannel = weight_size[3]; + c10::SmallVector output_size = {indices_number * kernelsum * outchannel}; at::Tensor out = at::empty(output_size, weight.options()).fill_(0); - int32_t inchannel = kernel_size[3]; EXEC_NPU_CMD(aclnnSubmSparseConv3dGrad, ouidx_offset, valid_indices, grad, kernel_size, inchannel, out); return out; } -- Gitee From 3a59456391f37bc8f76ae149787263e2bd2e17ee Mon Sep 17 00:00:00 2001 From: stormkingz Date: Fri, 7 Mar 2025 06:13:01 +0000 Subject: [PATCH 2/2] =?UTF-8?q?update=20kernels/op=5Fkernel/subm=5Fsparse?= =?UTF-8?q?=5Fconv3d=5Fgrad.cpp.=201.=E4=BF=AE=E6=AD=A3outputGm=E7=9A=84?= =?UTF-8?q?=E7=BB=B4=E5=BA=A6=EF=BC=9B2.=E5=8E=BB=E6=8E=89=E6=9C=AA?= =?UTF-8?q?=E4=BD=BF=E7=94=A8=E7=9A=84copyParams=5Fout=E7=9A=84=E5=AE=9A?= =?UTF-8?q?=E4=B9=89;?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: stormkingz --- kernels/op_kernel/subm_sparse_conv3d_grad.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernels/op_kernel/subm_sparse_conv3d_grad.cpp b/kernels/op_kernel/subm_sparse_conv3d_grad.cpp index 1ac7871f..6f94b8fc 100644 --- a/kernels/op_kernel/subm_sparse_conv3d_grad.cpp +++ b/kernels/op_kernel/subm_sparse_conv3d_grad.cpp @@ -65,7 +65,7 @@ public: validIndicesGm.SetGlobalBuffer((__gm__ DTYPE_OUTIDX_OFFSET*)valid_indices, this->valid_number); GradOutGm.SetGlobalBuffer((__gm__ DTYPE_GRAD_OUT_FEATURES*)grad_out_features, this->indices_number * this->outchannel); outputGm.SetGlobalBuffer( - (__gm__ DTYPE_GRAD_OUT_FEATURES*)grad_out_features_iml2col, this->indices_number * total_kernel_size * this->inchannel); + (__gm__ DTYPE_GRAD_OUT_FEATURES*)grad_out_features_iml2col, this->indices_number * total_kernel_size * this->outchannel); pipe->InitBuffer(inQueueOffset, 1, this->available_ub_size * sizeof(DTYPE_OUTIDX_OFFSET)); pipe->InitBuffer(inQueueValid, 1, this->available_ub_size * sizeof(DTYPE_OUTIDX_OFFSET)); pipe->InitBuffer(inQueueGrad, 1, this->outchannel * total_kernel_size * sizeof(DTYPE_GRAD_OUT_FEATURES)); @@ -111,8 +111,6 @@ private: copyParams_valid = {1, (uint16_t)(tensor_size * sizeof(DTYPE_OUTIDX_OFFSET)), 0, 0}; auto outchannel_ailgn_32b = AlignUp(this->outchannel, 8); DataCopyPadParams gradpadParams = {true, 0, (uint8_t)(outchannel_ailgn_32b-this->outchannel), 0}; - DataCopyParams copyParams_out = {(uint16_t)(total_kernel_size), - (uint16_t)(this->outchannel * sizeof(DTYPE_OUTIDX_OFFSET)), 0, 0}; DataCopyPad(indices_ub, validIndicesGm[address], copyParams_valid, padParams); DataCopyPad(offset_ub, outidxOffsetGm[address], copyParams_valid, padParams); PipeBarrier(); -- Gitee