diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000000000000000000000000000000000000..379eefb8e9e8cbef43f5441cb6337a1d9f353f02
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,15 @@
+BasedOnStyle: Google
+ColumnLimit: 120
+IndentWidth: 4
+SortIncludes: false
+
+# poniter aligment
+DerivePointerAlignment: false
+PointerAlignment: Left
+
+# single line layout
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
diff --git a/.gitignore b/.gitignore
index 658649e038829e59312962f906c0013e1bf654b2..3d91b143a2f6b6a59385579c9dc1c14eed6ea0f9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,2 @@
 __pycache__/
-.DS_Store
\ No newline at end of file
+.DS_Store
diff --git a/ads/common/ops/bev_pool.py b/ads/common/ops/bev_pool.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a1cd2924ce14116fec3e02e275ac486da389a55
--- /dev/null
+++ b/ads/common/ops/bev_pool.py
@@ -0,0 +1,60 @@
+import ads_c
+import torch
+
+
+class BEVPool(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, feats, geom_feats, ranks, B, D, H, W):
+        kept = torch.ones(feats.shape[0], device=feats.device, dtype=torch.bool)
+        kept[1:] = ranks[1:] != ranks[:-1]
+        interval_starts = torch.where(kept)[0].int()
+        interval_lengths = torch.zeros_like(interval_starts, dtype=torch.int64)
+        interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]
+        interval_lengths[-1] = feats.shape[0] - interval_starts[-1]
+        geom_feats = geom_feats.int()
+
+        out = ads_c.npu_bev_pool(
+            feats,
+            geom_feats,
+            interval_lengths,
+            interval_starts,
+            B,
+            D,
+            H,
+            W,
+        )
+
+        ctx.save_for_backward(interval_starts, interval_lengths, geom_feats)
+        ctx.saved_shapes = B, D, H, W
+        return out
+
+    @staticmethod
+    def backward(ctx, grad_out):
+        interval_starts, interval_lengths, geom_feats = ctx.saved_tensors
+        B, D, H, W = ctx.saved_shapes
+
+        grad_out = grad_out.contiguous()
+        grad_feats = ads_c.npu_bev_pool_backward(
+            grad_out,
+            geom_feats,
+            interval_lengths,
+            interval_starts,
+            B,
+            D,
+            H,
+            W,
+        )
+
+        return grad_feats, None, None, None, None, None, None
+
+
+def bev_pool(feats, geom_feats, B, D, H, W):
+    assert feats.shape[0] == geom_feats.shape[0]
+
+    ranks = geom_feats[:, 0] * (W * D * B) + geom_feats[:, 1] * (D * B) + geom_feats[:, 2] * B + geom_feats[:, 3]
+    indices = ranks.argsort()
+    feats, geom_feats, ranks = feats[indices], geom_feats[indices], ranks[indices]
+
+    out = BEVPool.apply(feats, geom_feats, ranks, B, D, H, W)
+    out = out.permute(0, 4, 1, 2, 3).contiguous()
+    return out
diff --git a/ads/common/ops/csrc/BEVPoolBackwardKernelNpu.cpp b/ads/common/ops/csrc/BEVPoolBackwardKernelNpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..78537c4019164d291c4ae7c05d65dc4b1d7e5b36
--- /dev/null
+++ b/ads/common/ops/csrc/BEVPoolBackwardKernelNpu.cpp
@@ -0,0 +1,55 @@
+// Copyright (c) 2023 Huawei Technologies Co., Ltd
+// Copyright (c) 2019, Facebook CORPORATION.
+// All rights reserved.
+//
+// Licensed under the BSD 3-Clause License  (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "torch_npu/csrc/framework/OpCommand.h"
+#include "torch_npu/csrc/framework/utils/OpPreparation.h"
+#include "torch_npu/csrc/framework/utils/NpuUtils.h"
+#include "torch_npu/csrc/aten/NPUNativeFunctions.h"
+#include "torch_npu/csrc/aten/CustomFunctions.h"
+#include "functions.h"
+#include "common.h"
+#include "OpApiCommon.h"
+
+namespace {
+constexpr int64_t N_IDX = 0;
+constexpr int64_t C_IDX = 4;
+constexpr int64_t N_INTERVAL_IDX = 0;
+}  // namespace
+/*
+ * Function: pilar pooing, bev_pool_backward
+ * Args:
+ * grad_out: input grad, 5D tensor(b, d, h, w, c)
+ * geom_feats: input coords, 2D tensor(n, 4)
+ * interval_lengths: the number of points in each interval, 1D tensor(n_interval)
+ * interval_starts: starting position for pooled point, 1D tensor(n_interval)
+ * Return:
+ * grad_feats: output grad, 5D tensor(b, d, h, w, c)
+ */
+at::Tensor npu_bev_pool_backward(const at::Tensor grad_out, const at::Tensor geom_feats,
+                                 const at::Tensor interval_lengths, const at::Tensor interval_starts, int64_t b,
+                                 int64_t d, int64_t h, int64_t w) {
+    TORCH_CHECK(grad_out.dim() == 5, "grad_out must be 5D tensor(b, d, h, w, c)");
+    TORCH_CHECK(geom_feats.dim() == 2, "coords must be 2D tensor(n, 4)");
+    auto n = geom_feats.size(N_IDX);
+    auto c = grad_out.size(C_IDX);
+    auto n_interval = interval_lengths.size(N_INTERVAL_IDX);
+    TORCH_CHECK(interval_starts.size(N_INTERVAL_IDX) == n_interval,
+                "interval_starts and interval_lengths must have same size");
+
+    auto grad_feats = at::zeros({n, c}, grad_out.options());
+    EXEC_NPU_CMD(aclnnBEVPoolGrad, grad_out, geom_feats, interval_lengths, interval_starts, b, d, h, w, n, c,
+                 grad_feats);
+}
diff --git a/ads/common/ops/csrc/functions.h b/ads/common/ops/csrc/functions.h
index 1384a1538dd30108b58583a791c79ad0bf2690b1..e06051a56822471aca5d89ed631123435f068a15 100644
--- a/ads/common/ops/csrc/functions.h
+++ b/ads/common/ops/csrc/functions.h
@@ -14,124 +14,70 @@
 #ifndef __FUNCTIONS_H__
 #define __FUNCTIONS_H__
 
-#include <ATen/Tensor.h>
 #include <ATen/ATen.h>
+#include <ATen/Tensor.h>
 #include <ATen/core/Scalar.h>
+#include <pybind11/numpy.h>
 #include <torch/extension.h>
 #include <torch/library.h>
-#include <pybind11/numpy.h>
 
-void init_common(pybind11::module &m);
+void init_common(pybind11::module& m);
 
-std::tuple<at::Tensor, at::Tensor> npu_scatter_max(const at::Tensor& updates, const at::Tensor& indices, c10::optional<at::Tensor> out);
+std::tuple<at::Tensor, at::Tensor> npu_scatter_max(const at::Tensor& updates, const at::Tensor& indices,
+                                                   c10::optional<at::Tensor> out);
 at::Tensor npu_scatter_max_backward(const at::Tensor& x, const at::Tensor& segment_ids, const at::Tensor& num_segments);
 
-at::Tensor npu_rotated_box_decode(const at::Tensor &self, const at::Tensor &deltas, const at::Tensor &weight);
-at::Tensor npu_rotated_box_encode(
-    const at::Tensor& self,
-    const at::Tensor& gtBox,
-    const at::Tensor& weight);
-at::Tensor npu_rotated_iou(
-    const at::Tensor& boxes,
-    const at::Tensor& query_boxes,
-    bool trans,
-    int64_t mode,
-    bool is_cross,
-    double v_threshold,
-    double e_threshold);
-at::Tensor npu_rotated_overlaps(
-    const at::Tensor& self,
-    const at::Tensor& query_boxes,
-    bool trans);
+at::Tensor npu_rotated_box_decode(const at::Tensor& self, const at::Tensor& deltas, const at::Tensor& weight);
+at::Tensor npu_rotated_box_encode(const at::Tensor& self, const at::Tensor& gtBox, const at::Tensor& weight);
+at::Tensor npu_rotated_iou(const at::Tensor& boxes, const at::Tensor& query_boxes, bool trans, int64_t mode,
+                           bool is_cross, double v_threshold, double e_threshold);
+at::Tensor npu_rotated_overlaps(const at::Tensor& self, const at::Tensor& query_boxes, bool trans);
 at::Tensor npu_scatter(const at::Tensor& self, const at::Tensor& indices, const at::Tensor& updates, int64_t dim);
 at::Tensor npu_sign_bits_pack(const at::Tensor& self, int64_t size);
 at::Tensor npu_sign_bits_unpack(py::args args);
-at::Tensor npu_softmax_cross_entropy_with_logits(const at::Tensor &self, const at::Tensor &lables);
+at::Tensor npu_softmax_cross_entropy_with_logits(const at::Tensor& self, const at::Tensor& lables);
 at::Tensor npu_stride_add(py::args args);
-at::Tensor npu_transpose(const at::Tensor &self, at::IntArrayRef perm, bool require_contiguous);
-at::Tensor npu_yolo_boxes_encode(
-    const at::Tensor& anchor_boxes,
-    const at::Tensor& gt_bboxes,
-    const at::Tensor& stride,
-    bool performance_mode);
+at::Tensor npu_transpose(const at::Tensor& self, at::IntArrayRef perm, bool require_contiguous);
+at::Tensor npu_yolo_boxes_encode(const at::Tensor& anchor_boxes, const at::Tensor& gt_bboxes, const at::Tensor& stride,
+                                 bool performance_mode);
 at::Tensor npu_scatter(const at::Tensor& self, const at::Tensor& indices, const at::Tensor& updates, int64_t dim);
-at::Tensor npu_rotary_mul(const at::Tensor &self, const at::Tensor &r1, const at::Tensor &r2);
+at::Tensor npu_rotary_mul(const at::Tensor& self, const at::Tensor& r1, const at::Tensor& r2);
 at::Tensor npu_silu(const at::Tensor& self);
 at::Tensor& npu_silu_(at::Tensor& self);
 at::Tensor npu_abs(const at::Tensor& self);
 at::Tensor npu_fast_gelu_backward(const at::Tensor& grad, const at::Tensor& self);
 at::Tensor npu_fast_gelu(const at::Tensor& self);
-at::Tensor npu_anchor_response_flags(const at::Tensor& self, at::IntArrayRef featmap_size, at::IntArrayRef stride, int64_t num_base_anchors);
-at::Tensor npu_bounding_box_decode(
-    const at::Tensor& rois,
-    const at::Tensor& deltas,
-    double means0,
-    double means1,
-    double means2,
-    double means3,
-    double stds0,
-    double stds1,
-    double stds2,
-    double stds3,
-    at::IntArrayRef max_shape,
-    double wh_ratio_clip);
-at::Tensor npu_bounding_box_encode(
-    const at::Tensor& anchor_box,
-    const at::Tensor& ground_truth_box,
-    double means0,
-    double means1,
-    double means2,
-    double means3,
-    double stds0,
-    double stds1,
-    double stds2,
-    double stds3);
+at::Tensor npu_anchor_response_flags(const at::Tensor& self, at::IntArrayRef featmap_size, at::IntArrayRef stride,
+                                     int64_t num_base_anchors);
+at::Tensor npu_bounding_box_decode(const at::Tensor& rois, const at::Tensor& deltas, double means0, double means1,
+                                   double means2, double means3, double stds0, double stds1, double stds2, double stds3,
+                                   at::IntArrayRef max_shape, double wh_ratio_clip);
+at::Tensor npu_bounding_box_encode(const at::Tensor& anchor_box, const at::Tensor& ground_truth_box, double means0,
+                                   double means1, double means2, double means3, double stds0, double stds1,
+                                   double stds2, double stds3);
 std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor> npu_batch_nms(
-    const at::Tensor& self,
-    const at::Tensor& scores,
-    double score_threshold,
-    double iou_threshold,
-    int64_t max_size_per_class,
-    int64_t max_total_size,
-    bool change_coordinate_frame,
-    bool transpose_box);
-at::Tensor npu_confusion_transpose(
-    const at::Tensor& self,
-    at::IntArrayRef perm,
-    at::IntArrayRef shape,
-    bool transpose_first);
-at::Tensor npu_confusion_transpose_backward(
-    const at::Tensor& grad,
-    at::IntArrayRef perm,
-    at::IntArrayRef shape,
-    bool transpose_first);
-at::Tensor npu_conv_transpose2d(
-    const at::Tensor& input,
-    const at::Tensor& weight,
-    const c10::optional<at::Tensor>& bias_opt,
-    at::IntArrayRef padding,
-    at::IntArrayRef output_padding,
-    at::IntArrayRef stride,
-    at::IntArrayRef dilation,
-    int64_t groups);
+    const at::Tensor& self, const at::Tensor& scores, double score_threshold, double iou_threshold,
+    int64_t max_size_per_class, int64_t max_total_size, bool change_coordinate_frame, bool transpose_box);
+at::Tensor npu_confusion_transpose(const at::Tensor& self, at::IntArrayRef perm, at::IntArrayRef shape,
+                                   bool transpose_first);
+at::Tensor npu_confusion_transpose_backward(const at::Tensor& grad, at::IntArrayRef perm, at::IntArrayRef shape,
+                                            bool transpose_first);
+at::Tensor npu_conv_transpose2d(const at::Tensor& input, const at::Tensor& weight,
+                                const c10::optional<at::Tensor>& bias_opt, at::IntArrayRef padding,
+                                at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation,
+                                int64_t groups);
 at::Tensor npu_broadcast(const at::Tensor& self, at::IntArrayRef size);
 at::Tensor& npu_broadcast_out(const at::Tensor& self, at::IntArrayRef size, at::Tensor& result);
-at::Tensor npu_moe_tutel(
-    const at::Tensor &self,
-    const at::Tensor &gates,
-    const at::Tensor &indices,
-    const at::Tensor &locations,
-    int64_t capacity);
-at::Tensor npu_moe_tutel_data_backward(
-    const at::Tensor &y_grad,
-    const at::Tensor &gates,
-    const at::Tensor &indices,
-    const at::Tensor &locations);
-at::Tensor npu_moe_tutel_gate_backward(
-    const at::Tensor &self,
-    const at::Tensor &y_grad,
-    const at::Tensor &indices,
-    const at::Tensor &locations);
+at::Tensor npu_moe_tutel(const at::Tensor& self, const at::Tensor& gates, const at::Tensor& indices,
+                         const at::Tensor& locations, int64_t capacity);
+at::Tensor npu_moe_tutel_data_backward(const at::Tensor& y_grad, const at::Tensor& gates, const at::Tensor& indices,
+                                       const at::Tensor& locations);
+at::Tensor npu_moe_tutel_gate_backward(const at::Tensor& self, const at::Tensor& y_grad, const at::Tensor& indices,
+                                       const at::Tensor& locations);
 
-at::Tensor npu_ads_add(const at::Tensor &tensor1, const at::Tensor &tensor2);
-#endif // __FUNCTIONS_H__
+at::Tensor npu_ads_add(const at::Tensor& tensor1, const at::Tensor& tensor2);
+at::Tensor npu_bev_pool(const at::Tensor feats, const at::Tensor geom_feats, const at::Tensor interval_lengths,
+                        const at::Tensor interval_starts, int64_t b, int64_t d, int64_t h, int64_t w);
+at::Tensor npu_bev_pool_backward(const at::Tensor grad_out, const at::Tensor geom_feats, const at::Tensor interval_lengths,
+                                 const at::Tensor interval_starts, int64_t b, int64_t d, int64_t h, int64_t w);
+#endif  // __FUNCTIONS_H__
diff --git a/ads/common/ops/csrc/pybind.cpp b/ads/common/ops/csrc/pybind.cpp
index 5638ac6b09c986dba4e68e9fe99862475dbaa323..630f1a958cedd882c025cb570e19ca3cd97e752a 100644
--- a/ads/common/ops/csrc/pybind.cpp
+++ b/ads/common/ops/csrc/pybind.cpp
@@ -73,4 +73,8 @@ void init_common(pybind11::module &m)
     m.def("npu_moe_tutel_gate_backward", &npu_moe_tutel_gate_backward, "npu_moe_tutel_gate_backward NPU version");
     // ads_add
     m.def("npu_ads_add", &npu_ads_add);
+    
+    // bev_pool
+    m.def("npu_bev_pool", &npu_bev_pool, "npu_bev_pool NPU version");
+    m.def("npu_bev_pool_backward", &npu_bev_pool_backward, "npu_bev_pool_backward NPU version");
 }
diff --git a/ads/common/ops/kernels/ads_op/op_host/CMakeLists.txt b/ads/common/ops/kernels/ads_op/op_host/CMakeLists.txt
index 40dd51cfac524b0a9607b7d8b2813edd2210c509..b467eef89d52edbb521326ba3ad01afe5afacb47 100644
--- a/ads/common/ops/kernels/ads_op/op_host/CMakeLists.txt
+++ b/ads/common/ops/kernels/ads_op/op_host/CMakeLists.txt
@@ -1,4 +1,3 @@
-
 aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} ops_srcs)
 
 opbuild(OPS_SRC ${ops_srcs}
@@ -28,6 +27,8 @@ target_link_libraries(cust_op_proto PRIVATE
 set_target_properties(cust_op_proto PROPERTIES OUTPUT_NAME
                       cust_opsproto_rt2.0
 )
+
+# tiling
 add_library(cust_optiling SHARED ${ops_srcs})
 target_compile_definitions(cust_optiling PRIVATE OP_TILING_LIB)
 target_compile_options(cust_optiling PRIVATE
diff --git a/ads/common/ops/kernels/ads_op/op_host/add_custom.cpp b/ads/common/ops/kernels/ads_op/op_host/add_custom.cpp
index 3a578d4ed44e8102b5285948093a22d88d88fa4e..9924452155aa14feaf3d5bcdd411eddae31365e4 100644
--- a/ads/common/ops/kernels/ads_op/op_host/add_custom.cpp
+++ b/ads/common/ops/kernels/ads_op/op_host/add_custom.cpp
@@ -1,4 +1,3 @@
-
 #include "add_custom_tiling.h"
 #include "register/op_def_registry.h"
 
diff --git a/ads/common/ops/kernels/ads_op/op_host/bev_pool.cpp b/ads/common/ops/kernels/ads_op/op_host/bev_pool.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d872093af06d708738b19b210f7d5c1d1eba9681
--- /dev/null
+++ b/ads/common/ops/kernels/ads_op/op_host/bev_pool.cpp
@@ -0,0 +1,133 @@
+#include <graph/types.h>
+#include <register/op_def.h>
+#include <cstdint>
+#include "bev_pool_tiling.h"
+#include "register/op_def_registry.h"
+#include "tiling/platform/platform_ascendc.h"
+
+namespace {
+constexpr size_t INTERVAL_START_IDX = 2;
+constexpr size_t B_IDX = 0;
+constexpr size_t D_IDX = 1;
+constexpr size_t H_IDX = 2;
+constexpr size_t W_IDX = 3;
+constexpr size_t N_IDX = 4;
+constexpr size_t C_IDX = 5;
+}  // namespace
+namespace optiling {
+static ge::graphStatus TilingForBEVPoolGrad(gert::TilingContext* context) {
+    BEVPoolTilingData tiling;
+    auto platform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+    auto coreNum = platform.GetCoreNum();
+
+    auto intervalShape = context->GetInputShape(INTERVAL_START_IDX);
+    int32_t nInterval = intervalShape->GetStorageShape().GetDim(0);
+
+    int32_t usedCoreNum = std::min(int32_t(coreNum), nInterval);
+    tiling.set_usedCoreNum(usedCoreNum);
+    auto avgTaskNum = nInterval / usedCoreNum;
+    auto tailTaskNum = nInterval % usedCoreNum;
+    tiling.set_avgTaskNum(avgTaskNum);
+    tiling.set_tailTaskNum(tailTaskNum);
+
+    auto attrs = context->GetAttrs();
+    // TODO: check if attrs is null
+    auto getAttr = [attrs](size_t idx) -> int32_t {
+        auto ptr = attrs->GetInt(idx);
+        // TODO: check if ptr is null
+        return int32_t(*ptr);
+    };
+    auto b = getAttr(B_IDX);
+    auto d = getAttr(D_IDX);
+    auto h = getAttr(H_IDX);
+    auto w = getAttr(W_IDX);
+    auto n = getAttr(N_IDX);
+    auto c = getAttr(C_IDX);
+    tiling.set_c(c);
+    tiling.set_stride0(w * c);
+    tiling.set_stride1(c);
+    tiling.set_stride2(h * w * c);
+    tiling.set_stride3(d * h * w * c);
+
+    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+    return ge::GRAPH_SUCCESS;
+}
+}  // namespace optiling
+
+namespace ge {
+static ge::graphStatus InferShapeForBEVPoolGrad(gert::InferShapeContext* context) {
+    const gert::Shape* x1_shape = context->GetInputShape(0);
+    gert::Shape* y_shape = context->GetOutputShape(0);
+    *y_shape = *x1_shape;
+    return GRAPH_SUCCESS;
+}
+}  // namespace ge
+
+namespace ops {
+
+/**
+ * @brief: BEVPoolGrad, the backward of bev_pool
+ * @par Inputs:
+ * grad_out: input grad, 5D tensor(b, d, h, w, c), dtype: float32, format: NDHWC, ND
+ * geom_feats: input coords, 2D tensor(n, 4), dtype: int32, format: ND
+ * interval_starts: starting position for pooled point, 1D tensor(n_interval), dtype: int32, format: ND
+ * interval_lengths: the number of points in each interval, 1D tensor(n_interval), dtype: int32, format: ND
+ * @par Outputs:
+ * grad_feats: output grad, 2D tensor(n, c), dtype: float32, format: ND
+ * @par Attributes:
+ * b: batch size, type: int
+ * d: depth, type: int
+ * w: width, type: int
+ * h: height, type: int
+ * n: number of points, type: int
+ * c: channels, type: int
+ **/
+class BEVPoolGrad : public OpDef {
+   public:
+    explicit BEVPoolGrad(const char* name) : OpDef(name) {
+        this->Input("grad_out")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_NDHWC, ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("geom_feats")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_INT64})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("interval_starts")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_INT64})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("interval_lengths")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_INT64})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+
+        this->Output("grad_feats")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+
+        this->Attr("b").AttrType(REQUIRED).Int();
+        this->Attr("d").AttrType(REQUIRED).Int();
+        this->Attr("w").AttrType(REQUIRED).Int();
+        this->Attr("h").AttrType(REQUIRED).Int();
+        this->Attr("n").AttrType(REQUIRED).Int();
+        this->Attr("c").AttrType(REQUIRED).Int();
+
+        this->SetInferShape(ge::InferShapeForBEVPoolGrad);
+
+        this->AICore().SetTiling(optiling::TilingForBEVPoolGrad);
+        this->AICore().AddConfig("ascend910b");
+        this->AICore().AddConfig("ascend910");
+        this->AICore().AddConfig("ascend310p");
+    }
+};
+
+OP_ADD(BEVPoolGrad);
+}  // namespace ops
diff --git a/ads/common/ops/kernels/ads_op/op_host/bev_pool_tiling.h b/ads/common/ops/kernels/ads_op/op_host/bev_pool_tiling.h
new file mode 100644
index 0000000000000000000000000000000000000000..3ba28f6888f7ca486a7f28638ef6d82ea86c6722
--- /dev/null
+++ b/ads/common/ops/kernels/ads_op/op_host/bev_pool_tiling.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
+ */
+#ifndef BEVPool_TILING_H
+#define BEVPool_TILING_H
+#include "register/tilingdata_base.h"
+
+namespace optiling {
+BEGIN_TILING_DATA_DEF(BEVPoolTilingData)
+TILING_DATA_FIELD_DEF(int32_t, usedCoreNum)
+TILING_DATA_FIELD_DEF(int32_t, avgTaskNum)
+TILING_DATA_FIELD_DEF(int32_t, tailTaskNum)
+TILING_DATA_FIELD_DEF(int32_t, c)
+TILING_DATA_FIELD_DEF(int32_t, stride0)
+TILING_DATA_FIELD_DEF(int32_t, stride1)
+TILING_DATA_FIELD_DEF(int32_t, stride2)
+TILING_DATA_FIELD_DEF(int32_t, stride3)
+END_TILING_DATA_DEF
+
+REGISTER_TILING_DATA_CLASS(BEVPool, BEVPoolTilingData)
+REGISTER_TILING_DATA_CLASS(BEVPoolGrad, BEVPoolTilingData)
+
+}  // namespace optiling
+#endif  // BEVPool_TILING_H
diff --git a/ads/common/ops/kernels/ads_op/op_kernel/bev_pool_grad.cpp b/ads/common/ops/kernels/ads_op/op_kernel/bev_pool_grad.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..113aa8d76da9f68b0fbfdac7e236944d735d66a3
--- /dev/null
+++ b/ads/common/ops/kernels/ads_op/op_kernel/bev_pool_grad.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
+ *
+ */
+using namespace AscendC;
+constexpr int32_t BUFFER_NUM = 2;  // tensor num for each queue
+
+tmplate<int32_t depth> class TConjungateQue : public TQueBind<Tposition::LCM, Tposition::LCM, depth, 0> {};
+
+class KernelBEVPoolGrad {
+   public:
+    __aicore__ inline KernelBEVPoolGrad() = delete;
+
+    __aicore__ inline KernelBEVPoolGrad(int32_t blkIdx, GM_ADDR gradOut, GM_ADDR geomFeats, GM_ADDR intervalStarts,
+                                        GM_ADDR intervalLengths, GM_ADDR gradFeats,
+                                        const BEVPoolTilingData& bevPoolTiling) {
+        blkIdx_ = blkIdx;
+
+        stride0_ = bevPoolTiling.get_stride0();
+        stride1_ = bevPoolTiling.get_stride1();
+        stride2_ = bevPoolTiling.get_stride2();
+        stride3_ = bevPoolTiling.get_stride3();
+        c_ = bevPoolTiling.get_c();
+        int32_t usedCoreNum = bevPoolTiling.get_usedCoreNum();
+        int32_t avgTaskNum = bevPoolTiling.get_avgTaskNum();
+        int32_t tailTaskNum = bevPoolTiling.get_tailTaskNum();
+        it_ = TaskIterator(blkIdx_, usedCoreNum, avgTaskNum, tailTaskNum);
+
+        cpOneParams_.blockLen = 1;
+        cpGradParams_.blockLen = DivCeil(c_, B32_BYTE_SIZE);
+
+        sGm_.setGlobalBuffer((__gm__ float*)intervalStarts);
+        lGm_.setGlobalBuffer((__gm__ float*)intervalLengths);
+        oGm_.setGlobalBuffer((__gm__ float*)gradOut);
+        gGm_.setGlobalBuffer((__gm__ float*)gradFeats);
+        fGm_.setGlobalBuffer((__gm__ float*)geomFeats);
+
+        pipe.InitBuffer(que_, BUFFER_NUM, AlignUp(c_, B32_BYTE_SIZE) * sizeof(float));
+        pipe.InitBuffer(sQue, BUFFER_NUM, ONE_BLK_SIZE);
+        pipe.InitBuffer(lQue, BUFFER_NUM, ONE_BLK_SIZE);
+        pipe.InitBuffer(gQue, BUFFER_NUM, ONE_BLK_SIZE);
+
+        evt1_ = pipe_.AllocEventID<HardEvent::S_MTE2>();
+        evt2_ = pipe_.AllocEventID<HardEvent::S_MTE2>();
+    }
+
+    __aicore__ inline void Process() {
+        while (it_.hasNext()) {
+            int32_t idx = it_.next();
+            int32_t start, length, offset;
+            CopyInInterval(idx);
+            GetInterval(idx, start, length);
+            CopyInGeom(start);
+            GetOffset(geomT, offset);
+            CopyInGrad(offset);
+            for (int32_t i = 0; i < l; ++l) {
+                DataCopy(gradOutGm_[i * stride0_ + offset], que_[i - start], c_);
+            }
+        }
+    }
+
+   private:
+    class TaskIterator {
+       public:
+        __aicore__ inline TaskIterator(int32_t blkIdx, int32_t blkDim, int32_t avgTaskNum, int32_t tailTaskNum) {
+            blkIdx_ = blkIdx;
+            nextIdx_ = blkIdx * avgTaskNum + (blkIdx < tailTaskNum ? blkIdx : tailTaskNum);
+            endIdx_ = nextIdx_ + avgTaskNum + (blkIdx < tailTaskNum ? 1 : 0);
+        }
+
+        __aicore__ inline bool HasNext() const {
+            return nextIdx_ < endIdx_;
+        }
+
+        __aicore__ inline int32_t Next() {
+            return nextIdx_++;
+        }
+
+       private:
+        int32_t blkIdx_, blkDim_;
+        int32_t nextIdx_, endIdx_;
+    };
+
+   private:
+    TPipe pipe_;
+    TConjungateQue<BUFFER_NUM> que_;
+    TQue<QuePosition::VECIN, BUFFER_NUM> sQue_, lQue_, gQue_;
+
+    GlobalTensor<float> oGm_;
+    GlobalTensor<float> fGm_;
+    GlobalTensor<int32_t> sGm_, lGm_, gGm_;
+
+    int32_t stride0_, stride1_, stride2_, stride3_;
+    int32_t c_;
+    TaskIterator it_;
+    DataCopyParams cpOneParams_, cpGradParams_;
+
+    TEventID evt1_, evt2_;
+
+   private:
+    __aicore__ inline void CopyInInterval(int32_t idx) {
+        auto startT = sQue_.AllocTensor<int32_t>();
+        auto lengthT = lQue_.AllocTensor<int32_t>();
+        auto geomT = gQue_.AllocTensor<int32_t>();
+
+        DataCopy(startT, sGm_[idx], cpOneParams_);
+        sQue_.EnQue(startT);
+        DataCopy(lengthT, lGm_[idx], cpOneParams_);
+        lQue_.EnQue(lengthT);
+        DataCopy(geomT, gGm_[idx], cpOneParams_);
+        gQue_.EnQue(geomT);
+    }
+
+    __aicore__ inline void GetInterval(int32_t idx, int32_t& start, int32_t& length) {
+        LocalTensor<int32_t> startT = sQue_.DeQue<int32_t>();
+        LocalTensor<int32_t> lengthT = lQue_.DeQue<int32_t>();
+        start = startT[0];
+        length = lengthT[0];
+
+        SetFlag<HardEvent::S_MTE2>(evt1_);
+    }
+
+    __aicore__ inline void CopyInGeom(int32_t start) {
+        WaitFlag<HardEvent::S_MTE2>(evt1_);
+
+        auto geomT = gQue_.AllocTensor<int32_t>();
+        DataCopy(geomT, gGm_[start], cpOneParams_);
+        gQue_.EnQue(geomT);
+    }
+    __aicore__ inline void GetOffset(int32_t idx, int32_t& offset) {
+        LocalTensor<int32_t> geomT = gQue_.DeQue<int32_t>();
+        offset = geomT[0] * stride0_ + geomT[1] * stride1_ + geomT[2] * stride2_ + geomT[3] * stride3_;
+
+        SetFlag<HardEvent::S_MTE2>(evt2_);
+    }
+
+    __aicore__ inline void CopyInGrad(int32_t offset) {
+        WaitFlag<HardEvent::S_MTE2>(evt2_);
+
+        auto gradOutT = que_.AllocTensor<float>();
+        DataCopy(gradOutT, oGm_[offset], cpGradParams_);
+        que_.EnQue(gradOutT);
+    }
+};
+
+extern "C" __global__ __aicore__ void bev_pool_grad(GM_ADDR gradOut, GM_ADDR geomFeats, GM_ADDR intervalStarts,
+                                                    GM_ADDR intervalLengths, GM_ADDR gradFeats, GM_ADDR workspace,
+                                                    GM_ADDR tiling) {
+    GET_TILING_DATA(bevPoolTiling, tiling);
+    if (!workspace) {
+        return;
+    }
+    SetSysWorkspace(workspace);
+
+    int32_t blkIdx = GetBlockIdx();
+    if (blkIdx >= bevPoolTiling.get_usedCoreNum()) {
+        return;
+    }
+
+    KernelBEVPoolGrad op(blkIdx, gradOut, geomFeats, intervalStarts, intervalLengths, gradFeats, bevPoolTiling);
+    op.Process();
+}
diff --git a/ads/include/op_log.h b/ads/include/op_log.h
new file mode 100644
index 0000000000000000000000000000000000000000..ba0e9ff8fc09b0fd6b73fcde6f868b3d6c2ed7f6
--- /dev/null
+++ b/ads/include/op_log.h
@@ -0,0 +1,304 @@
+#ifndef OPS_COMMON_INC_OP_LOG_H_
+#define OPS_COMMON_INC_OP_LOG_H_
+
+#include <string>
+#include <type_traits>
+#include "graph/operator.h"
+#include "graph/node.h"
+#include "common/util/error_manager/error_manager.h"
+
+#if !defined( __ANDROID__) && !defined(ANDROID)
+#include "toolchain/slog.h"
+#else
+#include <utils/Log.h>
+#endif
+
+#ifdef __GNUC__
+#include <unistd.h>
+#include <sys/syscall.h>
+#else
+#include "mmpa/mmpa_api.h"
+#endif
+
+#define OPPROTO_SUBMOD_NAME "OP_PROTO"
+
+class OpLog {
+ public:
+  static uint64_t GetTid() {
+#ifdef __GNUC__
+    const uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid));
+#else
+    const uint64_t tid = static_cast<uint64_t>(GetCurrentThreadId());
+#endif
+    return tid;
+  }
+};
+
+inline const char* get_cstr(const std::string& str) {
+  return str.c_str();
+}
+
+inline const char* get_cstr(const char* str) {
+  return str;
+}
+
+inline const std::string& get_op_info(const std::string& str) {
+  return str;
+}
+
+inline const char* get_op_info(const char* str) {
+  return str;
+}
+
+inline std::string get_op_info(const ge::NodePtr& node) {
+  return node != nullptr ? node->GetType() + ":" + node->GetName() : "nil";
+}
+
+inline std::string get_op_info(const ge::OpDescPtr& node) {
+  return node != nullptr ? node->GetType() + ":" + node->GetName() : "nil";
+}
+
+template <class T>
+constexpr bool is_ge_operator_type() {
+  return std::is_base_of<ge::Operator, typename std::decay<T>::type>::value;
+}
+
+template <class T>
+typename std::enable_if<is_ge_operator_type<T>(), std::string>::type get_op_info(const T& op) {
+  ge::AscendString name;
+  ge::AscendString type;
+  auto get_name_ret = op.GetName(name);
+  auto get_type_ret = op.GetOpType(type);
+  std::string op_info = get_type_ret == ge::GRAPH_SUCCESS ? type.GetString() : "nil";
+  op_info += ":";
+  op_info += get_name_ret == ge::GRAPH_SUCCESS ? name.GetString() : "nil";
+  return op_info;
+}
+
+template <class T>
+constexpr bool is_context_type() {
+  return !std::is_base_of<ge::Operator, typename std::decay<T>::type>::value &&
+      !std::is_same<const char*, typename std::decay<T>::type>::value &&
+      !std::is_same<char*, typename std::decay<T>::type>::value &&
+      !std::is_same<std::string, typename std::decay<T>::type>::value &&
+      !std::is_same<ge::NodePtr, typename std::decay<T>::type>::value &&
+      !std::is_same<ge::OpDescPtr, typename std::decay<T>::type>::value;
+}
+
+template <class T>
+typename std::enable_if<is_context_type<T>(), std::string>::type get_op_info(T context) {
+  if (context == nullptr) {
+    return "nil:nil";
+  }
+  std::string op_info = context->GetNodeType() != nullptr ? context->GetNodeType() : "nil";
+  op_info += ":";
+  op_info += context->GetNodeName() != nullptr ? context->GetNodeName() : "nil";
+  return op_info;
+}
+
+template <typename T>
+std::string TbeGetName(const T& op) {
+  ge::AscendString op_ascend_name;
+  ge::graphStatus ret = op.GetName(op_ascend_name);
+  if (ret != ge::GRAPH_SUCCESS) {
+    std::string op_name = "None";
+    return op_name;
+  }
+  return op_ascend_name.GetString();
+}
+
+template <typename T>
+std::string TbeGetOpType(const T& op) {
+  ge::AscendString op_ascend_name;
+  ge::graphStatus ret = op.GetOpType(op_ascend_name);
+  if (ret != ge::GRAPH_SUCCESS) {
+    std::string op_name = "None";
+    return op_name;
+  }
+  return op_ascend_name.GetString();
+}
+
+#define CHECK_DIVISOR_ZERO(divisor) \
+  if (divisor == 0) {\
+    return;\
+  }
+
+#define CHECK_DIVISOR_ZERO_RET(divisor, ret) \
+  if (divisor == 0) {                        \
+    return ret;                              \
+  }
+
+#define OP_CHECK(cond, log_func, return_expr) \
+  if (cond) {                                 \
+    log_func;                                 \
+    return_expr;                              \
+  }
+
+#if !defined( __ANDROID__) && !defined(ANDROID)
+#define AICPU_OP_LOGI(opname, ...) AICPU_D_OP_LOGI(get_op_info(opname), __VA_ARGS__)
+#define AICPU_OP_LOGW(opname, ...) AICPU_D_OP_LOGW(get_op_info(opname), __VA_ARGS__)
+#define AICPU_OP_LOGD(opname, ...) AICPU_D_OP_LOGD(get_op_info(opname), __VA_ARGS__)
+#define AICPU_OP_LOGE_WITHOUT_REPORT(opname, ...) AICPU_D_OP_LOGE(get_op_info(opname), __VA_ARGS__)
+#define AICPU_OP_LOGE(op_name, ...)                       \
+  do {                                              \
+    AICPU_OP_LOGE_WITHOUT_REPORT(op_name, ##__VA_ARGS__); \
+    REPORT_INNER_ERROR("EZ9999", ##__VA_ARGS__);    \
+  } while (0)
+
+#define OP_LOGI(opname, ...) D_OP_LOGI(get_op_info(opname), __VA_ARGS__)
+#define OP_LOGW(opname, ...) D_OP_LOGW(get_op_info(opname), __VA_ARGS__)
+
+#define OP_LOGE_WITHOUT_REPORT(opname, ...) D_OP_LOGE(get_op_info(opname), __VA_ARGS__)
+#define OP_LOGE(op_name, ...)                       \
+  do {                                              \
+    OP_LOGE_WITHOUT_REPORT(op_name, ##__VA_ARGS__); \
+    REPORT_INNER_ERROR("EZ9999", ##__VA_ARGS__);    \
+  } while (0)
+
+#define OP_LOGD(opname, ...) D_OP_LOGD(get_op_info(opname), __VA_ARGS__)
+#define OP_EVENT(opname, ...) D_OP_EVENT(get_op_info(opname), __VA_ARGS__)
+#define GE_OP_LOGI(opname, ...) GE_D_OP_LOGI(get_op_info(opname), __VA_ARGS__)
+#define GE_OP_LOGW(opname, ...) GE_D_OP_LOGW(get_op_info(opname), __VA_ARGS__)
+#define GE_OP_LOGE(opname, ...) GE_D_OP_LOGE(get_op_info(opname), __VA_ARGS__)
+#define GE_OP_LOGD(opname, ...) GE_D_OP_LOGD(get_op_info(opname), __VA_ARGS__)
+#define GE_OP_LOGE_WITH_REPORT(opname, format, ...)                                                   \
+  do {                                                                                                \
+    GE_OP_LOGE(opname, format, ##__VA_ARGS__);                                                        \
+    REPORT_INNER_ERROR("E19999", "[OpName:%s]" format, get_cstr(get_op_info(opname)), ##__VA_ARGS__); \
+  } while (0)
+
+#define FUSION_PASS_LOGI(...) D_FUSION_PASS_LOGI(__VA_ARGS__)
+#define FUSION_PASS_LOGW(...) D_FUSION_PASS_LOGW(__VA_ARGS__)
+#define FUSION_PASS_LOGE(...) D_FUSION_PASS_LOGE(__VA_ARGS__)
+#define FUSION_PASS_LOGD(...) D_FUSION_PASS_LOGD(__VA_ARGS__)
+#else
+#define AICPU_OP_LOGI(opname, ...)
+#define AICPU_OP_LOGW(opname, ...)
+#define AICPU_OP_LOGE(opname, ...)
+#define AICPU_OP_LOGD(opname, ...)
+#define AICPU_OP_LOGE_WITHOUT_REPORT(opname, ...)
+#define OP_LOGI(opname, ...)
+#define OP_LOGW(opname, ...)
+#define OP_LOGE_WITHOUT_REPORT(opname, ...)
+#define OP_LOGE(opname, ...)
+#define OP_LOGD(opname, ...)
+#define OP_EVENT(opname, ...)
+#define FUSION_PASS_LOGI(...)
+#define FUSION_PASS_LOGW(...)
+#define FUSION_PASS_LOGE(...)
+#define FUSION_PASS_LOGD(...)
+#endif
+
+#define OpLogSub(moduleId, level, op_info, fmt, ...)                                                                   \
+  DlogSub(static_cast<int>(moduleId), OPPROTO_SUBMOD_NAME, level, "[%s][%" PRIu64 "] OpName:[%s] " #fmt, __FUNCTION__, \
+          OpLog::GetTid(), get_cstr(op_info), ##__VA_ARGS__)
+
+#if !defined( __ANDROID__) && !defined(ANDROID)
+#define AICPU_D_OP_LOGI(opname, fmt, ...) OpLogSub(AICPU, DLOG_INFO, opname, fmt, ##__VA_ARGS__)
+#define AICPU_D_OP_LOGW(opname, fmt, ...) OpLogSub(AICPU, DLOG_WARN, opname, fmt, ##__VA_ARGS__)
+#define AICPU_D_OP_LOGE(opname, fmt, ...) OpLogSub(AICPU, DLOG_ERROR, opname, fmt, ##__VA_ARGS__)
+#define AICPU_D_OP_LOGD(opname, fmt, ...) OpLogSub(AICPU, DLOG_DEBUG, opname, fmt, ##__VA_ARGS__)
+#define D_OP_LOGI(opname, fmt, ...) OpLogSub(OP, DLOG_INFO, opname, fmt, ##__VA_ARGS__)
+#define D_OP_LOGW(opname, fmt, ...) OpLogSub(OP, DLOG_WARN, opname, fmt, ##__VA_ARGS__)
+#define D_OP_LOGE(opname, fmt, ...) OpLogSub(OP, DLOG_ERROR, opname, fmt, ##__VA_ARGS__)
+#define D_OP_LOGD(opname, fmt, ...) OpLogSub(OP, DLOG_DEBUG, opname, fmt, ##__VA_ARGS__)
+#define D_OP_EVENT(opname, fmt, ...) OpLogSub(OP, DLOG_EVENT, opname, fmt, ##__VA_ARGS__)
+#define GE_D_OP_LOGI(opname, fmt, ...) OpLogSub(GE, DLOG_INFO, opname, fmt, ##__VA_ARGS__)
+#define GE_D_OP_LOGW(opname, fmt, ...) OpLogSub(GE, DLOG_WARN, opname, fmt, ##__VA_ARGS__)
+#define GE_D_OP_LOGE(opname, fmt, ...) OpLogSub(GE, DLOG_ERROR, opname, fmt, ##__VA_ARGS__)
+#define GE_D_OP_LOGD(opname, fmt, ...) OpLogSub(GE, DLOG_DEBUG, opname, fmt, ##__VA_ARGS__)
+#define D_FUSION_PASS_LOGI(fmt, ...) \
+  DlogSub(FE, OPPROTO_SUBMOD_NAME, DLOG_INFO, " %s:%d " #fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__)
+#define D_FUSION_PASS_LOGW(fmt, ...) \
+  DlogSub(FE, OPPROTO_SUBMOD_NAME, DLOG_WARN, " %s:%d " #fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__)
+#define D_FUSION_PASS_LOGE(fmt, ...) \
+  DlogSub(FE, OPPROTO_SUBMOD_NAME, DLOG_ERROR, " %s:%d " #fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__)
+#define D_FUSION_PASS_LOGD(fmt, ...) \
+  DlogSub(FE, OPPROTO_SUBMOD_NAME, DLOG_DEBUG, " %s:%d " #fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__)
+#else
+#define AICPU_D_OP_LOGI(opname, fmt, ...)
+#define AICPU_D_OP_LOGW(opname, fmt, ...)
+#define AICPU_D_OP_LOGE(opname, fmt, ...)
+#define AICPU_D_OP_LOGD(opname, fmt, ...)
+#define D_OP_LOGI(opname, fmt, ...)
+#define D_OP_LOGW(opname, fmt, ...)
+#define D_OP_LOGE(opname, fmt, ...)
+#define D_OP_LOGD(opname, fmt, ...)
+#define D_OP_EVENT(opname, fmt, ...)
+#define D_FUSION_PASS_LOGI(fmt, ...)
+#define D_FUSION_PASS_LOGW(fmt, ...)
+#define D_FUSION_PASS_LOGE(fmt, ...)
+#define D_FUSION_PASS_LOGD(fmt, ...)
+#endif
+
+#define unlikely(x) __builtin_expect((x), 0)
+#define likely(x) __builtin_expect((x), 1)
+
+#define OP_LOGE_IF(condition, return_value, op_name, fmt, ...)                                                 \
+  static_assert(std::is_same<bool, std::decay<decltype(condition)>::type>::value, "condition should be bool"); \
+  do {                                                                                                         \
+    if (unlikely(condition)) {                                                                                 \
+      OP_LOGE(op_name, fmt, ##__VA_ARGS__);                                                                    \
+      return return_value;                                                                                     \
+    }                                                                                                          \
+  } while (0)
+
+#define OP_LOGW_IF(condition, op_name, fmt, ...)                                                               \
+  static_assert(std::is_same<bool, std::decay<decltype(condition)>::type>::value, "condition should be bool"); \
+  do {                                                                                                         \
+    if (unlikely(condition)) {                                                                                 \
+      OP_LOGW(op_name, fmt, ##__VA_ARGS__);                                                                    \
+    }                                                                                                          \
+  } while (0)
+
+#define OP_LOGI_IF_RETURN(condition, return_value, op_name, fmt, ...)                                          \
+  static_assert(std::is_same<bool, std::decay<decltype(condition)>::type>::value, "condition should be bool"); \
+  do {                                                                                                         \
+    if (unlikely(condition)) {                                                                                 \
+      OP_LOGI(op_name, fmt, ##__VA_ARGS__);                                                                    \
+      return return_value;                                                                                     \
+    }                                                                                                          \
+  } while (0)
+
+constexpr const int OP_MAX_LOG_SIZE = 16000;
+constexpr const int OP_MSG_HEADER_LEN = 200;
+// print very long log. long line will be split to multipile lines
+#define OP_LOG_FULL(level, opname, format, ...)                              \
+do {                                                                         \
+  if (0 == CheckLogLevel(OP, level)) {                                       \
+    break;                                                                   \
+  }                                                                          \
+  char msgbufxyz[OP_MAX_LOG_SIZE];                                           \
+  size_t msgmaxlen = (MSG_LENGTH - OP_MSG_HEADER_LEN);                       \
+  int rettmp = snprintf_s(msgbufxyz, sizeof(msgbufxyz), sizeof(msgbufxyz) - 1, format, ##__VA_ARGS__); \
+  if (rettmp == -1) {                                                        \
+    msgbufxyz[sizeof(msgbufxyz) - 1] = '\0';                                 \
+  }                                                                          \
+  size_t msglength = std::strlen(msgbufxyz);                                 \
+  if (msglength < msgmaxlen) {                                               \
+    OpLogSub(OP, level, opname, "%s", msgbufxyz);                            \
+    break;                                                                   \
+  }                                                                          \
+  char *msgchunkbegin = msgbufxyz;                                           \
+  char *msgchunkend = nullptr;                                               \
+  while (msgchunkbegin < msgbufxyz + msglength) {                            \
+    if (msgchunkbegin[0] == '\n') {                                          \
+      OpLogSub(OP, level, opname, "");                                       \
+      msgchunkbegin += 1;                                                    \
+      continue;                                                              \
+    }                                                                        \
+    msgchunkend = std::strchr(msgchunkbegin, '\n');                          \
+    if (msgchunkend == nullptr) {                                            \
+      msgchunkend = msgchunkbegin + std::strlen(msgchunkbegin);              \
+    }                                                                        \
+    while (msgchunkend > msgchunkbegin) {                                    \
+      std::string msgchunk(msgchunkbegin, std::min(msgmaxlen, static_cast<size_t>(msgchunkend - msgchunkbegin))); \
+      OpLogSub(OP, level, opname, "%s", msgchunk.c_str());                   \
+      msgchunkbegin += msgchunk.size();                                      \
+    }                                                                        \
+    msgchunkbegin += 1;                                                      \
+  }                                                                          \
+} while (0)
+
+#define OP_LOGD_FULL(opname, ...) OP_LOG_FULL(DLOG_DEBUG, get_op_info(opname), __VA_ARGS__)
+#endif  // OPS_COMMON_INC_OP_LOG_H_