From d012dfff2921c4674f6b0585e4867f1b2068287c Mon Sep 17 00:00:00 2001 From: chenmingkai Date: Mon, 19 Feb 2024 09:46:56 +0800 Subject: [PATCH] ADS upgrade --- .gitignore | 4 +- CMakeLists.txt | 206 ++++++++++ .../CMakePresets.json => CMakePresets.json | 12 +- MANIFEST.in | 2 +- ads/common/CMakeLists.txt | 7 + ads/common/ops/csrc/AbsOpApi.cpp | 2 +- ads/common/ops/csrc/AdsAddKernelNpu.cpp | 2 +- .../ops/csrc/AnchorResponseFlagsKernelNpu.cpp | 2 +- ads/common/ops/csrc/BatchNms.cpp | 2 +- .../ops/csrc/BoundingBoxDecodeKernelNpu.cpp | 2 +- .../ops/csrc/BoundingBoxEncodeKernelNpu.cpp | 2 +- .../ops/csrc/ConfusionTransposeKernelNpu.cpp | 2 +- .../ops/csrc/DynamicScatterKernelNpuOpApi.cpp | 4 +- .../ops/csrc/DynamicVoxelizationKernelNpu.cpp | 2 +- ...FurthestPointSamplingWithDistKernelNpu.cpp | 2 +- ads/common/ops/csrc/MoeTutelOpApi.cpp | 4 +- ...tiScaleDeformableAttnFunctionKernelNpu.cpp | 2 +- ads/common/ops/csrc/Nms3dNormal.cpp | 2 +- ads/common/ops/csrc/NpuSilu.cpp | 2 +- ads/common/ops/csrc/PointsInBoxKernelNpu.cpp | 2 +- ads/common/ops/csrc/RotaryMulKernelNpu.cpp | 2 +- ads/common/ops/csrc/ScatterMaxKernelNpu.cpp | 28 +- .../ops/csrc/SignBitsUnpackKernelNpu.cpp | 2 +- ...SoftmaxCrossEntropyWithLogitsKernelNpu.cpp | 2 +- ads/common/ops/csrc/StrideAddKernelNpu.cpp | 22 +- ads/common/ops/csrc/TransposeKernelNpu.cpp | 2 +- .../ops/csrc/YoloBoxesEncodeKernelNpu.cpp | 2 +- ads/common/ops/csrc/common.h | 33 -- ads/common/ops/csrc/functions.h | 9 +- ads/common/ops/csrc/pybind.cpp | 1 + ads/common/ops/kernels/CMakeLists.txt | 10 + ads/common/ops/kernels/README.md | 15 +- ads/common/ops/kernels/ads_op/CMakeLists.txt | 69 ---- ads/common/ops/kernels/ads_op/README.md | 13 - ads/common/ops/kernels/ads_op/build.sh | 51 --- .../ops/kernels/ads_op/cmake/func.cmake | 228 ----------- .../ops/kernels/ads_op/cmake/makeself.cmake | 17 - .../kernels/ads_op/framework/CMakeLists.txt | 11 - .../ops/kernels/ads_op/op_host/CMakeLists.txt | 82 ---- .../kernels/ads_op/op_kernel/CMakeLists.txt | 61 --- ads/common/ops/kernels/op_host/CMakeLists.txt | 16 + .../{ads_op => }/op_host/add_custom.cpp | 0 .../{ads_op => }/op_host/add_custom_tiling.h | 0 .../{ads_op => }/op_host/dynamic_scatter.cpp | 0 .../op_host/dynamic_scatter_tiling.h | 0 .../op_host/dynamic_voxelization.cpp | 0 .../op_host/dynamic_voxelization_tiling.h | 0 .../furthest_point_sampling_with_dist.cpp | 0 ...furthest_point_sampling_with_dist_tiling.h | 0 .../op_host/gather_nms3d_mask_tiling.cpp | 0 .../op_host/gather_nms3d_mask_tiling.h | 0 .../multi_scale_deformable_attention_grad.cpp | 0 .../multi_scale_deformable_attention_grad.h | 0 ...ulti_scale_deformable_attn_function_v2.cpp | 0 .../multi_scale_deformable_attn_function_v2.h | 0 .../op_host/nms3d_normal_tiling.cpp | 0 .../op_host/nms3d_normal_tiling.h | 0 .../{ads_op => }/op_host/points_in_box.cpp | 1 - .../op_host/points_in_box_tiling.h | 0 .../ops/kernels/op_kernel/CMakeLists.txt | 4 + .../{ads_op => }/op_kernel/add_custom.cpp | 0 .../op_kernel/dynamic_scatter.cpp | 0 .../op_kernel/dynamic_scatter_base.h | 0 .../op_kernel/dynamic_scatter_max.h | 0 .../op_kernel/dynamic_scatter_sum.h | 0 .../op_kernel/dynamic_voxelization.cpp | 0 .../furthest_point_sampling_with_dist.cpp | 0 .../op_kernel/gather_nms3d_mask.cpp | 0 .../multi_scale_deformable_attention_grad.cpp | 0 .../multi_scale_deformable_attn_function.cpp} | 0 ...ulti_scale_deformable_attn_function_v2.cpp | 359 ++++++++++++++++++ .../{ads_op => }/op_kernel/nms3d_normal.cpp | 0 .../{ads_op => }/op_kernel/points_in_box.cpp | 0 .../ops/{CMakeLists.txt => onnx/__init__.py} | 0 ads/common/ops/onnx/plugin/CMakeLists.txt | 5 + ads/motion/CMakeLists.txt | 3 + ads/motion/ops/csrc/pybind.cpp | 5 + ads/motion/ops/kernels/CMakeLists.txt | 12 + .../{ => kernels/framework}/CMakeLists.txt | 0 ads/motion/ops/kernels/op_host/CMakeLists.txt | 16 + .../ops/kernels/op_kernel}/CMakeLists.txt | 0 ads/motion/ops/pybind.cpp | 0 ads/perception/CMakeLists.txt | 9 + ads/perception/fused/ops/csrc/pybind.cpp | 5 + .../fused/ops/kernels/CMakeLists.txt | 9 + .../fused/ops/kernels/op_host/CMakeLists.txt | 17 + ads/perception/fused/ops/pybind.cpp | 0 ads/perception/point/ops/CMakeLists.txt | 0 ads/perception/point/ops/csrc/pybind.cpp | 6 + .../point/ops/kernels/CMakeLists.txt | 12 + .../point/ops/kernels/op_host/CMakeLists.txt | 17 + ads/perception/point/ops/pybind.cpp | 0 ads/perception/vision/ops/CMakeLists.txt | 0 ads/perception/vision/ops/csrc/pybind.cpp | 5 + .../vision/ops/kernels/CMakeLists.txt | 12 + .../vision/ops/kernels/op_host/CMakeLists.txt | 17 + ads/perception/vision/ops/pybind.cpp | 0 bind/pybind.cpp | 2 +- ci/access_control_test.py | 63 +-- ci/build.sh | 36 +- .../ads_op/cmake => cmake}/config.cmake | 44 ++- cmake/func.cmake | 233 ++++++++++++ .../kernels/ads_op/cmake => cmake}/intf.cmake | 43 ++- cmake/makeself.cmake | 19 + .../ads_op/cmake => cmake}/util/__init__.py | 0 .../util/ascendc_bin_param_build.py | 0 .../util/ascendc_impl_build.py | 0 .../util/ascendc_ops_config.py | 0 .../util/ascendc_replay_build.py | 0 .../util/batch_replay_impl.temp | 0 .../util/code_channel_infer.py | 0 .../ads_op/cmake => cmake}/util/const_var.py | 0 .../util/gen_impl_and_mrege_json.sh | 0 .../cmake => cmake}/util/gen_ops_filter.sh | 0 .../cmake => cmake}/util/gen_version_info.sh | 0 .../cmake => cmake}/util/insert_op_info.py | 0 .../util/insert_simplified_keys.py | 0 .../cmake => cmake}/util/kernel_entry.py | 0 .../cmake => cmake}/util/kernel_impl.temp | 0 .../cmake => cmake}/util/makeself/COPYING | 0 .../cmake => cmake}/util/makeself/README.md | 0 .../cmake => cmake}/util/makeself/VERSION | 0 .../util/makeself/make-release.sh | 0 .../util/makeself/makeself-header.sh | 0 .../cmake => cmake}/util/makeself/makeself.1 | 0 .../util/makeself/makeself.lsm | 0 .../cmake => cmake}/util/makeself/makeself.sh | 0 .../util/makeself/run-tests.sh | 0 .../util/merge_aicpu_info_json.sh | 0 .../cmake => cmake}/util/opdesc_parser.py | 0 .../cmake => cmake}/util/parse_ini_to_json.py | 0 .../cmake => cmake}/util/preset_parse.py | 0 .../cmake => cmake}/util/replay_codegen.py | 0 .../cmake => cmake}/util/replay_impl.temp | 0 .../util/tiling_data_def_build.py | 0 .../common/ops => include}/csrc/OpApiCommon.h | 199 ++++------ .../csrc/common.cpp => include/csrc/common.h | 88 +++-- include/csrc/pybind.h | 9 + include/onnx/common.h | 4 + scripts/build_kernel.sh | 32 ++ .../install.sh => scripts/install_kernel.sh | 0 .../upgrade.sh => scripts/upgrade_kernel.sh | 0 setup.py | 49 ++- tests/{ => torch}/run_test.py | 5 +- tests/{ => torch}/test_abs.py | 0 tests/{ => torch}/test_batch_nms.py | 0 tests/{ => torch}/test_fast_gelu.py | 0 tests/{ => torch}/test_fast_gelu_backward.py | 0 .../test_furthest_point_sample_with_dist.py | 0 ...t_multi_scale_deformable_attention_grad.py | 0 .../test_npu_anchor_response_flags.py | 0 .../test_npu_bounding_box_decode.py | 0 .../test_npu_bounding_box_encode.py | 0 tests/{ => torch}/test_npu_broadcast.py | 0 .../{ => torch}/test_npu_dyn_voxelization.py | 0 tests/{ => torch}/test_npu_dynamic_scatter.py | 0 tests/{ => torch}/test_npu_moe_tutel.py | 0 .../test_npu_moe_tutel_backward.py | 0 ...npu_multi_scale_deformable_attn_funtion.py | 0 tests/{ => torch}/test_npu_nms3d_normal.py | 84 ++-- tests/{ => torch}/test_npu_rotary_mul.py | 0 tests/{ => torch}/test_npu_scatter.py | 0 tests/{ => torch}/test_npu_silu.py | 0 ...t_npu_softmax_cross_entropy_with_logits.py | 0 tests/{ => torch}/test_npu_stride_add.py | 0 tests/{ => torch}/test_npu_transpose.py | 0 tests/{ => torch}/test_points_in_box.py | 0 tests/{ => torch}/test_rotated_box.py | 0 tests/{ => torch}/test_rotated_iou.py | 0 tests/{ => torch}/test_rotated_overlaps.py | 0 tests/{ => torch}/test_scatter_max.py | 0 tests/{ => torch}/test_sign_bits_pack.py | 0 tests/{ => torch}/test_sign_bits_unpack.py | 0 tests/{ => torch}/test_yolo_boxes_encode.py | 0 third_party/README.md | 2 - 175 files changed, 1375 insertions(+), 992 deletions(-) create mode 100644 CMakeLists.txt rename ads/common/ops/kernels/ads_op/CMakePresets.json => CMakePresets.json (80%) create mode 100644 ads/common/CMakeLists.txt delete mode 100644 ads/common/ops/csrc/common.h create mode 100644 ads/common/ops/kernels/CMakeLists.txt delete mode 100644 ads/common/ops/kernels/ads_op/CMakeLists.txt delete mode 100644 ads/common/ops/kernels/ads_op/README.md delete mode 100644 ads/common/ops/kernels/ads_op/build.sh delete mode 100644 ads/common/ops/kernels/ads_op/cmake/func.cmake delete mode 100644 ads/common/ops/kernels/ads_op/cmake/makeself.cmake delete mode 100644 ads/common/ops/kernels/ads_op/framework/CMakeLists.txt delete mode 100644 ads/common/ops/kernels/ads_op/op_host/CMakeLists.txt delete mode 100644 ads/common/ops/kernels/ads_op/op_kernel/CMakeLists.txt create mode 100644 ads/common/ops/kernels/op_host/CMakeLists.txt rename ads/common/ops/kernels/{ads_op => }/op_host/add_custom.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/add_custom_tiling.h (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/dynamic_scatter.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/dynamic_scatter_tiling.h (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/dynamic_voxelization.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/dynamic_voxelization_tiling.h (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/furthest_point_sampling_with_dist.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/furthest_point_sampling_with_dist_tiling.h (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/gather_nms3d_mask_tiling.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/gather_nms3d_mask_tiling.h (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/multi_scale_deformable_attention_grad.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/multi_scale_deformable_attention_grad.h (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/multi_scale_deformable_attn_function_v2.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/multi_scale_deformable_attn_function_v2.h (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/nms3d_normal_tiling.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/nms3d_normal_tiling.h (100%) rename ads/common/ops/kernels/{ads_op => }/op_host/points_in_box.cpp (99%) rename ads/common/ops/kernels/{ads_op => }/op_host/points_in_box_tiling.h (100%) create mode 100644 ads/common/ops/kernels/op_kernel/CMakeLists.txt rename ads/common/ops/kernels/{ads_op => }/op_kernel/add_custom.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_kernel/dynamic_scatter.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_kernel/dynamic_scatter_base.h (100%) rename ads/common/ops/kernels/{ads_op => }/op_kernel/dynamic_scatter_max.h (100%) rename ads/common/ops/kernels/{ads_op => }/op_kernel/dynamic_scatter_sum.h (100%) rename ads/common/ops/kernels/{ads_op => }/op_kernel/dynamic_voxelization.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_kernel/furthest_point_sampling_with_dist.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_kernel/gather_nms3d_mask.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_kernel/multi_scale_deformable_attention_grad.cpp (100%) rename ads/common/ops/kernels/{ads_op/op_kernel/multi_scale_deformable_attn_function_v2.cpp => op_kernel/multi_scale_deformable_attn_function.cpp} (100%) create mode 100644 ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_function_v2.cpp rename ads/common/ops/kernels/{ads_op => }/op_kernel/nms3d_normal.cpp (100%) rename ads/common/ops/kernels/{ads_op => }/op_kernel/points_in_box.cpp (100%) rename ads/common/ops/{CMakeLists.txt => onnx/__init__.py} (100%) create mode 100644 ads/common/ops/onnx/plugin/CMakeLists.txt create mode 100644 ads/motion/CMakeLists.txt create mode 100644 ads/motion/ops/csrc/pybind.cpp create mode 100644 ads/motion/ops/kernels/CMakeLists.txt rename ads/motion/ops/{ => kernels/framework}/CMakeLists.txt (100%) create mode 100644 ads/motion/ops/kernels/op_host/CMakeLists.txt rename ads/{perception/fused/ops => motion/ops/kernels/op_kernel}/CMakeLists.txt (100%) delete mode 100644 ads/motion/ops/pybind.cpp create mode 100644 ads/perception/CMakeLists.txt create mode 100644 ads/perception/fused/ops/csrc/pybind.cpp create mode 100644 ads/perception/fused/ops/kernels/CMakeLists.txt create mode 100644 ads/perception/fused/ops/kernels/op_host/CMakeLists.txt delete mode 100644 ads/perception/fused/ops/pybind.cpp delete mode 100644 ads/perception/point/ops/CMakeLists.txt create mode 100644 ads/perception/point/ops/csrc/pybind.cpp create mode 100644 ads/perception/point/ops/kernels/CMakeLists.txt create mode 100644 ads/perception/point/ops/kernels/op_host/CMakeLists.txt delete mode 100644 ads/perception/point/ops/pybind.cpp delete mode 100644 ads/perception/vision/ops/CMakeLists.txt create mode 100644 ads/perception/vision/ops/csrc/pybind.cpp create mode 100644 ads/perception/vision/ops/kernels/CMakeLists.txt create mode 100644 ads/perception/vision/ops/kernels/op_host/CMakeLists.txt delete mode 100644 ads/perception/vision/ops/pybind.cpp rename {ads/common/ops/kernels/ads_op/cmake => cmake}/config.cmake (38%) create mode 100644 cmake/func.cmake rename {ads/common/ops/kernels/ads_op/cmake => cmake}/intf.cmake (33%) create mode 100644 cmake/makeself.cmake rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/__init__.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/ascendc_bin_param_build.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/ascendc_impl_build.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/ascendc_ops_config.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/ascendc_replay_build.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/batch_replay_impl.temp (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/code_channel_infer.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/const_var.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/gen_impl_and_mrege_json.sh (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/gen_ops_filter.sh (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/gen_version_info.sh (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/insert_op_info.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/insert_simplified_keys.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/kernel_entry.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/kernel_impl.temp (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/makeself/COPYING (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/makeself/README.md (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/makeself/VERSION (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/makeself/make-release.sh (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/makeself/makeself-header.sh (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/makeself/makeself.1 (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/makeself/makeself.lsm (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/makeself/makeself.sh (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/makeself/run-tests.sh (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/merge_aicpu_info_json.sh (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/opdesc_parser.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/parse_ini_to_json.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/preset_parse.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/replay_codegen.py (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/replay_impl.temp (100%) rename {ads/common/ops/kernels/ads_op/cmake => cmake}/util/tiling_data_def_build.py (100%) rename {ads/common/ops => include}/csrc/OpApiCommon.h (74%) rename ads/common/ops/csrc/common.cpp => include/csrc/common.h (58%) create mode 100644 include/csrc/pybind.h create mode 100644 include/onnx/common.h create mode 100644 scripts/build_kernel.sh rename ads/common/ops/kernels/ads_op/scripts/install.sh => scripts/install_kernel.sh (100%) rename ads/common/ops/kernels/ads_op/scripts/upgrade.sh => scripts/upgrade_kernel.sh (100%) rename tests/{ => torch}/run_test.py (96%) rename tests/{ => torch}/test_abs.py (100%) rename tests/{ => torch}/test_batch_nms.py (100%) rename tests/{ => torch}/test_fast_gelu.py (100%) rename tests/{ => torch}/test_fast_gelu_backward.py (100%) rename tests/{ => torch}/test_furthest_point_sample_with_dist.py (100%) rename tests/{ => torch}/test_multi_scale_deformable_attention_grad.py (100%) rename tests/{ => torch}/test_npu_anchor_response_flags.py (100%) rename tests/{ => torch}/test_npu_bounding_box_decode.py (100%) rename tests/{ => torch}/test_npu_bounding_box_encode.py (100%) rename tests/{ => torch}/test_npu_broadcast.py (100%) rename tests/{ => torch}/test_npu_dyn_voxelization.py (100%) rename tests/{ => torch}/test_npu_dynamic_scatter.py (100%) rename tests/{ => torch}/test_npu_moe_tutel.py (100%) rename tests/{ => torch}/test_npu_moe_tutel_backward.py (100%) rename tests/{ => torch}/test_npu_multi_scale_deformable_attn_funtion.py (100%) rename tests/{ => torch}/test_npu_nms3d_normal.py (97%) rename tests/{ => torch}/test_npu_rotary_mul.py (100%) rename tests/{ => torch}/test_npu_scatter.py (100%) rename tests/{ => torch}/test_npu_silu.py (100%) rename tests/{ => torch}/test_npu_softmax_cross_entropy_with_logits.py (100%) rename tests/{ => torch}/test_npu_stride_add.py (100%) rename tests/{ => torch}/test_npu_transpose.py (100%) rename tests/{ => torch}/test_points_in_box.py (100%) rename tests/{ => torch}/test_rotated_box.py (100%) rename tests/{ => torch}/test_rotated_iou.py (100%) rename tests/{ => torch}/test_rotated_overlaps.py (100%) rename tests/{ => torch}/test_scatter_max.py (100%) rename tests/{ => torch}/test_sign_bits_pack.py (100%) rename tests/{ => torch}/test_sign_bits_unpack.py (100%) rename tests/{ => torch}/test_yolo_boxes_encode.py (100%) delete mode 100644 third_party/README.md diff --git a/.gitignore b/.gitignore index 658649e0..9009958c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ __pycache__/ -.DS_Store \ No newline at end of file +.DS_Store +.idea +cmake-build-debug \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..801d7d0b --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,206 @@ +cmake_minimum_required(VERSION 3.16.0) +project(opp) +set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) + +include(cmake/config.cmake) +include(cmake/func.cmake) +include(cmake/intf.cmake) + +set(ADS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/ads) +add_subdirectory(${ADS_DIR}/common) +add_subdirectory(${ADS_DIR}/motion) +add_subdirectory(${ADS_DIR}/perception) + +opbuild(OPS_SRC ${ASCEND_HOST_SRC} OUT_DIR ${ASCEND_AUTOGEN_PATH}) + +add_library(cust_op_proto SHARED ${ASCEND_HOST_SRC} + ${ASCEND_AUTOGEN_PATH}/op_proto.cc) +target_compile_definitions(cust_op_proto PRIVATE OP_PROTO_LIB) +target_compile_options(cust_op_proto PRIVATE -fvisibility=hidden) +target_link_libraries( + cust_op_proto + PRIVATE intf_pub + exe_graph + register + tiling_api + -Wl,--whole-archive + rt2_registry + -Wl,--no-whole-archive) +set_target_properties(cust_op_proto PROPERTIES OUTPUT_NAME cust_opsproto_rt2.0) +install_target( + TRG cust_op_proto DST + packages/vendors/${vendor_name}/op_proto/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) +install_file(TRG cust_op_proto SRC ${ASCEND_AUTOGEN_PATH}/op_proto.h DST + packages/vendors/${vendor_name}/op_proto/inc) + +add_library(cust_optiling SHARED ${ASCEND_HOST_SRC}) +target_compile_definitions(cust_optiling PRIVATE OP_TILING_LIB) +target_compile_options(cust_optiling PRIVATE -fvisibility=hidden) +target_link_libraries( + cust_optiling + PRIVATE intf_pub + exe_graph + register + tiling_api + -Wl,--whole-archive + rt2_registry + -Wl,--no-whole-archive) +set_target_properties(cust_optiling PROPERTIES OUTPUT_NAME cust_opmaster_rt2.0) +install_target( + TRG + cust_optiling + DST + packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/lib/linux/${CMAKE_SYSTEM_PROCESSOR} +) +# create liboptiling.so link +add_custom_command( + TARGET cust_optiling + POST_BUILD + COMMAND + ${CMAKE_COMMAND} -E chdir + ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling + ${CMAKE_COMMAND} -E create_symlink + lib/linux/${CMAKE_SYSTEM_PROCESSOR}/$ + liboptiling.so) +install( + FILES + ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/liboptiling.so + DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling) + +if(${ENABLE_ONNX}) + protobuf_generate(PROTO_FILE ${ASCEND_CANN_PACKAGE_PATH}/include/proto/ge_onnx.proto + OUT_DIR ${ASCEND_AUTOGEN_PATH}) + add_library(cust_onnx_parsers SHARED ${ASCEND_ONNX_SRC}) + target_compile_options( + cust_onnx_parsers + PRIVATE + -O2 + -Werror + -Wno-deprecated-declarations + -Dgoogle=ascend_private + "-fno-common" + "-fno-strict-aliasing") + target_link_libraries(cust_onnx_parsers PRIVATE intf_pub) + target_include_directories(cust_onnx_parsers + PRIVATE ${PROJECT_SOURCE_DIR}/include ${ASCEND_AUTOGEN_PATH}) + + install_target(TRG cust_onnx_parsers DST + packages/vendors/${vendor_name}/framework/onnx/) +endif() + +# ===================Build ACLNN=================== +file(GLOB ACLNN_SRC_GEN ${ASCEND_AUTOGEN_PATH}/aclnn_*.cpp) +file(GLOB ACLNN_INC_GEN ${ASCEND_AUTOGEN_PATH}/aclnn_*.h) +set(ACLNN_SRC ${ACLNN_SRC_GEN} ${ACLNN_SRC_CUSTOM}) +set(ACLNN_INC ${ACLNN_INC_GEN} ${ACLNN_INC_CUSTOM}) +add_library(cust_opapi SHARED ${ACLNN_SRC}) +target_link_libraries(cust_opapi PRIVATE intf_pub ascendcl nnopbase) +install_target(TRG cust_opapi DST packages/vendors/${vendor_name}/op_api/lib) +install_file(TRG cust_opapi SRC ${ACLNN_INC} DST + packages/vendors/${vendor_name}/op_api/include) + +# ===================Build Kernel=================== +# set custom compile options +if("${CMAKE_BUILD_TYPE}x" STREQUAL "Debugx") + add_ops_compile_options(ALL OPTIONS -g -O0) +endif() + +file(COPY ${ASCEND_KERNEL_SRC} DESTINATION ${ASCEND_KERNEL_PATH}) + +foreach(compute_unit ${ASCEND_COMPUTE_UNIT}) + # generate aic-${compute_unit}-ops-info.json + add_ops_info_target( + TARGET + ops_info_gen_${compute_unit} + OUTPUT + ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit}/aic-${compute_unit}-ops-info.json + OPS_INFO + ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + INSTALL_DIR + packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit}) + + # generate ascendc impl py once + if(NOT TARGET ascendc_impl_gen) + add_ops_impl_target( + TARGET + ascendc_impl_gen + OPS_INFO + ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + IMPL_DIR + ${ASCEND_KERNEL_PATH} + OUT_DIR + ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl + ) + endif() + + # dynamic shape binary compile + if(${ENABLE_BINARY_PACKAGE}) + add_bin_compile_target( + TARGET + ascendc_bin_${compute_unit} + OPS_INFO + ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + IMPL_DIR + ${ASCEND_KERNEL_PATH} + ADP_DIR + ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl/dynamic + OUT_DIR + ${CMAKE_CURRENT_BINARY_DIR}/binary/${compute_unit} + KERNEL_DIR + ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel + INSTALL_DIR + packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel + COMPUTE_UNIT + ${compute_unit}) + add_dependencies(ascendc_bin_${compute_unit} ascendc_impl_gen) + endif() +endforeach() + +# generate npu_supported_ops.json +add_npu_support_target( + TARGET + npu_supported_ops + OPS_INFO_DIR + ${ASCEND_AUTOGEN_PATH} + OUT_DIR + ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_info_cfg/ai_core + INSTALL_DIR + packages/vendors/${vendor_name}/framework/${ASCEND_FRAMEWORK_TYPE}) + +# ===================Build test=================== +# WARN: WIP +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() + +get_system_info(SYSTEM_INFO) + +# gen version.info +add_custom_target( + gen_version_info ALL + COMMAND + bash ${CMAKE_CURRENT_SOURCE_DIR}/cmake/util/gen_version_info.sh + ${ASCEND_CANN_PACKAGE_PATH} + ${ADS_PATH}/packages/vendors/${vendor_name}) + +install( + FILES ${ADS_PATH}/packages/vendors/${vendor_name}/version.info + DESTINATION packages/vendors/${vendor_name}) + +if(COMPILE_OPP_PACKAGE) + # CPack config + set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME}) + set(CPACK_PACKAGE_VERSION ${CMAKE_PROJECT_VERSION}) + set(CPACK_PACKAGE_DESCRIPTION "CPack opp project") + set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "CPack opp project") + set(CPACK_PACKAGE_DIRECTORY ${CMAKE_INSTALL_PREFIX}) + set(CPACK_PACKAGE_FILE_NAME "custom_opp_${SYSTEM_INFO}.run") + set(CPACK_GENERATOR External) + set(CPACK_CMAKE_GENERATOR "Unix Makefiles") + set(CPACK_EXTERNAL_ENABLE_STAGING TRUE) + set(CPACK_EXTERNAL_PACKAGE_SCRIPT ${CMAKE_SOURCE_DIR}/cmake/makeself.cmake) + set(CPACK_EXTERNAL_BUILT_PACKAGES + ${CPACK_PACKAGE_DIRECTORY}/_CPack_Packages/Linux/External/${CPACK_PACKAGE_FILE_NAME}/${CPACK_PACKAGE_FILE_NAME} + ) + include(CPack) +endif() diff --git a/ads/common/ops/kernels/ads_op/CMakePresets.json b/CMakePresets.json similarity index 80% rename from ads/common/ops/kernels/ads_op/CMakePresets.json rename to CMakePresets.json index a23c07b8..ab25547b 100644 --- a/ads/common/ops/kernels/ads_op/CMakePresets.json +++ b/CMakePresets.json @@ -17,17 +17,13 @@ "type": "STRING", "value": "Release" }, - "ENABLE_SOURCE_PACKAGE": { - "type": "BOOL", - "value": "True" - }, "ENABLE_BINARY_PACKAGE": { "type": "BOOL", "value": "True" }, "ASCEND_COMPUTE_UNIT": { "type": "STRING", - "value": "ascend910b" + "value": "ascend910b;ascend910;ascend310p" }, "ENABLE_TEST": { "type": "BOOL", @@ -49,13 +45,9 @@ "type": "PATH", "value": "${sourceDir}/build_out" }, - "ENABLE_CROSS_COMPILE": { + "ENABLE_ONNX": { "type": "BOOL", "value": "False" - }, - "CMAKE_CROSS_PLATFORM_COMPILER": { - "type": "PATH", - "value": "/usr/bin/aarch64-linux-gnu-g++" } } } diff --git a/MANIFEST.in b/MANIFEST.in index 49ea6f5c..cd191c56 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -recursive-include ads/common/ops/kernels/ads_op_kernel/packages/ * \ No newline at end of file +recursive-include ads/packages/ * diff --git a/ads/common/CMakeLists.txt b/ads/common/CMakeLists.txt new file mode 100644 index 00000000..3f1ac043 --- /dev/null +++ b/ads/common/CMakeLists.txt @@ -0,0 +1,7 @@ +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ops/kernels) + add_subdirectory(ops/kernels) +endif() + +if (${ENABLE_ONNX} AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ops/onnx) + add_subdirectory(ops/onnx/plugin) +endif() diff --git a/ads/common/ops/csrc/AbsOpApi.cpp b/ads/common/ops/csrc/AbsOpApi.cpp index 824b60ce..4332c993 100644 --- a/ads/common/ops/csrc/AbsOpApi.cpp +++ b/ads/common/ops/csrc/AbsOpApi.cpp @@ -1,5 +1,5 @@ #include -#include "OpApiCommon.h" +#include "csrc/OpApiCommon.h" #include "functions.h" at::Tensor npu_abs(const at::Tensor& self) diff --git a/ads/common/ops/csrc/AdsAddKernelNpu.cpp b/ads/common/ops/csrc/AdsAddKernelNpu.cpp index c766e0e2..b4471aa8 100644 --- a/ads/common/ops/csrc/AdsAddKernelNpu.cpp +++ b/ads/common/ops/csrc/AdsAddKernelNpu.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include -#include "OpApiCommon.h" +#include "csrc/OpApiCommon.h" #include "functions.h" at::Tensor npu_ads_add(const at::Tensor &tensor1, const at::Tensor &tensor2) diff --git a/ads/common/ops/csrc/AnchorResponseFlagsKernelNpu.cpp b/ads/common/ops/csrc/AnchorResponseFlagsKernelNpu.cpp index f414633c..f816e881 100644 --- a/ads/common/ops/csrc/AnchorResponseFlagsKernelNpu.cpp +++ b/ads/common/ops/csrc/AnchorResponseFlagsKernelNpu.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include "torch_npu/csrc/framework/OpCommand.h" -#include "common.h" +#include "csrc/common.h" namespace { c10::SmallVector infersize_npu_anchor_response_flags( diff --git a/ads/common/ops/csrc/BatchNms.cpp b/ads/common/ops/csrc/BatchNms.cpp index a7051437..ae903769 100644 --- a/ads/common/ops/csrc/BatchNms.cpp +++ b/ads/common/ops/csrc/BatchNms.cpp @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "torch_npu/csrc/framework/OpCommand.h" -#include "common.h" +#include "csrc/common.h" std::tuple npu_batch_nms( const at::Tensor& self, diff --git a/ads/common/ops/csrc/BoundingBoxDecodeKernelNpu.cpp b/ads/common/ops/csrc/BoundingBoxDecodeKernelNpu.cpp index 85fc0764..2bddf962 100644 --- a/ads/common/ops/csrc/BoundingBoxDecodeKernelNpu.cpp +++ b/ads/common/ops/csrc/BoundingBoxDecodeKernelNpu.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include "torch_npu/csrc/framework/OpCommand.h" -#include "common.h" +#include "csrc/common.h" at::Tensor npu_bounding_box_decode( const at::Tensor& rois, diff --git a/ads/common/ops/csrc/BoundingBoxEncodeKernelNpu.cpp b/ads/common/ops/csrc/BoundingBoxEncodeKernelNpu.cpp index aa5bad77..e3b12bb1 100644 --- a/ads/common/ops/csrc/BoundingBoxEncodeKernelNpu.cpp +++ b/ads/common/ops/csrc/BoundingBoxEncodeKernelNpu.cpp @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "torch_npu/csrc/framework/OpCommand.h" -#include "common.h" +#include "csrc/common.h" at::Tensor npu_bounding_box_encode( const at::Tensor& anchor_box, diff --git a/ads/common/ops/csrc/ConfusionTransposeKernelNpu.cpp b/ads/common/ops/csrc/ConfusionTransposeKernelNpu.cpp index a12d1d7c..6c045377 100644 --- a/ads/common/ops/csrc/ConfusionTransposeKernelNpu.cpp +++ b/ads/common/ops/csrc/ConfusionTransposeKernelNpu.cpp @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "torch_npu/csrc/framework/OpCommand.h" -#include "common.h" +#include "csrc/common.h" at::Tensor npu_confusion_transpose( const at::Tensor& self, diff --git a/ads/common/ops/csrc/DynamicScatterKernelNpuOpApi.cpp b/ads/common/ops/csrc/DynamicScatterKernelNpuOpApi.cpp index c2ccb7a2..64fc359b 100644 --- a/ads/common/ops/csrc/DynamicScatterKernelNpuOpApi.cpp +++ b/ads/common/ops/csrc/DynamicScatterKernelNpuOpApi.cpp @@ -6,8 +6,8 @@ #include "torch_npu/csrc/aten/NPUNativeFunctions.h" #include "torch_npu/csrc/aten/CustomFunctions.h" #include "functions.h" -#include "common.h" -#include "OpApiCommon.h" +#include "csrc/common.h" +#include "csrc/OpApiCommon.h" using npu_preparation = at_npu::native::OpPreparation; using torch::autograd::Function; diff --git a/ads/common/ops/csrc/DynamicVoxelizationKernelNpu.cpp b/ads/common/ops/csrc/DynamicVoxelizationKernelNpu.cpp index 3c89c5ab..ab0f52eb 100644 --- a/ads/common/ops/csrc/DynamicVoxelizationKernelNpu.cpp +++ b/ads/common/ops/csrc/DynamicVoxelizationKernelNpu.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include -#include "OpApiCommon.h" +#include "csrc/OpApiCommon.h" #include "functions.h" at::Tensor DynamicVoxelization( diff --git a/ads/common/ops/csrc/FurthestPointSamplingWithDistKernelNpu.cpp b/ads/common/ops/csrc/FurthestPointSamplingWithDistKernelNpu.cpp index fea2f948..a1873781 100644 --- a/ads/common/ops/csrc/FurthestPointSamplingWithDistKernelNpu.cpp +++ b/ads/common/ops/csrc/FurthestPointSamplingWithDistKernelNpu.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include -#include "OpApiCommon.h" +#include "csrc/OpApiCommon.h" #include "functions.h" at::Tensor furthest_point_sampling_with_dist(const at::Tensor &points_dist, const at::Tensor &nearest_temp, const int32_t num_points) diff --git a/ads/common/ops/csrc/MoeTutelOpApi.cpp b/ads/common/ops/csrc/MoeTutelOpApi.cpp index 6ec9fa35..b0e5017b 100644 --- a/ads/common/ops/csrc/MoeTutelOpApi.cpp +++ b/ads/common/ops/csrc/MoeTutelOpApi.cpp @@ -22,8 +22,8 @@ #include "torch_npu/csrc/aten/NPUNativeFunctions.h" #include "torch_npu/csrc/aten/CustomFunctions.h" #include "functions.h" -#include "common.h" -#include "OpApiCommon.h" +#include "csrc/common.h" +#include "csrc/OpApiCommon.h" using npu_preparation = at_npu::native::OpPreparation; using torch::autograd::Function; diff --git a/ads/common/ops/csrc/MultiScaleDeformableAttnFunctionKernelNpu.cpp b/ads/common/ops/csrc/MultiScaleDeformableAttnFunctionKernelNpu.cpp index dfa52b6b..e1e74560 100644 --- a/ads/common/ops/csrc/MultiScaleDeformableAttnFunctionKernelNpu.cpp +++ b/ads/common/ops/csrc/MultiScaleDeformableAttnFunctionKernelNpu.cpp @@ -1,5 +1,5 @@ #include -#include "OpApiCommon.h" +#include "csrc/OpApiCommon.h" #include "functions.h" at::Tensor npu_multi_scale_deformable_attn_function(const at::Tensor& value, diff --git a/ads/common/ops/csrc/Nms3dNormal.cpp b/ads/common/ops/csrc/Nms3dNormal.cpp index 493ef907..939ef64d 100644 --- a/ads/common/ops/csrc/Nms3dNormal.cpp +++ b/ads/common/ops/csrc/Nms3dNormal.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include -#include "OpApiCommon.h" +#include "csrc/OpApiCommon.h" #include "functions.h" std::tuple nms3d_normal(const at::Tensor &boxes, double nms_overlap_thresh) diff --git a/ads/common/ops/csrc/NpuSilu.cpp b/ads/common/ops/csrc/NpuSilu.cpp index 4f62cad5..3fa747bc 100644 --- a/ads/common/ops/csrc/NpuSilu.cpp +++ b/ads/common/ops/csrc/NpuSilu.cpp @@ -1,6 +1,6 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "common.h" +#include "csrc/common.h" at::Tensor &silu_out_npu_nocheck(at::Tensor &result, const at::Tensor &self) { diff --git a/ads/common/ops/csrc/PointsInBoxKernelNpu.cpp b/ads/common/ops/csrc/PointsInBoxKernelNpu.cpp index f887c21c..65421f0c 100644 --- a/ads/common/ops/csrc/PointsInBoxKernelNpu.cpp +++ b/ads/common/ops/csrc/PointsInBoxKernelNpu.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include -#include "OpApiCommon.h" +#include "csrc/OpApiCommon.h" #include "functions.h" at::Tensor npu_points_in_box(const at::Tensor &boxes, const at::Tensor &pts) diff --git a/ads/common/ops/csrc/RotaryMulKernelNpu.cpp b/ads/common/ops/csrc/RotaryMulKernelNpu.cpp index 05569309..7a676699 100644 --- a/ads/common/ops/csrc/RotaryMulKernelNpu.cpp +++ b/ads/common/ops/csrc/RotaryMulKernelNpu.cpp @@ -16,7 +16,7 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "common.h" +#include "csrc/common.h" using tensor_tuple = std::tuple; diff --git a/ads/common/ops/csrc/ScatterMaxKernelNpu.cpp b/ads/common/ops/csrc/ScatterMaxKernelNpu.cpp index f3b11664..b71c8a06 100644 --- a/ads/common/ops/csrc/ScatterMaxKernelNpu.cpp +++ b/ads/common/ops/csrc/ScatterMaxKernelNpu.cpp @@ -1,14 +1,12 @@ #include "torch_npu/csrc/framework/OpCommand.h" -#include "common.h" +#include "csrc/common.h" using namespace std; -std::tuple npu_scatter_max( - const at::Tensor& updates, - const at::Tensor& indices, - c10::optional out) +std::tuple npu_scatter_max(const at::Tensor& updates, const at::Tensor& indices, + c10::optional out) { - auto sizes = updates.sizes().vec(); + auto sizes = updates.sizes().vec(); sizes[0] = indices.max().item().toLong() + 1; @@ -16,18 +14,13 @@ std::tuple npu_scatter_max( at::Tensor argmax = at::empty(result.sizes(), result.options().dtype(at::kInt)); at_npu::native::OpCommand cmd; - cmd.Name("ScatterMaxWithArgmax") - .Input(result) - .Input(indices) - .Input(updates) - .Output(result) - .Output(argmax) - .Run(); + cmd.Name("ScatterMaxWithArgmax").Input(result).Input(indices).Input(updates).Output(result).Output(argmax).Run(); return std::tie(result, argmax); } -at::Tensor npu_scatter_max_backward(const at::Tensor& x, const at::Tensor& segment_ids, const at::Tensor& num_segments) +at::Tensor npu_scatter_max_backward(const at::Tensor& x, const at::Tensor& segment_ids, + const at::Tensor& num_segments) { c10::SmallVector output_size; @@ -41,11 +34,6 @@ at::Tensor npu_scatter_max_backward(const at::Tensor& x, const at::Tensor& segme at::Tensor out = at::empty(output_size, x.options()); at_npu::native::OpCommand cmd; - cmd.Name("UnsortedSegmentSum") - .Input(x) - .Input(segment_ids) - .Input(num_segments) - .Output(out) - .Run(); + cmd.Name("UnsortedSegmentSum").Input(x).Input(segment_ids).Input(num_segments).Output(out).Run(); return out; } diff --git a/ads/common/ops/csrc/SignBitsUnpackKernelNpu.cpp b/ads/common/ops/csrc/SignBitsUnpackKernelNpu.cpp index 27ae440b..a0b6d10c 100644 --- a/ads/common/ops/csrc/SignBitsUnpackKernelNpu.cpp +++ b/ads/common/ops/csrc/SignBitsUnpackKernelNpu.cpp @@ -16,7 +16,7 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "common.h" +#include "csrc/common.h" at::Tensor npu_sign_bits_unpack_compute( diff --git a/ads/common/ops/csrc/SoftmaxCrossEntropyWithLogitsKernelNpu.cpp b/ads/common/ops/csrc/SoftmaxCrossEntropyWithLogitsKernelNpu.cpp index cc8f95df..f1364956 100644 --- a/ads/common/ops/csrc/SoftmaxCrossEntropyWithLogitsKernelNpu.cpp +++ b/ads/common/ops/csrc/SoftmaxCrossEntropyWithLogitsKernelNpu.cpp @@ -16,7 +16,7 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "common.h" +#include "csrc/common.h" namespace { std::tuple softmax_cross_entropy_with_logits_out_nocheck( diff --git a/ads/common/ops/csrc/StrideAddKernelNpu.cpp b/ads/common/ops/csrc/StrideAddKernelNpu.cpp index 47922f62..57f65c48 100644 --- a/ads/common/ops/csrc/StrideAddKernelNpu.cpp +++ b/ads/common/ops/csrc/StrideAddKernelNpu.cpp @@ -16,17 +16,11 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "common.h" - +#include "csrc/common.h" namespace { -at::Tensor &stride_add_out_npu_nocheck( - at::Tensor &result, - const at::Tensor &self, - const at::Tensor &other, - c10::Scalar offset1, - c10::Scalar offset2, - c10::Scalar c1_len) +at::Tensor& stride_add_out_npu_nocheck(at::Tensor& result, const at::Tensor& self, const at::Tensor& other, + c10::Scalar offset1, c10::Scalar offset2, c10::Scalar c1_len) { at_npu::native::OpCommand cmd; cmd.Name("StrideAdd") @@ -39,14 +33,10 @@ at::Tensor &stride_add_out_npu_nocheck( .Run(); return result; } -} // namespace +} // namespace -at::Tensor npu_stride_add_compute( - const at::Tensor &self, - const at::Tensor &other, - const c10::Scalar &offset1, - const c10::Scalar &offset2, - const c10::Scalar &c1_len) +at::Tensor npu_stride_add_compute(const at::Tensor& self, const at::Tensor& other, const c10::Scalar& offset1, + const c10::Scalar& offset2, const c10::Scalar& c1_len) { auto output_size = infersize_stride_add(self.sizes(), other.sizes()); output_size[1] = c1_len.toInt() * 16; diff --git a/ads/common/ops/csrc/TransposeKernelNpu.cpp b/ads/common/ops/csrc/TransposeKernelNpu.cpp index 2e8705c2..6c391747 100644 --- a/ads/common/ops/csrc/TransposeKernelNpu.cpp +++ b/ads/common/ops/csrc/TransposeKernelNpu.cpp @@ -16,7 +16,7 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "common.h" +#include "csrc/common.h" namespace { at::Tensor &npu_transpose_out_nocheck( diff --git a/ads/common/ops/csrc/YoloBoxesEncodeKernelNpu.cpp b/ads/common/ops/csrc/YoloBoxesEncodeKernelNpu.cpp index df02a325..310d2857 100644 --- a/ads/common/ops/csrc/YoloBoxesEncodeKernelNpu.cpp +++ b/ads/common/ops/csrc/YoloBoxesEncodeKernelNpu.cpp @@ -16,7 +16,7 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "common.h" +#include "csrc/common.h" namespace { diff --git a/ads/common/ops/csrc/common.h b/ads/common/ops/csrc/common.h deleted file mode 100644 index 95c2b5a1..00000000 --- a/ads/common/ops/csrc/common.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef __COMMON_H__ -#define __COMMON_H__ -#include -#include -#include -#include -#include -#include "torch_npu/csrc/core/npu/NPUMacros.h" -#include "torch_npu/csrc/framework/utils/NPUDefinition.h" -#include "third_party/acl/inc/acl/acl_base.h" - -const int N = 32; -const int SIZE = 8; - -using tuple_vector = std::tuple, c10::SmallVector>; -aclDataType ConvertToAclDataType(const at::ScalarType &data_type); -c10::SmallVector array_to_small_vector(c10::IntArrayRef shape); -c10::SmallVector conv_transpose2d_npu_output_size(const at::Tensor &input, const at::Tensor &weight, - const at::Tensor &bias, c10::IntArrayRef padding, - c10::IntArrayRef output_padding, - c10::IntArrayRef stride, c10::IntArrayRef dilation, - int64_t groups); - -std::pair trans_torch_type_to_scalar(const std::string &type); -tuple_vector softmax_cross_entropy_with_logits_impl_npu_output_size(const at::Tensor& self); -int64_t make_warp_dim(int64_t dim, int64_t dim_post_expr); -c10::SmallVector convert_array_to_vector(c10::IntArrayRef intArray); -c10::SmallVector infersize_stride_add(c10::IntArrayRef shape1_, c10::IntArrayRef shape2_); -c10::SmallVector transpose_npu_output_size(const at::Tensor &self, c10::IntArrayRef perm); -bool check_match(const at::Tensor &self); -void format_fresh_view(at::Tensor &x, const at::Tensor &y); - -#endif // __COMMON_H__ diff --git a/ads/common/ops/csrc/functions.h b/ads/common/ops/csrc/functions.h index 8f5bf2bc..cf901d9c 100644 --- a/ads/common/ops/csrc/functions.h +++ b/ads/common/ops/csrc/functions.h @@ -11,17 +11,14 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef __FUNCTIONS_H__ -#define __FUNCTIONS_H__ +#ifndef COMMON_OPS_CSRC_FUNCTIONS_H_ +#define COMMON_OPS_CSRC_FUNCTIONS_H_ #include #include #include #include #include -#include - -void init_common(pybind11::module &m); std::tuple npu_scatter_max(const at::Tensor& updates, const at::Tensor& indices, c10::optional out); at::Tensor npu_scatter_max_backward(const at::Tensor& x, const at::Tensor& segment_ids, const at::Tensor& num_segments); @@ -163,4 +160,4 @@ at::Tensor DynamicVoxelization( const double coorsMinZ); std::tuple nms3d_normal(const at::Tensor &boxes, double nms_overlap_thresh); -#endif // __FUNCTIONS_H__ +#endif // COMMON_OPS_CSRC_FUNCTIONS_H_ diff --git a/ads/common/ops/csrc/pybind.cpp b/ads/common/ops/csrc/pybind.cpp index 35b230ae..3d2c805c 100644 --- a/ads/common/ops/csrc/pybind.cpp +++ b/ads/common/ops/csrc/pybind.cpp @@ -1,5 +1,6 @@ #include #include "functions.h" +#include "csrc/pybind.h" void init_common(pybind11::module &m) { diff --git a/ads/common/ops/kernels/CMakeLists.txt b/ads/common/ops/kernels/CMakeLists.txt new file mode 100644 index 00000000..3b1f8543 --- /dev/null +++ b/ads/common/ops/kernels/CMakeLists.txt @@ -0,0 +1,10 @@ +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) + add_subdirectory(op_host) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) + add_subdirectory(op_kernel) +endif() +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() + diff --git a/ads/common/ops/kernels/README.md b/ads/common/ops/kernels/README.md index 214fb0a6..1e664555 100644 --- a/ads/common/ops/kernels/README.md +++ b/ads/common/ops/kernels/README.md @@ -1,2 +1,13 @@ -## Description -+ The folder contains some ascend-kernel source files, which are like cuda-kernels and supply some ops that can be run on ascend device. \ No newline at end of file +## 算子原型 + + + + + + + + + + + +
算子类型(OpType)Add
算子输入nameshapedata typeformat
x-floatND
y-floatND
算子输出z-floatND
核函数名add_custom
\ No newline at end of file diff --git a/ads/common/ops/kernels/ads_op/CMakeLists.txt b/ads/common/ops/kernels/ads_op/CMakeLists.txt deleted file mode 100644 index 584132d8..00000000 --- a/ads/common/ops/kernels/ads_op/CMakeLists.txt +++ /dev/null @@ -1,69 +0,0 @@ -cmake_minimum_required(VERSION 3.16.0) -project(opp) -if(ENABLE_CROSS_COMPILE) - if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64) - set(CROSS_COMPILE_PLATFORM aarch64) - else() - set(CROSS_COMPILE_PLATFORM x86_64) - endif() - set(PLATFORM ${CMAKE_SYSTEM_PROCESSOR}) - set(CMAKE_COMPILE_COMPILER_LIBRARY ${ASCEND_CANN_PACKAGE_PATH}/${PLATFORM}-linux/devlib/linux/${CROSS_COMPILE_PLATFORM}/) - set(CMAKE_COMPILE_RUNTIME_LIBRARY ${ASCEND_CANN_PACKAGE_PATH}/${PLATFORM}-linux/devlib/${CROSS_COMPILE_PLATFORM}/) - set(CMAKE_SYSTEM_PROCESSOR ${CROSS_COMPILE_PLATFORM}) - set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) - set(CMAKE_CXX_COMPILER ${CMAKE_CROSS_PLATFORM_COMPILER}) -else() - set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) -endif() - -include(cmake/config.cmake) -include(cmake/func.cmake) -include(cmake/intf.cmake) - -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/framework) - add_subdirectory(framework) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) - add_subdirectory(op_host) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) - add_subdirectory(op_kernel) -endif() -if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) - add_subdirectory(testcases) -endif() - -# modify vendor_name in install.sh and upgrade.sh -add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/scripts/install.sh ${CMAKE_BINARY_DIR}/scripts/upgrade.sh - COMMAND mkdir -p ${CMAKE_BINARY_DIR}/scripts - COMMAND cp -r ${CMAKE_SOURCE_DIR}/scripts/* ${CMAKE_BINARY_DIR}/scripts/ - COMMAND sed -i "s/vendor_name=customize/vendor_name=${vendor_name}/g" ${CMAKE_BINARY_DIR}/scripts/* -) -add_custom_target(modify_vendor ALL DEPENDS ${CMAKE_BINARY_DIR}/scripts/install.sh ${CMAKE_BINARY_DIR}/scripts/upgrade.sh) -install(DIRECTORY ${CMAKE_BINARY_DIR}/scripts/ DESTINATION . FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_READ) - -install(FILES ${CMAKE_SOURCE_DIR}/custom.proto DESTINATION packages OPTIONAL) - -get_system_info(SYSTEM_INFO) - -# gen version.info -add_custom_target(gen_version_info ALL - COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/cmake/util/gen_version_info.sh ${ASCEND_CANN_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR} -) - -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/version.info - DESTINATION packages/vendors/${vendor_name}/) - -# CPack config -set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME}) -set(CPACK_PACKAGE_VERSION ${CMAKE_PROJECT_VERSION}) -set(CPACK_PACKAGE_DESCRIPTION "CPack opp project") -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "CPack opp project") -set(CPACK_PACKAGE_DIRECTORY ${CMAKE_INSTALL_PREFIX}) -set(CPACK_PACKAGE_FILE_NAME "custom_opp_${SYSTEM_INFO}.run") -set(CPACK_GENERATOR External) -set(CPACK_CMAKE_GENERATOR "Unix Makefiles") -set(CPACK_EXTERNAL_ENABLE_STAGING TRUE) -set(CPACK_EXTERNAL_PACKAGE_SCRIPT ${CMAKE_SOURCE_DIR}/cmake/makeself.cmake) -set(CPACK_EXTERNAL_BUILT_PACKAGES ${CPACK_PACKAGE_DIRECTORY}/_CPack_Packages/Linux/External/${CPACK_PACKAGE_FILE_NAME}/${CPACK_PACKAGE_FILE_NAME}) -include(CPack) diff --git a/ads/common/ops/kernels/ads_op/README.md b/ads/common/ops/kernels/ads_op/README.md deleted file mode 100644 index 1e664555..00000000 --- a/ads/common/ops/kernels/ads_op/README.md +++ /dev/null @@ -1,13 +0,0 @@ -## 算子原型 - - - - - - - - - - - -
算子类型(OpType)Add
算子输入nameshapedata typeformat
x-floatND
y-floatND
算子输出z-floatND
核函数名add_custom
\ No newline at end of file diff --git a/ads/common/ops/kernels/ads_op/build.sh b/ads/common/ops/kernels/ads_op/build.sh deleted file mode 100644 index b71f67a6..00000000 --- a/ads/common/ops/kernels/ads_op/build.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash -script_path=$(realpath $(dirname $0)) - - -mkdir -p build_out -rm -rf build_out/* -cd build_out - -if [ $ASCEND_AICPU_PATH ]; then - jq --arg field "configurePresets" --arg value "$ASCEND_AICPU_PATH" '.[$field][0].cacheVariables.ASCEND_CANN_PACKAGE_PATH.value = $value' $script_path/CMakePresets.json > $script_path/CMakePresets_bat.json - - if [ $? -eq 0 ]; then - mv $script_path/CMakePresets_bat.json $script_path/CMakePresets.json -f - else - echo "Error: please install jq with yum or apt-get" - exit 1 - fi -else - echo "Error: please source env.sh" - exit 1 -fi - -cmake_version=$(cmake --version | grep "cmake version" | awk '{print $3}') -if [ "$cmake_version" \< "3.19.0" ] ; then - opts=$(python3 $script_path/cmake/util/preset_parse.py $script_path/CMakePresets.json) - echo $opts - cmake .. $opts -else - cmake .. --preset=default -fi -target=package -if [ "$1"x != ""x ]; then target=$1; fi - -cmake --build . --target $target -j16 -if [ $? -ne 0 ]; then exit 1; fi - -if [ $target = "package" ]; then - if test -d ./op_kernel/binary ; then - ./cust*.run - if [ $? -ne 0 ]; then exit 1; fi - cmake --build . --target binary -j16 - if [ $? -ne 0 ]; then exit 1; fi - cmake --build . --target $target -j16 - fi -fi - -# for debug -# cd build_out -# make -# cpack -# verbose append -v diff --git a/ads/common/ops/kernels/ads_op/cmake/func.cmake b/ads/common/ops/kernels/ads_op/cmake/func.cmake deleted file mode 100644 index ad187e7d..00000000 --- a/ads/common/ops/kernels/ads_op/cmake/func.cmake +++ /dev/null @@ -1,228 +0,0 @@ - -function(get_system_info SYSTEM_INFO) - if (UNIX) - execute_process(COMMAND grep -i ^id= /etc/os-release OUTPUT_VARIABLE TEMP) - string(REGEX REPLACE "\n|id=|ID=|\"" "" SYSTEM_NAME ${TEMP}) - set(${SYSTEM_INFO} ${SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR} PARENT_SCOPE) - elseif (WIN32) - message(STATUS "System is Windows. Only for pre-build.") - else () - message(FATAL_ERROR "${CMAKE_SYSTEM_NAME} not support.") - endif () -endfunction() - -function(opbuild) - message(STATUS "Opbuild generating sources") - cmake_parse_arguments(OPBUILD "" "OUT_DIR;PROJECT_NAME;ACCESS_PREFIX" "OPS_SRC" ${ARGN}) - execute_process(COMMAND ${CMAKE_COMPILE} -g -fPIC -shared -std=c++11 ${OPBUILD_OPS_SRC} -D_GLIBCXX_USE_CXX11_ABI=0 - -I ${ASCEND_CANN_PACKAGE_PATH}/include -L ${ASCEND_CANN_PACKAGE_PATH}/lib64 -lexe_graph -lregister -ltiling_api - -o ${OPBUILD_OUT_DIR}/libascend_all_ops.so - RESULT_VARIABLE EXEC_RESULT - OUTPUT_VARIABLE EXEC_INFO - ERROR_VARIABLE EXEC_ERROR - ) - if (${EXEC_RESULT}) - message("build ops lib info: ${EXEC_INFO}") - message("build ops lib error: ${EXEC_ERROR}") - message(FATAL_ERROR "opbuild run failed!") - endif() - set(proj_env "") - set(prefix_env "") - if (NOT "${OPBUILD_PROJECT_NAME}x" STREQUAL "x") - set(proj_env "OPS_PROJECT_NAME=${OPBUILD_PROJECT_NAME}") - endif() - if (NOT "${OPBUILD_ACCESS_PREFIX}x" STREQUAL "x") - set(prefix_env "OPS_DIRECT_ACCESS_PREFIX=${OPBUILD_ACCESS_PREFIX}") - endif() - execute_process(COMMAND ${proj_env} ${prefix_env} ${ASCEND_CANN_PACKAGE_PATH}/toolkit/tools/opbuild/op_build - ${OPBUILD_OUT_DIR}/libascend_all_ops.so ${OPBUILD_OUT_DIR} - RESULT_VARIABLE EXEC_RESULT - OUTPUT_VARIABLE EXEC_INFO - ERROR_VARIABLE EXEC_ERROR - ) - if (${EXEC_RESULT}) - message("opbuild ops info: ${EXEC_INFO}") - message("opbuild ops error: ${EXEC_ERROR}") - endif() - message(STATUS "Opbuild generating sources - done") -endfunction() - -function(add_ops_info_target) - cmake_parse_arguments(OPINFO "" "TARGET;OPS_INFO;OUTPUT;INSTALL_DIR" "" ${ARGN}) - get_filename_component(opinfo_file_path "${OPINFO_OUTPUT}" DIRECTORY) - add_custom_command(OUTPUT ${OPINFO_OUTPUT} - COMMAND mkdir -p ${opinfo_file_path} - COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/parse_ini_to_json.py - ${OPINFO_OPS_INFO} ${OPINFO_OUTPUT} - ) - add_custom_target(${OPINFO_TARGET} ALL - DEPENDS ${OPINFO_OUTPUT} - ) - install(FILES ${OPINFO_OUTPUT} - DESTINATION ${OPINFO_INSTALL_DIR} - ) -endfunction() - -function(add_ops_compile_options OP_TYPE) - cmake_parse_arguments(OP_COMPILE "" "OP_TYPE" "COMPUTE_UNIT;OPTIONS" ${ARGN}) - file(APPEND ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} - "${OP_TYPE},${OP_COMPILE_COMPUTE_UNIT},${OP_COMPILE_OPTIONS}\n") -endfunction() - -function(add_ops_impl_target) - cmake_parse_arguments(OPIMPL "" "TARGET;OPS_INFO;IMPL_DIR;OUT_DIR;INSTALL_DIR" "OPS_BATCH;OPS_ITERATE" ${ARGN}) - add_custom_command(OUTPUT ${OPIMPL_OUT_DIR}/.impl_timestamp - COMMAND mkdir -m 700 -p ${OPIMPL_OUT_DIR}/dynamic - COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py - ${OPIMPL_OPS_INFO} - \"${OPIMPL_OPS_BATCH}\" \"${OPIMPL_OPS_ITERATE}\" - ${OPIMPL_IMPL_DIR} - ${OPIMPL_OUT_DIR}/dynamic - ${ASCEND_AUTOGEN_PATH} - - COMMAND rm -rf ${OPIMPL_OUT_DIR}/.impl_timestamp - COMMAND touch ${OPIMPL_OUT_DIR}/.impl_timestamp - DEPENDS ${OPIMPL_OPS_INFO} - ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py - ) - add_custom_target(${OPIMPL_TARGET} ALL - DEPENDS ${OPIMPL_OUT_DIR}/.impl_timestamp) - if (${ENABLE_SOURCE_PACKAGE}) - install(DIRECTORY ${OPIMPL_OUT_DIR}/dynamic - DESTINATION ${OPIMPL_INSTALL_DIR} - ) - endif() -endfunction() - -function(add_ops_replay_targets) - cmake_parse_arguments(OPREPLAY "" "OPS_INFO;COMPUTE_UNIT;IMPL_DIR;OUT_DIR;INSTALL_DIR" "OPS_BATCH;OPS_ITERATE" ${ARGN}) - # ccec compile options - set(ccec_base_opts -c -O2 --cce-aicore-only -mllvm -cce-aicore-function-stack-size=16000 - -mllvm -cce-aicore-record-overflow=false -std=c++17) - set(ccec_extopts_ascend310p --cce-aicore-arch=dav-m200 -mllvm -cce-aicore-fp-ceiling=2) - set(ccec_extopts_ascend910 --cce-aicore-arch=dav-c100) - set(ccec_extopts_ascend910b --cce-aicore-arch=dav-c220-cube) - file(MAKE_DIRECTORY ${OPREPLAY_OUT_DIR}) - execute_process(COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_replay_build.py - ${OPREPLAY_OPS_INFO} - "${OPREPLAY_OPS_BATCH}" "${OPREPLAY_OPS_ITERATE}" - ${OPREPLAY_IMPL_DIR} - ${OPREPLAY_OUT_DIR} - ${OPREPLAY_COMPUTE_UNIT} - ) - file(GLOB replay_kernel_entries ${OPREPLAY_OUT_DIR}/*.cce) - if (NOT "${replay_kernel_entries}x" STREQUAL "x") - foreach(replay_kernel_file ${replay_kernel_entries}) - get_filename_component(replay_kernel_file_name "${replay_kernel_file}" NAME) - string(REPLACE "_entry.cce" "" op_kerne_name ${replay_kernel_file_name}) - file(GLOB replay_lib_src ${OPREPLAY_OUT_DIR}/${op_kerne_name}*.cpp) - set(OP_TILING_DATA_H_PATH ${OPREPLAY_OUT_DIR}/${op_kerne_name}_tiling_data.h) - add_library(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} SHARED ${replay_lib_src}) - if(EXISTS ${OP_TILING_DATA_H_PATH}) - target_compile_options(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE - -include ${OP_TILING_DATA_H_PATH} - ) - endif() - target_compile_definitions(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE - ${op_kerne_name}=${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} - ) - target_compile_options(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE - -D__ASCENDC_REPLAY__ - ) - target_link_libraries(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE intf_pub - tikreplaylib::${OPREPLAY_COMPUTE_UNIT} - register - ) - add_custom_command(OUTPUT ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o - COMMAND ccec ${ccec_base_opts} ${ccec_extopts_${OPREPLAY_COMPUTE_UNIT}} ${replay_kernel_file} - -o ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o - DEPENDS ${replay_kernel_file} - ) - add_custom_target(replay_kernel_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} ALL - DEPENDS ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o - ) - install(TARGETS replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} - LIBRARY DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_replay - ) - install(FILES ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o - DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_replay - ) - endforeach() - endif() -endfunction() - -function(add_npu_support_target) - cmake_parse_arguments(NPUSUP "" "TARGET;OPS_INFO_DIR;OUT_DIR;INSTALL_DIR" "" ${ARGN}) - get_filename_component(npu_sup_file_path "${NPUSUP_OUT_DIR}" DIRECTORY) - add_custom_command(OUTPUT ${NPUSUP_OUT_DIR}/npu_supported_ops.json - COMMAND mkdir -p ${NPUSUP_OUT_DIR} - COMMAND ${CMAKE_SOURCE_DIR}/cmake/util/gen_ops_filter.sh - ${NPUSUP_OPS_INFO_DIR} - ${NPUSUP_OUT_DIR} - ) - add_custom_target(npu_supported_ops ALL - DEPENDS ${NPUSUP_OUT_DIR}/npu_supported_ops.json - ) - install(FILES ${NPUSUP_OUT_DIR}/npu_supported_ops.json - DESTINATION ${NPUSUP_INSTALL_DIR} - ) -endfunction() - -function(add_bin_compile_target) - cmake_parse_arguments(BINCMP "" "TARGET;OPS_INFO;COMPUTE_UNIT;IMPL_DIR;ADP_DIR;OUT_DIR;INSTALL_DIR" "" ${ARGN}) - file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/src) - file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/bin) - file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/gen) - execute_process(COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_bin_param_build.py - ${BINCMP_OPS_INFO} ${BINCMP_OUT_DIR}/gen ${BINCMP_COMPUTE_UNIT} - RESULT_VARIABLE EXEC_RESULT - OUTPUT_VARIABLE EXEC_INFO - ERROR_VARIABLE EXEC_ERROR - ) - if (${EXEC_RESULT}) - message("ops binary compile scripts gen info: ${EXEC_INFO}") - message("ops binary compile scripts gen error: ${EXEC_ERROR}") - message(FATAL_ERROR "ops binary compile scripts gen failed!") - endif() - if (NOT TARGET binary) - add_custom_target(binary) - endif() - add_custom_target(${BINCMP_TARGET} - COMMAND cp -r ${BINCMP_IMPL_DIR}/*.* ${BINCMP_OUT_DIR}/src - ) - add_custom_target(${BINCMP_TARGET}_gen_ops_config - COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/insert_simplified_keys.py -p ${BINCMP_OUT_DIR}/bin - COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_ops_config.py -p ${BINCMP_OUT_DIR}/bin - -s ${BINCMP_COMPUTE_UNIT} - ) - add_dependencies(binary ${BINCMP_TARGET}_gen_ops_config) - file(GLOB bin_scripts ${BINCMP_OUT_DIR}/gen/*.sh) - foreach(bin_script ${bin_scripts}) - get_filename_component(bin_file ${bin_script} NAME_WE) - string(REPLACE "-" ";" bin_sep ${bin_file}) - list(GET bin_sep 0 op_type) - list(GET bin_sep 1 op_file) - list(GET bin_sep 2 op_index) - if (NOT TARGET ${BINCMP_TARGET}_${op_file}_copy) - file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file}) - add_custom_target(${BINCMP_TARGET}_${op_file}_copy - COMMAND cp ${BINCMP_ADP_DIR}/${op_file}.py ${BINCMP_OUT_DIR}/src/${op_type}.py - ) - install(DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file} - DESTINATION ${BINCMP_INSTALL_DIR}/${BINCMP_COMPUTE_UNIT} OPTIONAL - ) - install(FILES ${BINCMP_OUT_DIR}/bin/${op_file}.json - DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT}/ OPTIONAL - ) - endif() - add_custom_target(${BINCMP_TARGET}_${op_file}_${op_index} - COMMAND export HI_PYTHON=${ASCEND_PYTHON_EXECUTABLE} && bash ${bin_script} ${BINCMP_OUT_DIR}/src/${op_type}.py ${BINCMP_OUT_DIR}/bin/${op_file} - WORKING_DIRECTORY ${BINCMP_OUT_DIR} - ) - add_dependencies(${BINCMP_TARGET}_${op_file}_${op_index} ${BINCMP_TARGET} ${BINCMP_TARGET}_${op_file}_copy) - add_dependencies(${BINCMP_TARGET}_gen_ops_config ${BINCMP_TARGET}_${op_file}_${op_index}) - endforeach() - install(FILES ${BINCMP_OUT_DIR}/bin/binary_info_config.json - DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT} OPTIONAL - ) -endfunction() diff --git a/ads/common/ops/kernels/ads_op/cmake/makeself.cmake b/ads/common/ops/kernels/ads_op/cmake/makeself.cmake deleted file mode 100644 index 48c565bf..00000000 --- a/ads/common/ops/kernels/ads_op/cmake/makeself.cmake +++ /dev/null @@ -1,17 +0,0 @@ -execute_process(COMMAND chmod +x ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh) -execute_process(COMMAND ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh - --header ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself-header.sh - --help-header ./help.info - --gzip --complevel 4 --nomd5 --sha256 - ./ ${CPACK_PACKAGE_FILE_NAME} "version:1.0" ./install.sh - WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} - RESULT_VARIABLE EXEC_RESULT - ERROR_VARIABLE EXEC_ERROR -) -if (NOT "${EXEC_RESULT}x" STREQUAL "0x") - message(FATAL_ERROR "CPack Command error: ${EXEC_RESULT}\n${EXEC_ERROR}") -endif() -execute_process(COMMAND cp ${CPACK_EXTERNAL_BUILT_PACKAGES} ${CPACK_PACKAGE_DIRECTORY}/ - COMMAND echo "Copy ${CPACK_EXTERNAL_BUILT_PACKAGES} to ${CPACK_PACKAGE_DIRECTORY}/" - WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} -) diff --git a/ads/common/ops/kernels/ads_op/framework/CMakeLists.txt b/ads/common/ops/kernels/ads_op/framework/CMakeLists.txt deleted file mode 100644 index b6be9b49..00000000 --- a/ads/common/ops/kernels/ads_op/framework/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mindspore") - if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/caffe_plugin") - add_subdirectory(caffe_plugin) - endif() - if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tf_plugin") - add_subdirectory(tf_plugin) - endif() - if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/onnx_plugin") - add_subdirectory(onnx_plugin) - endif() -endif() diff --git a/ads/common/ops/kernels/ads_op/op_host/CMakeLists.txt b/ads/common/ops/kernels/ads_op/op_host/CMakeLists.txt deleted file mode 100644 index 40dd51cf..00000000 --- a/ads/common/ops/kernels/ads_op/op_host/CMakeLists.txt +++ /dev/null @@ -1,82 +0,0 @@ - -aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} ops_srcs) - -opbuild(OPS_SRC ${ops_srcs} - OUT_DIR ${ASCEND_AUTOGEN_PATH} -) - -add_library(cust_op_proto SHARED ${ops_srcs} ${ASCEND_AUTOGEN_PATH}/op_proto.cc) -target_compile_definitions(cust_op_proto PRIVATE OP_PROTO_LIB) -target_compile_options(cust_op_proto PRIVATE - -fvisibility=hidden -) -if(ENABLE_CROSS_COMPILE) - target_link_directories(cust_op_proto PRIVATE - ${CMAKE_COMPILE_COMPILER_LIBRARY} - ${CMAKE_COMPILE_RUNTIME_LIBRARY} - ) -endif() -target_link_libraries(cust_op_proto PRIVATE - intf_pub - exe_graph - register - tiling_api - -Wl,--whole-archive - rt2_registry - -Wl,--no-whole-archive -) -set_target_properties(cust_op_proto PROPERTIES OUTPUT_NAME - cust_opsproto_rt2.0 -) -add_library(cust_optiling SHARED ${ops_srcs}) -target_compile_definitions(cust_optiling PRIVATE OP_TILING_LIB) -target_compile_options(cust_optiling PRIVATE - -fvisibility=hidden -) -if(ENABLE_CROSS_COMPILE) - target_link_directories(cust_optiling PRIVATE - ${CMAKE_COMPILE_COMPILER_LIBRARY} - ${CMAKE_COMPILE_RUNTIME_LIBRARY} - ) -endif() -target_link_libraries(cust_optiling PRIVATE - intf_pub - exe_graph - register - tiling_api - -Wl,--whole-archive - rt2_registry - -Wl,--no-whole-archive -) -set_target_properties(cust_optiling PROPERTIES OUTPUT_NAME - cust_opmaster_rt2.0 -) - -file(GLOB aclnn_src ${ASCEND_AUTOGEN_PATH}/aclnn_*.cpp) -file(GLOB aclnn_inc ${ASCEND_AUTOGEN_PATH}/aclnn_*.h) -add_library(cust_opapi SHARED ${aclnn_src}) -if(ENABLE_CROSS_COMPILE) - target_link_directories(cust_opapi PRIVATE - ${CMAKE_COMPILE_COMPILER_LIBRARY} - ${CMAKE_COMPILE_RUNTIME_LIBRARY} - ) -endif() -target_link_libraries(cust_opapi PRIVATE intf_pub ascendcl nnopbase) - -add_custom_target(optiling_compat ALL - COMMAND ln -sf lib/linux/${CMAKE_SYSTEM_PROCESSOR}/$ - ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so -) - -install(TARGETS cust_op_proto - LIBRARY DESTINATION packages/vendors/${vendor_name}/op_proto/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) -install(FILES ${ASCEND_AUTOGEN_PATH}/op_proto.h - DESTINATION packages/vendors/${vendor_name}/op_proto/inc) -install(TARGETS cust_optiling - LIBRARY DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so - DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling) -install(TARGETS cust_opapi - LIBRARY DESTINATION packages/vendors/${vendor_name}/op_api/lib) -install(FILES ${aclnn_inc} - DESTINATION packages/vendors/${vendor_name}/op_api/include) diff --git a/ads/common/ops/kernels/ads_op/op_kernel/CMakeLists.txt b/ads/common/ops/kernels/ads_op/op_kernel/CMakeLists.txt deleted file mode 100644 index 0d31a444..00000000 --- a/ads/common/ops/kernels/ads_op/op_kernel/CMakeLists.txt +++ /dev/null @@ -1,61 +0,0 @@ -# set custom compile options -if ("${CMAKE_BUILD_TYPE}x" STREQUAL "Debugx") - add_ops_compile_options(ALL OPTIONS -g -O0) -endif() - -foreach(compute_unit ${ASCEND_COMPUTE_UNIT}) - - # generate aic-${compute_unit}-ops-info.json - add_ops_info_target(TARGET ops_info_gen_${compute_unit} - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core/${compute_unit}/aic-${compute_unit}-ops-info.json - OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini - INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit} - ) - - # generate ascendc impl py once - if (NOT TARGET ascendc_impl_gen) - add_ops_impl_target(TARGET ascendc_impl_gen - OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini - IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR} - OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe - INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl - ) - endif() - - # dynamic shape binary compile - if (${ENABLE_BINARY_PACKAGE}) - add_bin_compile_target(TARGET ascendc_bin_${compute_unit} - OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini - IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR} - ADP_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/dynamic - OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/binary/${compute_unit} - INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel - COMPUTE_UNIT ${compute_unit} - ) - add_dependencies(ascendc_bin_${compute_unit} ascendc_impl_gen) - endif() - -endforeach() - -# generate npu_supported_ops.json -add_npu_support_target(TARGET npu_supported_ops - OPS_INFO_DIR ${ASCEND_AUTOGEN_PATH} - OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core - INSTALL_DIR packages/vendors/${vendor_name}/framework/${ASCEND_FRAMEWORK_TYPE} -) - -if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) - add_subdirectory(testcases) -endif() - -# install kernel file -if (${ENABLE_SOURCE_PACKAGE}) - file(GLOB KERNEL_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/*.h - ${CMAKE_CURRENT_SOURCE_DIR}/*.py - ) - install(FILES ${KERNEL_FILES} - DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl/dynamic - ) -endif() diff --git a/ads/common/ops/kernels/op_host/CMakeLists.txt b/ads/common/ops/kernels/op_host/CMakeLists.txt new file mode 100644 index 00000000..c44b2b01 --- /dev/null +++ b/ads/common/ops/kernels/op_host/CMakeLists.txt @@ -0,0 +1,16 @@ +file(GLOB HOST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) +set(ASCEND_HOST_SRC + ${ASCEND_HOST_SRC} ${HOST_SRC} + CACHE INTERNAL "") +# add the exclude files for aclnn +set(aclop_exclude + ${aclop_exclude} "" + CACHE INTERNAL "") +file(GLOB ACLNN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.cpp) +file(GLOB ACLNN_INC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.h) +set(ACLNN_SRC_CUSTOM + ${ACLNN_SRC_CUSTOM} ${ACLNN_SRC} + CACHE INTERNAL "") +set(ACLNN_INC_CUSTOM + ${ACLNN_INC_CUSTOM} ${ACLNN_INC} + CACHE INTERNAL "") diff --git a/ads/common/ops/kernels/ads_op/op_host/add_custom.cpp b/ads/common/ops/kernels/op_host/add_custom.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/add_custom.cpp rename to ads/common/ops/kernels/op_host/add_custom.cpp diff --git a/ads/common/ops/kernels/ads_op/op_host/add_custom_tiling.h b/ads/common/ops/kernels/op_host/add_custom_tiling.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/add_custom_tiling.h rename to ads/common/ops/kernels/op_host/add_custom_tiling.h diff --git a/ads/common/ops/kernels/ads_op/op_host/dynamic_scatter.cpp b/ads/common/ops/kernels/op_host/dynamic_scatter.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/dynamic_scatter.cpp rename to ads/common/ops/kernels/op_host/dynamic_scatter.cpp diff --git a/ads/common/ops/kernels/ads_op/op_host/dynamic_scatter_tiling.h b/ads/common/ops/kernels/op_host/dynamic_scatter_tiling.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/dynamic_scatter_tiling.h rename to ads/common/ops/kernels/op_host/dynamic_scatter_tiling.h diff --git a/ads/common/ops/kernels/ads_op/op_host/dynamic_voxelization.cpp b/ads/common/ops/kernels/op_host/dynamic_voxelization.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/dynamic_voxelization.cpp rename to ads/common/ops/kernels/op_host/dynamic_voxelization.cpp diff --git a/ads/common/ops/kernels/ads_op/op_host/dynamic_voxelization_tiling.h b/ads/common/ops/kernels/op_host/dynamic_voxelization_tiling.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/dynamic_voxelization_tiling.h rename to ads/common/ops/kernels/op_host/dynamic_voxelization_tiling.h diff --git a/ads/common/ops/kernels/ads_op/op_host/furthest_point_sampling_with_dist.cpp b/ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/furthest_point_sampling_with_dist.cpp rename to ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp diff --git a/ads/common/ops/kernels/ads_op/op_host/furthest_point_sampling_with_dist_tiling.h b/ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/furthest_point_sampling_with_dist_tiling.h rename to ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h diff --git a/ads/common/ops/kernels/ads_op/op_host/gather_nms3d_mask_tiling.cpp b/ads/common/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/gather_nms3d_mask_tiling.cpp rename to ads/common/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp diff --git a/ads/common/ops/kernels/ads_op/op_host/gather_nms3d_mask_tiling.h b/ads/common/ops/kernels/op_host/gather_nms3d_mask_tiling.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/gather_nms3d_mask_tiling.h rename to ads/common/ops/kernels/op_host/gather_nms3d_mask_tiling.h diff --git a/ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attention_grad.cpp b/ads/common/ops/kernels/op_host/multi_scale_deformable_attention_grad.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attention_grad.cpp rename to ads/common/ops/kernels/op_host/multi_scale_deformable_attention_grad.cpp diff --git a/ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attention_grad.h b/ads/common/ops/kernels/op_host/multi_scale_deformable_attention_grad.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attention_grad.h rename to ads/common/ops/kernels/op_host/multi_scale_deformable_attention_grad.h diff --git a/ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attn_function_v2.cpp b/ads/common/ops/kernels/op_host/multi_scale_deformable_attn_function_v2.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attn_function_v2.cpp rename to ads/common/ops/kernels/op_host/multi_scale_deformable_attn_function_v2.cpp diff --git a/ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attn_function_v2.h b/ads/common/ops/kernels/op_host/multi_scale_deformable_attn_function_v2.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attn_function_v2.h rename to ads/common/ops/kernels/op_host/multi_scale_deformable_attn_function_v2.h diff --git a/ads/common/ops/kernels/ads_op/op_host/nms3d_normal_tiling.cpp b/ads/common/ops/kernels/op_host/nms3d_normal_tiling.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/nms3d_normal_tiling.cpp rename to ads/common/ops/kernels/op_host/nms3d_normal_tiling.cpp diff --git a/ads/common/ops/kernels/ads_op/op_host/nms3d_normal_tiling.h b/ads/common/ops/kernels/op_host/nms3d_normal_tiling.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/nms3d_normal_tiling.h rename to ads/common/ops/kernels/op_host/nms3d_normal_tiling.h diff --git a/ads/common/ops/kernels/ads_op/op_host/points_in_box.cpp b/ads/common/ops/kernels/op_host/points_in_box.cpp similarity index 99% rename from ads/common/ops/kernels/ads_op/op_host/points_in_box.cpp rename to ads/common/ops/kernels/op_host/points_in_box.cpp index f7fd2820..acac1cbd 100644 --- a/ads/common/ops/kernels/ads_op/op_host/points_in_box.cpp +++ b/ads/common/ops/kernels/op_host/points_in_box.cpp @@ -1,4 +1,3 @@ - #include "points_in_box_tiling.h" #include "register/op_def_registry.h" #include "tiling/tiling_api.h" diff --git a/ads/common/ops/kernels/ads_op/op_host/points_in_box_tiling.h b/ads/common/ops/kernels/op_host/points_in_box_tiling.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_host/points_in_box_tiling.h rename to ads/common/ops/kernels/op_host/points_in_box_tiling.h diff --git a/ads/common/ops/kernels/op_kernel/CMakeLists.txt b/ads/common/ops/kernels/op_kernel/CMakeLists.txt new file mode 100644 index 00000000..0cf50214 --- /dev/null +++ b/ads/common/ops/kernels/op_kernel/CMakeLists.txt @@ -0,0 +1,4 @@ +file(GLOB KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) +set(ASCEND_KERNEL_SRC + ${ASCEND_KERNEL_SRC} ${KERNEL_SRC} + CACHE INTERNAL "") diff --git a/ads/common/ops/kernels/ads_op/op_kernel/add_custom.cpp b/ads/common/ops/kernels/op_kernel/add_custom.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/add_custom.cpp rename to ads/common/ops/kernels/op_kernel/add_custom.cpp diff --git a/ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter.cpp b/ads/common/ops/kernels/op_kernel/dynamic_scatter.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter.cpp rename to ads/common/ops/kernels/op_kernel/dynamic_scatter.cpp diff --git a/ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_base.h b/ads/common/ops/kernels/op_kernel/dynamic_scatter_base.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_base.h rename to ads/common/ops/kernels/op_kernel/dynamic_scatter_base.h diff --git a/ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_max.h b/ads/common/ops/kernels/op_kernel/dynamic_scatter_max.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_max.h rename to ads/common/ops/kernels/op_kernel/dynamic_scatter_max.h diff --git a/ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_sum.h b/ads/common/ops/kernels/op_kernel/dynamic_scatter_sum.h similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_sum.h rename to ads/common/ops/kernels/op_kernel/dynamic_scatter_sum.h diff --git a/ads/common/ops/kernels/ads_op/op_kernel/dynamic_voxelization.cpp b/ads/common/ops/kernels/op_kernel/dynamic_voxelization.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/dynamic_voxelization.cpp rename to ads/common/ops/kernels/op_kernel/dynamic_voxelization.cpp diff --git a/ads/common/ops/kernels/ads_op/op_kernel/furthest_point_sampling_with_dist.cpp b/ads/common/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/furthest_point_sampling_with_dist.cpp rename to ads/common/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp diff --git a/ads/common/ops/kernels/ads_op/op_kernel/gather_nms3d_mask.cpp b/ads/common/ops/kernels/op_kernel/gather_nms3d_mask.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/gather_nms3d_mask.cpp rename to ads/common/ops/kernels/op_kernel/gather_nms3d_mask.cpp diff --git a/ads/common/ops/kernels/ads_op/op_kernel/multi_scale_deformable_attention_grad.cpp b/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attention_grad.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/multi_scale_deformable_attention_grad.cpp rename to ads/common/ops/kernels/op_kernel/multi_scale_deformable_attention_grad.cpp diff --git a/ads/common/ops/kernels/ads_op/op_kernel/multi_scale_deformable_attn_function_v2.cpp b/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_function.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/multi_scale_deformable_attn_function_v2.cpp rename to ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_function.cpp diff --git a/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_function_v2.cpp b/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_function_v2.cpp new file mode 100644 index 00000000..c59529fa --- /dev/null +++ b/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_function_v2.cpp @@ -0,0 +1,359 @@ + +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved. + * + * This sample is a very basic sample that implements vector add on Ascend plaform. + */ +#include "kernel_operator.h" +using namespace AscendC; +constexpr int32_t BUFFER_NUM = 2; + +class KernelMultiScaleDeformableAttnFunctionV2 +{ +public: + __aicore__ inline KernelMultiScaleDeformableAttnFunctionV2() {} + __aicore__ inline void Init(GM_ADDR value, + GM_ADDR value_spatial_shapes, + GM_ADDR value_level_start_index, + GM_ADDR sampling_locations, + GM_ADDR attention_weights, + GM_ADDR output, MultiScaleDeformableAttnFunctionV2TilingData *tiling_data) + { + ASSERT(GetBlockNum() != 0 && "block dim can not be zero!"); + dataAlign = blockNum / sizeof(DTYPE_VALUE); + batchSize = tiling_data->batchSize; + numKeys = tiling_data->numKeys; + numHeads = tiling_data->numHeads; + embedDims = tiling_data->embedDims; + + numLevels = tiling_data->numLevels; + numQueries = tiling_data->numQueries; + numPoints = tiling_data->numPoints; + coreNum = tiling_data->coreNum; + + taskNum = batchSize * numQueries; + taskNumPerCore = DivCeil(taskNum, coreNum); + + embedDimsAlign = AlignUp(embedDims, dataAlign); + numPointsAlign = AlignUp(numPoints, dataAlign); + numLevelsAlign = AlignUp(numLevels, dataAlign); + + curBlockIdx = GetBlockIdx(); + startOffset = curBlockIdx * taskNumPerCore; + endOffset = (curBlockIdx + 1) * taskNumPerCore; + if (endOffset > taskNum) + { + endOffset = taskNum; + } + + valueGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE *>(value), batchSize * numKeys * numHeads * embedDims); + locationGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE *>(sampling_locations), batchSize * numQueries * numHeads * numLevels * numPoints * 2); + attentionWeightsGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE *>(attention_weights), batchSize * numQueries * numHeads * numLevels * numPoints); + outputGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE *>(output), batchSize * numQueries * numHeads * embedDims); + + valueSpatialShapesGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE_SPATIAL_SHAPES *>(value_spatial_shapes), numLevels * 2); + valueLevelStartIndexGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE_SPATIAL_SHAPES *>(value_level_start_index), numLevels); + + pipe.InitBuffer(shapeQueue, BUFFER_NUM, AlignUp(numLevels * 2, dataAlign) * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(offsetQueue, BUFFER_NUM, numLevelsAlign * sizeof(DTYPE_VALUE)); + + pipe.InitBuffer(locationQueue, BUFFER_NUM, AlignUp(numLevels * numPoints * 2, dataAlign) * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(attentionWeightsUb, BUFFER_NUM, AlignUp(numLevels * numPoints, dataAlign) * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(outputQueue, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); + + pipe.InitBuffer(tmpUb1, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(tmpUb2, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(tmpUb3, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(tmpUb4, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + + pipe.InitBuffer(tmpResUb, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(tmpResUb2, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); + + pipe.InitBuffer(intOneUb, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE_SPATIAL_SHAPES)); + pipe.InitBuffer(floatOneUb, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + + pipe.InitBuffer(tmpXUb, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(tmpYUb, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(tmpParam0Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(tmpParam1Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + + pipe.InitBuffer(tmpIntX0Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE_SPATIAL_SHAPES)); + pipe.InitBuffer(tmpIntY0Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE_SPATIAL_SHAPES)); + pipe.InitBuffer(tmpIntX1Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE_SPATIAL_SHAPES)); + pipe.InitBuffer(tmpIntY1Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE_SPATIAL_SHAPES)); + + pipe.InitBuffer(leftTopWieightQueue, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(leftBottomWieightQueue, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(rightTopWieightQueue, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(rightBottomWieightQueue, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); + + pipe.InitBuffer(leftTopValueUb, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(leftBottomValueUb, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(rightTopValueUb, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); + pipe.InitBuffer(rightBottomValueUb, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); + } + + __aicore__ inline void Process() + { + for (uint32_t taskIdx = startOffset; taskIdx < endOffset; taskIdx++) + { + batch = taskIdx / numQueries; + query = taskIdx % numQueries; + pipe_barrier(PIPE_ALL); + Compute(batch, query); + } + } + +private: + __aicore__ inline bool isInRange(DTYPE_VALUE_SPATIAL_SHAPES x, DTYPE_VALUE_SPATIAL_SHAPES upper) + { + return 0 <= x && x < upper; + } + + __aicore__ inline void Compute(uint32_t batch, uint32_t query) + { + LocalTensor tmpResLocal = tmpResUb.Get(); + LocalTensor tmpResLocal2 = tmpResUb2.Get(); + + LocalTensor leftTopValueLocal = leftTopValueUb.Get(); + LocalTensor leftBottomValueUbLocal = leftBottomValueUb.Get(); + LocalTensor rightTopValueUbLocal = rightTopValueUb.Get(); + LocalTensor rightBottomValueUbLocal = rightBottomValueUb.Get(); + + LocalTensor leftTopWeiightLocal = leftTopWieightQueue.Get(); + LocalTensor leftBottomWeightLocal = leftBottomWieightQueue.Get(); + LocalTensor rightTopWeiightLocal = rightTopWieightQueue.Get(); + LocalTensor rightBottomWeightLocal = rightBottomWieightQueue.Get(); + + LocalTensor shapesLocal = shapeQueue.AllocTensor(); + LocalTensor offsetLocal = offsetQueue.AllocTensor(); + + LocalTensor locationLocal = locationQueue.AllocTensor(); + LocalTensor attentionWeightLocal = attentionWeightsUb.AllocTensor(); + + LocalTensor resLocal = outputQueue.AllocTensor(); + + LocalTensor xLocal = tmpXUb.Get(); + LocalTensor yLocal = tmpYUb.Get(); + + LocalTensor param0Local = tmpParam0Ub.Get(); + LocalTensor param1Local = tmpParam1Ub.Get(); + + LocalTensor x1Local = tmpIntX1Ub.Get(); + LocalTensor y1Local = tmpIntY1Ub.Get(); + + LocalTensor x0Local = tmpIntX0Ub.Get(); + LocalTensor y0Local = tmpIntY0Ub.Get(); + + LocalTensor tmpLocal1 = tmpUb1.Get(); + LocalTensor tmpLocal2 = tmpUb2.Get(); + LocalTensor tmpLocal3 = tmpUb3.Get(); + LocalTensor tmpLocal4 = tmpUb4.Get(); + + LocalTensor intOneLocal = intOneUb.Get(); + LocalTensor floatOneLocal = floatOneUb.Get(); + + Duplicate(intOneLocal, (DTYPE_VALUE_SPATIAL_SHAPES)1, numPointsAlign); + Duplicate(floatOneLocal, (DTYPE_VALUE)1, numPointsAlign); + DataCopyParams copyParams{1, (uint16_t)(embedDims * sizeof(DTYPE_VALUE)), 0, 0}; + + DataCopy(shapesLocal, valueSpatialShapesGm, AlignUp(numLevels * 2, dataAlign)); + DataCopy(offsetLocal, valueLevelStartIndexGm, numLevelsAlign); + Duplicate(resLocal, DTYPE_VALUE(0), embedDimsAlign); + moveOffset = batch * numQueries * numHeads * embedDims + query * numHeads * embedDims; + pipe_barrier(PIPE_ALL); + + for (uint32_t head = 0; head < numHeads; head++) + { + DataCopyPad(outputGm[moveOffset + head * embedDims], resLocal, copyParams); + } + pipe_barrier(PIPE_ALL); + + for (uint32_t head = 0; head < numHeads; head++) + { + weightOffset = (batch * numQueries * numHeads * numLevels + query * numHeads * numLevels + head * numLevels) * numPoints; + + pipe_barrier(PIPE_ALL); + + DataCopy(locationLocal, locationGm[weightOffset * 2], AlignUp(numLevels * numPoints * 2, dataAlign)); + DataCopy(attentionWeightLocal, attentionWeightsGm[weightOffset], AlignUp(numLevels * numPoints, dataAlign)); + + pipe_barrier(PIPE_ALL); + for (uint32_t level = 0; level < numLevels; level++) + { + h = shapesLocal.GetValue(level * 2); + w = shapesLocal.GetValue(level * 2 + 1); + for (uint32_t point = 0; point < numPoints; point++) + { + locationOffset = (level * numPoints + point) * 2; + xLocal.SetValue(point, locationLocal.GetValue(locationOffset)); + yLocal.SetValue(point, locationLocal.GetValue(locationOffset + 1)); + } + + pipe_barrier(PIPE_ALL); + + Muls(tmpLocal1, xLocal, (DTYPE_VALUE)w, numPointsAlign); + Muls(tmpLocal2, yLocal, (DTYPE_VALUE)h, numPointsAlign); + pipe_barrier(PIPE_ALL); + + Adds(param0Local, tmpLocal1, (DTYPE_VALUE)0.5, numPointsAlign); + Adds(param1Local, tmpLocal2, (DTYPE_VALUE)0.5, numPointsAlign); + pipe_barrier(PIPE_ALL); + + Cast(x1Local, param0Local, RoundMode::CAST_FLOOR, numPointsAlign); + Cast(y1Local, param1Local, RoundMode::CAST_FLOOR, numPointsAlign); + pipe_barrier(PIPE_ALL); + + Adds(tmpLocal3, param0Local, (DTYPE_VALUE)-1, numPointsAlign); + Adds(tmpLocal4, param1Local, (DTYPE_VALUE)-1, numPointsAlign); + pipe_barrier(PIPE_ALL); + + Sub(x0Local, x1Local, intOneLocal, numPointsAlign); + Sub(y0Local, y1Local, intOneLocal, numPointsAlign); + pipe_barrier(PIPE_ALL); + + Cast(xLocal, x0Local, RoundMode::CAST_NONE, numPointsAlign); + Cast(yLocal, y0Local, RoundMode::CAST_NONE, numPointsAlign); + pipe_barrier(PIPE_ALL); + + Sub(tmpLocal1, tmpLocal3, xLocal, numPointsAlign); + Sub(tmpLocal2, tmpLocal4, yLocal, numPointsAlign); + pipe_barrier(PIPE_ALL); + + Abs(param0Local, tmpLocal1, numPointsAlign); + Abs(param1Local, tmpLocal2, numPointsAlign); + pipe_barrier(PIPE_ALL); + + Sub(xLocal, floatOneLocal, param0Local, numPointsAlign); + Sub(yLocal, floatOneLocal, param1Local, numPointsAlign); + pipe_barrier(PIPE_ALL); + + Mul(leftTopWeiightLocal, xLocal, yLocal, numPointsAlign); + Mul(leftBottomWeightLocal, xLocal, param1Local, numPointsAlign); + Mul(rightTopWeiightLocal, param0Local, yLocal, numPointsAlign); + Mul(rightBottomWeightLocal, param0Local, param1Local, numPointsAlign); + pipe_barrier(PIPE_ALL); + + Duplicate(resLocal, DTYPE_VALUE(0), embedDimsAlign); + + for (uint32_t point = 0; point < numPoints; point++) + { + Duplicate(leftTopValueLocal, DTYPE_VALUE(0), embedDimsAlign); + Duplicate(leftBottomValueUbLocal, DTYPE_VALUE(0), embedDimsAlign); + Duplicate(rightTopValueUbLocal, DTYPE_VALUE(0), embedDimsAlign); + Duplicate(rightBottomValueUbLocal, DTYPE_VALUE(0), embedDimsAlign); + + x0 = x0Local.GetValue(point); + y0 = y0Local.GetValue(point); + x1 = x1Local.GetValue(point); + y1 = y1Local.GetValue(point); + + valueOffset = batch * numKeys * numHeads + offsetLocal.GetValue(level) * numHeads + head; + pipe_barrier(PIPE_ALL); + + if (isInRange(x0, w)) + { + if (isInRange(y0, h)) + { + DataCopy(leftTopValueLocal, valueGm[(valueOffset + (y0 * w + x0) * numHeads) * embedDims], embedDimsAlign); + } + if (isInRange(y1, h)) + { + DataCopy(leftBottomValueUbLocal, valueGm[(valueOffset + (y1 * w + x0) * numHeads) * embedDims], embedDimsAlign); + } + } + if (isInRange(x1, w)) + { + if (isInRange(y0, h)) + { + DataCopy(rightTopValueUbLocal, valueGm[(valueOffset + (y0 * w + x1) * numHeads) * embedDims], embedDimsAlign); + } + if (isInRange(y1, h)) + { + DataCopy(rightBottomValueUbLocal, valueGm[(valueOffset + (y1 * w + x1) * numHeads) * embedDims], embedDimsAlign); + } + } + pipe_barrier(PIPE_ALL); + + Muls(leftTopValueLocal, leftTopValueLocal, leftTopWeiightLocal.GetValue(point), embedDimsAlign); + Muls(rightTopValueUbLocal, rightTopValueUbLocal, rightTopWeiightLocal.GetValue(point), embedDimsAlign); + Muls(leftBottomValueUbLocal, leftBottomValueUbLocal, leftBottomWeightLocal.GetValue(point), embedDimsAlign); + Muls(rightBottomValueUbLocal, rightBottomValueUbLocal, rightBottomWeightLocal.GetValue(point), embedDimsAlign); + pipe_barrier(PIPE_ALL); + Add(tmpResLocal, leftTopValueLocal, rightTopValueUbLocal, embedDimsAlign); + Add(tmpResLocal2, leftBottomValueUbLocal, rightBottomValueUbLocal, embedDimsAlign); + pipe_barrier(PIPE_ALL); + Add(tmpResLocal, tmpResLocal, tmpResLocal2, embedDimsAlign); + pipe_barrier(PIPE_ALL); + Muls(tmpResLocal, tmpResLocal, attentionWeightLocal.GetValue(level * numPoints + point), embedDimsAlign); + pipe_barrier(PIPE_ALL); + Add(resLocal, resLocal, tmpResLocal, embedDimsAlign); + } + pipe_barrier(PIPE_ALL); + + SetAtomicAdd(); + DataCopyPad(outputGm[moveOffset + head * embedDims], resLocal, copyParams); + SetAtomicNone(); + } + } + locationQueue.FreeTensor(locationLocal); + attentionWeightsUb.FreeTensor(attentionWeightLocal); + outputQueue.FreeTensor(resLocal); + shapeQueue.FreeTensor(shapesLocal); + offsetQueue.FreeTensor(offsetLocal); + } + +private: + TPipe pipe; + GlobalTensor valueGm, locationGm, attentionWeightsGm, outputGm; + GlobalTensor valueSpatialShapesGm, valueLevelStartIndexGm; + + TQue locationQueue, attentionWeightsUb, shapeQueue, offsetQueue; + TQue outputQueue; + + TBuf tmpResUb, tmpResUb2, tmpXUb, tmpYUb, tmpParam0Ub, tmpParam1Ub, tmpIntX0Ub, tmpIntY0Ub, tmpIntX1Ub, tmpIntY1Ub, tmpUb1, tmpUb2, tmpUb3, tmpUb4; + TBuf intOneUb, floatOneUb, leftTopValueUb, leftBottomValueUb, rightTopValueUb, rightBottomValueUb; + TBuf leftTopWieightQueue, leftBottomWieightQueue, rightTopWieightQueue, rightBottomWieightQueue; + + uint32_t batchSize; + uint32_t numKeys; + uint32_t numHeads; + uint32_t embedDims; + + uint32_t numLevels; + uint32_t numQueries; + uint32_t numPoints; + uint32_t coreNum; + + uint32_t embedDimsAlign; + uint32_t numPointsAlign; + uint32_t numLevelsAlign; + + uint32_t batch; + uint32_t query; + uint32_t head; + + uint32_t taskNum; + uint32_t taskNumPerCore; + uint32_t curBlockIdx; + uint32_t startOffset; + uint32_t endOffset; + uint32_t dataAlign; + uint32_t blockNum = 32; + + DTYPE_VALUE_SPATIAL_SHAPES h, w, x0, y0, x1, y1, valueOffset, weightOffset, locationOffset, moveOffset; +}; + +extern "C" __global__ __aicore__ void multi_scale_deformable_attn_function_v2(GM_ADDR value, + GM_ADDR value_spatial_shapes, + GM_ADDR value_level_start_index, + GM_ADDR sampling_locations, + GM_ADDR attention_weights, + GM_ADDR output, GM_ADDR workspace, GM_ADDR tiling) +{ + GET_TILING_DATA(tiling_data, tiling); + KernelMultiScaleDeformableAttnFunctionV2 op; + op.Init(value, value_spatial_shapes, value_level_start_index, + sampling_locations, attention_weights, output, &tiling_data); + op.Process(); +} diff --git a/ads/common/ops/kernels/ads_op/op_kernel/nms3d_normal.cpp b/ads/common/ops/kernels/op_kernel/nms3d_normal.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/nms3d_normal.cpp rename to ads/common/ops/kernels/op_kernel/nms3d_normal.cpp diff --git a/ads/common/ops/kernels/ads_op/op_kernel/points_in_box.cpp b/ads/common/ops/kernels/op_kernel/points_in_box.cpp similarity index 100% rename from ads/common/ops/kernels/ads_op/op_kernel/points_in_box.cpp rename to ads/common/ops/kernels/op_kernel/points_in_box.cpp diff --git a/ads/common/ops/CMakeLists.txt b/ads/common/ops/onnx/__init__.py similarity index 100% rename from ads/common/ops/CMakeLists.txt rename to ads/common/ops/onnx/__init__.py diff --git a/ads/common/ops/onnx/plugin/CMakeLists.txt b/ads/common/ops/onnx/plugin/CMakeLists.txt new file mode 100644 index 00000000..cc6034bd --- /dev/null +++ b/ads/common/ops/onnx/plugin/CMakeLists.txt @@ -0,0 +1,5 @@ +file(GLOB ONNX_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/*.h) +set(ASCEND_ONNX_SRC + ${ASCEND_ONNX_SRC} ${ONNX_SRC} + CACHE INTERNAL "") diff --git a/ads/motion/CMakeLists.txt b/ads/motion/CMakeLists.txt new file mode 100644 index 00000000..621d1fa9 --- /dev/null +++ b/ads/motion/CMakeLists.txt @@ -0,0 +1,3 @@ +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ops/kernels) + add_subdirectory(ops/kernels) +endif() diff --git a/ads/motion/ops/csrc/pybind.cpp b/ads/motion/ops/csrc/pybind.cpp new file mode 100644 index 00000000..7b362419 --- /dev/null +++ b/ads/motion/ops/csrc/pybind.cpp @@ -0,0 +1,5 @@ +#include +#include "csrc/pybind.h" + +void init_motion(pybind11::module& m) { +} diff --git a/ads/motion/ops/kernels/CMakeLists.txt b/ads/motion/ops/kernels/CMakeLists.txt new file mode 100644 index 00000000..179d9da2 --- /dev/null +++ b/ads/motion/ops/kernels/CMakeLists.txt @@ -0,0 +1,12 @@ +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/framework) + add_subdirectory(framework) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) + add_subdirectory(op_host) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) + add_subdirectory(op_kernel) +endif() +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() diff --git a/ads/motion/ops/CMakeLists.txt b/ads/motion/ops/kernels/framework/CMakeLists.txt similarity index 100% rename from ads/motion/ops/CMakeLists.txt rename to ads/motion/ops/kernels/framework/CMakeLists.txt diff --git a/ads/motion/ops/kernels/op_host/CMakeLists.txt b/ads/motion/ops/kernels/op_host/CMakeLists.txt new file mode 100644 index 00000000..c44b2b01 --- /dev/null +++ b/ads/motion/ops/kernels/op_host/CMakeLists.txt @@ -0,0 +1,16 @@ +file(GLOB HOST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) +set(ASCEND_HOST_SRC + ${ASCEND_HOST_SRC} ${HOST_SRC} + CACHE INTERNAL "") +# add the exclude files for aclnn +set(aclop_exclude + ${aclop_exclude} "" + CACHE INTERNAL "") +file(GLOB ACLNN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.cpp) +file(GLOB ACLNN_INC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.h) +set(ACLNN_SRC_CUSTOM + ${ACLNN_SRC_CUSTOM} ${ACLNN_SRC} + CACHE INTERNAL "") +set(ACLNN_INC_CUSTOM + ${ACLNN_INC_CUSTOM} ${ACLNN_INC} + CACHE INTERNAL "") diff --git a/ads/perception/fused/ops/CMakeLists.txt b/ads/motion/ops/kernels/op_kernel/CMakeLists.txt similarity index 100% rename from ads/perception/fused/ops/CMakeLists.txt rename to ads/motion/ops/kernels/op_kernel/CMakeLists.txt diff --git a/ads/motion/ops/pybind.cpp b/ads/motion/ops/pybind.cpp deleted file mode 100644 index e69de29b..00000000 diff --git a/ads/perception/CMakeLists.txt b/ads/perception/CMakeLists.txt new file mode 100644 index 00000000..c8777acb --- /dev/null +++ b/ads/perception/CMakeLists.txt @@ -0,0 +1,9 @@ +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/fused/ops/kernels) + add_subdirectory(fused/ops/kernels) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/point/ops/kernels) + add_subdirectory(point/ops/kernels) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/vision/ops/kernels) + add_subdirectory(vision/ops/kernels) +endif() diff --git a/ads/perception/fused/ops/csrc/pybind.cpp b/ads/perception/fused/ops/csrc/pybind.cpp new file mode 100644 index 00000000..f37707a3 --- /dev/null +++ b/ads/perception/fused/ops/csrc/pybind.cpp @@ -0,0 +1,5 @@ +#include +#include "csrc/pybind.h" + +void init_perception_fused(pybind11::module& m) { +} diff --git a/ads/perception/fused/ops/kernels/CMakeLists.txt b/ads/perception/fused/ops/kernels/CMakeLists.txt new file mode 100644 index 00000000..b77ac594 --- /dev/null +++ b/ads/perception/fused/ops/kernels/CMakeLists.txt @@ -0,0 +1,9 @@ +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) + add_subdirectory(op_host) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) + add_subdirectory(op_kernel) +endif() +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() diff --git a/ads/perception/fused/ops/kernels/op_host/CMakeLists.txt b/ads/perception/fused/ops/kernels/op_host/CMakeLists.txt new file mode 100644 index 00000000..75a458e0 --- /dev/null +++ b/ads/perception/fused/ops/kernels/op_host/CMakeLists.txt @@ -0,0 +1,17 @@ +file(GLOB HOST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) +set( + ${ASCEND_HOST_SRC} ${HOST_SRC} + CACHE INTERNAL "") + +# add the exclude files for aclnn +set(aclop_exclude + ${aclop_exclude} "" + CACHE INTERNAL "") +file(GLOB ACLNN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.cpp) +file(GLOB ACLNN_INC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.h) +set(ACLNN_SRC_CUSTOM + ${ACLNN_SRC_CUSTOM} ${ACLNN_SRC} + CACHE INTERNAL "") +set(ACLNN_INC_CUSTOM + ${ACLNN_INC_CUSTOM} ${ACLNN_INC} + CACHE INTERNAL "") diff --git a/ads/perception/fused/ops/pybind.cpp b/ads/perception/fused/ops/pybind.cpp deleted file mode 100644 index e69de29b..00000000 diff --git a/ads/perception/point/ops/CMakeLists.txt b/ads/perception/point/ops/CMakeLists.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/ads/perception/point/ops/csrc/pybind.cpp b/ads/perception/point/ops/csrc/pybind.cpp new file mode 100644 index 00000000..35d19d8e --- /dev/null +++ b/ads/perception/point/ops/csrc/pybind.cpp @@ -0,0 +1,6 @@ +#include +#include "csrc/pybind.h" + +void init_perception_point(pybind11::module& m) { +} + diff --git a/ads/perception/point/ops/kernels/CMakeLists.txt b/ads/perception/point/ops/kernels/CMakeLists.txt new file mode 100644 index 00000000..179d9da2 --- /dev/null +++ b/ads/perception/point/ops/kernels/CMakeLists.txt @@ -0,0 +1,12 @@ +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/framework) + add_subdirectory(framework) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) + add_subdirectory(op_host) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) + add_subdirectory(op_kernel) +endif() +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() diff --git a/ads/perception/point/ops/kernels/op_host/CMakeLists.txt b/ads/perception/point/ops/kernels/op_host/CMakeLists.txt new file mode 100644 index 00000000..75a458e0 --- /dev/null +++ b/ads/perception/point/ops/kernels/op_host/CMakeLists.txt @@ -0,0 +1,17 @@ +file(GLOB HOST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) +set( + ${ASCEND_HOST_SRC} ${HOST_SRC} + CACHE INTERNAL "") + +# add the exclude files for aclnn +set(aclop_exclude + ${aclop_exclude} "" + CACHE INTERNAL "") +file(GLOB ACLNN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.cpp) +file(GLOB ACLNN_INC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.h) +set(ACLNN_SRC_CUSTOM + ${ACLNN_SRC_CUSTOM} ${ACLNN_SRC} + CACHE INTERNAL "") +set(ACLNN_INC_CUSTOM + ${ACLNN_INC_CUSTOM} ${ACLNN_INC} + CACHE INTERNAL "") diff --git a/ads/perception/point/ops/pybind.cpp b/ads/perception/point/ops/pybind.cpp deleted file mode 100644 index e69de29b..00000000 diff --git a/ads/perception/vision/ops/CMakeLists.txt b/ads/perception/vision/ops/CMakeLists.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/ads/perception/vision/ops/csrc/pybind.cpp b/ads/perception/vision/ops/csrc/pybind.cpp new file mode 100644 index 00000000..59057f96 --- /dev/null +++ b/ads/perception/vision/ops/csrc/pybind.cpp @@ -0,0 +1,5 @@ +#include +#include "csrc/pybind.h" + +void init_perception_vision(pybind11::module& m) { +} diff --git a/ads/perception/vision/ops/kernels/CMakeLists.txt b/ads/perception/vision/ops/kernels/CMakeLists.txt new file mode 100644 index 00000000..179d9da2 --- /dev/null +++ b/ads/perception/vision/ops/kernels/CMakeLists.txt @@ -0,0 +1,12 @@ +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/framework) + add_subdirectory(framework) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) + add_subdirectory(op_host) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) + add_subdirectory(op_kernel) +endif() +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() diff --git a/ads/perception/vision/ops/kernels/op_host/CMakeLists.txt b/ads/perception/vision/ops/kernels/op_host/CMakeLists.txt new file mode 100644 index 00000000..75a458e0 --- /dev/null +++ b/ads/perception/vision/ops/kernels/op_host/CMakeLists.txt @@ -0,0 +1,17 @@ +file(GLOB HOST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) +set( + ${ASCEND_HOST_SRC} ${HOST_SRC} + CACHE INTERNAL "") + +# add the exclude files for aclnn +set(aclop_exclude + ${aclop_exclude} "" + CACHE INTERNAL "") +file(GLOB ACLNN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.cpp) +file(GLOB ACLNN_INC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.h) +set(ACLNN_SRC_CUSTOM + ${ACLNN_SRC_CUSTOM} ${ACLNN_SRC} + CACHE INTERNAL "") +set(ACLNN_INC_CUSTOM + ${ACLNN_INC_CUSTOM} ${ACLNN_INC} + CACHE INTERNAL "") diff --git a/ads/perception/vision/ops/pybind.cpp b/ads/perception/vision/ops/pybind.cpp deleted file mode 100644 index e69de29b..00000000 diff --git a/bind/pybind.cpp b/bind/pybind.cpp index 351c837e..d0d5eba4 100644 --- a/bind/pybind.cpp +++ b/bind/pybind.cpp @@ -1,5 +1,5 @@ #include -#include "../ads/common/ops/csrc/functions.h" +#include "csrc/pybind.h" PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { init_common(m); diff --git a/ci/access_control_test.py b/ci/access_control_test.py index b5caff67..5eb8dc7a 100644 --- a/ci/access_control_test.py +++ b/ci/access_control_test.py @@ -24,8 +24,8 @@ from abc import ABCMeta, abstractmethod from pathlib import Path import warnings -BASE_DIR = Path(__file__).absolute().parent.parent -TEST_DIR = BASE_DIR / 'tests' +BASE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +TEST_DIR = os.path.join(BASE_DIR, "tests", "torch") def check_path_owner_consistent(path: str): @@ -117,21 +117,7 @@ class DirectoryStrategy(AccurateTest): def identify(self, modify_file): is_test_file = str(Path(modify_file).parts[0]) == "tests" \ and re.match("test_(.+).py", Path(modify_file).name) - return [(str(BASE_DIR / modify_file))] if is_test_file else [] - - -class CoreTestStrategy(AccurateTest): - """ - Determine whether the core tests should be runned - """ - block_list = ['test', 'docs'] - core_test_cases = [str(i) for i in (BASE_DIR / 'test/test_npu').rglob('test_*.py')] - - def identify(self, modify_file): - modified_module = str(Path(modify_file).parts[0]) - if modified_module not in self.block_list: - return self.core_test_cases - return [] + return [str(os.path.join(BASE_DIR, modify_file))] if is_test_file else [] class CopyOptStrategy(AccurateTest): @@ -146,44 +132,6 @@ class CopyOptStrategy(AccurateTest): return [] -class DirectoryMappingStrategy(AccurateTest): - """ - Map the modified files to the corresponding test cases - """ - mapping_list = { - 'contrib': 'test/test_contrib', - 'cpp_extension': 'test/test_cpp_extension', - 'distributed': 'test/test_distributed', - 'fx': 'test/test_fx', - 'hooks': 'test/test_hooks', - 'optim': 'test/test_optim', - 'profiler': 'test/test_profiler', - 'onnx': 'test/test_onnx', - 'utils': 'test/test_utils', - 'testing': 'test/test_testing.py', - } - - def identify(self, modify_file): - current_all_ut_path = [] - if str(Path(modify_file).parts[0]) == 'torch_npu': - mapped_ut_path = [] - module_name = str(Path(modify_file).parts[1]) - if module_name == 'csrc': - module_name = str(Path(modify_file).parts[2]) - if module_name in self.mapping_list: - mapped_ut_path.append(self.mapping_list[module_name]) - file_name = str(Path(modify_file).stem) - if file_name in self.mapping_list: - mapped_ut_path.append(self.mapping_list[file_name]) - - for mapped_path in mapped_ut_path: - if Path.is_file(BASE_DIR / mapped_path): - current_all_ut_path.append(str(BASE_DIR / mapped_path)) - else: - current_all_ut_path += [str(i) for i in (BASE_DIR / mapped_path).rglob('test_*.py')] - return current_all_ut_path - - class TestMgr(): def __init__(self): self.modify_files = [] @@ -210,9 +158,6 @@ class TestMgr(): self.test_files['ut_files'] += DirectoryStrategy().identify(modify_file) self.test_files['ut_files'] += CopyOptStrategy().identify(modify_file) self.test_files['ut_files'] += OpStrategy().identify(modify_file) - # self.test_files['op_ut_files'] += OpStrategy().identify(modify_file) - # self.test_files['ut_files'] += DirectoryMappingStrategy().identify(modify_file) - self.test_files['ut_files'] += CoreTestStrategy().identify(modify_file) unique_files = sorted(set(self.test_files['ut_files'])) exist_ut_file = [ @@ -326,7 +271,7 @@ def exec_ut(files): if __name__ == "__main__": - cur_modify_files = str(BASE_DIR / 'modify_files.txt') + cur_modify_files = str(os.path.join(BASE_DIR, 'modify_files.txt')) test_mgr = TestMgr() test_mgr.load(cur_modify_files) test_mgr.analyze() diff --git a/ci/build.sh b/ci/build.sh index 4d8a2a3a..27cc6447 100644 --- a/ci/build.sh +++ b/ci/build.sh @@ -1,5 +1,7 @@ # Copyright 2023 Huawei Technologies Co., Ltd CUR_DIR=$(dirname $(readlink -f $0)) +SCRIPTS_DIR=${CUR_DIR}/../scripts +BUILD_PACKAGES_DIR=${CUR_DIR}/../build_out/packages SUPPORTED_PY_VERSION=(3.7 3.8 3.9 3.10) PY_VERSION='3.7' DEFAULT_SCRIPT_ARGS_NUM=1 @@ -83,35 +85,16 @@ function main() else echo "ASCEND_OPP_PATH = $ASCEND_OPP_PATH" fi - chmod -R 777 ${CUR_DIR}/.. - cd ${CUR_DIR}/../ads/common/ops/kernels/ads_op - bash build.sh - cd ${CUR_DIR}/../ads/common/ops/kernels/ads_op/build_out - cp custom_opp_*.run ${CUR_DIR}/../ads/common/ops/kernels - cd .. - rm -rf build_out - cd .. - - - ./custom_opp_*.run --extract=./ads_op_kernel - rm -rf custom_opp_*.run - if [ -d "ads_op_kernel/" ]; then - echo "kernel compile success" - else - echo "kernel did not compile success" - exit 1 - fi - cd ${CUR_DIR}/../ads/common/ops/kernels/ads_op_kernel - rm -rf install.sh - rm -rf upgrade.sh - + chmod -R 777 ${SCRIPTS_DIR} + bash ${SCRIPTS_DIR}/build_kernel.sh + cd ${CUR_DIR}/.. rm -rf build - if [ -d "ads.egg-info" ]; then - echo "ads.egg-info exist" - rm -rf ads.egg-info + if [ -d "ads_accelerator.egg-info" ]; then + echo "ads_accelerator.egg-info exist" + rm -rf ads_accelerator.egg-info else - echo "ads.egg-info not exist" + echo "ads_accelerator.egg-info not exist" fi if ! parse_script_args "$@"; then @@ -121,7 +104,6 @@ function main() check_python_version - cd ${CUR_DIR}/.. python"${PY_VERSION}" setup.py build bdist_wheel if [ $? != 0 ]; then echo "Failed to compile the wheel file. Please check the source code by yourself." diff --git a/ads/common/ops/kernels/ads_op/cmake/config.cmake b/cmake/config.cmake similarity index 38% rename from ads/common/ops/kernels/ads_op/cmake/config.cmake rename to cmake/config.cmake index 886119da..be7299fb 100644 --- a/ads/common/ops/kernels/ads_op/cmake/config.cmake +++ b/cmake/config.cmake @@ -1,25 +1,51 @@ - set(CMAKE_CXX_FLAGS_DEBUG "") set(CMAKE_CXX_FLAGS_RELEASE "") -if (NOT DEFINED vendor_name) - set(vendor_name customize CACHE STRING "") +if(NOT DEFINED vendor_name) + set(vendor_name + customize + CACHE STRING "") endif() -if (NOT DEFINED ASCEND_CANN_PACKAGE_PATH) - set(ASCEND_CANN_PACKAGE_PATH /usr/local/Ascend/latest CACHE PATH "") +if(NOT DEFINED ASCEND_CANN_PACKAGE_PATH) + set(ASCEND_CANN_PACKAGE_PATH + /usr/local/Ascend/latest + CACHE PATH "") endif() -if (NOT DEFINED ASCEND_PYTHON_EXECUTABLE) - set(ASCEND_PYTHON_EXECUTABLE python3 CACHE STRING "") +if(NOT DEFINED ASCEND_PYTHON_EXECUTABLE) + set(ASCEND_PYTHON_EXECUTABLE + python3 + CACHE STRING "") endif() -if (NOT DEFINED ASCEND_COMPUTE_UNIT) - message(FATAL_ERROR "ASCEND_COMPUTE_UNIT not set in CMakePreset.json ! +if(NOT DEFINED ASCEND_COMPUTE_UNIT) + message(FATAL_ERROR "ASCEND_COMPUTE_UNIT not set in CMakePreset.json ! ") endif() set(ASCEND_TENSOR_COMPILER_PATH ${ASCEND_CANN_PACKAGE_PATH}/compiler) set(ASCEND_CCEC_COMPILER_PATH ${ASCEND_TENSOR_COMPILER_PATH}/ccec_compiler/bin) set(ASCEND_AUTOGEN_PATH ${CMAKE_BINARY_DIR}/autogen) +set(ASCEND_KERNEL_PATH ${CMAKE_BINARY_DIR}/kernels) +set(ADS_PATH ${PROJECT_SOURCE_DIR}/ads) +set(ASCEND_HOST_SRC + "" + CACHE STRING "host source files") +set(ASCEND_KERNEL_SRC + "" + CACHE STRING "kernel source files") +set(ACLNN_SRC_CUSTOM + "" + CACHE STRING "aclnn source files") +set(ACLNN_INC_CUSTOM + "" + CACHE STRING "aclnn include files") +set(aclop_exclude + "" + CACHE STRING "aclop exclude files") +set(ASCEND_ONNX_SRC + "" + CACHE STRING "onnx source files") set(ASCEND_FRAMEWORK_TYPE tensorflow) file(MAKE_DIRECTORY ${ASCEND_AUTOGEN_PATH}) +file(MAKE_DIRECTORY ${ASCEND_KERNEL_PATH}) set(CUSTOM_COMPILE_OPTIONS "custom_compile_options.ini") execute_process(COMMAND rm -rf ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} COMMAND touch ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS}) diff --git a/cmake/func.cmake b/cmake/func.cmake new file mode 100644 index 00000000..3bd39155 --- /dev/null +++ b/cmake/func.cmake @@ -0,0 +1,233 @@ +function(install_target) + cmake_parse_arguments(INSTALL_TARGET "" "DST;TRG" "" ${ARGN}) + set_target_properties( + ${INSTALL_TARGET_TRG} + PROPERTIES LIBRARY_OUTPUT_DIRECTORY + ${ADS_PATH}/${INSTALL_TARGET_DST}) + install(TARGETS ${INSTALL_TARGET_TRG} + LIBRARY DESTINATION ${INSTALL_TARGET_DST}) +endfunction() + +function(install_file) + cmake_parse_arguments(INSTALL_TARGET "" "DST;SRC;TRG" "" ${ARGN}) + file(MAKE_DIRECTORY ${ADS_PATH}/${INSTALL_TARGET_DST}) + foreach(SOURCE_FILE ${INSTALL_TARGET_SRC}) + add_custom_command( + TARGET ${INSTALL_TARGET_TRG} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${SOURCE_FILE} + ${ADS_PATH}/${INSTALL_TARGET_DST}) + endforeach() + install(FILES ${INSTALL_TARGET_SRC} DESTINATION ${INSTALL_TARGET_DST}) +endfunction() + +function(get_system_info SYSTEM_INFO) + if(UNIX) + execute_process(COMMAND grep -i ^id= /etc/os-release OUTPUT_VARIABLE TEMP) + string(REGEX REPLACE "\n|id=|ID=|\"" "" SYSTEM_NAME ${TEMP}) + set(${SYSTEM_INFO} + ${SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR} + PARENT_SCOPE) + elseif(WIN32) + message(STATUS "System is Windows. Only for pre-build.") + else() + message(FATAL_ERROR "${CMAKE_SYSTEM_NAME} not support.") + endif() +endfunction() + +function(opbuild) + message(STATUS "Opbuild generating sources") + cmake_parse_arguments(OPBUILD "" "OUT_DIR;PROJECT_NAME;ACCESS_PREFIX" + "OPS_SRC" ${ARGN}) + execute_process( + COMMAND + ${CMAKE_COMPILE} -g -fPIC -shared -std=c++11 ${OPBUILD_OPS_SRC} + -D_GLIBCXX_USE_CXX11_ABI=0 -I ${ASCEND_CANN_PACKAGE_PATH}/include -L + ${ASCEND_CANN_PACKAGE_PATH}/lib64 -lexe_graph -lregister -ltiling_api -o + ${OPBUILD_OUT_DIR}/libascend_all_ops.so + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR) + if(${EXEC_RESULT}) + message("build ops lib info: ${EXEC_INFO}") + message("build ops lib error: ${EXEC_ERROR}") + message(FATAL_ERROR "opbuild run failed!") + endif() + set(proj_env "") + set(prefix_env "") + if(NOT "${OPBUILD_PROJECT_NAME}x" STREQUAL "x") + set(proj_env "OPS_PROJECT_NAME=${OPBUILD_PROJECT_NAME}") + endif() + if(NOT "${OPBUILD_ACCESS_PREFIX}x" STREQUAL "x") + set(prefix_env "OPS_DIRECT_ACCESS_PREFIX=${OPBUILD_ACCESS_PREFIX}") + endif() + execute_process( + COMMAND + ${proj_env} ${prefix_env} + ${ASCEND_CANN_PACKAGE_PATH}/toolkit/tools/opbuild/op_build + ${OPBUILD_OUT_DIR}/libascend_all_ops.so ${OPBUILD_OUT_DIR} + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR) + if(${EXEC_RESULT}) + message("opbuild ops info: ${EXEC_INFO}") + message("opbuild ops error: ${EXEC_ERROR}") + endif() + message(STATUS "Opbuild generating sources - done") +endfunction() + +function(add_ops_info_target) + cmake_parse_arguments(OPINFO "" "TARGET;OPS_INFO;OUTPUT;INSTALL_DIR" "" + ${ARGN}) + get_filename_component(opinfo_file_path "${OPINFO_OUTPUT}" DIRECTORY) + add_custom_command( + OUTPUT ${OPINFO_OUTPUT} + COMMAND mkdir -p ${opinfo_file_path} + COMMAND + ${ASCEND_PYTHON_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/cmake/util/parse_ini_to_json.py ${OPINFO_OPS_INFO} + ${OPINFO_OUTPUT}) + add_custom_target(${OPINFO_TARGET} ALL DEPENDS ${OPINFO_OUTPUT}) + install(FILES ${OPINFO_OUTPUT} DESTINATION ${OPINFO_INSTALL_DIR}) +endfunction() + +function(add_ops_compile_options OP_TYPE) + cmake_parse_arguments(OP_COMPILE "" "OP_TYPE" "COMPUTE_UNIT;OPTIONS" ${ARGN}) + file(APPEND ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} + "${OP_TYPE},${OP_COMPILE_COMPUTE_UNIT},${OP_COMPILE_OPTIONS}\n") +endfunction() + +function(add_ops_impl_target) + cmake_parse_arguments(OPIMPL "" "TARGET;OPS_INFO;IMPL_DIR;OUT_DIR" + "OPS_BATCH;OPS_ITERATE" ${ARGN}) + add_custom_command( + OUTPUT ${OPIMPL_OUT_DIR}/.impl_timestamp + COMMAND mkdir -m 700 -p ${OPIMPL_OUT_DIR}/dynamic + COMMAND + ${ASCEND_PYTHON_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py ${OPIMPL_OPS_INFO} + \"${OPIMPL_OPS_BATCH}\" \"${OPIMPL_OPS_ITERATE}\" ${OPIMPL_IMPL_DIR} + ${OPIMPL_OUT_DIR}/dynamic ${ASCEND_AUTOGEN_PATH} + COMMAND rm -rf ${OPIMPL_OUT_DIR}/.impl_timestamp + COMMAND touch ${OPIMPL_OUT_DIR}/.impl_timestamp + DEPENDS ${OPIMPL_OPS_INFO} + ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py) + add_custom_target(${OPIMPL_TARGET} ALL + DEPENDS ${OPIMPL_OUT_DIR}/.impl_timestamp) +endfunction() + +function(add_npu_support_target) + cmake_parse_arguments(NPUSUP "" "TARGET;OPS_INFO_DIR;OUT_DIR;INSTALL_DIR" "" + ${ARGN}) + get_filename_component(npu_sup_file_path "${NPUSUP_OUT_DIR}" DIRECTORY) + add_custom_command( + OUTPUT ${NPUSUP_OUT_DIR}/npu_supported_ops.json + COMMAND mkdir -p ${NPUSUP_OUT_DIR} + COMMAND bash ${CMAKE_SOURCE_DIR}/cmake/util/gen_ops_filter.sh + ${NPUSUP_OPS_INFO_DIR} ${NPUSUP_OUT_DIR}) + add_custom_target(npu_supported_ops ALL + DEPENDS ${NPUSUP_OUT_DIR}/npu_supported_ops.json) + install(FILES ${NPUSUP_OUT_DIR}/npu_supported_ops.json + DESTINATION ${NPUSUP_INSTALL_DIR}) +endfunction() + +function(add_bin_compile_target) + cmake_parse_arguments( + BINCMP + "" + "TARGET;OPS_INFO;COMPUTE_UNIT;IMPL_DIR;ADP_DIR;OUT_DIR;INSTALL_DIR;KERNEL_DIR" + "" + ${ARGN}) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/src) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/gen) + file(MAKE_DIRECTORY ${BINCMP_KERNEL_DIR}/config/${BINCMP_COMPUTE_UNIT}) + execute_process( + COMMAND + ${ASCEND_PYTHON_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_bin_param_build.py + ${BINCMP_OPS_INFO} ${BINCMP_OUT_DIR}/gen ${BINCMP_COMPUTE_UNIT} + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR) + if(${EXEC_RESULT}) + message("ops binary compile scripts gen info: ${EXEC_INFO}") + message("ops binary compile scripts gen error: ${EXEC_ERROR}") + message(FATAL_ERROR "ops binary compile scripts gen failed!") + endif() + add_custom_target(${BINCMP_TARGET} COMMAND cp -r ${BINCMP_IMPL_DIR}/*.* + ${BINCMP_OUT_DIR}/src) + add_custom_target( + ${BINCMP_TARGET}_gen_ops_config ALL + COMMAND + ${ASCEND_PYTHON_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/cmake/util/insert_simplified_keys.py -p + ${BINCMP_KERNEL_DIR}/${BINCMP_COMPUTE_UNIT} + COMMAND + ${ASCEND_PYTHON_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_ops_config.py -p + ${BINCMP_KERNEL_DIR}/${BINCMP_COMPUTE_UNIT} -s ${BINCMP_COMPUTE_UNIT}) + file(GLOB bin_scripts ${BINCMP_OUT_DIR}/gen/*.sh) + foreach(bin_script ${bin_scripts}) + get_filename_component(bin_file ${bin_script} NAME_WE) + string(REPLACE "-" ";" bin_sep ${bin_file}) + list(GET bin_sep 0 op_type) + list(GET bin_sep 1 op_file) + list(GET bin_sep 2 op_index) + if(NOT TARGET ${BINCMP_TARGET}_${op_file}_copy) + add_custom_target( + ${BINCMP_TARGET}_${op_file}_copy + COMMAND cp ${BINCMP_ADP_DIR}/${op_file}.py + ${BINCMP_OUT_DIR}/src/${op_type}.py + DEPENDS ascendc_impl_gen) + install( + DIRECTORY ${BINCMP_KERNEL_DIR}/${BINCMP_COMPUTE_UNIT}/${op_file} + DESTINATION ${BINCMP_INSTALL_DIR}/${BINCMP_COMPUTE_UNIT} + OPTIONAL) + install( + FILES ${BINCMP_KERNEL_DIR}/config/${BINCMP_COMPUTE_UNIT}/${op_file}.json + DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT} + OPTIONAL) + endif() + add_custom_target( + ${BINCMP_TARGET}_${op_file}_${op_index} + COMMAND + export HI_PYTHON=${ASCEND_PYTHON_EXECUTABLE} && export + ASCEND_CUSTOM_OPP_PATH=${ADS_PATH}/packages/vendors/${vendor_name} + && bash ${bin_script} ${BINCMP_OUT_DIR}/src/${op_type}.py + ${BINCMP_KERNEL_DIR}/${BINCMP_COMPUTE_UNIT}/${op_file} + WORKING_DIRECTORY ${BINCMP_OUT_DIR}) + add_dependencies(${BINCMP_TARGET}_${op_file}_${op_index} ${BINCMP_TARGET} + ${BINCMP_TARGET}_${op_file}_copy) + add_dependencies(${BINCMP_TARGET}_gen_ops_config + ${BINCMP_TARGET}_${op_file}_${op_index}) + endforeach() + add_custom_command( + TARGET ${BINCMP_TARGET}_gen_ops_config + POST_BUILD + COMMAND mv ${BINCMP_KERNEL_DIR}/${BINCMP_COMPUTE_UNIT}/*.json + ${BINCMP_KERNEL_DIR}/config/${BINCMP_COMPUTE_UNIT}) + install( + FILES + ${BINCMP_KERNEL_DIR}/config/${BINCMP_COMPUTE_UNIT}/binary_info_config.json + DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT} + OPTIONAL) +endfunction() + +function(protobuf_generate) + cmake_parse_arguments(PROTOBUF_GEN "" "PROTO_FILE;OUT_DIR" "" ${ARGN}) + set(OUT_DIR ${PROTOBUF_GEN_OUT_DIR}/proto/onnx) + file(MAKE_DIRECTORY ${OUT_DIR}) + get_filename_component(file_name ${PROTOBUF_GEN_PROTO_FILE} NAME_WE) + get_filename_component(file_dir ${PROTOBUF_GEN_PROTO_FILE} PATH) + execute_process( + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMAND protoc -I${file_dir} --cpp_out=${OUT_DIR} ${PROTOBUF_GEN_PROTO_FILE} + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR) + if(${EXEC_RESULT}) + message("protobuf gen info: ${EXEC_INFO}") + message("protobuf gen error: ${EXEC_ERROR}") + message(FATAL_ERROR "protobuf gen failed!") + endif() +endfunction() diff --git a/ads/common/ops/kernels/ads_op/cmake/intf.cmake b/cmake/intf.cmake similarity index 33% rename from ads/common/ops/kernels/ads_op/cmake/intf.cmake rename to cmake/intf.cmake index 2f362c39..416ab14a 100644 --- a/ads/common/ops/kernels/ads_op/cmake/intf.cmake +++ b/cmake/intf.cmake @@ -1,26 +1,35 @@ - add_library(intf_pub INTERFACE) -target_compile_options(intf_pub INTERFACE +target_compile_options( + intf_pub + INTERFACE -fPIC -fvisibility=hidden -fvisibility-inlines-hidden $<$:-O2> - $<$:-O0 -g> + $<$:-O0 + -g> $<$:-std=c++11> - $<$,$>:-ftrapv -fstack-check> - $<$:-pthread -Wfloat-equal -Wshadow -Wformat=2 -Wno-deprecated -Wextra> + $<$,$>:-ftrapv + -fstack-check> + $<$:-pthread + -Wfloat-equal + -Wshadow + -Wformat=2 + -Wno-deprecated + -Wextra> $,-fstack-protector-strong,-fstack-protector-all> ) -target_compile_definitions(intf_pub INTERFACE - _GLIBCXX_USE_CXX11_ABI=0 - $<$:_FORTIFY_SOURCE=2> -) -target_include_directories(intf_pub INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/include) -target_link_options(intf_pub INTERFACE - $<$,EXECUTABLE>:-pie> - $<$:-s> - -Wl,-z,relro - -Wl,-z,now - -Wl,-z,noexecstack -) +target_compile_definitions( + intf_pub INTERFACE _GLIBCXX_USE_CXX11_ABI=0 + $<$:_FORTIFY_SOURCE=2>) +target_include_directories(intf_pub + INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/include) +target_link_options( + intf_pub + INTERFACE + $<$,EXECUTABLE>:-pie> + $<$:-s> + -Wl,-z,relro + -Wl,-z,now + -Wl,-z,noexecstack) target_link_directories(intf_pub INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/lib64) diff --git a/cmake/makeself.cmake b/cmake/makeself.cmake new file mode 100644 index 00000000..1f4fa76c --- /dev/null +++ b/cmake/makeself.cmake @@ -0,0 +1,19 @@ +execute_process(COMMAND chmod +x + ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh) +execute_process( + COMMAND + ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh --header + ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself-header.sh --help-header + ./help.info --gzip --complevel 4 --nomd5 --sha256 ./ + ${CPACK_PACKAGE_FILE_NAME} "version:1.0" ./install.sh + WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} + RESULT_VARIABLE EXEC_RESULT + ERROR_VARIABLE EXEC_ERROR) +if(NOT "${EXEC_RESULT}x" STREQUAL "0x") + message(FATAL_ERROR "CPack Command error: ${EXEC_RESULT}\n${EXEC_ERROR}") +endif() +execute_process( + COMMAND cp ${CPACK_EXTERNAL_BUILT_PACKAGES} ${CPACK_PACKAGE_DIRECTORY}/ + COMMAND echo + "Copy ${CPACK_EXTERNAL_BUILT_PACKAGES} to ${CPACK_PACKAGE_DIRECTORY}/" + WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY}) diff --git a/ads/common/ops/kernels/ads_op/cmake/util/__init__.py b/cmake/util/__init__.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/__init__.py rename to cmake/util/__init__.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/ascendc_bin_param_build.py b/cmake/util/ascendc_bin_param_build.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/ascendc_bin_param_build.py rename to cmake/util/ascendc_bin_param_build.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/ascendc_impl_build.py b/cmake/util/ascendc_impl_build.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/ascendc_impl_build.py rename to cmake/util/ascendc_impl_build.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/ascendc_ops_config.py b/cmake/util/ascendc_ops_config.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/ascendc_ops_config.py rename to cmake/util/ascendc_ops_config.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/ascendc_replay_build.py b/cmake/util/ascendc_replay_build.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/ascendc_replay_build.py rename to cmake/util/ascendc_replay_build.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/batch_replay_impl.temp b/cmake/util/batch_replay_impl.temp similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/batch_replay_impl.temp rename to cmake/util/batch_replay_impl.temp diff --git a/ads/common/ops/kernels/ads_op/cmake/util/code_channel_infer.py b/cmake/util/code_channel_infer.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/code_channel_infer.py rename to cmake/util/code_channel_infer.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/const_var.py b/cmake/util/const_var.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/const_var.py rename to cmake/util/const_var.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/gen_impl_and_mrege_json.sh b/cmake/util/gen_impl_and_mrege_json.sh similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/gen_impl_and_mrege_json.sh rename to cmake/util/gen_impl_and_mrege_json.sh diff --git a/ads/common/ops/kernels/ads_op/cmake/util/gen_ops_filter.sh b/cmake/util/gen_ops_filter.sh similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/gen_ops_filter.sh rename to cmake/util/gen_ops_filter.sh diff --git a/ads/common/ops/kernels/ads_op/cmake/util/gen_version_info.sh b/cmake/util/gen_version_info.sh similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/gen_version_info.sh rename to cmake/util/gen_version_info.sh diff --git a/ads/common/ops/kernels/ads_op/cmake/util/insert_op_info.py b/cmake/util/insert_op_info.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/insert_op_info.py rename to cmake/util/insert_op_info.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/insert_simplified_keys.py b/cmake/util/insert_simplified_keys.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/insert_simplified_keys.py rename to cmake/util/insert_simplified_keys.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/kernel_entry.py b/cmake/util/kernel_entry.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/kernel_entry.py rename to cmake/util/kernel_entry.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/kernel_impl.temp b/cmake/util/kernel_impl.temp similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/kernel_impl.temp rename to cmake/util/kernel_impl.temp diff --git a/ads/common/ops/kernels/ads_op/cmake/util/makeself/COPYING b/cmake/util/makeself/COPYING similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/makeself/COPYING rename to cmake/util/makeself/COPYING diff --git a/ads/common/ops/kernels/ads_op/cmake/util/makeself/README.md b/cmake/util/makeself/README.md similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/makeself/README.md rename to cmake/util/makeself/README.md diff --git a/ads/common/ops/kernels/ads_op/cmake/util/makeself/VERSION b/cmake/util/makeself/VERSION similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/makeself/VERSION rename to cmake/util/makeself/VERSION diff --git a/ads/common/ops/kernels/ads_op/cmake/util/makeself/make-release.sh b/cmake/util/makeself/make-release.sh similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/makeself/make-release.sh rename to cmake/util/makeself/make-release.sh diff --git a/ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself-header.sh b/cmake/util/makeself/makeself-header.sh similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself-header.sh rename to cmake/util/makeself/makeself-header.sh diff --git a/ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.1 b/cmake/util/makeself/makeself.1 similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.1 rename to cmake/util/makeself/makeself.1 diff --git a/ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.lsm b/cmake/util/makeself/makeself.lsm similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.lsm rename to cmake/util/makeself/makeself.lsm diff --git a/ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.sh b/cmake/util/makeself/makeself.sh similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.sh rename to cmake/util/makeself/makeself.sh diff --git a/ads/common/ops/kernels/ads_op/cmake/util/makeself/run-tests.sh b/cmake/util/makeself/run-tests.sh similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/makeself/run-tests.sh rename to cmake/util/makeself/run-tests.sh diff --git a/ads/common/ops/kernels/ads_op/cmake/util/merge_aicpu_info_json.sh b/cmake/util/merge_aicpu_info_json.sh similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/merge_aicpu_info_json.sh rename to cmake/util/merge_aicpu_info_json.sh diff --git a/ads/common/ops/kernels/ads_op/cmake/util/opdesc_parser.py b/cmake/util/opdesc_parser.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/opdesc_parser.py rename to cmake/util/opdesc_parser.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/parse_ini_to_json.py b/cmake/util/parse_ini_to_json.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/parse_ini_to_json.py rename to cmake/util/parse_ini_to_json.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/preset_parse.py b/cmake/util/preset_parse.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/preset_parse.py rename to cmake/util/preset_parse.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/replay_codegen.py b/cmake/util/replay_codegen.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/replay_codegen.py rename to cmake/util/replay_codegen.py diff --git a/ads/common/ops/kernels/ads_op/cmake/util/replay_impl.temp b/cmake/util/replay_impl.temp similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/replay_impl.temp rename to cmake/util/replay_impl.temp diff --git a/ads/common/ops/kernels/ads_op/cmake/util/tiling_data_def_build.py b/cmake/util/tiling_data_def_build.py similarity index 100% rename from ads/common/ops/kernels/ads_op/cmake/util/tiling_data_def_build.py rename to cmake/util/tiling_data_def_build.py diff --git a/ads/common/ops/csrc/OpApiCommon.h b/include/csrc/OpApiCommon.h similarity index 74% rename from ads/common/ops/csrc/OpApiCommon.h rename to include/csrc/OpApiCommon.h index 92332df1..70083ab9 100644 --- a/ads/common/ops/csrc/OpApiCommon.h +++ b/include/csrc/OpApiCommon.h @@ -13,25 +13,27 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#ifndef CSRC_OP_API_COMMON_H_ +#define CSRC_OP_API_COMMON_H_ #include -#include -#include #include #include -#include -#include - #include #include #include +#include "third_party/acl/inc/acl/acl_base.h" +#include "third_party/acl/inc/acl/acl_rt.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" #include "torch_npu/csrc/core/npu/NPUStream.h" #include "torch_npu/csrc/framework/OpCommand.h" #include "torch_npu/csrc/framework/interface/EnvVariables.h" #include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" +#include "torch_npu/csrc/framework/utils/OpAdapter.h" #include "torch_npu/csrc/framework/utils/OpPreparation.h" +#include "common.h" + #define NPU_NAME_SPACE at_npu::native #ifdef COMPILE_WITH_XLA @@ -69,34 +71,6 @@ constexpr int kHashBufMaxSize = kHashBufSize + 1024; extern thread_local char g_hashBuf[kHashBufSize]; extern thread_local int g_hashOffset; -#define AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(_) \ - _(at::ScalarType::Byte, ACL_UINT8) \ - _(at::ScalarType::Char, ACL_INT8) \ - _(at::ScalarType::Short, ACL_INT16) \ - _(at::ScalarType::Int, ACL_INT32) \ - _(at::ScalarType::Long, ACL_INT64) \ - _(at::ScalarType::Half, ACL_FLOAT16) \ - _(at::ScalarType::Float, ACL_FLOAT) \ - _(at::ScalarType::Double, ACL_DOUBLE) \ - _(at::ScalarType::ComplexHalf, ACL_DT_UNDEFINED) \ - _(at::ScalarType::ComplexFloat, ACL_COMPLEX64) \ - _(at::ScalarType::ComplexDouble, ACL_COMPLEX128) \ - _(at::ScalarType::Bool, ACL_BOOL) \ - _(at::ScalarType::QInt8, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QUInt8, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QInt32, ACL_DT_UNDEFINED) \ - _(at::ScalarType::BFloat16, ACL_BF16) \ - _(at::ScalarType::QUInt4x2, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QUInt2x4, ACL_DT_UNDEFINED) \ - _(at::ScalarType::Undefined, ACL_DT_UNDEFINED) \ - _(at::ScalarType::NumOptions, ACL_DT_UNDEFINED) - -constexpr aclDataType kATenScalarTypeToAclDataTypeTable[static_cast(at::ScalarType::NumOptions) + 1] = { -#define DEFINE_ENUM(_1, n) n, - AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(DEFINE_ENUM) -#undef DEFINE_ENUM -}; - #define GET_OP_API_FUNC(apiName) reinterpret_cast<_##apiName>(GetOpApiFuncAddr(#apiName)) #define MEMCPY_TO_BUF(data_expression, size_expression) \ @@ -200,8 +174,7 @@ inline at::Tensor CopyTensorHostToDevice(const at::Tensor &cpu_tensor) { at::Tensor cpuPinMemTensor = cpu_tensor.pin_memory(); int deviceIndex = 0; - return cpuPinMemTensor.to( - c10::Device(DEVICE_TYPE, deviceIndex), cpuPinMemTensor.scalar_type(), true, true); + return cpuPinMemTensor.to(c10::Device(DEVICE_TYPE, deviceIndex), cpuPinMemTensor.scalar_type(), true, true); } inline at::Tensor CopyScalarToDevice(const c10::Scalar &cpu_scalar, at::ScalarType scalar_data_type) @@ -221,8 +194,8 @@ inline aclTensor *ConvertType(const at::Tensor &at_tensor) } at::ScalarType scalar_data_type = at_tensor.scalar_type(); aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; - TORCH_CHECK( - acl_data_type != ACL_DT_UNDEFINED, std::string(c10::toString(scalar_data_type)) + " has not been supported") + TORCH_CHECK(acl_data_type != ACL_DT_UNDEFINED, + std::string(c10::toString(scalar_data_type)) + " has not been supported") c10::SmallVector storageDims; // if acl_data_type is ACL_STRING, storageDims is empty. auto itemsize = at_tensor.itemsize(); @@ -253,25 +226,13 @@ inline aclTensor *ConvertType(const at::Tensor &at_tensor) if (at_tensor.unsafeGetTensorImpl()->is_wrapped_number()) { c10::Scalar expScalar = ConvertTensorToScalar(at_tensor); at::Tensor aclInput = CopyScalarToDevice(expScalar, scalar_data_type); - return aclCreateTensor(aclInput.sizes().data(), - aclInput.sizes().size(), - acl_data_type, - aclInput.strides().data(), - aclInput.storage_offset(), - format, - storageDims.data(), - storageDims.size(), + return aclCreateTensor(aclInput.sizes().data(), aclInput.sizes().size(), acl_data_type, + aclInput.strides().data(), aclInput.storage_offset(), format, storageDims.data(), storageDims.size(), const_cast(aclInput.storage().data())); } - auto acl_tensor = aclCreateTensor(at_tensor.sizes().data(), - at_tensor.sizes().size(), - acl_data_type, - at_tensor.strides().data(), - at_tensor.storage_offset(), - format, - storageDims.data(), - storageDims.size(), + auto acl_tensor = aclCreateTensor(at_tensor.sizes().data(), at_tensor.sizes().size(), acl_data_type, + at_tensor.strides().data(), at_tensor.storage_offset(), format, storageDims.data(), storageDims.size(), const_cast(at_tensor.storage().data())); return acl_tensor; } @@ -285,8 +246,8 @@ inline aclScalar *ConvertType(const at::Scalar &at_scalar) at::ScalarType scalar_data_type = at_scalar.type(); aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; - TORCH_CHECK( - acl_data_type != ACL_DT_UNDEFINED, std::string(c10::toString(scalar_data_type)) + " has not been supported") + TORCH_CHECK(acl_data_type != ACL_DT_UNDEFINED, + std::string(c10::toString(scalar_data_type)) + " has not been supported") aclScalar *acl_scalar = nullptr; switch (scalar_data_type) { case at::ScalarType::Double: { @@ -471,7 +432,7 @@ void Release(T value) template void CallRelease(Tuple t, std::index_sequence) { - (void)std::initializer_list{(Release(std::get(t)), 0)...}; + (void)std::initializer_list{ (Release(std::get(t)), 0)... }; } template @@ -536,74 +497,64 @@ typedef int (*InitHugeMemThreadLocal)(void *, bool); typedef void (*UnInitHugeMemThreadLocal)(void *, bool); typedef void (*ReleaseHugeMem)(void *, bool); -#define DO_COMPATIBILITY(aclnn_api, originCallExpression) \ - do { \ - static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ - static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ - if (getWorkspaceSizeFuncAddr == nullptr || opApiFuncAddr == nullptr) { \ - ASCEND_LOGW("%s or %sGetWorkspaceSize not in %s, or %s not found. Will call %s", \ - #aclnn_api, \ - #aclnn_api, \ - GetOpApiLibName(), \ - GetOpApiLibName(), \ - #originCallExpression); \ - return originCallExpression; \ - } \ +#define DO_COMPATIBILITY(aclnn_api, originCallExpression) \ + do { \ + static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ + static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ + if (getWorkspaceSizeFuncAddr == nullptr || opApiFuncAddr == nullptr) { \ + ASCEND_LOGW("%s or %sGetWorkspaceSize not in %s, or %s not found. Will call %s", #aclnn_api, #aclnn_api, \ + GetOpApiLibName(), GetOpApiLibName(), #originCallExpression); \ + return originCallExpression; \ + } \ } while (0) -#define EXEC_NPU_CMD(aclnn_api, ...) \ - do { \ - static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ - static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ - static const auto initMemAddr = GetOpApiFuncAddr("InitHugeMemThreadLocal"); \ - static const auto unInitMemAddr = GetOpApiFuncAddr("UnInitHugeMemThreadLocal"); \ - static const auto releaseMemAddr = GetOpApiFuncAddr("ReleaseHugeMem"); \ - TORCH_CHECK(getWorkspaceSizeFuncAddr != nullptr && opApiFuncAddr != nullptr, \ - #aclnn_api, \ - " or ", \ - #aclnn_api "GetWorkspaceSize", \ - " not in ", \ - GetOpApiLibName(), \ - ", or ", \ - GetOpApiLibName(), \ - "not found."); \ - auto acl_stream = c10_npu::getCurrentNPUStream().stream(false); \ - uint64_t workspace_size = 0; \ - uint64_t *workspace_size_addr = &workspace_size; \ - aclOpExecutor *executor = nullptr; \ - aclOpExecutor **executor_addr = &executor; \ - InitHugeMemThreadLocal initMemFunc = reinterpret_cast(initMemAddr); \ - UnInitHugeMemThreadLocal unInitMemFunc = reinterpret_cast(unInitMemAddr); \ - if (initMemFunc) { \ - initMemFunc(nullptr, false); \ - } \ - auto converted_params = ConvertTypes(__VA_ARGS__, workspace_size_addr, executor_addr); \ - static auto getWorkspaceSizeFunc = ConvertToOpApiFunc(converted_params, getWorkspaceSizeFuncAddr); \ - auto workspace_status = call(getWorkspaceSizeFunc, converted_params); \ - TORCH_CHECK(workspace_status == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ - void *workspace_addr = nullptr; \ - if (workspace_size != 0) { \ - at::TensorOptions options = at::TensorOptions(torch_npu::utils::get_npu_device_type()); \ - auto workspace_tensor = at::empty({workspace_size}, options.dtype(at::kByte)); \ - workspace_addr = const_cast(workspace_tensor.storage().data()); \ - } \ - auto acl_call = [converted_params, workspace_addr, workspace_size, acl_stream, executor]()->int { \ - typedef int (*OpApiFunc)(void *, uint64_t, aclOpExecutor *, const aclrtStream); \ - OpApiFunc opApiFunc = reinterpret_cast(opApiFuncAddr); \ - auto api_ret = opApiFunc(workspace_addr, workspace_size, executor, acl_stream); \ - TORCH_CHECK(api_ret == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ - ReleaseConvertTypes(converted_params); \ - ReleaseHugeMem releaseMemFunc = reinterpret_cast(releaseMemAddr); \ - if (releaseMemFunc) { \ - releaseMemFunc(nullptr, false); \ - } \ - return api_ret; \ - }; \ - at_npu::native::OpCommand cmd; \ - cmd.Name(#aclnn_api); \ - cmd.SetCustomHandler(acl_call); \ - cmd.Run(); \ - if (unInitMemFunc) { \ - unInitMemFunc(nullptr, false); \ - } \ +#define EXEC_NPU_CMD(aclnn_api, ...) \ + do { \ + static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ + static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ + static const auto initMemAddr = GetOpApiFuncAddr("InitHugeMemThreadLocal"); \ + static const auto unInitMemAddr = GetOpApiFuncAddr("UnInitHugeMemThreadLocal"); \ + static const auto releaseMemAddr = GetOpApiFuncAddr("ReleaseHugeMem"); \ + TORCH_CHECK(getWorkspaceSizeFuncAddr != nullptr && opApiFuncAddr != nullptr, #aclnn_api, " or ", \ + #aclnn_api "GetWorkspaceSize", " not in ", GetOpApiLibName(), ", or ", GetOpApiLibName(), "not found."); \ + auto acl_stream = c10_npu::getCurrentNPUStream().stream(false); \ + uint64_t workspace_size = 0; \ + uint64_t *workspace_size_addr = &workspace_size; \ + aclOpExecutor *executor = nullptr; \ + aclOpExecutor **executor_addr = &executor; \ + InitHugeMemThreadLocal initMemFunc = reinterpret_cast(initMemAddr); \ + UnInitHugeMemThreadLocal unInitMemFunc = reinterpret_cast(unInitMemAddr); \ + if (initMemFunc) { \ + initMemFunc(nullptr, false); \ + } \ + auto converted_params = ConvertTypes(__VA_ARGS__, workspace_size_addr, executor_addr); \ + static auto getWorkspaceSizeFunc = ConvertToOpApiFunc(converted_params, getWorkspaceSizeFuncAddr); \ + auto workspace_status = call(getWorkspaceSizeFunc, converted_params); \ + TORCH_CHECK(workspace_status == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ + void *workspace_addr = nullptr; \ + if (workspace_size != 0) { \ + at::TensorOptions options = at::TensorOptions(torch_npu::utils::get_npu_device_type()); \ + auto workspace_tensor = at::empty({ workspace_size }, options.dtype(at::kByte)); \ + workspace_addr = const_cast(workspace_tensor.storage().data()); \ + } \ + auto acl_call = [converted_params, workspace_addr, workspace_size, acl_stream, executor]() -> int { \ + typedef int (*OpApiFunc)(void *, uint64_t, aclOpExecutor *, const aclrtStream); \ + OpApiFunc opApiFunc = reinterpret_cast(opApiFuncAddr); \ + auto api_ret = opApiFunc(workspace_addr, workspace_size, executor, acl_stream); \ + TORCH_CHECK(api_ret == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ + ReleaseConvertTypes(converted_params); \ + ReleaseHugeMem releaseMemFunc = reinterpret_cast(releaseMemAddr); \ + if (releaseMemFunc) { \ + releaseMemFunc(nullptr, false); \ + } \ + return api_ret; \ + }; \ + at_npu::native::OpCommand cmd; \ + cmd.Name(#aclnn_api); \ + cmd.SetCustomHandler(acl_call); \ + cmd.Run(); \ + if (unInitMemFunc) { \ + unInitMemFunc(nullptr, false); \ + } \ } while (false) +#endif // CSRC_OP_API_COMMON_H_ diff --git a/ads/common/ops/csrc/common.cpp b/include/csrc/common.h similarity index 58% rename from ads/common/ops/csrc/common.cpp rename to include/csrc/common.h index f6f9cc49..1370c199 100644 --- a/ads/common/ops/csrc/common.cpp +++ b/include/csrc/common.h @@ -1,9 +1,21 @@ +#ifndef CSRC_COMMON_H_ +#define CSRC_COMMON_H_ #include +#include +#include +#include +#include +#include +#include "third_party/acl/inc/acl/acl_base.h" +#include "torch_npu/csrc/core/npu/NPUMacros.h" +#include "torch_npu/csrc/framework/utils/NPUDefinition.h" #include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" #include "torch_npu/csrc/aten/mirror/NPUMemoryOverlap.h" -#include "third_party/acl/inc/acl/acl_base.h" -#include "common.h" +const int N = 32; +const int SIZE = 8; + +using tuple_vector = std::tuple, c10::SmallVector>; using CalcuOpUtil = at_npu::native::CalcuOpUtil; #define AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(_) \ @@ -28,10 +40,16 @@ using CalcuOpUtil = at_npu::native::CalcuOpUtil; _(at::ScalarType::Undefined, ACL_DT_UNDEFINED) \ _(at::ScalarType::NumOptions, ACL_DT_UNDEFINED) -static std::unordered_map dTypeTransMap{ - {"torch.float16", at::ScalarType::Half}, {"torch.half", at::ScalarType::Half}, {"torch.float32", at::ScalarType::Float}, {"torch.float", at::ScalarType::Float}, {"torch.float64", at::ScalarType::Double}, {"torch.float", at::ScalarType::Double}, {"torch.int8", at::ScalarType::Char}, {"torch.char", at::ScalarType::Char}, {"torch.int16", at::ScalarType::Short}, {"torch.short", at::ScalarType::Short}, {"torch.int32", at::ScalarType::Int}, {"torch.int32", at::ScalarType::Int}, {"torch.int64", at::ScalarType::Long}, {"torch.long", at::ScalarType::Long}}; +static std::unordered_map dTypeTransMap{ { "torch.float16", at::ScalarType::Half }, + { "torch.half", at::ScalarType::Half }, { "torch.float32", at::ScalarType::Float }, + { "torch.float", at::ScalarType::Float }, { "torch.float64", at::ScalarType::Double }, + { "torch.float", at::ScalarType::Double }, { "torch.int8", at::ScalarType::Char }, + { "torch.char", at::ScalarType::Char }, { "torch.int16", at::ScalarType::Short }, + { "torch.short", at::ScalarType::Short }, { "torch.int32", at::ScalarType::Int }, + { "torch.int32", at::ScalarType::Int }, { "torch.int64", at::ScalarType::Long }, + { "torch.long", at::ScalarType::Long } }; -static bool check_inplace_tensor(const std::initializer_list &src_list, const at::Tensor &dst) +inline static bool check_inplace_tensor(const std::initializer_list &src_list, const at::Tensor &dst) { bool is_inplace_tensor = false; // check whether dst is contained in src_list @@ -44,36 +62,32 @@ static bool check_inplace_tensor(const std::initializer_list &src_li return is_inplace_tensor; } -static void check_tensor_size(const std::initializer_list &src_list, at::Tensor &dst, - c10::IntArrayRef expect_size) +inline static void check_tensor_size(const std::initializer_list &src_list, at::Tensor &dst, + c10::IntArrayRef expect_size) { bool is_inplace = check_inplace_tensor(src_list, dst); // Preserve legacy resizing behavior of out=... arguments if (!dst.sizes().equals(expect_size)) { - TORCH_CHECK(!is_inplace, "output with shape ", dst.sizes(), " doesn't match the broadcast shape ", - expect_size); + TORCH_CHECK(!is_inplace, "output with shape ", dst.sizes(), " doesn't match the broadcast shape ", expect_size); dst.resize_(expect_size); } return; } -constexpr aclDataType kATenScalarTypeToAclDataTypeTable - [static_cast(at::ScalarType::NumOptions) + 1] = { +constexpr aclDataType kATenScalarTypeToAclDataTypeTable[static_cast(at::ScalarType::NumOptions) + 1] = { #define DEFINE_ENUM(_1, n) n, - AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(DEFINE_ENUM) + AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(DEFINE_ENUM) #undef DEFINE_ENUM }; -aclDataType ConvertToAclDataType(const at::ScalarType &data_type) +inline aclDataType ConvertToAclDataType(const at::ScalarType &data_type) { - auto acl_dtype = - kATenScalarTypeToAclDataTypeTable[static_cast(data_type)]; - TORCH_CHECK(acl_dtype != ACL_DT_UNDEFINED, - std::string(c10::toString(data_type)) + " has not been supported") + auto acl_dtype = kATenScalarTypeToAclDataTypeTable[static_cast(data_type)]; + TORCH_CHECK(acl_dtype != ACL_DT_UNDEFINED, std::string(c10::toString(data_type)) + " has not been supported") return acl_dtype; } -c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) +inline c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) { c10::SmallVector shape_small_vec; for (uint64_t i = 0; i < shape.size(); i++) { @@ -82,11 +96,9 @@ c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) return shape_small_vec; } -c10::SmallVector conv_transpose2d_npu_output_size(const at::Tensor &input, const at::Tensor &weight, - const at::Tensor &bias, c10::IntArrayRef padding, - c10::IntArrayRef output_padding, - c10::IntArrayRef stride, c10::IntArrayRef dilation, - int64_t groups) +inline c10::SmallVector conv_transpose2d_npu_output_size(const at::Tensor &input, + const at::Tensor &weight, const at::Tensor &bias, c10::IntArrayRef padding, c10::IntArrayRef output_padding, + c10::IntArrayRef stride, c10::IntArrayRef dilation, int64_t groups) { int64_t N = input.size(0); int64_t H = input.size(2); @@ -97,22 +109,20 @@ c10::SmallVector conv_transpose2d_npu_output_size(const at::Tenso int64_t Ho = (H - 1) * stride[0] - 2 * padding[0] + dilation[0] * (kernel_size[0] - 1) + output_padding[0] + 1; int64_t Wo = (W - 1) * stride[1] - 2 * padding[1] + dilation[1] * (kernel_size[1] - 1) + output_padding[1] + 1; - c10::SmallVector outputSize = {N, Co, Ho, Wo}; + c10::SmallVector outputSize = { N, Co, Ho, Wo }; return outputSize; } -// tyf - -std::pair trans_torch_type_to_scalar(const std::string &type) +inline std::pair trans_torch_type_to_scalar(const std::string &type) { if (dTypeTransMap.find(type) != dTypeTransMap.end()) { - return {true, dTypeTransMap[type]}; + return { true, dTypeTransMap[type] }; } - return {false, at::ScalarType::Byte}; + return { false, at::ScalarType::Byte }; } -tuple_vector softmax_cross_entropy_with_logits_impl_npu_output_size(const at::Tensor &self) +inline tuple_vector softmax_cross_entropy_with_logits_impl_npu_output_size(const at::Tensor &self) { c10::SmallVector resultSize = array_to_small_vector(self.size(0)); c10::SmallVector backpropSize = array_to_small_vector(self.sizes()); @@ -120,7 +130,7 @@ tuple_vector softmax_cross_entropy_with_logits_impl_npu_output_size(const at::Te return std::tuple, c10::SmallVector>(resultSize, backpropSize); } -c10::SmallVector convert_array_to_vector(c10::IntArrayRef intArray) +inline c10::SmallVector convert_array_to_vector(c10::IntArrayRef intArray) { c10::SmallVector intVec; for (uint64_t i = 0; i < intArray.size(); i++) { @@ -129,10 +139,10 @@ c10::SmallVector convert_array_to_vector(c10::IntArrayRef intArray) return intVec; } -int64_t make_warp_dim(int64_t dim, int64_t dim_post_expr) +inline int64_t make_warp_dim(int64_t dim, int64_t dim_post_expr) { if (dim_post_expr <= 0) { - dim_post_expr = 1; // this will make range [-1, 0] + dim_post_expr = 1; // this will make range [-1, 0] } if (dim < 0) { dim += dim_post_expr; @@ -141,7 +151,7 @@ int64_t make_warp_dim(int64_t dim, int64_t dim_post_expr) } // This logic is specially made for stride_add, and will be removed in future version. -c10::SmallVector infersize_stride_add(c10::IntArrayRef shape1_, c10::IntArrayRef shape2_) +inline c10::SmallVector infersize_stride_add(c10::IntArrayRef shape1_, c10::IntArrayRef shape2_) { auto shape1 = array_to_small_vector(shape1_); auto shape2 = array_to_small_vector(shape2_); @@ -169,7 +179,7 @@ c10::SmallVector infersize_stride_add(c10::IntArrayRef shape1_, c return output_shape; } -c10::SmallVector transpose_npu_output_size(const at::Tensor &self, c10::IntArrayRef perm) +inline c10::SmallVector transpose_npu_output_size(const at::Tensor &self, c10::IntArrayRef perm) { auto sizes = self.sizes(); c10::SmallVector shape; @@ -180,13 +190,15 @@ c10::SmallVector transpose_npu_output_size(const at::Tensor &self return shape; } -bool check_match(const at::Tensor &self) +inline bool check_match(const at::Tensor &self) { - static auto op = c10::Dispatcher::singleton().findSchemaOrThrow("aten::check_match", "").typed(); + static auto op = + c10::Dispatcher::singleton().findSchemaOrThrow("aten::check_match", "").typed(); return op.call(self); } -void format_fresh_view(at::Tensor &x, const at::Tensor &y) +inline void format_fresh_view(at::Tensor &x, const at::Tensor &y) { x.copy_(y); } +#endif // CSRC_COMMON_H_ diff --git a/include/csrc/pybind.h b/include/csrc/pybind.h new file mode 100644 index 00000000..b997451f --- /dev/null +++ b/include/csrc/pybind.h @@ -0,0 +1,9 @@ +#ifndef CSRC_PYBIND_H_ +#define CSRC_PYBIND_H_ +#include +void init_common(pybind11::module& m); +void init_motion(pybind11::module& m); +void init_percention_fused(pybind11::module& m); +void init_perception_point(pybind11::module& m); +void init_perception_vision(pybind11::module& m); +#endif // CSRC_PYBIND_H_ diff --git a/include/onnx/common.h b/include/onnx/common.h new file mode 100644 index 00000000..3c16c44f --- /dev/null +++ b/include/onnx/common.h @@ -0,0 +1,4 @@ +#ifndef ADS_ONNX_H_ +#define ADS_ONNX_H_ +#include "proto/onnx/ge_onnx.pb.h" +#endif // ADS_ONNX_H_ diff --git a/scripts/build_kernel.sh b/scripts/build_kernel.sh new file mode 100644 index 00000000..c08e2eca --- /dev/null +++ b/scripts/build_kernel.sh @@ -0,0 +1,32 @@ +#!/bin/bash +script_path=$(realpath $(dirname $0)) +root_path=$(realpath $script_path/..) +rm -rf build_out +mkdir build_out +cd build_out + +if [ $ASCEND_AICPU_PATH ]; then + jq --arg field "configurePresets" --arg value "$ASCEND_AICPU_PATH" '.[$field][0].cacheVariables.ASCEND_CANN_PACKAGE_PATH.value = $value' $root_path/CMakePresets.json >$root_path/CMakePresets_bat.json + + if [ $? -eq 0 ]; then + mv $root_path/CMakePresets_bat.json $root_path/CMakePresets.json -f + else + echo "Error: please install jq with yum or apt-get" + exit 1 + fi +else + echo "Error: please source env.sh" + exit 1 +fi + +cmake_version=$(cmake --version | grep "cmake version" | awk '{print $3}') +if [ "$cmake_version" \< "3.19.0" ]; then + opts=$(python3 $root_path/cmake/util/preset_parse.py $root_path/CMakePresets.json) + echo $opts + cmake .. $opts +else + cmake .. --preset=default +fi + +cmake --build . -j16 +if [ $? -ne 0 ]; then exit 1; fi diff --git a/ads/common/ops/kernels/ads_op/scripts/install.sh b/scripts/install_kernel.sh similarity index 100% rename from ads/common/ops/kernels/ads_op/scripts/install.sh rename to scripts/install_kernel.sh diff --git a/ads/common/ops/kernels/ads_op/scripts/upgrade.sh b/scripts/upgrade_kernel.sh similarity index 100% rename from ads/common/ops/kernels/ads_op/scripts/upgrade.sh rename to scripts/upgrade_kernel.sh diff --git a/setup.py b/setup.py index 01d5a775..3a2ca028 100644 --- a/setup.py +++ b/setup.py @@ -1,34 +1,34 @@ -import os import glob +import os import subprocess from pathlib import Path from typing import Union -import torch -from setuptools import setup, find_packages -from torch.utils.cpp_extension import BuildExtension -from utils import extension -import imp +import torch +from setuptools import find_packages, setup from torch.utils.cpp_extension import BuildExtension -torch_npu_dir = extension.PYTORCH_NPU_INSTALL_PATH -source_file = [] -source_file += glob.glob(os.path.join("./ads/common/ops/csrc/", "*.cpp")) -source_file += glob.glob(os.path.join("./bind/", "*.cpp")) +from utils import extension -include_dirs = [] -include_dirs.append(torch_npu_dir + "/include/third_party/acl/inc/") +BASE_DIR = os.path.dirname(os.path.realpath(__file__)) +VERSION = torch.__version__ +full_components = ["common", "motion", "perception/fused", "perception/point", "perception/vision"] +source_file = glob.glob(os.path.join("./bind/", "*.cpp")) +include_dirs = [os.path.join(BASE_DIR, "include")] +for part in full_components: + source_file += glob.glob(os.path.join(f"./ads/{part}/ops/csrc/", "*.cpp")) exts = [] ext1 = extension.NpuExtension( name="ads_c", sources=source_file, + include_dirs=include_dirs, extra_compile_args=[ - '-D__FILENAME__=\"$$(notdir $$(abspath $$<))\"', - '-I' + imp.find_module('torch_npu')[1] + "/include/third_party/acl/inc", - '-fprofile-arcs', - '-ftest-coverage'], - libraries=['gcov'], + '-D__FILENAME__="$$(notdir $$(abspath $$<))"', + "-fprofile-arcs", + "-ftest-coverage", + ], + libraries=["gcov"], ) exts.append(ext1) @@ -36,15 +36,12 @@ exts.append(ext1) def get_sha(pytorch_root: Union[str, Path]) -> str: try: return ( - subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=pytorch_root) # Compliant - .decode("ascii") - .strip() + subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=pytorch_root).decode("ascii").strip() # Compliant ) except Exception: return "Unknown" -BASE_DIR = os.path.dirname(os.path.realpath(__file__)) -VERSION = torch.__version__[0:6] +VERSION = "1.0_" + torch.__version__[0:6] torch_npu_root = Path(__file__).parent sha = get_sha(torch_npu_root) if not os.getenv("BUILD_WITHOUT_SHA"): @@ -53,11 +50,11 @@ if not os.getenv("BUILD_WITHOUT_SHA"): setup( name="ads_accelerator", version=VERSION, - description='Cpp Extension Include ascend_ads', - keywords='ads', + description="Cpp Extension Include ascend_ads", + keywords="ads", ext_modules=exts, - author='Ascend Contributors', + author="Ascend Contributors", cmdclass={"build_ext": BuildExtension}, packages=find_packages(), include_package_data=True, -) \ No newline at end of file +) diff --git a/tests/run_test.py b/tests/torch/run_test.py similarity index 96% rename from tests/run_test.py rename to tests/torch/run_test.py index 5fe2c913..7978d024 100644 --- a/tests/run_test.py +++ b/tests/torch/run_test.py @@ -12,7 +12,7 @@ from torch.testing._internal.common_utils import shell import torch_npu -REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent +TEST_ROOT = pathlib.Path(__file__).resolve().parent # https://stackoverflow.com/questions/2549939/get-signal-names-from-numbers-in-python SIGNALS_TO_NAMES_DICT = { @@ -153,7 +153,6 @@ def run_test_module(test: str, test_directory: str, options) -> Optional[str]: def main(): options = parse_args() - test_directory = os.path.join(REPO_ROOT, "tests") selected_tests = get_selected_tests(options) if options.verbose: @@ -163,7 +162,7 @@ def main(): failure_msgs = [] for test in selected_tests: - err_msg = run_test_module(test, test_directory, options) + err_msg = run_test_module(test, TEST_ROOT, options) if err_msg is None: continue diff --git a/tests/test_abs.py b/tests/torch/test_abs.py similarity index 100% rename from tests/test_abs.py rename to tests/torch/test_abs.py diff --git a/tests/test_batch_nms.py b/tests/torch/test_batch_nms.py similarity index 100% rename from tests/test_batch_nms.py rename to tests/torch/test_batch_nms.py diff --git a/tests/test_fast_gelu.py b/tests/torch/test_fast_gelu.py similarity index 100% rename from tests/test_fast_gelu.py rename to tests/torch/test_fast_gelu.py diff --git a/tests/test_fast_gelu_backward.py b/tests/torch/test_fast_gelu_backward.py similarity index 100% rename from tests/test_fast_gelu_backward.py rename to tests/torch/test_fast_gelu_backward.py diff --git a/tests/test_furthest_point_sample_with_dist.py b/tests/torch/test_furthest_point_sample_with_dist.py similarity index 100% rename from tests/test_furthest_point_sample_with_dist.py rename to tests/torch/test_furthest_point_sample_with_dist.py diff --git a/tests/test_multi_scale_deformable_attention_grad.py b/tests/torch/test_multi_scale_deformable_attention_grad.py similarity index 100% rename from tests/test_multi_scale_deformable_attention_grad.py rename to tests/torch/test_multi_scale_deformable_attention_grad.py diff --git a/tests/test_npu_anchor_response_flags.py b/tests/torch/test_npu_anchor_response_flags.py similarity index 100% rename from tests/test_npu_anchor_response_flags.py rename to tests/torch/test_npu_anchor_response_flags.py diff --git a/tests/test_npu_bounding_box_decode.py b/tests/torch/test_npu_bounding_box_decode.py similarity index 100% rename from tests/test_npu_bounding_box_decode.py rename to tests/torch/test_npu_bounding_box_decode.py diff --git a/tests/test_npu_bounding_box_encode.py b/tests/torch/test_npu_bounding_box_encode.py similarity index 100% rename from tests/test_npu_bounding_box_encode.py rename to tests/torch/test_npu_bounding_box_encode.py diff --git a/tests/test_npu_broadcast.py b/tests/torch/test_npu_broadcast.py similarity index 100% rename from tests/test_npu_broadcast.py rename to tests/torch/test_npu_broadcast.py diff --git a/tests/test_npu_dyn_voxelization.py b/tests/torch/test_npu_dyn_voxelization.py similarity index 100% rename from tests/test_npu_dyn_voxelization.py rename to tests/torch/test_npu_dyn_voxelization.py diff --git a/tests/test_npu_dynamic_scatter.py b/tests/torch/test_npu_dynamic_scatter.py similarity index 100% rename from tests/test_npu_dynamic_scatter.py rename to tests/torch/test_npu_dynamic_scatter.py diff --git a/tests/test_npu_moe_tutel.py b/tests/torch/test_npu_moe_tutel.py similarity index 100% rename from tests/test_npu_moe_tutel.py rename to tests/torch/test_npu_moe_tutel.py diff --git a/tests/test_npu_moe_tutel_backward.py b/tests/torch/test_npu_moe_tutel_backward.py similarity index 100% rename from tests/test_npu_moe_tutel_backward.py rename to tests/torch/test_npu_moe_tutel_backward.py diff --git a/tests/test_npu_multi_scale_deformable_attn_funtion.py b/tests/torch/test_npu_multi_scale_deformable_attn_funtion.py similarity index 100% rename from tests/test_npu_multi_scale_deformable_attn_funtion.py rename to tests/torch/test_npu_multi_scale_deformable_attn_funtion.py diff --git a/tests/test_npu_nms3d_normal.py b/tests/torch/test_npu_nms3d_normal.py similarity index 97% rename from tests/test_npu_nms3d_normal.py rename to tests/torch/test_npu_nms3d_normal.py index f4370f14..f80c155c 100644 --- a/tests/test_npu_nms3d_normal.py +++ b/tests/torch/test_npu_nms3d_normal.py @@ -1,42 +1,42 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import unittest -import torch -import numpy as np - -import torch_npu -from torch_npu.testing.testcase import TestCase, run_tests -import ads.common - -DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] - - -class TestNms3dNormal(TestCase): - @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `RotaryMul` is only supported on 910B, skip this ut!") - def test_nms3d_normal(self): - # test for 5 boxes - np_boxes = np.asarray([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0], - [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0], - [3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.3], - [3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.0], - [3.0, 3.2, 3.2, 3.0, 2.0, 2.0, 0.3]], - dtype=np.float32) - np_scores = np.array([0.6, 0.9, 0.1, 0.2, 0.15], dtype=np.float32) - np_inds = np.array([1, 0, 3]) - boxes = torch.from_numpy(np_boxes) - scores = torch.from_numpy(np_scores) - inds = ads.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3) - - self.assertRtolEqual(inds.cpu().numpy(), np_inds) - - # test for many boxes - np.random.seed(42) - np_boxes = np.random.rand(555, 7).astype(np.float32) - np_scores = np.random.rand(555).astype(np.float32) - boxes = torch.from_numpy(np_boxes) - scores = torch.from_numpy(np_scores) - inds = ads.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3) - - self.assertRtolEqual(len(inds.cpu().numpy()), 148) - -if __name__ == "__main__": - run_tests() +# Copyright (c) OpenMMLab. All rights reserved. +import unittest +import torch +import numpy as np + +import torch_npu +from torch_npu.testing.testcase import TestCase, run_tests +import ads.common + +DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] + + +class TestNms3dNormal(TestCase): + @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `RotaryMul` is only supported on 910B, skip this ut!") + def test_nms3d_normal(self): + # test for 5 boxes + np_boxes = np.asarray([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0], + [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0], + [3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.3], + [3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.0], + [3.0, 3.2, 3.2, 3.0, 2.0, 2.0, 0.3]], + dtype=np.float32) + np_scores = np.array([0.6, 0.9, 0.1, 0.2, 0.15], dtype=np.float32) + np_inds = np.array([1, 0, 3]) + boxes = torch.from_numpy(np_boxes) + scores = torch.from_numpy(np_scores) + inds = ads.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3) + + self.assertRtolEqual(inds.cpu().numpy(), np_inds) + + # test for many boxes + np.random.seed(42) + np_boxes = np.random.rand(555, 7).astype(np.float32) + np_scores = np.random.rand(555).astype(np.float32) + boxes = torch.from_numpy(np_boxes) + scores = torch.from_numpy(np_scores) + inds = ads.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3) + + self.assertRtolEqual(len(inds.cpu().numpy()), 148) + +if __name__ == "__main__": + run_tests() diff --git a/tests/test_npu_rotary_mul.py b/tests/torch/test_npu_rotary_mul.py similarity index 100% rename from tests/test_npu_rotary_mul.py rename to tests/torch/test_npu_rotary_mul.py diff --git a/tests/test_npu_scatter.py b/tests/torch/test_npu_scatter.py similarity index 100% rename from tests/test_npu_scatter.py rename to tests/torch/test_npu_scatter.py diff --git a/tests/test_npu_silu.py b/tests/torch/test_npu_silu.py similarity index 100% rename from tests/test_npu_silu.py rename to tests/torch/test_npu_silu.py diff --git a/tests/test_npu_softmax_cross_entropy_with_logits.py b/tests/torch/test_npu_softmax_cross_entropy_with_logits.py similarity index 100% rename from tests/test_npu_softmax_cross_entropy_with_logits.py rename to tests/torch/test_npu_softmax_cross_entropy_with_logits.py diff --git a/tests/test_npu_stride_add.py b/tests/torch/test_npu_stride_add.py similarity index 100% rename from tests/test_npu_stride_add.py rename to tests/torch/test_npu_stride_add.py diff --git a/tests/test_npu_transpose.py b/tests/torch/test_npu_transpose.py similarity index 100% rename from tests/test_npu_transpose.py rename to tests/torch/test_npu_transpose.py diff --git a/tests/test_points_in_box.py b/tests/torch/test_points_in_box.py similarity index 100% rename from tests/test_points_in_box.py rename to tests/torch/test_points_in_box.py diff --git a/tests/test_rotated_box.py b/tests/torch/test_rotated_box.py similarity index 100% rename from tests/test_rotated_box.py rename to tests/torch/test_rotated_box.py diff --git a/tests/test_rotated_iou.py b/tests/torch/test_rotated_iou.py similarity index 100% rename from tests/test_rotated_iou.py rename to tests/torch/test_rotated_iou.py diff --git a/tests/test_rotated_overlaps.py b/tests/torch/test_rotated_overlaps.py similarity index 100% rename from tests/test_rotated_overlaps.py rename to tests/torch/test_rotated_overlaps.py diff --git a/tests/test_scatter_max.py b/tests/torch/test_scatter_max.py similarity index 100% rename from tests/test_scatter_max.py rename to tests/torch/test_scatter_max.py diff --git a/tests/test_sign_bits_pack.py b/tests/torch/test_sign_bits_pack.py similarity index 100% rename from tests/test_sign_bits_pack.py rename to tests/torch/test_sign_bits_pack.py diff --git a/tests/test_sign_bits_unpack.py b/tests/torch/test_sign_bits_unpack.py similarity index 100% rename from tests/test_sign_bits_unpack.py rename to tests/torch/test_sign_bits_unpack.py diff --git a/tests/test_yolo_boxes_encode.py b/tests/torch/test_yolo_boxes_encode.py similarity index 100% rename from tests/test_yolo_boxes_encode.py rename to tests/torch/test_yolo_boxes_encode.py diff --git a/third_party/README.md b/third_party/README.md deleted file mode 100644 index 72a48a33..00000000 --- a/third_party/README.md +++ /dev/null @@ -1,2 +0,0 @@ -## Description -+ The folder contains some third_party libraries. \ No newline at end of file -- Gitee