diff --git a/.clang-format b/.clang-format deleted file mode 100644 index a4a3ecd2cd9d324367d09f81beaff4947b56c279..0000000000000000000000000000000000000000 --- a/.clang-format +++ /dev/null @@ -1,115 +0,0 @@ ---- -BasedOnStyle: LLVM ---- -Language: Cpp -AccessModifierOffset: -4 -AlignAfterOpenBracket: DontAlign -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false -AlignEscapedNewlines: Left -AlignOperands: false -AlignTrailingComments: true -AllowAllParametersOfDeclarationOnNextLine: false -AllowShortBlocksOnASingleLine: false -AllowShortCaseLabelsOnASingleLine: false -AllowShortEnumsOnASingleLine: false -AllowShortFunctionsOnASingleLine: None -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: true -BinPackArguments: true -BinPackParameters: true -BraceWrapping: - AfterClass: false - AfterControlStatement: false - AfterEnum: false - AfterFunction: true - AfterNamespace: false - AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false - AfterExternBlock: false - BeforeCatch: false - BeforeElse: false - IndentBraces: false - SplitEmptyFunction: true - SplitEmptyRecord: true - SplitEmptyNamespace: true -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Custom -BreakBeforeInheritanceComma: false -BreakBeforeTernaryOperators: false -BreakConstructorInitializers: BeforeColon -BreakAfterJavaFieldAnnotations: false -BreakStringLiterals: true -ColumnLimit: 120 -CommentPragmas: '^ IWYU pragma:' -CompactNamespaces: false -ConstructorInitializerAllOnOneLineOrOnePerLine: false -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: false -DerivePointerAlignment: false -DisableFormat: false -ExperimentalAutoDetectBinPacking: false -FixNamespaceComments: false -ForEachMacros: - - foreach - - Q_FOREACH - - BOOST_FOREACH -IncludeBlocks: Preserve -IncludeCategories: - - Regex: '^' - Priority: 2 - - Regex: '^<.*\.h>' - Priority: 1 - - Regex: '^<.*' - Priority: 2 - - Regex: '.*' - Priority: 3 -IncludeIsMainRegex: '(Test)?$' -IndentCaseLabels: true -IndentPPDirectives: None -IndentWidth: 4 -IndentWrappedFunctionNames: false -JavaScriptQuotes: Leave -JavaScriptWrapImports: true -KeepEmptyLinesAtTheStartOfBlocks: true -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: None -ObjCBlockIndentWidth: 4 -ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: true -PackConstructorInitializers: Never -PenaltyBreakAssignment: 60 -PenaltyBreakBeforeFirstCallParameter: 19 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakOpenParenthesis: 7 -PenaltyBreakString: 1000 -PenaltyExcessCharacter: 1000000 -PenaltyIndentedWhitespace: 1 -PenaltyReturnTypeOnItsOwnLine: 60 -PointerAlignment: Right -ReflowComments: true -SortIncludes: false -SortUsingDeclarations: false -SpaceAfterCStyleCast: false -SpaceAfterTemplateKeyword: true -SpaceBeforeAssignmentOperators: true -SpaceBeforeParens: ControlStatements -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 2 -SpacesInAngles: false -SpacesInContainerLiterals: true -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -Standard: Cpp11 -TabWidth: 4 -UseTab: Never diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index 801d7d0b6699b1ecd4e7160e8a595e9732d0af6b..0000000000000000000000000000000000000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,206 +0,0 @@ -cmake_minimum_required(VERSION 3.16.0) -project(opp) -set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) - -include(cmake/config.cmake) -include(cmake/func.cmake) -include(cmake/intf.cmake) - -set(ADS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/ads) -add_subdirectory(${ADS_DIR}/common) -add_subdirectory(${ADS_DIR}/motion) -add_subdirectory(${ADS_DIR}/perception) - -opbuild(OPS_SRC ${ASCEND_HOST_SRC} OUT_DIR ${ASCEND_AUTOGEN_PATH}) - -add_library(cust_op_proto SHARED ${ASCEND_HOST_SRC} - ${ASCEND_AUTOGEN_PATH}/op_proto.cc) -target_compile_definitions(cust_op_proto PRIVATE OP_PROTO_LIB) -target_compile_options(cust_op_proto PRIVATE -fvisibility=hidden) -target_link_libraries( - cust_op_proto - PRIVATE intf_pub - exe_graph - register - tiling_api - -Wl,--whole-archive - rt2_registry - -Wl,--no-whole-archive) -set_target_properties(cust_op_proto PROPERTIES OUTPUT_NAME cust_opsproto_rt2.0) -install_target( - TRG cust_op_proto DST - packages/vendors/${vendor_name}/op_proto/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) -install_file(TRG cust_op_proto SRC ${ASCEND_AUTOGEN_PATH}/op_proto.h DST - packages/vendors/${vendor_name}/op_proto/inc) - -add_library(cust_optiling SHARED ${ASCEND_HOST_SRC}) -target_compile_definitions(cust_optiling PRIVATE OP_TILING_LIB) -target_compile_options(cust_optiling PRIVATE -fvisibility=hidden) -target_link_libraries( - cust_optiling - PRIVATE intf_pub - exe_graph - register - tiling_api - -Wl,--whole-archive - rt2_registry - -Wl,--no-whole-archive) -set_target_properties(cust_optiling PROPERTIES OUTPUT_NAME cust_opmaster_rt2.0) -install_target( - TRG - cust_optiling - DST - packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/lib/linux/${CMAKE_SYSTEM_PROCESSOR} -) -# create liboptiling.so link -add_custom_command( - TARGET cust_optiling - POST_BUILD - COMMAND - ${CMAKE_COMMAND} -E chdir - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling - ${CMAKE_COMMAND} -E create_symlink - lib/linux/${CMAKE_SYSTEM_PROCESSOR}/$ - liboptiling.so) -install( - FILES - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/liboptiling.so - DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling) - -if(${ENABLE_ONNX}) - protobuf_generate(PROTO_FILE ${ASCEND_CANN_PACKAGE_PATH}/include/proto/ge_onnx.proto - OUT_DIR ${ASCEND_AUTOGEN_PATH}) - add_library(cust_onnx_parsers SHARED ${ASCEND_ONNX_SRC}) - target_compile_options( - cust_onnx_parsers - PRIVATE - -O2 - -Werror - -Wno-deprecated-declarations - -Dgoogle=ascend_private - "-fno-common" - "-fno-strict-aliasing") - target_link_libraries(cust_onnx_parsers PRIVATE intf_pub) - target_include_directories(cust_onnx_parsers - PRIVATE ${PROJECT_SOURCE_DIR}/include ${ASCEND_AUTOGEN_PATH}) - - install_target(TRG cust_onnx_parsers DST - packages/vendors/${vendor_name}/framework/onnx/) -endif() - -# ===================Build ACLNN=================== -file(GLOB ACLNN_SRC_GEN ${ASCEND_AUTOGEN_PATH}/aclnn_*.cpp) -file(GLOB ACLNN_INC_GEN ${ASCEND_AUTOGEN_PATH}/aclnn_*.h) -set(ACLNN_SRC ${ACLNN_SRC_GEN} ${ACLNN_SRC_CUSTOM}) -set(ACLNN_INC ${ACLNN_INC_GEN} ${ACLNN_INC_CUSTOM}) -add_library(cust_opapi SHARED ${ACLNN_SRC}) -target_link_libraries(cust_opapi PRIVATE intf_pub ascendcl nnopbase) -install_target(TRG cust_opapi DST packages/vendors/${vendor_name}/op_api/lib) -install_file(TRG cust_opapi SRC ${ACLNN_INC} DST - packages/vendors/${vendor_name}/op_api/include) - -# ===================Build Kernel=================== -# set custom compile options -if("${CMAKE_BUILD_TYPE}x" STREQUAL "Debugx") - add_ops_compile_options(ALL OPTIONS -g -O0) -endif() - -file(COPY ${ASCEND_KERNEL_SRC} DESTINATION ${ASCEND_KERNEL_PATH}) - -foreach(compute_unit ${ASCEND_COMPUTE_UNIT}) - # generate aic-${compute_unit}-ops-info.json - add_ops_info_target( - TARGET - ops_info_gen_${compute_unit} - OUTPUT - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit}/aic-${compute_unit}-ops-info.json - OPS_INFO - ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini - INSTALL_DIR - packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit}) - - # generate ascendc impl py once - if(NOT TARGET ascendc_impl_gen) - add_ops_impl_target( - TARGET - ascendc_impl_gen - OPS_INFO - ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini - IMPL_DIR - ${ASCEND_KERNEL_PATH} - OUT_DIR - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl - ) - endif() - - # dynamic shape binary compile - if(${ENABLE_BINARY_PACKAGE}) - add_bin_compile_target( - TARGET - ascendc_bin_${compute_unit} - OPS_INFO - ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini - IMPL_DIR - ${ASCEND_KERNEL_PATH} - ADP_DIR - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl/dynamic - OUT_DIR - ${CMAKE_CURRENT_BINARY_DIR}/binary/${compute_unit} - KERNEL_DIR - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel - INSTALL_DIR - packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel - COMPUTE_UNIT - ${compute_unit}) - add_dependencies(ascendc_bin_${compute_unit} ascendc_impl_gen) - endif() -endforeach() - -# generate npu_supported_ops.json -add_npu_support_target( - TARGET - npu_supported_ops - OPS_INFO_DIR - ${ASCEND_AUTOGEN_PATH} - OUT_DIR - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_info_cfg/ai_core - INSTALL_DIR - packages/vendors/${vendor_name}/framework/${ASCEND_FRAMEWORK_TYPE}) - -# ===================Build test=================== -# WARN: WIP -if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) - add_subdirectory(testcases) -endif() - -get_system_info(SYSTEM_INFO) - -# gen version.info -add_custom_target( - gen_version_info ALL - COMMAND - bash ${CMAKE_CURRENT_SOURCE_DIR}/cmake/util/gen_version_info.sh - ${ASCEND_CANN_PACKAGE_PATH} - ${ADS_PATH}/packages/vendors/${vendor_name}) - -install( - FILES ${ADS_PATH}/packages/vendors/${vendor_name}/version.info - DESTINATION packages/vendors/${vendor_name}) - -if(COMPILE_OPP_PACKAGE) - # CPack config - set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME}) - set(CPACK_PACKAGE_VERSION ${CMAKE_PROJECT_VERSION}) - set(CPACK_PACKAGE_DESCRIPTION "CPack opp project") - set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "CPack opp project") - set(CPACK_PACKAGE_DIRECTORY ${CMAKE_INSTALL_PREFIX}) - set(CPACK_PACKAGE_FILE_NAME "custom_opp_${SYSTEM_INFO}.run") - set(CPACK_GENERATOR External) - set(CPACK_CMAKE_GENERATOR "Unix Makefiles") - set(CPACK_EXTERNAL_ENABLE_STAGING TRUE) - set(CPACK_EXTERNAL_PACKAGE_SCRIPT ${CMAKE_SOURCE_DIR}/cmake/makeself.cmake) - set(CPACK_EXTERNAL_BUILT_PACKAGES - ${CPACK_PACKAGE_DIRECTORY}/_CPack_Packages/Linux/External/${CPACK_PACKAGE_FILE_NAME}/${CPACK_PACKAGE_FILE_NAME} - ) - include(CPack) -endif() diff --git a/MANIFEST.in b/MANIFEST.in index cd191c56a22f2a99885ef00a1460668260c282a2..49ea6f5c07fe131ba9a5dfdea6e1eeaca638269a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -recursive-include ads/packages/ * +recursive-include ads/common/ops/kernels/ads_op_kernel/packages/ * \ No newline at end of file diff --git a/ads/common/CMakeLists.txt b/ads/common/CMakeLists.txt deleted file mode 100644 index 3f1ac043697b735171e85c379344504ae0921ac8..0000000000000000000000000000000000000000 --- a/ads/common/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ops/kernels) - add_subdirectory(ops/kernels) -endif() - -if (${ENABLE_ONNX} AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ops/onnx) - add_subdirectory(ops/onnx/plugin) -endif() diff --git a/ads/common/__init__.py b/ads/common/__init__.py index c750aba16ed91e5449806720c0b1abf165a10a86..5885fb07371289624fe0e2883b8f617f31bb2223 100644 --- a/ads/common/__init__.py +++ b/ads/common/__init__.py @@ -27,4 +27,4 @@ from .ops.npu_dynamic_scatter import npu_dynamic_scatter from .ops.ads_add import npu_ads_add from .ops.npu_multi_scale_deformable_attn_function import npu_multi_scale_deformable_attn_function from .ops.dynamic_voxelization import voxelization -from .ops.dynamic_voxelization import Voxelization +from .ops.dynamic_voxelization import Voxelization \ No newline at end of file diff --git a/ads/motion/ops/kernels/framework/CMakeLists.txt b/ads/common/ops/CMakeLists.txt similarity index 100% rename from ads/motion/ops/kernels/framework/CMakeLists.txt rename to ads/common/ops/CMakeLists.txt diff --git a/ads/common/ops/csrc/AbsOpApi.cpp b/ads/common/ops/csrc/AbsOpApi.cpp index 4332c99396f6e6e2551d1f46c79d21791c9e8175..824b60ce4a2454581182effcb6d28c179f72476f 100644 --- a/ads/common/ops/csrc/AbsOpApi.cpp +++ b/ads/common/ops/csrc/AbsOpApi.cpp @@ -1,5 +1,5 @@ #include -#include "csrc/OpApiCommon.h" +#include "OpApiCommon.h" #include "functions.h" at::Tensor npu_abs(const at::Tensor& self) diff --git a/ads/common/ops/csrc/AdsAddKernelNpu.cpp b/ads/common/ops/csrc/AdsAddKernelNpu.cpp index b4471aa8ca168129658fca3d0134b6acc3299ee6..c766e0e27cd90a477213bdd4aecb586153c2b8f3 100644 --- a/ads/common/ops/csrc/AdsAddKernelNpu.cpp +++ b/ads/common/ops/csrc/AdsAddKernelNpu.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include -#include "csrc/OpApiCommon.h" +#include "OpApiCommon.h" #include "functions.h" at::Tensor npu_ads_add(const at::Tensor &tensor1, const at::Tensor &tensor2) diff --git a/ads/common/ops/csrc/AnchorResponseFlagsKernelNpu.cpp b/ads/common/ops/csrc/AnchorResponseFlagsKernelNpu.cpp index f816e8815948644928cc2f8236c74746c868c43e..f414633cc6ba78e6a9fdb8cdb80dceb64f23c8e4 100644 --- a/ads/common/ops/csrc/AnchorResponseFlagsKernelNpu.cpp +++ b/ads/common/ops/csrc/AnchorResponseFlagsKernelNpu.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include "torch_npu/csrc/framework/OpCommand.h" -#include "csrc/common.h" +#include "common.h" namespace { c10::SmallVector infersize_npu_anchor_response_flags( diff --git a/ads/common/ops/csrc/BatchNms.cpp b/ads/common/ops/csrc/BatchNms.cpp index ae90376934d8fdab7965ebf99b9d01246ff12ed3..a7051437f9fedc0279108ff783e9637499ee6e74 100644 --- a/ads/common/ops/csrc/BatchNms.cpp +++ b/ads/common/ops/csrc/BatchNms.cpp @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "torch_npu/csrc/framework/OpCommand.h" -#include "csrc/common.h" +#include "common.h" std::tuple npu_batch_nms( const at::Tensor& self, diff --git a/ads/common/ops/csrc/BoundingBoxDecodeKernelNpu.cpp b/ads/common/ops/csrc/BoundingBoxDecodeKernelNpu.cpp index 2bddf962886f97e33a32891cf267d19d4b51a8cf..85fc07643d9059a23d6a549f08451f5cd5cbdaac 100644 --- a/ads/common/ops/csrc/BoundingBoxDecodeKernelNpu.cpp +++ b/ads/common/ops/csrc/BoundingBoxDecodeKernelNpu.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include "torch_npu/csrc/framework/OpCommand.h" -#include "csrc/common.h" +#include "common.h" at::Tensor npu_bounding_box_decode( const at::Tensor& rois, diff --git a/ads/common/ops/csrc/BoundingBoxEncodeKernelNpu.cpp b/ads/common/ops/csrc/BoundingBoxEncodeKernelNpu.cpp index e3b12bb1b533125cc7d850fb79d9f03fb2304e7c..aa5bad77d2a6b5f8cc3ffc63b917a952b1f97eec 100644 --- a/ads/common/ops/csrc/BoundingBoxEncodeKernelNpu.cpp +++ b/ads/common/ops/csrc/BoundingBoxEncodeKernelNpu.cpp @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "torch_npu/csrc/framework/OpCommand.h" -#include "csrc/common.h" +#include "common.h" at::Tensor npu_bounding_box_encode( const at::Tensor& anchor_box, diff --git a/ads/common/ops/csrc/ConfusionTransposeKernelNpu.cpp b/ads/common/ops/csrc/ConfusionTransposeKernelNpu.cpp index 6c0453778e4719d82a7953910d2a244413924a70..a12d1d7c87d2be696e4e89adbea4dca5362836d9 100644 --- a/ads/common/ops/csrc/ConfusionTransposeKernelNpu.cpp +++ b/ads/common/ops/csrc/ConfusionTransposeKernelNpu.cpp @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "torch_npu/csrc/framework/OpCommand.h" -#include "csrc/common.h" +#include "common.h" at::Tensor npu_confusion_transpose( const at::Tensor& self, diff --git a/ads/common/ops/csrc/DynamicScatterKernelNpuOpApi.cpp b/ads/common/ops/csrc/DynamicScatterKernelNpuOpApi.cpp index 64fc359bff10a2c7944ae3666918943be33dfe23..c2ccb7a2c32693acc1faff686d254ec2141b136d 100644 --- a/ads/common/ops/csrc/DynamicScatterKernelNpuOpApi.cpp +++ b/ads/common/ops/csrc/DynamicScatterKernelNpuOpApi.cpp @@ -6,8 +6,8 @@ #include "torch_npu/csrc/aten/NPUNativeFunctions.h" #include "torch_npu/csrc/aten/CustomFunctions.h" #include "functions.h" -#include "csrc/common.h" -#include "csrc/OpApiCommon.h" +#include "common.h" +#include "OpApiCommon.h" using npu_preparation = at_npu::native::OpPreparation; using torch::autograd::Function; diff --git a/ads/common/ops/csrc/DynamicVoxelizationKernelNpu.cpp b/ads/common/ops/csrc/DynamicVoxelizationKernelNpu.cpp index ab0f52ebc4fcc7856a6764170042c545a1350e4d..3c89c5ab29517b74e71943ca0b831ae27c7354af 100644 --- a/ads/common/ops/csrc/DynamicVoxelizationKernelNpu.cpp +++ b/ads/common/ops/csrc/DynamicVoxelizationKernelNpu.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include -#include "csrc/OpApiCommon.h" +#include "OpApiCommon.h" #include "functions.h" at::Tensor DynamicVoxelization( diff --git a/ads/common/ops/csrc/FurthestPointSamplingWithDistKernelNpu.cpp b/ads/common/ops/csrc/FurthestPointSamplingWithDistKernelNpu.cpp index a187378135e65abd651262b1e11bfedda7152739..fea2f9486a311d5cb4742123f1a1d11db6b33b74 100644 --- a/ads/common/ops/csrc/FurthestPointSamplingWithDistKernelNpu.cpp +++ b/ads/common/ops/csrc/FurthestPointSamplingWithDistKernelNpu.cpp @@ -15,7 +15,7 @@ // limitations under the License. #include -#include "csrc/OpApiCommon.h" +#include "OpApiCommon.h" #include "functions.h" at::Tensor furthest_point_sampling_with_dist(const at::Tensor &points_dist, const at::Tensor &nearest_temp, const int32_t num_points) diff --git a/ads/common/ops/csrc/MoeTutelOpApi.cpp b/ads/common/ops/csrc/MoeTutelOpApi.cpp index b0e5017bb10172cebf1f86a6605b006974df2567..6ec9fa354b375b96b8cc2f61bba993cbed69a3f0 100644 --- a/ads/common/ops/csrc/MoeTutelOpApi.cpp +++ b/ads/common/ops/csrc/MoeTutelOpApi.cpp @@ -22,8 +22,8 @@ #include "torch_npu/csrc/aten/NPUNativeFunctions.h" #include "torch_npu/csrc/aten/CustomFunctions.h" #include "functions.h" -#include "csrc/common.h" -#include "csrc/OpApiCommon.h" +#include "common.h" +#include "OpApiCommon.h" using npu_preparation = at_npu::native::OpPreparation; using torch::autograd::Function; diff --git a/ads/common/ops/csrc/MultiScaleDeformableAttnFunctionKernelNpu.cpp b/ads/common/ops/csrc/MultiScaleDeformableAttnFunctionKernelNpu.cpp index e1e74560e7c0d1795b84e9948b93e3198d06a4b0..dfa52b6b2fd3625cbf78a7ba8acd3a61e0835934 100644 --- a/ads/common/ops/csrc/MultiScaleDeformableAttnFunctionKernelNpu.cpp +++ b/ads/common/ops/csrc/MultiScaleDeformableAttnFunctionKernelNpu.cpp @@ -1,5 +1,5 @@ #include -#include "csrc/OpApiCommon.h" +#include "OpApiCommon.h" #include "functions.h" at::Tensor npu_multi_scale_deformable_attn_function(const at::Tensor& value, diff --git a/ads/common/ops/csrc/NpuSilu.cpp b/ads/common/ops/csrc/NpuSilu.cpp index 3fa747bc546423eff3e2b6773be24ede777366bf..4f62cad5862989d2a3259b2a6852b749767383a8 100644 --- a/ads/common/ops/csrc/NpuSilu.cpp +++ b/ads/common/ops/csrc/NpuSilu.cpp @@ -1,6 +1,6 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "csrc/common.h" +#include "common.h" at::Tensor &silu_out_npu_nocheck(at::Tensor &result, const at::Tensor &self) { diff --git a/include/csrc/OpApiCommon.h b/ads/common/ops/csrc/OpApiCommon.h similarity index 74% rename from include/csrc/OpApiCommon.h rename to ads/common/ops/csrc/OpApiCommon.h index 70083ab95c48c67beae5cb600450108e0232e008..92332df165283aca764da86d2ae11acd41af0eba 100644 --- a/include/csrc/OpApiCommon.h +++ b/ads/common/ops/csrc/OpApiCommon.h @@ -13,27 +13,25 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef CSRC_OP_API_COMMON_H_ -#define CSRC_OP_API_COMMON_H_ #include +#include +#include #include #include +#include +#include + #include #include #include -#include "third_party/acl/inc/acl/acl_base.h" -#include "third_party/acl/inc/acl/acl_rt.h" #include "torch_npu/csrc/aten/NPUNativeFunctions.h" #include "torch_npu/csrc/core/npu/NPUStream.h" #include "torch_npu/csrc/framework/OpCommand.h" #include "torch_npu/csrc/framework/interface/EnvVariables.h" #include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" -#include "torch_npu/csrc/framework/utils/OpAdapter.h" #include "torch_npu/csrc/framework/utils/OpPreparation.h" -#include "common.h" - #define NPU_NAME_SPACE at_npu::native #ifdef COMPILE_WITH_XLA @@ -71,6 +69,34 @@ constexpr int kHashBufMaxSize = kHashBufSize + 1024; extern thread_local char g_hashBuf[kHashBufSize]; extern thread_local int g_hashOffset; +#define AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(_) \ + _(at::ScalarType::Byte, ACL_UINT8) \ + _(at::ScalarType::Char, ACL_INT8) \ + _(at::ScalarType::Short, ACL_INT16) \ + _(at::ScalarType::Int, ACL_INT32) \ + _(at::ScalarType::Long, ACL_INT64) \ + _(at::ScalarType::Half, ACL_FLOAT16) \ + _(at::ScalarType::Float, ACL_FLOAT) \ + _(at::ScalarType::Double, ACL_DOUBLE) \ + _(at::ScalarType::ComplexHalf, ACL_DT_UNDEFINED) \ + _(at::ScalarType::ComplexFloat, ACL_COMPLEX64) \ + _(at::ScalarType::ComplexDouble, ACL_COMPLEX128) \ + _(at::ScalarType::Bool, ACL_BOOL) \ + _(at::ScalarType::QInt8, ACL_DT_UNDEFINED) \ + _(at::ScalarType::QUInt8, ACL_DT_UNDEFINED) \ + _(at::ScalarType::QInt32, ACL_DT_UNDEFINED) \ + _(at::ScalarType::BFloat16, ACL_BF16) \ + _(at::ScalarType::QUInt4x2, ACL_DT_UNDEFINED) \ + _(at::ScalarType::QUInt2x4, ACL_DT_UNDEFINED) \ + _(at::ScalarType::Undefined, ACL_DT_UNDEFINED) \ + _(at::ScalarType::NumOptions, ACL_DT_UNDEFINED) + +constexpr aclDataType kATenScalarTypeToAclDataTypeTable[static_cast(at::ScalarType::NumOptions) + 1] = { +#define DEFINE_ENUM(_1, n) n, + AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(DEFINE_ENUM) +#undef DEFINE_ENUM +}; + #define GET_OP_API_FUNC(apiName) reinterpret_cast<_##apiName>(GetOpApiFuncAddr(#apiName)) #define MEMCPY_TO_BUF(data_expression, size_expression) \ @@ -174,7 +200,8 @@ inline at::Tensor CopyTensorHostToDevice(const at::Tensor &cpu_tensor) { at::Tensor cpuPinMemTensor = cpu_tensor.pin_memory(); int deviceIndex = 0; - return cpuPinMemTensor.to(c10::Device(DEVICE_TYPE, deviceIndex), cpuPinMemTensor.scalar_type(), true, true); + return cpuPinMemTensor.to( + c10::Device(DEVICE_TYPE, deviceIndex), cpuPinMemTensor.scalar_type(), true, true); } inline at::Tensor CopyScalarToDevice(const c10::Scalar &cpu_scalar, at::ScalarType scalar_data_type) @@ -194,8 +221,8 @@ inline aclTensor *ConvertType(const at::Tensor &at_tensor) } at::ScalarType scalar_data_type = at_tensor.scalar_type(); aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; - TORCH_CHECK(acl_data_type != ACL_DT_UNDEFINED, - std::string(c10::toString(scalar_data_type)) + " has not been supported") + TORCH_CHECK( + acl_data_type != ACL_DT_UNDEFINED, std::string(c10::toString(scalar_data_type)) + " has not been supported") c10::SmallVector storageDims; // if acl_data_type is ACL_STRING, storageDims is empty. auto itemsize = at_tensor.itemsize(); @@ -226,13 +253,25 @@ inline aclTensor *ConvertType(const at::Tensor &at_tensor) if (at_tensor.unsafeGetTensorImpl()->is_wrapped_number()) { c10::Scalar expScalar = ConvertTensorToScalar(at_tensor); at::Tensor aclInput = CopyScalarToDevice(expScalar, scalar_data_type); - return aclCreateTensor(aclInput.sizes().data(), aclInput.sizes().size(), acl_data_type, - aclInput.strides().data(), aclInput.storage_offset(), format, storageDims.data(), storageDims.size(), + return aclCreateTensor(aclInput.sizes().data(), + aclInput.sizes().size(), + acl_data_type, + aclInput.strides().data(), + aclInput.storage_offset(), + format, + storageDims.data(), + storageDims.size(), const_cast(aclInput.storage().data())); } - auto acl_tensor = aclCreateTensor(at_tensor.sizes().data(), at_tensor.sizes().size(), acl_data_type, - at_tensor.strides().data(), at_tensor.storage_offset(), format, storageDims.data(), storageDims.size(), + auto acl_tensor = aclCreateTensor(at_tensor.sizes().data(), + at_tensor.sizes().size(), + acl_data_type, + at_tensor.strides().data(), + at_tensor.storage_offset(), + format, + storageDims.data(), + storageDims.size(), const_cast(at_tensor.storage().data())); return acl_tensor; } @@ -246,8 +285,8 @@ inline aclScalar *ConvertType(const at::Scalar &at_scalar) at::ScalarType scalar_data_type = at_scalar.type(); aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; - TORCH_CHECK(acl_data_type != ACL_DT_UNDEFINED, - std::string(c10::toString(scalar_data_type)) + " has not been supported") + TORCH_CHECK( + acl_data_type != ACL_DT_UNDEFINED, std::string(c10::toString(scalar_data_type)) + " has not been supported") aclScalar *acl_scalar = nullptr; switch (scalar_data_type) { case at::ScalarType::Double: { @@ -432,7 +471,7 @@ void Release(T value) template void CallRelease(Tuple t, std::index_sequence) { - (void)std::initializer_list{ (Release(std::get(t)), 0)... }; + (void)std::initializer_list{(Release(std::get(t)), 0)...}; } template @@ -497,64 +536,74 @@ typedef int (*InitHugeMemThreadLocal)(void *, bool); typedef void (*UnInitHugeMemThreadLocal)(void *, bool); typedef void (*ReleaseHugeMem)(void *, bool); -#define DO_COMPATIBILITY(aclnn_api, originCallExpression) \ - do { \ - static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ - static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ - if (getWorkspaceSizeFuncAddr == nullptr || opApiFuncAddr == nullptr) { \ - ASCEND_LOGW("%s or %sGetWorkspaceSize not in %s, or %s not found. Will call %s", #aclnn_api, #aclnn_api, \ - GetOpApiLibName(), GetOpApiLibName(), #originCallExpression); \ - return originCallExpression; \ - } \ +#define DO_COMPATIBILITY(aclnn_api, originCallExpression) \ + do { \ + static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ + static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ + if (getWorkspaceSizeFuncAddr == nullptr || opApiFuncAddr == nullptr) { \ + ASCEND_LOGW("%s or %sGetWorkspaceSize not in %s, or %s not found. Will call %s", \ + #aclnn_api, \ + #aclnn_api, \ + GetOpApiLibName(), \ + GetOpApiLibName(), \ + #originCallExpression); \ + return originCallExpression; \ + } \ } while (0) -#define EXEC_NPU_CMD(aclnn_api, ...) \ - do { \ - static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ - static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ - static const auto initMemAddr = GetOpApiFuncAddr("InitHugeMemThreadLocal"); \ - static const auto unInitMemAddr = GetOpApiFuncAddr("UnInitHugeMemThreadLocal"); \ - static const auto releaseMemAddr = GetOpApiFuncAddr("ReleaseHugeMem"); \ - TORCH_CHECK(getWorkspaceSizeFuncAddr != nullptr && opApiFuncAddr != nullptr, #aclnn_api, " or ", \ - #aclnn_api "GetWorkspaceSize", " not in ", GetOpApiLibName(), ", or ", GetOpApiLibName(), "not found."); \ - auto acl_stream = c10_npu::getCurrentNPUStream().stream(false); \ - uint64_t workspace_size = 0; \ - uint64_t *workspace_size_addr = &workspace_size; \ - aclOpExecutor *executor = nullptr; \ - aclOpExecutor **executor_addr = &executor; \ - InitHugeMemThreadLocal initMemFunc = reinterpret_cast(initMemAddr); \ - UnInitHugeMemThreadLocal unInitMemFunc = reinterpret_cast(unInitMemAddr); \ - if (initMemFunc) { \ - initMemFunc(nullptr, false); \ - } \ - auto converted_params = ConvertTypes(__VA_ARGS__, workspace_size_addr, executor_addr); \ - static auto getWorkspaceSizeFunc = ConvertToOpApiFunc(converted_params, getWorkspaceSizeFuncAddr); \ - auto workspace_status = call(getWorkspaceSizeFunc, converted_params); \ - TORCH_CHECK(workspace_status == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ - void *workspace_addr = nullptr; \ - if (workspace_size != 0) { \ - at::TensorOptions options = at::TensorOptions(torch_npu::utils::get_npu_device_type()); \ - auto workspace_tensor = at::empty({ workspace_size }, options.dtype(at::kByte)); \ - workspace_addr = const_cast(workspace_tensor.storage().data()); \ - } \ - auto acl_call = [converted_params, workspace_addr, workspace_size, acl_stream, executor]() -> int { \ - typedef int (*OpApiFunc)(void *, uint64_t, aclOpExecutor *, const aclrtStream); \ - OpApiFunc opApiFunc = reinterpret_cast(opApiFuncAddr); \ - auto api_ret = opApiFunc(workspace_addr, workspace_size, executor, acl_stream); \ - TORCH_CHECK(api_ret == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ - ReleaseConvertTypes(converted_params); \ - ReleaseHugeMem releaseMemFunc = reinterpret_cast(releaseMemAddr); \ - if (releaseMemFunc) { \ - releaseMemFunc(nullptr, false); \ - } \ - return api_ret; \ - }; \ - at_npu::native::OpCommand cmd; \ - cmd.Name(#aclnn_api); \ - cmd.SetCustomHandler(acl_call); \ - cmd.Run(); \ - if (unInitMemFunc) { \ - unInitMemFunc(nullptr, false); \ - } \ +#define EXEC_NPU_CMD(aclnn_api, ...) \ + do { \ + static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ + static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ + static const auto initMemAddr = GetOpApiFuncAddr("InitHugeMemThreadLocal"); \ + static const auto unInitMemAddr = GetOpApiFuncAddr("UnInitHugeMemThreadLocal"); \ + static const auto releaseMemAddr = GetOpApiFuncAddr("ReleaseHugeMem"); \ + TORCH_CHECK(getWorkspaceSizeFuncAddr != nullptr && opApiFuncAddr != nullptr, \ + #aclnn_api, \ + " or ", \ + #aclnn_api "GetWorkspaceSize", \ + " not in ", \ + GetOpApiLibName(), \ + ", or ", \ + GetOpApiLibName(), \ + "not found."); \ + auto acl_stream = c10_npu::getCurrentNPUStream().stream(false); \ + uint64_t workspace_size = 0; \ + uint64_t *workspace_size_addr = &workspace_size; \ + aclOpExecutor *executor = nullptr; \ + aclOpExecutor **executor_addr = &executor; \ + InitHugeMemThreadLocal initMemFunc = reinterpret_cast(initMemAddr); \ + UnInitHugeMemThreadLocal unInitMemFunc = reinterpret_cast(unInitMemAddr); \ + if (initMemFunc) { \ + initMemFunc(nullptr, false); \ + } \ + auto converted_params = ConvertTypes(__VA_ARGS__, workspace_size_addr, executor_addr); \ + static auto getWorkspaceSizeFunc = ConvertToOpApiFunc(converted_params, getWorkspaceSizeFuncAddr); \ + auto workspace_status = call(getWorkspaceSizeFunc, converted_params); \ + TORCH_CHECK(workspace_status == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ + void *workspace_addr = nullptr; \ + if (workspace_size != 0) { \ + at::TensorOptions options = at::TensorOptions(torch_npu::utils::get_npu_device_type()); \ + auto workspace_tensor = at::empty({workspace_size}, options.dtype(at::kByte)); \ + workspace_addr = const_cast(workspace_tensor.storage().data()); \ + } \ + auto acl_call = [converted_params, workspace_addr, workspace_size, acl_stream, executor]()->int { \ + typedef int (*OpApiFunc)(void *, uint64_t, aclOpExecutor *, const aclrtStream); \ + OpApiFunc opApiFunc = reinterpret_cast(opApiFuncAddr); \ + auto api_ret = opApiFunc(workspace_addr, workspace_size, executor, acl_stream); \ + TORCH_CHECK(api_ret == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ + ReleaseConvertTypes(converted_params); \ + ReleaseHugeMem releaseMemFunc = reinterpret_cast(releaseMemAddr); \ + if (releaseMemFunc) { \ + releaseMemFunc(nullptr, false); \ + } \ + return api_ret; \ + }; \ + at_npu::native::OpCommand cmd; \ + cmd.Name(#aclnn_api); \ + cmd.SetCustomHandler(acl_call); \ + cmd.Run(); \ + if (unInitMemFunc) { \ + unInitMemFunc(nullptr, false); \ + } \ } while (false) -#endif // CSRC_OP_API_COMMON_H_ diff --git a/ads/common/ops/csrc/RotaryMulKernelNpu.cpp b/ads/common/ops/csrc/RotaryMulKernelNpu.cpp index 7a676699a7c8af9d66b7692ce89a44ed87e4f575..055693091b9e88d8c1b66cfe078cee143b8e43fc 100644 --- a/ads/common/ops/csrc/RotaryMulKernelNpu.cpp +++ b/ads/common/ops/csrc/RotaryMulKernelNpu.cpp @@ -16,7 +16,7 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "csrc/common.h" +#include "common.h" using tensor_tuple = std::tuple; diff --git a/ads/common/ops/csrc/ScatterMaxKernelNpu.cpp b/ads/common/ops/csrc/ScatterMaxKernelNpu.cpp index b71c8a0668ba96d5bdfa67dcb7503d510a1f8cca..f3b11664cc4fb99fd03dbaa7b7dd4f3b8be6e8c7 100644 --- a/ads/common/ops/csrc/ScatterMaxKernelNpu.cpp +++ b/ads/common/ops/csrc/ScatterMaxKernelNpu.cpp @@ -1,12 +1,14 @@ #include "torch_npu/csrc/framework/OpCommand.h" -#include "csrc/common.h" +#include "common.h" using namespace std; -std::tuple npu_scatter_max(const at::Tensor& updates, const at::Tensor& indices, - c10::optional out) +std::tuple npu_scatter_max( + const at::Tensor& updates, + const at::Tensor& indices, + c10::optional out) { - auto sizes = updates.sizes().vec(); + auto sizes = updates.sizes().vec(); sizes[0] = indices.max().item().toLong() + 1; @@ -14,13 +16,18 @@ std::tuple npu_scatter_max(const at::Tensor& updates, co at::Tensor argmax = at::empty(result.sizes(), result.options().dtype(at::kInt)); at_npu::native::OpCommand cmd; - cmd.Name("ScatterMaxWithArgmax").Input(result).Input(indices).Input(updates).Output(result).Output(argmax).Run(); + cmd.Name("ScatterMaxWithArgmax") + .Input(result) + .Input(indices) + .Input(updates) + .Output(result) + .Output(argmax) + .Run(); return std::tie(result, argmax); } -at::Tensor npu_scatter_max_backward(const at::Tensor& x, const at::Tensor& segment_ids, - const at::Tensor& num_segments) +at::Tensor npu_scatter_max_backward(const at::Tensor& x, const at::Tensor& segment_ids, const at::Tensor& num_segments) { c10::SmallVector output_size; @@ -34,6 +41,11 @@ at::Tensor npu_scatter_max_backward(const at::Tensor& x, const at::Tensor& segme at::Tensor out = at::empty(output_size, x.options()); at_npu::native::OpCommand cmd; - cmd.Name("UnsortedSegmentSum").Input(x).Input(segment_ids).Input(num_segments).Output(out).Run(); + cmd.Name("UnsortedSegmentSum") + .Input(x) + .Input(segment_ids) + .Input(num_segments) + .Output(out) + .Run(); return out; } diff --git a/ads/common/ops/csrc/SignBitsUnpackKernelNpu.cpp b/ads/common/ops/csrc/SignBitsUnpackKernelNpu.cpp index a0b6d10ccee8c72ada25d90bb8977aab87631c3e..27ae440bed26bf974991218bc8294ac126d2fea1 100644 --- a/ads/common/ops/csrc/SignBitsUnpackKernelNpu.cpp +++ b/ads/common/ops/csrc/SignBitsUnpackKernelNpu.cpp @@ -16,7 +16,7 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "csrc/common.h" +#include "common.h" at::Tensor npu_sign_bits_unpack_compute( diff --git a/ads/common/ops/csrc/SoftmaxCrossEntropyWithLogitsKernelNpu.cpp b/ads/common/ops/csrc/SoftmaxCrossEntropyWithLogitsKernelNpu.cpp index f1364956c3a2bca6e7a626fb658ff18a02446604..cc8f95dff0b0072324b875630059a7fbfa6fbf5c 100644 --- a/ads/common/ops/csrc/SoftmaxCrossEntropyWithLogitsKernelNpu.cpp +++ b/ads/common/ops/csrc/SoftmaxCrossEntropyWithLogitsKernelNpu.cpp @@ -16,7 +16,7 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "csrc/common.h" +#include "common.h" namespace { std::tuple softmax_cross_entropy_with_logits_out_nocheck( diff --git a/ads/common/ops/csrc/StrideAddKernelNpu.cpp b/ads/common/ops/csrc/StrideAddKernelNpu.cpp index 57f65c487a5ff64567ac3bcb61903f08f176108e..47922f62715d095724e7721528a7f085dcd498fd 100644 --- a/ads/common/ops/csrc/StrideAddKernelNpu.cpp +++ b/ads/common/ops/csrc/StrideAddKernelNpu.cpp @@ -16,11 +16,17 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "csrc/common.h" +#include "common.h" + namespace { -at::Tensor& stride_add_out_npu_nocheck(at::Tensor& result, const at::Tensor& self, const at::Tensor& other, - c10::Scalar offset1, c10::Scalar offset2, c10::Scalar c1_len) +at::Tensor &stride_add_out_npu_nocheck( + at::Tensor &result, + const at::Tensor &self, + const at::Tensor &other, + c10::Scalar offset1, + c10::Scalar offset2, + c10::Scalar c1_len) { at_npu::native::OpCommand cmd; cmd.Name("StrideAdd") @@ -33,10 +39,14 @@ at::Tensor& stride_add_out_npu_nocheck(at::Tensor& result, const at::Tensor& sel .Run(); return result; } -} // namespace +} // namespace -at::Tensor npu_stride_add_compute(const at::Tensor& self, const at::Tensor& other, const c10::Scalar& offset1, - const c10::Scalar& offset2, const c10::Scalar& c1_len) +at::Tensor npu_stride_add_compute( + const at::Tensor &self, + const at::Tensor &other, + const c10::Scalar &offset1, + const c10::Scalar &offset2, + const c10::Scalar &c1_len) { auto output_size = infersize_stride_add(self.sizes(), other.sizes()); output_size[1] = c1_len.toInt() * 16; diff --git a/ads/common/ops/csrc/TransposeKernelNpu.cpp b/ads/common/ops/csrc/TransposeKernelNpu.cpp index 6c3917476fcd59b72a2faf06de493266aafe45ec..2e8705c2a79c10e15c1cfed71cfdaf2c1ce4d754 100644 --- a/ads/common/ops/csrc/TransposeKernelNpu.cpp +++ b/ads/common/ops/csrc/TransposeKernelNpu.cpp @@ -16,7 +16,7 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "csrc/common.h" +#include "common.h" namespace { at::Tensor &npu_transpose_out_nocheck( diff --git a/ads/common/ops/csrc/YoloBoxesEncodeKernelNpu.cpp b/ads/common/ops/csrc/YoloBoxesEncodeKernelNpu.cpp index 310d2857cfb324909cc96f02547d4571021a2ffa..df02a325f4cb3fda9ab8bb2b8ad807629be6d7f5 100644 --- a/ads/common/ops/csrc/YoloBoxesEncodeKernelNpu.cpp +++ b/ads/common/ops/csrc/YoloBoxesEncodeKernelNpu.cpp @@ -16,7 +16,7 @@ #include "torch_npu/csrc/framework/OpCommand.h" #include "functions.h" -#include "csrc/common.h" +#include "common.h" namespace { diff --git a/include/csrc/common.h b/ads/common/ops/csrc/common.cpp similarity index 58% rename from include/csrc/common.h rename to ads/common/ops/csrc/common.cpp index 1370c1990889be15fab9384c0c28ac0786ca0a92..f6f9cc495143311ed326cf79f3391bd827f5c1fa 100644 --- a/include/csrc/common.h +++ b/ads/common/ops/csrc/common.cpp @@ -1,21 +1,9 @@ -#ifndef CSRC_COMMON_H_ -#define CSRC_COMMON_H_ #include -#include -#include -#include -#include -#include -#include "third_party/acl/inc/acl/acl_base.h" -#include "torch_npu/csrc/core/npu/NPUMacros.h" -#include "torch_npu/csrc/framework/utils/NPUDefinition.h" #include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" #include "torch_npu/csrc/aten/mirror/NPUMemoryOverlap.h" +#include "third_party/acl/inc/acl/acl_base.h" +#include "common.h" -const int N = 32; -const int SIZE = 8; - -using tuple_vector = std::tuple, c10::SmallVector>; using CalcuOpUtil = at_npu::native::CalcuOpUtil; #define AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(_) \ @@ -40,16 +28,10 @@ using CalcuOpUtil = at_npu::native::CalcuOpUtil; _(at::ScalarType::Undefined, ACL_DT_UNDEFINED) \ _(at::ScalarType::NumOptions, ACL_DT_UNDEFINED) -static std::unordered_map dTypeTransMap{ { "torch.float16", at::ScalarType::Half }, - { "torch.half", at::ScalarType::Half }, { "torch.float32", at::ScalarType::Float }, - { "torch.float", at::ScalarType::Float }, { "torch.float64", at::ScalarType::Double }, - { "torch.float", at::ScalarType::Double }, { "torch.int8", at::ScalarType::Char }, - { "torch.char", at::ScalarType::Char }, { "torch.int16", at::ScalarType::Short }, - { "torch.short", at::ScalarType::Short }, { "torch.int32", at::ScalarType::Int }, - { "torch.int32", at::ScalarType::Int }, { "torch.int64", at::ScalarType::Long }, - { "torch.long", at::ScalarType::Long } }; +static std::unordered_map dTypeTransMap{ + {"torch.float16", at::ScalarType::Half}, {"torch.half", at::ScalarType::Half}, {"torch.float32", at::ScalarType::Float}, {"torch.float", at::ScalarType::Float}, {"torch.float64", at::ScalarType::Double}, {"torch.float", at::ScalarType::Double}, {"torch.int8", at::ScalarType::Char}, {"torch.char", at::ScalarType::Char}, {"torch.int16", at::ScalarType::Short}, {"torch.short", at::ScalarType::Short}, {"torch.int32", at::ScalarType::Int}, {"torch.int32", at::ScalarType::Int}, {"torch.int64", at::ScalarType::Long}, {"torch.long", at::ScalarType::Long}}; -inline static bool check_inplace_tensor(const std::initializer_list &src_list, const at::Tensor &dst) +static bool check_inplace_tensor(const std::initializer_list &src_list, const at::Tensor &dst) { bool is_inplace_tensor = false; // check whether dst is contained in src_list @@ -62,32 +44,36 @@ inline static bool check_inplace_tensor(const std::initializer_list return is_inplace_tensor; } -inline static void check_tensor_size(const std::initializer_list &src_list, at::Tensor &dst, - c10::IntArrayRef expect_size) +static void check_tensor_size(const std::initializer_list &src_list, at::Tensor &dst, + c10::IntArrayRef expect_size) { bool is_inplace = check_inplace_tensor(src_list, dst); // Preserve legacy resizing behavior of out=... arguments if (!dst.sizes().equals(expect_size)) { - TORCH_CHECK(!is_inplace, "output with shape ", dst.sizes(), " doesn't match the broadcast shape ", expect_size); + TORCH_CHECK(!is_inplace, "output with shape ", dst.sizes(), " doesn't match the broadcast shape ", + expect_size); dst.resize_(expect_size); } return; } -constexpr aclDataType kATenScalarTypeToAclDataTypeTable[static_cast(at::ScalarType::NumOptions) + 1] = { +constexpr aclDataType kATenScalarTypeToAclDataTypeTable + [static_cast(at::ScalarType::NumOptions) + 1] = { #define DEFINE_ENUM(_1, n) n, - AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(DEFINE_ENUM) + AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(DEFINE_ENUM) #undef DEFINE_ENUM }; -inline aclDataType ConvertToAclDataType(const at::ScalarType &data_type) +aclDataType ConvertToAclDataType(const at::ScalarType &data_type) { - auto acl_dtype = kATenScalarTypeToAclDataTypeTable[static_cast(data_type)]; - TORCH_CHECK(acl_dtype != ACL_DT_UNDEFINED, std::string(c10::toString(data_type)) + " has not been supported") + auto acl_dtype = + kATenScalarTypeToAclDataTypeTable[static_cast(data_type)]; + TORCH_CHECK(acl_dtype != ACL_DT_UNDEFINED, + std::string(c10::toString(data_type)) + " has not been supported") return acl_dtype; } -inline c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) +c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) { c10::SmallVector shape_small_vec; for (uint64_t i = 0; i < shape.size(); i++) { @@ -96,9 +82,11 @@ inline c10::SmallVector array_to_small_vector(c10::IntArrayRef sh return shape_small_vec; } -inline c10::SmallVector conv_transpose2d_npu_output_size(const at::Tensor &input, - const at::Tensor &weight, const at::Tensor &bias, c10::IntArrayRef padding, c10::IntArrayRef output_padding, - c10::IntArrayRef stride, c10::IntArrayRef dilation, int64_t groups) +c10::SmallVector conv_transpose2d_npu_output_size(const at::Tensor &input, const at::Tensor &weight, + const at::Tensor &bias, c10::IntArrayRef padding, + c10::IntArrayRef output_padding, + c10::IntArrayRef stride, c10::IntArrayRef dilation, + int64_t groups) { int64_t N = input.size(0); int64_t H = input.size(2); @@ -109,20 +97,22 @@ inline c10::SmallVector conv_transpose2d_npu_output_size(const at int64_t Ho = (H - 1) * stride[0] - 2 * padding[0] + dilation[0] * (kernel_size[0] - 1) + output_padding[0] + 1; int64_t Wo = (W - 1) * stride[1] - 2 * padding[1] + dilation[1] * (kernel_size[1] - 1) + output_padding[1] + 1; - c10::SmallVector outputSize = { N, Co, Ho, Wo }; + c10::SmallVector outputSize = {N, Co, Ho, Wo}; return outputSize; } -inline std::pair trans_torch_type_to_scalar(const std::string &type) +// tyf + +std::pair trans_torch_type_to_scalar(const std::string &type) { if (dTypeTransMap.find(type) != dTypeTransMap.end()) { - return { true, dTypeTransMap[type] }; + return {true, dTypeTransMap[type]}; } - return { false, at::ScalarType::Byte }; + return {false, at::ScalarType::Byte}; } -inline tuple_vector softmax_cross_entropy_with_logits_impl_npu_output_size(const at::Tensor &self) +tuple_vector softmax_cross_entropy_with_logits_impl_npu_output_size(const at::Tensor &self) { c10::SmallVector resultSize = array_to_small_vector(self.size(0)); c10::SmallVector backpropSize = array_to_small_vector(self.sizes()); @@ -130,7 +120,7 @@ inline tuple_vector softmax_cross_entropy_with_logits_impl_npu_output_size(const return std::tuple, c10::SmallVector>(resultSize, backpropSize); } -inline c10::SmallVector convert_array_to_vector(c10::IntArrayRef intArray) +c10::SmallVector convert_array_to_vector(c10::IntArrayRef intArray) { c10::SmallVector intVec; for (uint64_t i = 0; i < intArray.size(); i++) { @@ -139,10 +129,10 @@ inline c10::SmallVector convert_array_to_vector(c10::IntArrayRef int return intVec; } -inline int64_t make_warp_dim(int64_t dim, int64_t dim_post_expr) +int64_t make_warp_dim(int64_t dim, int64_t dim_post_expr) { if (dim_post_expr <= 0) { - dim_post_expr = 1; // this will make range [-1, 0] + dim_post_expr = 1; // this will make range [-1, 0] } if (dim < 0) { dim += dim_post_expr; @@ -151,7 +141,7 @@ inline int64_t make_warp_dim(int64_t dim, int64_t dim_post_expr) } // This logic is specially made for stride_add, and will be removed in future version. -inline c10::SmallVector infersize_stride_add(c10::IntArrayRef shape1_, c10::IntArrayRef shape2_) +c10::SmallVector infersize_stride_add(c10::IntArrayRef shape1_, c10::IntArrayRef shape2_) { auto shape1 = array_to_small_vector(shape1_); auto shape2 = array_to_small_vector(shape2_); @@ -179,7 +169,7 @@ inline c10::SmallVector infersize_stride_add(c10::IntArrayRef sha return output_shape; } -inline c10::SmallVector transpose_npu_output_size(const at::Tensor &self, c10::IntArrayRef perm) +c10::SmallVector transpose_npu_output_size(const at::Tensor &self, c10::IntArrayRef perm) { auto sizes = self.sizes(); c10::SmallVector shape; @@ -190,15 +180,13 @@ inline c10::SmallVector transpose_npu_output_size(const at::Tenso return shape; } -inline bool check_match(const at::Tensor &self) +bool check_match(const at::Tensor &self) { - static auto op = - c10::Dispatcher::singleton().findSchemaOrThrow("aten::check_match", "").typed(); + static auto op = c10::Dispatcher::singleton().findSchemaOrThrow("aten::check_match", "").typed(); return op.call(self); } -inline void format_fresh_view(at::Tensor &x, const at::Tensor &y) +void format_fresh_view(at::Tensor &x, const at::Tensor &y) { x.copy_(y); } -#endif // CSRC_COMMON_H_ diff --git a/ads/common/ops/csrc/common.h b/ads/common/ops/csrc/common.h new file mode 100644 index 0000000000000000000000000000000000000000..95c2b5a194029b37893ce7b45651ea3cb9d315d2 --- /dev/null +++ b/ads/common/ops/csrc/common.h @@ -0,0 +1,33 @@ +#ifndef __COMMON_H__ +#define __COMMON_H__ +#include +#include +#include +#include +#include +#include "torch_npu/csrc/core/npu/NPUMacros.h" +#include "torch_npu/csrc/framework/utils/NPUDefinition.h" +#include "third_party/acl/inc/acl/acl_base.h" + +const int N = 32; +const int SIZE = 8; + +using tuple_vector = std::tuple, c10::SmallVector>; +aclDataType ConvertToAclDataType(const at::ScalarType &data_type); +c10::SmallVector array_to_small_vector(c10::IntArrayRef shape); +c10::SmallVector conv_transpose2d_npu_output_size(const at::Tensor &input, const at::Tensor &weight, + const at::Tensor &bias, c10::IntArrayRef padding, + c10::IntArrayRef output_padding, + c10::IntArrayRef stride, c10::IntArrayRef dilation, + int64_t groups); + +std::pair trans_torch_type_to_scalar(const std::string &type); +tuple_vector softmax_cross_entropy_with_logits_impl_npu_output_size(const at::Tensor& self); +int64_t make_warp_dim(int64_t dim, int64_t dim_post_expr); +c10::SmallVector convert_array_to_vector(c10::IntArrayRef intArray); +c10::SmallVector infersize_stride_add(c10::IntArrayRef shape1_, c10::IntArrayRef shape2_); +c10::SmallVector transpose_npu_output_size(const at::Tensor &self, c10::IntArrayRef perm); +bool check_match(const at::Tensor &self); +void format_fresh_view(at::Tensor &x, const at::Tensor &y); + +#endif // __COMMON_H__ diff --git a/ads/common/ops/csrc/functions.h b/ads/common/ops/csrc/functions.h index ccad449f214994ec45ab64895283bd382244c08c..68dc31e55896fd232b263ed301a94a91ea0dbee4 100644 --- a/ads/common/ops/csrc/functions.h +++ b/ads/common/ops/csrc/functions.h @@ -11,14 +11,17 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef COMMON_OPS_CSRC_FUNCTIONS_H_ -#define COMMON_OPS_CSRC_FUNCTIONS_H_ +#ifndef __FUNCTIONS_H__ +#define __FUNCTIONS_H__ #include #include #include #include #include +#include + +void init_common(pybind11::module &m); std::tuple npu_scatter_max(const at::Tensor& updates, const at::Tensor& indices, c10::optional out); at::Tensor npu_scatter_max_backward(const at::Tensor& x, const at::Tensor& segment_ids, const at::Tensor& num_segments); @@ -157,4 +160,4 @@ at::Tensor DynamicVoxelization( const double coorsMinX, const double coorsMinY, const double coorsMinZ); -#endif // COMMON_OPS_CSRC_FUNCTIONS_H_ +#endif // __FUNCTIONS_H__ diff --git a/ads/common/ops/csrc/pybind.cpp b/ads/common/ops/csrc/pybind.cpp index 58e1ad1ac554d3a4b9354d5d57168610130daf35..69ade167d038a93df86432278df2ba01192ab0a4 100644 --- a/ads/common/ops/csrc/pybind.cpp +++ b/ads/common/ops/csrc/pybind.cpp @@ -1,6 +1,5 @@ #include #include "functions.h" -#include "csrc/pybind.h" void init_common(pybind11::module &m) { diff --git a/ads/common/ops/kernels/CMakeLists.txt b/ads/common/ops/kernels/CMakeLists.txt deleted file mode 100644 index 3b1f8543b1fd15189db362166f9afad80f056ffd..0000000000000000000000000000000000000000 --- a/ads/common/ops/kernels/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) - add_subdirectory(op_host) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) - add_subdirectory(op_kernel) -endif() -if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) - add_subdirectory(testcases) -endif() - diff --git a/ads/common/ops/kernels/README.md b/ads/common/ops/kernels/README.md index 1e6645553e8d86a84a9833a13610741b59930494..214fb0a6d662e806bd7f6bdd1b8962bc1639026e 100644 --- a/ads/common/ops/kernels/README.md +++ b/ads/common/ops/kernels/README.md @@ -1,13 +1,2 @@ -## 算子原型 - - - - - - - - - - - -
算子类型(OpType)Add
算子输入nameshapedata typeformat
x-floatND
y-floatND
算子输出z-floatND
核函数名add_custom
\ No newline at end of file +## Description ++ The folder contains some ascend-kernel source files, which are like cuda-kernels and supply some ops that can be run on ascend device. \ No newline at end of file diff --git a/ads/common/ops/kernels/ads_op/CMakeLists.txt b/ads/common/ops/kernels/ads_op/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..584132d80993d309434fb1303de83910a1989aba --- /dev/null +++ b/ads/common/ops/kernels/ads_op/CMakeLists.txt @@ -0,0 +1,69 @@ +cmake_minimum_required(VERSION 3.16.0) +project(opp) +if(ENABLE_CROSS_COMPILE) + if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64) + set(CROSS_COMPILE_PLATFORM aarch64) + else() + set(CROSS_COMPILE_PLATFORM x86_64) + endif() + set(PLATFORM ${CMAKE_SYSTEM_PROCESSOR}) + set(CMAKE_COMPILE_COMPILER_LIBRARY ${ASCEND_CANN_PACKAGE_PATH}/${PLATFORM}-linux/devlib/linux/${CROSS_COMPILE_PLATFORM}/) + set(CMAKE_COMPILE_RUNTIME_LIBRARY ${ASCEND_CANN_PACKAGE_PATH}/${PLATFORM}-linux/devlib/${CROSS_COMPILE_PLATFORM}/) + set(CMAKE_SYSTEM_PROCESSOR ${CROSS_COMPILE_PLATFORM}) + set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) + set(CMAKE_CXX_COMPILER ${CMAKE_CROSS_PLATFORM_COMPILER}) +else() + set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) +endif() + +include(cmake/config.cmake) +include(cmake/func.cmake) +include(cmake/intf.cmake) + +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/framework) + add_subdirectory(framework) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) + add_subdirectory(op_host) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) + add_subdirectory(op_kernel) +endif() +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() + +# modify vendor_name in install.sh and upgrade.sh +add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/scripts/install.sh ${CMAKE_BINARY_DIR}/scripts/upgrade.sh + COMMAND mkdir -p ${CMAKE_BINARY_DIR}/scripts + COMMAND cp -r ${CMAKE_SOURCE_DIR}/scripts/* ${CMAKE_BINARY_DIR}/scripts/ + COMMAND sed -i "s/vendor_name=customize/vendor_name=${vendor_name}/g" ${CMAKE_BINARY_DIR}/scripts/* +) +add_custom_target(modify_vendor ALL DEPENDS ${CMAKE_BINARY_DIR}/scripts/install.sh ${CMAKE_BINARY_DIR}/scripts/upgrade.sh) +install(DIRECTORY ${CMAKE_BINARY_DIR}/scripts/ DESTINATION . FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_READ) + +install(FILES ${CMAKE_SOURCE_DIR}/custom.proto DESTINATION packages OPTIONAL) + +get_system_info(SYSTEM_INFO) + +# gen version.info +add_custom_target(gen_version_info ALL + COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/cmake/util/gen_version_info.sh ${ASCEND_CANN_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR} +) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/version.info + DESTINATION packages/vendors/${vendor_name}/) + +# CPack config +set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME}) +set(CPACK_PACKAGE_VERSION ${CMAKE_PROJECT_VERSION}) +set(CPACK_PACKAGE_DESCRIPTION "CPack opp project") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "CPack opp project") +set(CPACK_PACKAGE_DIRECTORY ${CMAKE_INSTALL_PREFIX}) +set(CPACK_PACKAGE_FILE_NAME "custom_opp_${SYSTEM_INFO}.run") +set(CPACK_GENERATOR External) +set(CPACK_CMAKE_GENERATOR "Unix Makefiles") +set(CPACK_EXTERNAL_ENABLE_STAGING TRUE) +set(CPACK_EXTERNAL_PACKAGE_SCRIPT ${CMAKE_SOURCE_DIR}/cmake/makeself.cmake) +set(CPACK_EXTERNAL_BUILT_PACKAGES ${CPACK_PACKAGE_DIRECTORY}/_CPack_Packages/Linux/External/${CPACK_PACKAGE_FILE_NAME}/${CPACK_PACKAGE_FILE_NAME}) +include(CPack) diff --git a/CMakePresets.json b/ads/common/ops/kernels/ads_op/CMakePresets.json similarity index 80% rename from CMakePresets.json rename to ads/common/ops/kernels/ads_op/CMakePresets.json index ab25547b3cf301350d1d951c89fe07daf0471270..a23c07b8bf823cc052ddf980a835408a9e3b918a 100644 --- a/CMakePresets.json +++ b/ads/common/ops/kernels/ads_op/CMakePresets.json @@ -17,13 +17,17 @@ "type": "STRING", "value": "Release" }, + "ENABLE_SOURCE_PACKAGE": { + "type": "BOOL", + "value": "True" + }, "ENABLE_BINARY_PACKAGE": { "type": "BOOL", "value": "True" }, "ASCEND_COMPUTE_UNIT": { "type": "STRING", - "value": "ascend910b;ascend910;ascend310p" + "value": "ascend910b" }, "ENABLE_TEST": { "type": "BOOL", @@ -45,9 +49,13 @@ "type": "PATH", "value": "${sourceDir}/build_out" }, - "ENABLE_ONNX": { + "ENABLE_CROSS_COMPILE": { "type": "BOOL", "value": "False" + }, + "CMAKE_CROSS_PLATFORM_COMPILER": { + "type": "PATH", + "value": "/usr/bin/aarch64-linux-gnu-g++" } } } diff --git a/ads/common/ops/kernels/ads_op/README.md b/ads/common/ops/kernels/ads_op/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e6645553e8d86a84a9833a13610741b59930494 --- /dev/null +++ b/ads/common/ops/kernels/ads_op/README.md @@ -0,0 +1,13 @@ +## 算子原型 + + + + + + + + + + + +
算子类型(OpType)Add
算子输入nameshapedata typeformat
x-floatND
y-floatND
算子输出z-floatND
核函数名add_custom
\ No newline at end of file diff --git a/ads/common/ops/kernels/ads_op/build.sh b/ads/common/ops/kernels/ads_op/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..b71f67a642dab630e82a607f2799425d530f9c0d --- /dev/null +++ b/ads/common/ops/kernels/ads_op/build.sh @@ -0,0 +1,51 @@ +#!/bin/bash +script_path=$(realpath $(dirname $0)) + + +mkdir -p build_out +rm -rf build_out/* +cd build_out + +if [ $ASCEND_AICPU_PATH ]; then + jq --arg field "configurePresets" --arg value "$ASCEND_AICPU_PATH" '.[$field][0].cacheVariables.ASCEND_CANN_PACKAGE_PATH.value = $value' $script_path/CMakePresets.json > $script_path/CMakePresets_bat.json + + if [ $? -eq 0 ]; then + mv $script_path/CMakePresets_bat.json $script_path/CMakePresets.json -f + else + echo "Error: please install jq with yum or apt-get" + exit 1 + fi +else + echo "Error: please source env.sh" + exit 1 +fi + +cmake_version=$(cmake --version | grep "cmake version" | awk '{print $3}') +if [ "$cmake_version" \< "3.19.0" ] ; then + opts=$(python3 $script_path/cmake/util/preset_parse.py $script_path/CMakePresets.json) + echo $opts + cmake .. $opts +else + cmake .. --preset=default +fi +target=package +if [ "$1"x != ""x ]; then target=$1; fi + +cmake --build . --target $target -j16 +if [ $? -ne 0 ]; then exit 1; fi + +if [ $target = "package" ]; then + if test -d ./op_kernel/binary ; then + ./cust*.run + if [ $? -ne 0 ]; then exit 1; fi + cmake --build . --target binary -j16 + if [ $? -ne 0 ]; then exit 1; fi + cmake --build . --target $target -j16 + fi +fi + +# for debug +# cd build_out +# make +# cpack +# verbose append -v diff --git a/cmake/config.cmake b/ads/common/ops/kernels/ads_op/cmake/config.cmake similarity index 38% rename from cmake/config.cmake rename to ads/common/ops/kernels/ads_op/cmake/config.cmake index be7299fbc13d44cade31dc3d9040ee3ef96b1f7f..886119daadd85495676c07dfb0b629e3deab8ccf 100644 --- a/cmake/config.cmake +++ b/ads/common/ops/kernels/ads_op/cmake/config.cmake @@ -1,51 +1,25 @@ + set(CMAKE_CXX_FLAGS_DEBUG "") set(CMAKE_CXX_FLAGS_RELEASE "") -if(NOT DEFINED vendor_name) - set(vendor_name - customize - CACHE STRING "") +if (NOT DEFINED vendor_name) + set(vendor_name customize CACHE STRING "") endif() -if(NOT DEFINED ASCEND_CANN_PACKAGE_PATH) - set(ASCEND_CANN_PACKAGE_PATH - /usr/local/Ascend/latest - CACHE PATH "") +if (NOT DEFINED ASCEND_CANN_PACKAGE_PATH) + set(ASCEND_CANN_PACKAGE_PATH /usr/local/Ascend/latest CACHE PATH "") endif() -if(NOT DEFINED ASCEND_PYTHON_EXECUTABLE) - set(ASCEND_PYTHON_EXECUTABLE - python3 - CACHE STRING "") +if (NOT DEFINED ASCEND_PYTHON_EXECUTABLE) + set(ASCEND_PYTHON_EXECUTABLE python3 CACHE STRING "") endif() -if(NOT DEFINED ASCEND_COMPUTE_UNIT) - message(FATAL_ERROR "ASCEND_COMPUTE_UNIT not set in CMakePreset.json ! +if (NOT DEFINED ASCEND_COMPUTE_UNIT) + message(FATAL_ERROR "ASCEND_COMPUTE_UNIT not set in CMakePreset.json ! ") endif() set(ASCEND_TENSOR_COMPILER_PATH ${ASCEND_CANN_PACKAGE_PATH}/compiler) set(ASCEND_CCEC_COMPILER_PATH ${ASCEND_TENSOR_COMPILER_PATH}/ccec_compiler/bin) set(ASCEND_AUTOGEN_PATH ${CMAKE_BINARY_DIR}/autogen) -set(ASCEND_KERNEL_PATH ${CMAKE_BINARY_DIR}/kernels) -set(ADS_PATH ${PROJECT_SOURCE_DIR}/ads) -set(ASCEND_HOST_SRC - "" - CACHE STRING "host source files") -set(ASCEND_KERNEL_SRC - "" - CACHE STRING "kernel source files") -set(ACLNN_SRC_CUSTOM - "" - CACHE STRING "aclnn source files") -set(ACLNN_INC_CUSTOM - "" - CACHE STRING "aclnn include files") -set(aclop_exclude - "" - CACHE STRING "aclop exclude files") -set(ASCEND_ONNX_SRC - "" - CACHE STRING "onnx source files") set(ASCEND_FRAMEWORK_TYPE tensorflow) file(MAKE_DIRECTORY ${ASCEND_AUTOGEN_PATH}) -file(MAKE_DIRECTORY ${ASCEND_KERNEL_PATH}) set(CUSTOM_COMPILE_OPTIONS "custom_compile_options.ini") execute_process(COMMAND rm -rf ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} COMMAND touch ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS}) diff --git a/ads/common/ops/kernels/ads_op/cmake/func.cmake b/ads/common/ops/kernels/ads_op/cmake/func.cmake new file mode 100644 index 0000000000000000000000000000000000000000..ad187e7d6c0a7c801d0d791d3fab38b2e9d4e71f --- /dev/null +++ b/ads/common/ops/kernels/ads_op/cmake/func.cmake @@ -0,0 +1,228 @@ + +function(get_system_info SYSTEM_INFO) + if (UNIX) + execute_process(COMMAND grep -i ^id= /etc/os-release OUTPUT_VARIABLE TEMP) + string(REGEX REPLACE "\n|id=|ID=|\"" "" SYSTEM_NAME ${TEMP}) + set(${SYSTEM_INFO} ${SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR} PARENT_SCOPE) + elseif (WIN32) + message(STATUS "System is Windows. Only for pre-build.") + else () + message(FATAL_ERROR "${CMAKE_SYSTEM_NAME} not support.") + endif () +endfunction() + +function(opbuild) + message(STATUS "Opbuild generating sources") + cmake_parse_arguments(OPBUILD "" "OUT_DIR;PROJECT_NAME;ACCESS_PREFIX" "OPS_SRC" ${ARGN}) + execute_process(COMMAND ${CMAKE_COMPILE} -g -fPIC -shared -std=c++11 ${OPBUILD_OPS_SRC} -D_GLIBCXX_USE_CXX11_ABI=0 + -I ${ASCEND_CANN_PACKAGE_PATH}/include -L ${ASCEND_CANN_PACKAGE_PATH}/lib64 -lexe_graph -lregister -ltiling_api + -o ${OPBUILD_OUT_DIR}/libascend_all_ops.so + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR + ) + if (${EXEC_RESULT}) + message("build ops lib info: ${EXEC_INFO}") + message("build ops lib error: ${EXEC_ERROR}") + message(FATAL_ERROR "opbuild run failed!") + endif() + set(proj_env "") + set(prefix_env "") + if (NOT "${OPBUILD_PROJECT_NAME}x" STREQUAL "x") + set(proj_env "OPS_PROJECT_NAME=${OPBUILD_PROJECT_NAME}") + endif() + if (NOT "${OPBUILD_ACCESS_PREFIX}x" STREQUAL "x") + set(prefix_env "OPS_DIRECT_ACCESS_PREFIX=${OPBUILD_ACCESS_PREFIX}") + endif() + execute_process(COMMAND ${proj_env} ${prefix_env} ${ASCEND_CANN_PACKAGE_PATH}/toolkit/tools/opbuild/op_build + ${OPBUILD_OUT_DIR}/libascend_all_ops.so ${OPBUILD_OUT_DIR} + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR + ) + if (${EXEC_RESULT}) + message("opbuild ops info: ${EXEC_INFO}") + message("opbuild ops error: ${EXEC_ERROR}") + endif() + message(STATUS "Opbuild generating sources - done") +endfunction() + +function(add_ops_info_target) + cmake_parse_arguments(OPINFO "" "TARGET;OPS_INFO;OUTPUT;INSTALL_DIR" "" ${ARGN}) + get_filename_component(opinfo_file_path "${OPINFO_OUTPUT}" DIRECTORY) + add_custom_command(OUTPUT ${OPINFO_OUTPUT} + COMMAND mkdir -p ${opinfo_file_path} + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/parse_ini_to_json.py + ${OPINFO_OPS_INFO} ${OPINFO_OUTPUT} + ) + add_custom_target(${OPINFO_TARGET} ALL + DEPENDS ${OPINFO_OUTPUT} + ) + install(FILES ${OPINFO_OUTPUT} + DESTINATION ${OPINFO_INSTALL_DIR} + ) +endfunction() + +function(add_ops_compile_options OP_TYPE) + cmake_parse_arguments(OP_COMPILE "" "OP_TYPE" "COMPUTE_UNIT;OPTIONS" ${ARGN}) + file(APPEND ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} + "${OP_TYPE},${OP_COMPILE_COMPUTE_UNIT},${OP_COMPILE_OPTIONS}\n") +endfunction() + +function(add_ops_impl_target) + cmake_parse_arguments(OPIMPL "" "TARGET;OPS_INFO;IMPL_DIR;OUT_DIR;INSTALL_DIR" "OPS_BATCH;OPS_ITERATE" ${ARGN}) + add_custom_command(OUTPUT ${OPIMPL_OUT_DIR}/.impl_timestamp + COMMAND mkdir -m 700 -p ${OPIMPL_OUT_DIR}/dynamic + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py + ${OPIMPL_OPS_INFO} + \"${OPIMPL_OPS_BATCH}\" \"${OPIMPL_OPS_ITERATE}\" + ${OPIMPL_IMPL_DIR} + ${OPIMPL_OUT_DIR}/dynamic + ${ASCEND_AUTOGEN_PATH} + + COMMAND rm -rf ${OPIMPL_OUT_DIR}/.impl_timestamp + COMMAND touch ${OPIMPL_OUT_DIR}/.impl_timestamp + DEPENDS ${OPIMPL_OPS_INFO} + ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py + ) + add_custom_target(${OPIMPL_TARGET} ALL + DEPENDS ${OPIMPL_OUT_DIR}/.impl_timestamp) + if (${ENABLE_SOURCE_PACKAGE}) + install(DIRECTORY ${OPIMPL_OUT_DIR}/dynamic + DESTINATION ${OPIMPL_INSTALL_DIR} + ) + endif() +endfunction() + +function(add_ops_replay_targets) + cmake_parse_arguments(OPREPLAY "" "OPS_INFO;COMPUTE_UNIT;IMPL_DIR;OUT_DIR;INSTALL_DIR" "OPS_BATCH;OPS_ITERATE" ${ARGN}) + # ccec compile options + set(ccec_base_opts -c -O2 --cce-aicore-only -mllvm -cce-aicore-function-stack-size=16000 + -mllvm -cce-aicore-record-overflow=false -std=c++17) + set(ccec_extopts_ascend310p --cce-aicore-arch=dav-m200 -mllvm -cce-aicore-fp-ceiling=2) + set(ccec_extopts_ascend910 --cce-aicore-arch=dav-c100) + set(ccec_extopts_ascend910b --cce-aicore-arch=dav-c220-cube) + file(MAKE_DIRECTORY ${OPREPLAY_OUT_DIR}) + execute_process(COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_replay_build.py + ${OPREPLAY_OPS_INFO} + "${OPREPLAY_OPS_BATCH}" "${OPREPLAY_OPS_ITERATE}" + ${OPREPLAY_IMPL_DIR} + ${OPREPLAY_OUT_DIR} + ${OPREPLAY_COMPUTE_UNIT} + ) + file(GLOB replay_kernel_entries ${OPREPLAY_OUT_DIR}/*.cce) + if (NOT "${replay_kernel_entries}x" STREQUAL "x") + foreach(replay_kernel_file ${replay_kernel_entries}) + get_filename_component(replay_kernel_file_name "${replay_kernel_file}" NAME) + string(REPLACE "_entry.cce" "" op_kerne_name ${replay_kernel_file_name}) + file(GLOB replay_lib_src ${OPREPLAY_OUT_DIR}/${op_kerne_name}*.cpp) + set(OP_TILING_DATA_H_PATH ${OPREPLAY_OUT_DIR}/${op_kerne_name}_tiling_data.h) + add_library(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} SHARED ${replay_lib_src}) + if(EXISTS ${OP_TILING_DATA_H_PATH}) + target_compile_options(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE + -include ${OP_TILING_DATA_H_PATH} + ) + endif() + target_compile_definitions(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE + ${op_kerne_name}=${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} + ) + target_compile_options(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE + -D__ASCENDC_REPLAY__ + ) + target_link_libraries(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE intf_pub + tikreplaylib::${OPREPLAY_COMPUTE_UNIT} + register + ) + add_custom_command(OUTPUT ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o + COMMAND ccec ${ccec_base_opts} ${ccec_extopts_${OPREPLAY_COMPUTE_UNIT}} ${replay_kernel_file} + -o ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o + DEPENDS ${replay_kernel_file} + ) + add_custom_target(replay_kernel_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} ALL + DEPENDS ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o + ) + install(TARGETS replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} + LIBRARY DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_replay + ) + install(FILES ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o + DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_replay + ) + endforeach() + endif() +endfunction() + +function(add_npu_support_target) + cmake_parse_arguments(NPUSUP "" "TARGET;OPS_INFO_DIR;OUT_DIR;INSTALL_DIR" "" ${ARGN}) + get_filename_component(npu_sup_file_path "${NPUSUP_OUT_DIR}" DIRECTORY) + add_custom_command(OUTPUT ${NPUSUP_OUT_DIR}/npu_supported_ops.json + COMMAND mkdir -p ${NPUSUP_OUT_DIR} + COMMAND ${CMAKE_SOURCE_DIR}/cmake/util/gen_ops_filter.sh + ${NPUSUP_OPS_INFO_DIR} + ${NPUSUP_OUT_DIR} + ) + add_custom_target(npu_supported_ops ALL + DEPENDS ${NPUSUP_OUT_DIR}/npu_supported_ops.json + ) + install(FILES ${NPUSUP_OUT_DIR}/npu_supported_ops.json + DESTINATION ${NPUSUP_INSTALL_DIR} + ) +endfunction() + +function(add_bin_compile_target) + cmake_parse_arguments(BINCMP "" "TARGET;OPS_INFO;COMPUTE_UNIT;IMPL_DIR;ADP_DIR;OUT_DIR;INSTALL_DIR" "" ${ARGN}) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/src) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/bin) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/gen) + execute_process(COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_bin_param_build.py + ${BINCMP_OPS_INFO} ${BINCMP_OUT_DIR}/gen ${BINCMP_COMPUTE_UNIT} + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR + ) + if (${EXEC_RESULT}) + message("ops binary compile scripts gen info: ${EXEC_INFO}") + message("ops binary compile scripts gen error: ${EXEC_ERROR}") + message(FATAL_ERROR "ops binary compile scripts gen failed!") + endif() + if (NOT TARGET binary) + add_custom_target(binary) + endif() + add_custom_target(${BINCMP_TARGET} + COMMAND cp -r ${BINCMP_IMPL_DIR}/*.* ${BINCMP_OUT_DIR}/src + ) + add_custom_target(${BINCMP_TARGET}_gen_ops_config + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/insert_simplified_keys.py -p ${BINCMP_OUT_DIR}/bin + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_ops_config.py -p ${BINCMP_OUT_DIR}/bin + -s ${BINCMP_COMPUTE_UNIT} + ) + add_dependencies(binary ${BINCMP_TARGET}_gen_ops_config) + file(GLOB bin_scripts ${BINCMP_OUT_DIR}/gen/*.sh) + foreach(bin_script ${bin_scripts}) + get_filename_component(bin_file ${bin_script} NAME_WE) + string(REPLACE "-" ";" bin_sep ${bin_file}) + list(GET bin_sep 0 op_type) + list(GET bin_sep 1 op_file) + list(GET bin_sep 2 op_index) + if (NOT TARGET ${BINCMP_TARGET}_${op_file}_copy) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file}) + add_custom_target(${BINCMP_TARGET}_${op_file}_copy + COMMAND cp ${BINCMP_ADP_DIR}/${op_file}.py ${BINCMP_OUT_DIR}/src/${op_type}.py + ) + install(DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file} + DESTINATION ${BINCMP_INSTALL_DIR}/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) + install(FILES ${BINCMP_OUT_DIR}/bin/${op_file}.json + DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT}/ OPTIONAL + ) + endif() + add_custom_target(${BINCMP_TARGET}_${op_file}_${op_index} + COMMAND export HI_PYTHON=${ASCEND_PYTHON_EXECUTABLE} && bash ${bin_script} ${BINCMP_OUT_DIR}/src/${op_type}.py ${BINCMP_OUT_DIR}/bin/${op_file} + WORKING_DIRECTORY ${BINCMP_OUT_DIR} + ) + add_dependencies(${BINCMP_TARGET}_${op_file}_${op_index} ${BINCMP_TARGET} ${BINCMP_TARGET}_${op_file}_copy) + add_dependencies(${BINCMP_TARGET}_gen_ops_config ${BINCMP_TARGET}_${op_file}_${op_index}) + endforeach() + install(FILES ${BINCMP_OUT_DIR}/bin/binary_info_config.json + DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) +endfunction() diff --git a/cmake/intf.cmake b/ads/common/ops/kernels/ads_op/cmake/intf.cmake similarity index 33% rename from cmake/intf.cmake rename to ads/common/ops/kernels/ads_op/cmake/intf.cmake index 416ab14a0e86c4b62f37a4e02316c843f147c356..2f362c396622d66132f80f54492a8cc3204882fb 100644 --- a/cmake/intf.cmake +++ b/ads/common/ops/kernels/ads_op/cmake/intf.cmake @@ -1,35 +1,26 @@ + add_library(intf_pub INTERFACE) -target_compile_options( - intf_pub - INTERFACE +target_compile_options(intf_pub INTERFACE -fPIC -fvisibility=hidden -fvisibility-inlines-hidden $<$:-O2> - $<$:-O0 - -g> + $<$:-O0 -g> $<$:-std=c++11> - $<$,$>:-ftrapv - -fstack-check> - $<$:-pthread - -Wfloat-equal - -Wshadow - -Wformat=2 - -Wno-deprecated - -Wextra> + $<$,$>:-ftrapv -fstack-check> + $<$:-pthread -Wfloat-equal -Wshadow -Wformat=2 -Wno-deprecated -Wextra> $,-fstack-protector-strong,-fstack-protector-all> ) -target_compile_definitions( - intf_pub INTERFACE _GLIBCXX_USE_CXX11_ABI=0 - $<$:_FORTIFY_SOURCE=2>) -target_include_directories(intf_pub - INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/include) -target_link_options( - intf_pub - INTERFACE - $<$,EXECUTABLE>:-pie> - $<$:-s> - -Wl,-z,relro - -Wl,-z,now - -Wl,-z,noexecstack) +target_compile_definitions(intf_pub INTERFACE + _GLIBCXX_USE_CXX11_ABI=0 + $<$:_FORTIFY_SOURCE=2> +) +target_include_directories(intf_pub INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/include) +target_link_options(intf_pub INTERFACE + $<$,EXECUTABLE>:-pie> + $<$:-s> + -Wl,-z,relro + -Wl,-z,now + -Wl,-z,noexecstack +) target_link_directories(intf_pub INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/lib64) diff --git a/ads/common/ops/kernels/ads_op/cmake/makeself.cmake b/ads/common/ops/kernels/ads_op/cmake/makeself.cmake new file mode 100644 index 0000000000000000000000000000000000000000..48c565bfb4f2edc6534a81abaa8565c4cf2dfc30 --- /dev/null +++ b/ads/common/ops/kernels/ads_op/cmake/makeself.cmake @@ -0,0 +1,17 @@ +execute_process(COMMAND chmod +x ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh) +execute_process(COMMAND ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh + --header ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself-header.sh + --help-header ./help.info + --gzip --complevel 4 --nomd5 --sha256 + ./ ${CPACK_PACKAGE_FILE_NAME} "version:1.0" ./install.sh + WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} + RESULT_VARIABLE EXEC_RESULT + ERROR_VARIABLE EXEC_ERROR +) +if (NOT "${EXEC_RESULT}x" STREQUAL "0x") + message(FATAL_ERROR "CPack Command error: ${EXEC_RESULT}\n${EXEC_ERROR}") +endif() +execute_process(COMMAND cp ${CPACK_EXTERNAL_BUILT_PACKAGES} ${CPACK_PACKAGE_DIRECTORY}/ + COMMAND echo "Copy ${CPACK_EXTERNAL_BUILT_PACKAGES} to ${CPACK_PACKAGE_DIRECTORY}/" + WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} +) diff --git a/cmake/util/__init__.py b/ads/common/ops/kernels/ads_op/cmake/util/__init__.py similarity index 100% rename from cmake/util/__init__.py rename to ads/common/ops/kernels/ads_op/cmake/util/__init__.py diff --git a/cmake/util/ascendc_bin_param_build.py b/ads/common/ops/kernels/ads_op/cmake/util/ascendc_bin_param_build.py similarity index 100% rename from cmake/util/ascendc_bin_param_build.py rename to ads/common/ops/kernels/ads_op/cmake/util/ascendc_bin_param_build.py diff --git a/cmake/util/ascendc_impl_build.py b/ads/common/ops/kernels/ads_op/cmake/util/ascendc_impl_build.py similarity index 100% rename from cmake/util/ascendc_impl_build.py rename to ads/common/ops/kernels/ads_op/cmake/util/ascendc_impl_build.py diff --git a/cmake/util/ascendc_ops_config.py b/ads/common/ops/kernels/ads_op/cmake/util/ascendc_ops_config.py similarity index 100% rename from cmake/util/ascendc_ops_config.py rename to ads/common/ops/kernels/ads_op/cmake/util/ascendc_ops_config.py diff --git a/cmake/util/ascendc_replay_build.py b/ads/common/ops/kernels/ads_op/cmake/util/ascendc_replay_build.py similarity index 100% rename from cmake/util/ascendc_replay_build.py rename to ads/common/ops/kernels/ads_op/cmake/util/ascendc_replay_build.py diff --git a/cmake/util/batch_replay_impl.temp b/ads/common/ops/kernels/ads_op/cmake/util/batch_replay_impl.temp similarity index 100% rename from cmake/util/batch_replay_impl.temp rename to ads/common/ops/kernels/ads_op/cmake/util/batch_replay_impl.temp diff --git a/cmake/util/code_channel_infer.py b/ads/common/ops/kernels/ads_op/cmake/util/code_channel_infer.py similarity index 100% rename from cmake/util/code_channel_infer.py rename to ads/common/ops/kernels/ads_op/cmake/util/code_channel_infer.py diff --git a/cmake/util/const_var.py b/ads/common/ops/kernels/ads_op/cmake/util/const_var.py similarity index 100% rename from cmake/util/const_var.py rename to ads/common/ops/kernels/ads_op/cmake/util/const_var.py diff --git a/cmake/util/gen_impl_and_mrege_json.sh b/ads/common/ops/kernels/ads_op/cmake/util/gen_impl_and_mrege_json.sh similarity index 100% rename from cmake/util/gen_impl_and_mrege_json.sh rename to ads/common/ops/kernels/ads_op/cmake/util/gen_impl_and_mrege_json.sh diff --git a/cmake/util/gen_ops_filter.sh b/ads/common/ops/kernels/ads_op/cmake/util/gen_ops_filter.sh similarity index 100% rename from cmake/util/gen_ops_filter.sh rename to ads/common/ops/kernels/ads_op/cmake/util/gen_ops_filter.sh diff --git a/cmake/util/gen_version_info.sh b/ads/common/ops/kernels/ads_op/cmake/util/gen_version_info.sh similarity index 100% rename from cmake/util/gen_version_info.sh rename to ads/common/ops/kernels/ads_op/cmake/util/gen_version_info.sh diff --git a/cmake/util/insert_op_info.py b/ads/common/ops/kernels/ads_op/cmake/util/insert_op_info.py similarity index 100% rename from cmake/util/insert_op_info.py rename to ads/common/ops/kernels/ads_op/cmake/util/insert_op_info.py diff --git a/cmake/util/insert_simplified_keys.py b/ads/common/ops/kernels/ads_op/cmake/util/insert_simplified_keys.py similarity index 100% rename from cmake/util/insert_simplified_keys.py rename to ads/common/ops/kernels/ads_op/cmake/util/insert_simplified_keys.py diff --git a/cmake/util/kernel_entry.py b/ads/common/ops/kernels/ads_op/cmake/util/kernel_entry.py similarity index 100% rename from cmake/util/kernel_entry.py rename to ads/common/ops/kernels/ads_op/cmake/util/kernel_entry.py diff --git a/cmake/util/kernel_impl.temp b/ads/common/ops/kernels/ads_op/cmake/util/kernel_impl.temp similarity index 100% rename from cmake/util/kernel_impl.temp rename to ads/common/ops/kernels/ads_op/cmake/util/kernel_impl.temp diff --git a/cmake/util/makeself/COPYING b/ads/common/ops/kernels/ads_op/cmake/util/makeself/COPYING similarity index 100% rename from cmake/util/makeself/COPYING rename to ads/common/ops/kernels/ads_op/cmake/util/makeself/COPYING diff --git a/cmake/util/makeself/README.md b/ads/common/ops/kernels/ads_op/cmake/util/makeself/README.md similarity index 100% rename from cmake/util/makeself/README.md rename to ads/common/ops/kernels/ads_op/cmake/util/makeself/README.md diff --git a/cmake/util/makeself/VERSION b/ads/common/ops/kernels/ads_op/cmake/util/makeself/VERSION similarity index 100% rename from cmake/util/makeself/VERSION rename to ads/common/ops/kernels/ads_op/cmake/util/makeself/VERSION diff --git a/cmake/util/makeself/make-release.sh b/ads/common/ops/kernels/ads_op/cmake/util/makeself/make-release.sh similarity index 100% rename from cmake/util/makeself/make-release.sh rename to ads/common/ops/kernels/ads_op/cmake/util/makeself/make-release.sh diff --git a/cmake/util/makeself/makeself-header.sh b/ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself-header.sh similarity index 100% rename from cmake/util/makeself/makeself-header.sh rename to ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself-header.sh diff --git a/cmake/util/makeself/makeself.1 b/ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.1 similarity index 100% rename from cmake/util/makeself/makeself.1 rename to ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.1 diff --git a/cmake/util/makeself/makeself.lsm b/ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.lsm similarity index 100% rename from cmake/util/makeself/makeself.lsm rename to ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.lsm diff --git a/cmake/util/makeself/makeself.sh b/ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.sh similarity index 100% rename from cmake/util/makeself/makeself.sh rename to ads/common/ops/kernels/ads_op/cmake/util/makeself/makeself.sh diff --git a/cmake/util/makeself/run-tests.sh b/ads/common/ops/kernels/ads_op/cmake/util/makeself/run-tests.sh similarity index 100% rename from cmake/util/makeself/run-tests.sh rename to ads/common/ops/kernels/ads_op/cmake/util/makeself/run-tests.sh diff --git a/cmake/util/merge_aicpu_info_json.sh b/ads/common/ops/kernels/ads_op/cmake/util/merge_aicpu_info_json.sh similarity index 100% rename from cmake/util/merge_aicpu_info_json.sh rename to ads/common/ops/kernels/ads_op/cmake/util/merge_aicpu_info_json.sh diff --git a/cmake/util/opdesc_parser.py b/ads/common/ops/kernels/ads_op/cmake/util/opdesc_parser.py similarity index 100% rename from cmake/util/opdesc_parser.py rename to ads/common/ops/kernels/ads_op/cmake/util/opdesc_parser.py diff --git a/cmake/util/parse_ini_to_json.py b/ads/common/ops/kernels/ads_op/cmake/util/parse_ini_to_json.py similarity index 100% rename from cmake/util/parse_ini_to_json.py rename to ads/common/ops/kernels/ads_op/cmake/util/parse_ini_to_json.py diff --git a/cmake/util/preset_parse.py b/ads/common/ops/kernels/ads_op/cmake/util/preset_parse.py similarity index 100% rename from cmake/util/preset_parse.py rename to ads/common/ops/kernels/ads_op/cmake/util/preset_parse.py diff --git a/cmake/util/replay_codegen.py b/ads/common/ops/kernels/ads_op/cmake/util/replay_codegen.py similarity index 100% rename from cmake/util/replay_codegen.py rename to ads/common/ops/kernels/ads_op/cmake/util/replay_codegen.py diff --git a/cmake/util/replay_impl.temp b/ads/common/ops/kernels/ads_op/cmake/util/replay_impl.temp similarity index 100% rename from cmake/util/replay_impl.temp rename to ads/common/ops/kernels/ads_op/cmake/util/replay_impl.temp diff --git a/cmake/util/tiling_data_def_build.py b/ads/common/ops/kernels/ads_op/cmake/util/tiling_data_def_build.py similarity index 100% rename from cmake/util/tiling_data_def_build.py rename to ads/common/ops/kernels/ads_op/cmake/util/tiling_data_def_build.py diff --git a/ads/common/ops/kernels/ads_op/framework/CMakeLists.txt b/ads/common/ops/kernels/ads_op/framework/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b6be9b492610f4d45b25bb7725648df9aac39a12 --- /dev/null +++ b/ads/common/ops/kernels/ads_op/framework/CMakeLists.txt @@ -0,0 +1,11 @@ +if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mindspore") + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/caffe_plugin") + add_subdirectory(caffe_plugin) + endif() + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tf_plugin") + add_subdirectory(tf_plugin) + endif() + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/onnx_plugin") + add_subdirectory(onnx_plugin) + endif() +endif() diff --git a/ads/common/ops/kernels/ads_op/op_host/CMakeLists.txt b/ads/common/ops/kernels/ads_op/op_host/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..40dd51cfac524b0a9607b7d8b2813edd2210c509 --- /dev/null +++ b/ads/common/ops/kernels/ads_op/op_host/CMakeLists.txt @@ -0,0 +1,82 @@ + +aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} ops_srcs) + +opbuild(OPS_SRC ${ops_srcs} + OUT_DIR ${ASCEND_AUTOGEN_PATH} +) + +add_library(cust_op_proto SHARED ${ops_srcs} ${ASCEND_AUTOGEN_PATH}/op_proto.cc) +target_compile_definitions(cust_op_proto PRIVATE OP_PROTO_LIB) +target_compile_options(cust_op_proto PRIVATE + -fvisibility=hidden +) +if(ENABLE_CROSS_COMPILE) + target_link_directories(cust_op_proto PRIVATE + ${CMAKE_COMPILE_COMPILER_LIBRARY} + ${CMAKE_COMPILE_RUNTIME_LIBRARY} + ) +endif() +target_link_libraries(cust_op_proto PRIVATE + intf_pub + exe_graph + register + tiling_api + -Wl,--whole-archive + rt2_registry + -Wl,--no-whole-archive +) +set_target_properties(cust_op_proto PROPERTIES OUTPUT_NAME + cust_opsproto_rt2.0 +) +add_library(cust_optiling SHARED ${ops_srcs}) +target_compile_definitions(cust_optiling PRIVATE OP_TILING_LIB) +target_compile_options(cust_optiling PRIVATE + -fvisibility=hidden +) +if(ENABLE_CROSS_COMPILE) + target_link_directories(cust_optiling PRIVATE + ${CMAKE_COMPILE_COMPILER_LIBRARY} + ${CMAKE_COMPILE_RUNTIME_LIBRARY} + ) +endif() +target_link_libraries(cust_optiling PRIVATE + intf_pub + exe_graph + register + tiling_api + -Wl,--whole-archive + rt2_registry + -Wl,--no-whole-archive +) +set_target_properties(cust_optiling PROPERTIES OUTPUT_NAME + cust_opmaster_rt2.0 +) + +file(GLOB aclnn_src ${ASCEND_AUTOGEN_PATH}/aclnn_*.cpp) +file(GLOB aclnn_inc ${ASCEND_AUTOGEN_PATH}/aclnn_*.h) +add_library(cust_opapi SHARED ${aclnn_src}) +if(ENABLE_CROSS_COMPILE) + target_link_directories(cust_opapi PRIVATE + ${CMAKE_COMPILE_COMPILER_LIBRARY} + ${CMAKE_COMPILE_RUNTIME_LIBRARY} + ) +endif() +target_link_libraries(cust_opapi PRIVATE intf_pub ascendcl nnopbase) + +add_custom_target(optiling_compat ALL + COMMAND ln -sf lib/linux/${CMAKE_SYSTEM_PROCESSOR}/$ + ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so +) + +install(TARGETS cust_op_proto + LIBRARY DESTINATION packages/vendors/${vendor_name}/op_proto/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) +install(FILES ${ASCEND_AUTOGEN_PATH}/op_proto.h + DESTINATION packages/vendors/${vendor_name}/op_proto/inc) +install(TARGETS cust_optiling + LIBRARY DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so + DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling) +install(TARGETS cust_opapi + LIBRARY DESTINATION packages/vendors/${vendor_name}/op_api/lib) +install(FILES ${aclnn_inc} + DESTINATION packages/vendors/${vendor_name}/op_api/include) diff --git a/ads/common/ops/kernels/op_host/add_custom.cpp b/ads/common/ops/kernels/ads_op/op_host/add_custom.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/add_custom.cpp rename to ads/common/ops/kernels/ads_op/op_host/add_custom.cpp diff --git a/ads/common/ops/kernels/op_host/add_custom_tiling.h b/ads/common/ops/kernels/ads_op/op_host/add_custom_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/add_custom_tiling.h rename to ads/common/ops/kernels/ads_op/op_host/add_custom_tiling.h diff --git a/ads/common/ops/kernels/op_host/dynamic_scatter.cpp b/ads/common/ops/kernels/ads_op/op_host/dynamic_scatter.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/dynamic_scatter.cpp rename to ads/common/ops/kernels/ads_op/op_host/dynamic_scatter.cpp diff --git a/ads/common/ops/kernels/op_host/dynamic_scatter_tiling.h b/ads/common/ops/kernels/ads_op/op_host/dynamic_scatter_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/dynamic_scatter_tiling.h rename to ads/common/ops/kernels/ads_op/op_host/dynamic_scatter_tiling.h diff --git a/ads/common/ops/kernels/op_host/dynamic_voxelization.cpp b/ads/common/ops/kernels/ads_op/op_host/dynamic_voxelization.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/dynamic_voxelization.cpp rename to ads/common/ops/kernels/ads_op/op_host/dynamic_voxelization.cpp diff --git a/ads/common/ops/kernels/op_host/dynamic_voxelization_tiling.h b/ads/common/ops/kernels/ads_op/op_host/dynamic_voxelization_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/dynamic_voxelization_tiling.h rename to ads/common/ops/kernels/ads_op/op_host/dynamic_voxelization_tiling.h diff --git a/ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp b/ads/common/ops/kernels/ads_op/op_host/furthest_point_sampling_with_dist.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp rename to ads/common/ops/kernels/ads_op/op_host/furthest_point_sampling_with_dist.cpp diff --git a/ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h b/ads/common/ops/kernels/ads_op/op_host/furthest_point_sampling_with_dist_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h rename to ads/common/ops/kernels/ads_op/op_host/furthest_point_sampling_with_dist_tiling.h diff --git a/ads/common/ops/kernels/op_host/multi_scale_deformable_attention_grad.cpp b/ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attention_grad.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/multi_scale_deformable_attention_grad.cpp rename to ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attention_grad.cpp diff --git a/ads/common/ops/kernels/op_host/multi_scale_deformable_attention_grad.h b/ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attention_grad.h similarity index 100% rename from ads/common/ops/kernels/op_host/multi_scale_deformable_attention_grad.h rename to ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attention_grad.h diff --git a/ads/common/ops/kernels/op_host/multi_scale_deformable_attn_function_v2.cpp b/ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attn_function_v2.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/multi_scale_deformable_attn_function_v2.cpp rename to ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attn_function_v2.cpp diff --git a/ads/common/ops/kernels/op_host/multi_scale_deformable_attn_function_v2.h b/ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attn_function_v2.h similarity index 100% rename from ads/common/ops/kernels/op_host/multi_scale_deformable_attn_function_v2.h rename to ads/common/ops/kernels/ads_op/op_host/multi_scale_deformable_attn_function_v2.h diff --git a/ads/common/ops/kernels/ads_op/op_kernel/CMakeLists.txt b/ads/common/ops/kernels/ads_op/op_kernel/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0d31a444cd71b6e455dc206b9b89159dea9f4ce2 --- /dev/null +++ b/ads/common/ops/kernels/ads_op/op_kernel/CMakeLists.txt @@ -0,0 +1,61 @@ +# set custom compile options +if ("${CMAKE_BUILD_TYPE}x" STREQUAL "Debugx") + add_ops_compile_options(ALL OPTIONS -g -O0) +endif() + +foreach(compute_unit ${ASCEND_COMPUTE_UNIT}) + + # generate aic-${compute_unit}-ops-info.json + add_ops_info_target(TARGET ops_info_gen_${compute_unit} + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core/${compute_unit}/aic-${compute_unit}-ops-info.json + OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit} + ) + + # generate ascendc impl py once + if (NOT TARGET ascendc_impl_gen) + add_ops_impl_target(TARGET ascendc_impl_gen + OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR} + OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe + INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl + ) + endif() + + # dynamic shape binary compile + if (${ENABLE_BINARY_PACKAGE}) + add_bin_compile_target(TARGET ascendc_bin_${compute_unit} + OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR} + ADP_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/dynamic + OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/binary/${compute_unit} + INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel + COMPUTE_UNIT ${compute_unit} + ) + add_dependencies(ascendc_bin_${compute_unit} ascendc_impl_gen) + endif() + +endforeach() + +# generate npu_supported_ops.json +add_npu_support_target(TARGET npu_supported_ops + OPS_INFO_DIR ${ASCEND_AUTOGEN_PATH} + OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core + INSTALL_DIR packages/vendors/${vendor_name}/framework/${ASCEND_FRAMEWORK_TYPE} +) + +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() + +# install kernel file +if (${ENABLE_SOURCE_PACKAGE}) + file(GLOB KERNEL_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/*.h + ${CMAKE_CURRENT_SOURCE_DIR}/*.py + ) + install(FILES ${KERNEL_FILES} + DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl/dynamic + ) +endif() diff --git a/ads/common/ops/kernels/op_kernel/add_custom.cpp b/ads/common/ops/kernels/ads_op/op_kernel/add_custom.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/add_custom.cpp rename to ads/common/ops/kernels/ads_op/op_kernel/add_custom.cpp diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter.cpp b/ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter.cpp rename to ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter.cpp diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_base.h b/ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_base.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_base.h rename to ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_base.h diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_max.h b/ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_max.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_max.h rename to ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_max.h diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_sum.h b/ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_sum.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_sum.h rename to ads/common/ops/kernels/ads_op/op_kernel/dynamic_scatter_sum.h diff --git a/ads/common/ops/kernels/op_kernel/dynamic_voxelization.cpp b/ads/common/ops/kernels/ads_op/op_kernel/dynamic_voxelization.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/dynamic_voxelization.cpp rename to ads/common/ops/kernels/ads_op/op_kernel/dynamic_voxelization.cpp diff --git a/ads/common/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp b/ads/common/ops/kernels/ads_op/op_kernel/furthest_point_sampling_with_dist.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp rename to ads/common/ops/kernels/ads_op/op_kernel/furthest_point_sampling_with_dist.cpp diff --git a/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attention_grad.cpp b/ads/common/ops/kernels/ads_op/op_kernel/multi_scale_deformable_attention_grad.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/multi_scale_deformable_attention_grad.cpp rename to ads/common/ops/kernels/ads_op/op_kernel/multi_scale_deformable_attention_grad.cpp diff --git a/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_function_v2.cpp b/ads/common/ops/kernels/ads_op/op_kernel/multi_scale_deformable_attn_function_v2.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_function_v2.cpp rename to ads/common/ops/kernels/ads_op/op_kernel/multi_scale_deformable_attn_function_v2.cpp diff --git a/scripts/install_kernel.sh b/ads/common/ops/kernels/ads_op/scripts/install.sh similarity index 100% rename from scripts/install_kernel.sh rename to ads/common/ops/kernels/ads_op/scripts/install.sh diff --git a/scripts/upgrade_kernel.sh b/ads/common/ops/kernels/ads_op/scripts/upgrade.sh similarity index 100% rename from scripts/upgrade_kernel.sh rename to ads/common/ops/kernels/ads_op/scripts/upgrade.sh diff --git a/ads/common/ops/kernels/op_host/CMakeLists.txt b/ads/common/ops/kernels/op_host/CMakeLists.txt deleted file mode 100644 index c44b2b0174f28f0144a7c03fc6c40cc5b389c14e..0000000000000000000000000000000000000000 --- a/ads/common/ops/kernels/op_host/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -file(GLOB HOST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) -set(ASCEND_HOST_SRC - ${ASCEND_HOST_SRC} ${HOST_SRC} - CACHE INTERNAL "") -# add the exclude files for aclnn -set(aclop_exclude - ${aclop_exclude} "" - CACHE INTERNAL "") -file(GLOB ACLNN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.cpp) -file(GLOB ACLNN_INC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.h) -set(ACLNN_SRC_CUSTOM - ${ACLNN_SRC_CUSTOM} ${ACLNN_SRC} - CACHE INTERNAL "") -set(ACLNN_INC_CUSTOM - ${ACLNN_INC_CUSTOM} ${ACLNN_INC} - CACHE INTERNAL "") diff --git a/ads/common/ops/kernels/op_kernel/CMakeLists.txt b/ads/common/ops/kernels/op_kernel/CMakeLists.txt deleted file mode 100644 index 0cf5021494806dab37271b1c052809e09760f1b8..0000000000000000000000000000000000000000 --- a/ads/common/ops/kernels/op_kernel/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -file(GLOB KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) -set(ASCEND_KERNEL_SRC - ${ASCEND_KERNEL_SRC} ${KERNEL_SRC} - CACHE INTERNAL "") diff --git a/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_function.cpp b/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_function.cpp deleted file mode 100644 index c59529fa7c78e206cb7b07cfdb8cc2a7cb200af0..0000000000000000000000000000000000000000 --- a/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_function.cpp +++ /dev/null @@ -1,359 +0,0 @@ - -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved. - * - * This sample is a very basic sample that implements vector add on Ascend plaform. - */ -#include "kernel_operator.h" -using namespace AscendC; -constexpr int32_t BUFFER_NUM = 2; - -class KernelMultiScaleDeformableAttnFunctionV2 -{ -public: - __aicore__ inline KernelMultiScaleDeformableAttnFunctionV2() {} - __aicore__ inline void Init(GM_ADDR value, - GM_ADDR value_spatial_shapes, - GM_ADDR value_level_start_index, - GM_ADDR sampling_locations, - GM_ADDR attention_weights, - GM_ADDR output, MultiScaleDeformableAttnFunctionV2TilingData *tiling_data) - { - ASSERT(GetBlockNum() != 0 && "block dim can not be zero!"); - dataAlign = blockNum / sizeof(DTYPE_VALUE); - batchSize = tiling_data->batchSize; - numKeys = tiling_data->numKeys; - numHeads = tiling_data->numHeads; - embedDims = tiling_data->embedDims; - - numLevels = tiling_data->numLevels; - numQueries = tiling_data->numQueries; - numPoints = tiling_data->numPoints; - coreNum = tiling_data->coreNum; - - taskNum = batchSize * numQueries; - taskNumPerCore = DivCeil(taskNum, coreNum); - - embedDimsAlign = AlignUp(embedDims, dataAlign); - numPointsAlign = AlignUp(numPoints, dataAlign); - numLevelsAlign = AlignUp(numLevels, dataAlign); - - curBlockIdx = GetBlockIdx(); - startOffset = curBlockIdx * taskNumPerCore; - endOffset = (curBlockIdx + 1) * taskNumPerCore; - if (endOffset > taskNum) - { - endOffset = taskNum; - } - - valueGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE *>(value), batchSize * numKeys * numHeads * embedDims); - locationGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE *>(sampling_locations), batchSize * numQueries * numHeads * numLevels * numPoints * 2); - attentionWeightsGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE *>(attention_weights), batchSize * numQueries * numHeads * numLevels * numPoints); - outputGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE *>(output), batchSize * numQueries * numHeads * embedDims); - - valueSpatialShapesGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE_SPATIAL_SHAPES *>(value_spatial_shapes), numLevels * 2); - valueLevelStartIndexGm.SetGlobalBuffer(reinterpret_cast<__gm__ DTYPE_VALUE_SPATIAL_SHAPES *>(value_level_start_index), numLevels); - - pipe.InitBuffer(shapeQueue, BUFFER_NUM, AlignUp(numLevels * 2, dataAlign) * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(offsetQueue, BUFFER_NUM, numLevelsAlign * sizeof(DTYPE_VALUE)); - - pipe.InitBuffer(locationQueue, BUFFER_NUM, AlignUp(numLevels * numPoints * 2, dataAlign) * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(attentionWeightsUb, BUFFER_NUM, AlignUp(numLevels * numPoints, dataAlign) * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(outputQueue, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); - - pipe.InitBuffer(tmpUb1, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(tmpUb2, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(tmpUb3, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(tmpUb4, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - - pipe.InitBuffer(tmpResUb, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(tmpResUb2, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); - - pipe.InitBuffer(intOneUb, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE_SPATIAL_SHAPES)); - pipe.InitBuffer(floatOneUb, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - - pipe.InitBuffer(tmpXUb, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(tmpYUb, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(tmpParam0Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(tmpParam1Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - - pipe.InitBuffer(tmpIntX0Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE_SPATIAL_SHAPES)); - pipe.InitBuffer(tmpIntY0Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE_SPATIAL_SHAPES)); - pipe.InitBuffer(tmpIntX1Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE_SPATIAL_SHAPES)); - pipe.InitBuffer(tmpIntY1Ub, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE_SPATIAL_SHAPES)); - - pipe.InitBuffer(leftTopWieightQueue, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(leftBottomWieightQueue, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(rightTopWieightQueue, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(rightBottomWieightQueue, BUFFER_NUM, numPointsAlign * sizeof(DTYPE_VALUE)); - - pipe.InitBuffer(leftTopValueUb, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(leftBottomValueUb, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(rightTopValueUb, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); - pipe.InitBuffer(rightBottomValueUb, BUFFER_NUM, embedDimsAlign * sizeof(DTYPE_VALUE)); - } - - __aicore__ inline void Process() - { - for (uint32_t taskIdx = startOffset; taskIdx < endOffset; taskIdx++) - { - batch = taskIdx / numQueries; - query = taskIdx % numQueries; - pipe_barrier(PIPE_ALL); - Compute(batch, query); - } - } - -private: - __aicore__ inline bool isInRange(DTYPE_VALUE_SPATIAL_SHAPES x, DTYPE_VALUE_SPATIAL_SHAPES upper) - { - return 0 <= x && x < upper; - } - - __aicore__ inline void Compute(uint32_t batch, uint32_t query) - { - LocalTensor tmpResLocal = tmpResUb.Get(); - LocalTensor tmpResLocal2 = tmpResUb2.Get(); - - LocalTensor leftTopValueLocal = leftTopValueUb.Get(); - LocalTensor leftBottomValueUbLocal = leftBottomValueUb.Get(); - LocalTensor rightTopValueUbLocal = rightTopValueUb.Get(); - LocalTensor rightBottomValueUbLocal = rightBottomValueUb.Get(); - - LocalTensor leftTopWeiightLocal = leftTopWieightQueue.Get(); - LocalTensor leftBottomWeightLocal = leftBottomWieightQueue.Get(); - LocalTensor rightTopWeiightLocal = rightTopWieightQueue.Get(); - LocalTensor rightBottomWeightLocal = rightBottomWieightQueue.Get(); - - LocalTensor shapesLocal = shapeQueue.AllocTensor(); - LocalTensor offsetLocal = offsetQueue.AllocTensor(); - - LocalTensor locationLocal = locationQueue.AllocTensor(); - LocalTensor attentionWeightLocal = attentionWeightsUb.AllocTensor(); - - LocalTensor resLocal = outputQueue.AllocTensor(); - - LocalTensor xLocal = tmpXUb.Get(); - LocalTensor yLocal = tmpYUb.Get(); - - LocalTensor param0Local = tmpParam0Ub.Get(); - LocalTensor param1Local = tmpParam1Ub.Get(); - - LocalTensor x1Local = tmpIntX1Ub.Get(); - LocalTensor y1Local = tmpIntY1Ub.Get(); - - LocalTensor x0Local = tmpIntX0Ub.Get(); - LocalTensor y0Local = tmpIntY0Ub.Get(); - - LocalTensor tmpLocal1 = tmpUb1.Get(); - LocalTensor tmpLocal2 = tmpUb2.Get(); - LocalTensor tmpLocal3 = tmpUb3.Get(); - LocalTensor tmpLocal4 = tmpUb4.Get(); - - LocalTensor intOneLocal = intOneUb.Get(); - LocalTensor floatOneLocal = floatOneUb.Get(); - - Duplicate(intOneLocal, (DTYPE_VALUE_SPATIAL_SHAPES)1, numPointsAlign); - Duplicate(floatOneLocal, (DTYPE_VALUE)1, numPointsAlign); - DataCopyParams copyParams{1, (uint16_t)(embedDims * sizeof(DTYPE_VALUE)), 0, 0}; - - DataCopy(shapesLocal, valueSpatialShapesGm, AlignUp(numLevels * 2, dataAlign)); - DataCopy(offsetLocal, valueLevelStartIndexGm, numLevelsAlign); - Duplicate(resLocal, DTYPE_VALUE(0), embedDimsAlign); - moveOffset = batch * numQueries * numHeads * embedDims + query * numHeads * embedDims; - pipe_barrier(PIPE_ALL); - - for (uint32_t head = 0; head < numHeads; head++) - { - DataCopyPad(outputGm[moveOffset + head * embedDims], resLocal, copyParams); - } - pipe_barrier(PIPE_ALL); - - for (uint32_t head = 0; head < numHeads; head++) - { - weightOffset = (batch * numQueries * numHeads * numLevels + query * numHeads * numLevels + head * numLevels) * numPoints; - - pipe_barrier(PIPE_ALL); - - DataCopy(locationLocal, locationGm[weightOffset * 2], AlignUp(numLevels * numPoints * 2, dataAlign)); - DataCopy(attentionWeightLocal, attentionWeightsGm[weightOffset], AlignUp(numLevels * numPoints, dataAlign)); - - pipe_barrier(PIPE_ALL); - for (uint32_t level = 0; level < numLevels; level++) - { - h = shapesLocal.GetValue(level * 2); - w = shapesLocal.GetValue(level * 2 + 1); - for (uint32_t point = 0; point < numPoints; point++) - { - locationOffset = (level * numPoints + point) * 2; - xLocal.SetValue(point, locationLocal.GetValue(locationOffset)); - yLocal.SetValue(point, locationLocal.GetValue(locationOffset + 1)); - } - - pipe_barrier(PIPE_ALL); - - Muls(tmpLocal1, xLocal, (DTYPE_VALUE)w, numPointsAlign); - Muls(tmpLocal2, yLocal, (DTYPE_VALUE)h, numPointsAlign); - pipe_barrier(PIPE_ALL); - - Adds(param0Local, tmpLocal1, (DTYPE_VALUE)0.5, numPointsAlign); - Adds(param1Local, tmpLocal2, (DTYPE_VALUE)0.5, numPointsAlign); - pipe_barrier(PIPE_ALL); - - Cast(x1Local, param0Local, RoundMode::CAST_FLOOR, numPointsAlign); - Cast(y1Local, param1Local, RoundMode::CAST_FLOOR, numPointsAlign); - pipe_barrier(PIPE_ALL); - - Adds(tmpLocal3, param0Local, (DTYPE_VALUE)-1, numPointsAlign); - Adds(tmpLocal4, param1Local, (DTYPE_VALUE)-1, numPointsAlign); - pipe_barrier(PIPE_ALL); - - Sub(x0Local, x1Local, intOneLocal, numPointsAlign); - Sub(y0Local, y1Local, intOneLocal, numPointsAlign); - pipe_barrier(PIPE_ALL); - - Cast(xLocal, x0Local, RoundMode::CAST_NONE, numPointsAlign); - Cast(yLocal, y0Local, RoundMode::CAST_NONE, numPointsAlign); - pipe_barrier(PIPE_ALL); - - Sub(tmpLocal1, tmpLocal3, xLocal, numPointsAlign); - Sub(tmpLocal2, tmpLocal4, yLocal, numPointsAlign); - pipe_barrier(PIPE_ALL); - - Abs(param0Local, tmpLocal1, numPointsAlign); - Abs(param1Local, tmpLocal2, numPointsAlign); - pipe_barrier(PIPE_ALL); - - Sub(xLocal, floatOneLocal, param0Local, numPointsAlign); - Sub(yLocal, floatOneLocal, param1Local, numPointsAlign); - pipe_barrier(PIPE_ALL); - - Mul(leftTopWeiightLocal, xLocal, yLocal, numPointsAlign); - Mul(leftBottomWeightLocal, xLocal, param1Local, numPointsAlign); - Mul(rightTopWeiightLocal, param0Local, yLocal, numPointsAlign); - Mul(rightBottomWeightLocal, param0Local, param1Local, numPointsAlign); - pipe_barrier(PIPE_ALL); - - Duplicate(resLocal, DTYPE_VALUE(0), embedDimsAlign); - - for (uint32_t point = 0; point < numPoints; point++) - { - Duplicate(leftTopValueLocal, DTYPE_VALUE(0), embedDimsAlign); - Duplicate(leftBottomValueUbLocal, DTYPE_VALUE(0), embedDimsAlign); - Duplicate(rightTopValueUbLocal, DTYPE_VALUE(0), embedDimsAlign); - Duplicate(rightBottomValueUbLocal, DTYPE_VALUE(0), embedDimsAlign); - - x0 = x0Local.GetValue(point); - y0 = y0Local.GetValue(point); - x1 = x1Local.GetValue(point); - y1 = y1Local.GetValue(point); - - valueOffset = batch * numKeys * numHeads + offsetLocal.GetValue(level) * numHeads + head; - pipe_barrier(PIPE_ALL); - - if (isInRange(x0, w)) - { - if (isInRange(y0, h)) - { - DataCopy(leftTopValueLocal, valueGm[(valueOffset + (y0 * w + x0) * numHeads) * embedDims], embedDimsAlign); - } - if (isInRange(y1, h)) - { - DataCopy(leftBottomValueUbLocal, valueGm[(valueOffset + (y1 * w + x0) * numHeads) * embedDims], embedDimsAlign); - } - } - if (isInRange(x1, w)) - { - if (isInRange(y0, h)) - { - DataCopy(rightTopValueUbLocal, valueGm[(valueOffset + (y0 * w + x1) * numHeads) * embedDims], embedDimsAlign); - } - if (isInRange(y1, h)) - { - DataCopy(rightBottomValueUbLocal, valueGm[(valueOffset + (y1 * w + x1) * numHeads) * embedDims], embedDimsAlign); - } - } - pipe_barrier(PIPE_ALL); - - Muls(leftTopValueLocal, leftTopValueLocal, leftTopWeiightLocal.GetValue(point), embedDimsAlign); - Muls(rightTopValueUbLocal, rightTopValueUbLocal, rightTopWeiightLocal.GetValue(point), embedDimsAlign); - Muls(leftBottomValueUbLocal, leftBottomValueUbLocal, leftBottomWeightLocal.GetValue(point), embedDimsAlign); - Muls(rightBottomValueUbLocal, rightBottomValueUbLocal, rightBottomWeightLocal.GetValue(point), embedDimsAlign); - pipe_barrier(PIPE_ALL); - Add(tmpResLocal, leftTopValueLocal, rightTopValueUbLocal, embedDimsAlign); - Add(tmpResLocal2, leftBottomValueUbLocal, rightBottomValueUbLocal, embedDimsAlign); - pipe_barrier(PIPE_ALL); - Add(tmpResLocal, tmpResLocal, tmpResLocal2, embedDimsAlign); - pipe_barrier(PIPE_ALL); - Muls(tmpResLocal, tmpResLocal, attentionWeightLocal.GetValue(level * numPoints + point), embedDimsAlign); - pipe_barrier(PIPE_ALL); - Add(resLocal, resLocal, tmpResLocal, embedDimsAlign); - } - pipe_barrier(PIPE_ALL); - - SetAtomicAdd(); - DataCopyPad(outputGm[moveOffset + head * embedDims], resLocal, copyParams); - SetAtomicNone(); - } - } - locationQueue.FreeTensor(locationLocal); - attentionWeightsUb.FreeTensor(attentionWeightLocal); - outputQueue.FreeTensor(resLocal); - shapeQueue.FreeTensor(shapesLocal); - offsetQueue.FreeTensor(offsetLocal); - } - -private: - TPipe pipe; - GlobalTensor valueGm, locationGm, attentionWeightsGm, outputGm; - GlobalTensor valueSpatialShapesGm, valueLevelStartIndexGm; - - TQue locationQueue, attentionWeightsUb, shapeQueue, offsetQueue; - TQue outputQueue; - - TBuf tmpResUb, tmpResUb2, tmpXUb, tmpYUb, tmpParam0Ub, tmpParam1Ub, tmpIntX0Ub, tmpIntY0Ub, tmpIntX1Ub, tmpIntY1Ub, tmpUb1, tmpUb2, tmpUb3, tmpUb4; - TBuf intOneUb, floatOneUb, leftTopValueUb, leftBottomValueUb, rightTopValueUb, rightBottomValueUb; - TBuf leftTopWieightQueue, leftBottomWieightQueue, rightTopWieightQueue, rightBottomWieightQueue; - - uint32_t batchSize; - uint32_t numKeys; - uint32_t numHeads; - uint32_t embedDims; - - uint32_t numLevels; - uint32_t numQueries; - uint32_t numPoints; - uint32_t coreNum; - - uint32_t embedDimsAlign; - uint32_t numPointsAlign; - uint32_t numLevelsAlign; - - uint32_t batch; - uint32_t query; - uint32_t head; - - uint32_t taskNum; - uint32_t taskNumPerCore; - uint32_t curBlockIdx; - uint32_t startOffset; - uint32_t endOffset; - uint32_t dataAlign; - uint32_t blockNum = 32; - - DTYPE_VALUE_SPATIAL_SHAPES h, w, x0, y0, x1, y1, valueOffset, weightOffset, locationOffset, moveOffset; -}; - -extern "C" __global__ __aicore__ void multi_scale_deformable_attn_function_v2(GM_ADDR value, - GM_ADDR value_spatial_shapes, - GM_ADDR value_level_start_index, - GM_ADDR sampling_locations, - GM_ADDR attention_weights, - GM_ADDR output, GM_ADDR workspace, GM_ADDR tiling) -{ - GET_TILING_DATA(tiling_data, tiling); - KernelMultiScaleDeformableAttnFunctionV2 op; - op.Init(value, value_spatial_shapes, value_level_start_index, - sampling_locations, attention_weights, output, &tiling_data); - op.Process(); -} diff --git a/ads/common/ops/onnx/plugin/CMakeLists.txt b/ads/common/ops/onnx/plugin/CMakeLists.txt deleted file mode 100644 index cc6034bd1fe09a766aef52f69cf0bb348ceaf2b5..0000000000000000000000000000000000000000 --- a/ads/common/ops/onnx/plugin/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -file(GLOB ONNX_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/*.h) -set(ASCEND_ONNX_SRC - ${ASCEND_ONNX_SRC} ${ONNX_SRC} - CACHE INTERNAL "") diff --git a/ads/motion/CMakeLists.txt b/ads/motion/CMakeLists.txt deleted file mode 100644 index 621d1fa961ab5258fcb066433827098ea7c4029f..0000000000000000000000000000000000000000 --- a/ads/motion/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ops/kernels) - add_subdirectory(ops/kernels) -endif() diff --git a/ads/motion/ops/kernels/op_kernel/CMakeLists.txt b/ads/motion/ops/CMakeLists.txt similarity index 100% rename from ads/motion/ops/kernels/op_kernel/CMakeLists.txt rename to ads/motion/ops/CMakeLists.txt diff --git a/ads/motion/ops/csrc/pybind.cpp b/ads/motion/ops/csrc/pybind.cpp deleted file mode 100644 index 7b362419232490a160ad94fba8eb8b1bb8336c22..0000000000000000000000000000000000000000 --- a/ads/motion/ops/csrc/pybind.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include -#include "csrc/pybind.h" - -void init_motion(pybind11::module& m) { -} diff --git a/ads/motion/ops/kernels/CMakeLists.txt b/ads/motion/ops/kernels/CMakeLists.txt deleted file mode 100644 index 179d9da23345abf75fb87954f266055922527742..0000000000000000000000000000000000000000 --- a/ads/motion/ops/kernels/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/framework) - add_subdirectory(framework) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) - add_subdirectory(op_host) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) - add_subdirectory(op_kernel) -endif() -if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) - add_subdirectory(testcases) -endif() diff --git a/ads/motion/ops/kernels/op_host/CMakeLists.txt b/ads/motion/ops/kernels/op_host/CMakeLists.txt deleted file mode 100644 index c44b2b0174f28f0144a7c03fc6c40cc5b389c14e..0000000000000000000000000000000000000000 --- a/ads/motion/ops/kernels/op_host/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -file(GLOB HOST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) -set(ASCEND_HOST_SRC - ${ASCEND_HOST_SRC} ${HOST_SRC} - CACHE INTERNAL "") -# add the exclude files for aclnn -set(aclop_exclude - ${aclop_exclude} "" - CACHE INTERNAL "") -file(GLOB ACLNN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.cpp) -file(GLOB ACLNN_INC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.h) -set(ACLNN_SRC_CUSTOM - ${ACLNN_SRC_CUSTOM} ${ACLNN_SRC} - CACHE INTERNAL "") -set(ACLNN_INC_CUSTOM - ${ACLNN_INC_CUSTOM} ${ACLNN_INC} - CACHE INTERNAL "") diff --git a/ads/common/ops/onnx/__init__.py b/ads/motion/ops/pybind.cpp similarity index 100% rename from ads/common/ops/onnx/__init__.py rename to ads/motion/ops/pybind.cpp diff --git a/ads/perception/CMakeLists.txt b/ads/perception/CMakeLists.txt deleted file mode 100644 index c8777acb97a385c80e38554342341086c9cc3bdd..0000000000000000000000000000000000000000 --- a/ads/perception/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/fused/ops/kernels) - add_subdirectory(fused/ops/kernels) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/point/ops/kernels) - add_subdirectory(point/ops/kernels) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/vision/ops/kernels) - add_subdirectory(vision/ops/kernels) -endif() diff --git a/ads/perception/fused/ops/CMakeLists.txt b/ads/perception/fused/ops/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ads/perception/fused/ops/csrc/pybind.cpp b/ads/perception/fused/ops/csrc/pybind.cpp deleted file mode 100644 index f37707a3d103f9e06d1391660b8ed59650ac9c9b..0000000000000000000000000000000000000000 --- a/ads/perception/fused/ops/csrc/pybind.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include -#include "csrc/pybind.h" - -void init_perception_fused(pybind11::module& m) { -} diff --git a/ads/perception/fused/ops/kernels/CMakeLists.txt b/ads/perception/fused/ops/kernels/CMakeLists.txt deleted file mode 100644 index b77ac594c4df44bf8700a3b2fa1867984111f27a..0000000000000000000000000000000000000000 --- a/ads/perception/fused/ops/kernels/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) - add_subdirectory(op_host) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) - add_subdirectory(op_kernel) -endif() -if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) - add_subdirectory(testcases) -endif() diff --git a/ads/perception/fused/ops/kernels/op_host/CMakeLists.txt b/ads/perception/fused/ops/kernels/op_host/CMakeLists.txt deleted file mode 100644 index 75a458e050d95c942754bc7a65a55ef44c004832..0000000000000000000000000000000000000000 --- a/ads/perception/fused/ops/kernels/op_host/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -file(GLOB HOST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) -set( - ${ASCEND_HOST_SRC} ${HOST_SRC} - CACHE INTERNAL "") - -# add the exclude files for aclnn -set(aclop_exclude - ${aclop_exclude} "" - CACHE INTERNAL "") -file(GLOB ACLNN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.cpp) -file(GLOB ACLNN_INC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.h) -set(ACLNN_SRC_CUSTOM - ${ACLNN_SRC_CUSTOM} ${ACLNN_SRC} - CACHE INTERNAL "") -set(ACLNN_INC_CUSTOM - ${ACLNN_INC_CUSTOM} ${ACLNN_INC} - CACHE INTERNAL "") diff --git a/ads/perception/fused/ops/pybind.cpp b/ads/perception/fused/ops/pybind.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ads/perception/point/ops/CMakeLists.txt b/ads/perception/point/ops/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ads/perception/point/ops/csrc/pybind.cpp b/ads/perception/point/ops/csrc/pybind.cpp deleted file mode 100644 index 35d19d8e7077103690c4f8843174fab581f006f2..0000000000000000000000000000000000000000 --- a/ads/perception/point/ops/csrc/pybind.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include -#include "csrc/pybind.h" - -void init_perception_point(pybind11::module& m) { -} - diff --git a/ads/perception/point/ops/kernels/CMakeLists.txt b/ads/perception/point/ops/kernels/CMakeLists.txt deleted file mode 100644 index 179d9da23345abf75fb87954f266055922527742..0000000000000000000000000000000000000000 --- a/ads/perception/point/ops/kernels/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/framework) - add_subdirectory(framework) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) - add_subdirectory(op_host) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) - add_subdirectory(op_kernel) -endif() -if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) - add_subdirectory(testcases) -endif() diff --git a/ads/perception/point/ops/kernels/op_host/CMakeLists.txt b/ads/perception/point/ops/kernels/op_host/CMakeLists.txt deleted file mode 100644 index 75a458e050d95c942754bc7a65a55ef44c004832..0000000000000000000000000000000000000000 --- a/ads/perception/point/ops/kernels/op_host/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -file(GLOB HOST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) -set( - ${ASCEND_HOST_SRC} ${HOST_SRC} - CACHE INTERNAL "") - -# add the exclude files for aclnn -set(aclop_exclude - ${aclop_exclude} "" - CACHE INTERNAL "") -file(GLOB ACLNN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.cpp) -file(GLOB ACLNN_INC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.h) -set(ACLNN_SRC_CUSTOM - ${ACLNN_SRC_CUSTOM} ${ACLNN_SRC} - CACHE INTERNAL "") -set(ACLNN_INC_CUSTOM - ${ACLNN_INC_CUSTOM} ${ACLNN_INC} - CACHE INTERNAL "") diff --git a/ads/perception/point/ops/pybind.cpp b/ads/perception/point/ops/pybind.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ads/perception/vision/ops/CMakeLists.txt b/ads/perception/vision/ops/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ads/perception/vision/ops/csrc/pybind.cpp b/ads/perception/vision/ops/csrc/pybind.cpp deleted file mode 100644 index 59057f96959cd3baf2e9d7e92196664f9e895d43..0000000000000000000000000000000000000000 --- a/ads/perception/vision/ops/csrc/pybind.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include -#include "csrc/pybind.h" - -void init_perception_vision(pybind11::module& m) { -} diff --git a/ads/perception/vision/ops/kernels/CMakeLists.txt b/ads/perception/vision/ops/kernels/CMakeLists.txt deleted file mode 100644 index 179d9da23345abf75fb87954f266055922527742..0000000000000000000000000000000000000000 --- a/ads/perception/vision/ops/kernels/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/framework) - add_subdirectory(framework) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) - add_subdirectory(op_host) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) - add_subdirectory(op_kernel) -endif() -if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) - add_subdirectory(testcases) -endif() diff --git a/ads/perception/vision/ops/kernels/op_host/CMakeLists.txt b/ads/perception/vision/ops/kernels/op_host/CMakeLists.txt deleted file mode 100644 index 75a458e050d95c942754bc7a65a55ef44c004832..0000000000000000000000000000000000000000 --- a/ads/perception/vision/ops/kernels/op_host/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -file(GLOB HOST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h) -set( - ${ASCEND_HOST_SRC} ${HOST_SRC} - CACHE INTERNAL "") - -# add the exclude files for aclnn -set(aclop_exclude - ${aclop_exclude} "" - CACHE INTERNAL "") -file(GLOB ACLNN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.cpp) -file(GLOB ACLNN_INC ${CMAKE_CURRENT_SOURCE_DIR}/aclnn*.h) -set(ACLNN_SRC_CUSTOM - ${ACLNN_SRC_CUSTOM} ${ACLNN_SRC} - CACHE INTERNAL "") -set(ACLNN_INC_CUSTOM - ${ACLNN_INC_CUSTOM} ${ACLNN_INC} - CACHE INTERNAL "") diff --git a/ads/perception/vision/ops/pybind.cpp b/ads/perception/vision/ops/pybind.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bind/pybind.cpp b/bind/pybind.cpp index d0d5eba42e2617241c8092889ace8491d3175d01..351c837ee0e6e8ecd6edb5c2164558389e2f8dee 100644 --- a/bind/pybind.cpp +++ b/bind/pybind.cpp @@ -1,5 +1,5 @@ #include -#include "csrc/pybind.h" +#include "../ads/common/ops/csrc/functions.h" PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { init_common(m); diff --git a/ci/build.sh b/ci/build.sh index 27cc6447103ceeaa31d0c5bde1b6aa5c18249ee6..4d8a2a3a1bc580a0e0d177b28756d19893b47041 100644 --- a/ci/build.sh +++ b/ci/build.sh @@ -1,7 +1,5 @@ # Copyright 2023 Huawei Technologies Co., Ltd CUR_DIR=$(dirname $(readlink -f $0)) -SCRIPTS_DIR=${CUR_DIR}/../scripts -BUILD_PACKAGES_DIR=${CUR_DIR}/../build_out/packages SUPPORTED_PY_VERSION=(3.7 3.8 3.9 3.10) PY_VERSION='3.7' DEFAULT_SCRIPT_ARGS_NUM=1 @@ -85,16 +83,35 @@ function main() else echo "ASCEND_OPP_PATH = $ASCEND_OPP_PATH" fi - chmod -R 777 ${SCRIPTS_DIR} - bash ${SCRIPTS_DIR}/build_kernel.sh - + chmod -R 777 ${CUR_DIR}/.. + cd ${CUR_DIR}/../ads/common/ops/kernels/ads_op + bash build.sh + cd ${CUR_DIR}/../ads/common/ops/kernels/ads_op/build_out + cp custom_opp_*.run ${CUR_DIR}/../ads/common/ops/kernels + cd .. + rm -rf build_out + cd .. + + + ./custom_opp_*.run --extract=./ads_op_kernel + rm -rf custom_opp_*.run + if [ -d "ads_op_kernel/" ]; then + echo "kernel compile success" + else + echo "kernel did not compile success" + exit 1 + fi + cd ${CUR_DIR}/../ads/common/ops/kernels/ads_op_kernel + rm -rf install.sh + rm -rf upgrade.sh + cd ${CUR_DIR}/.. rm -rf build - if [ -d "ads_accelerator.egg-info" ]; then - echo "ads_accelerator.egg-info exist" - rm -rf ads_accelerator.egg-info + if [ -d "ads.egg-info" ]; then + echo "ads.egg-info exist" + rm -rf ads.egg-info else - echo "ads_accelerator.egg-info not exist" + echo "ads.egg-info not exist" fi if ! parse_script_args "$@"; then @@ -104,6 +121,7 @@ function main() check_python_version + cd ${CUR_DIR}/.. python"${PY_VERSION}" setup.py build bdist_wheel if [ $? != 0 ]; then echo "Failed to compile the wheel file. Please check the source code by yourself." diff --git a/cmake/func.cmake b/cmake/func.cmake deleted file mode 100644 index 3bd391552ba1f4d0d71e3b18819e247bab293d9a..0000000000000000000000000000000000000000 --- a/cmake/func.cmake +++ /dev/null @@ -1,233 +0,0 @@ -function(install_target) - cmake_parse_arguments(INSTALL_TARGET "" "DST;TRG" "" ${ARGN}) - set_target_properties( - ${INSTALL_TARGET_TRG} - PROPERTIES LIBRARY_OUTPUT_DIRECTORY - ${ADS_PATH}/${INSTALL_TARGET_DST}) - install(TARGETS ${INSTALL_TARGET_TRG} - LIBRARY DESTINATION ${INSTALL_TARGET_DST}) -endfunction() - -function(install_file) - cmake_parse_arguments(INSTALL_TARGET "" "DST;SRC;TRG" "" ${ARGN}) - file(MAKE_DIRECTORY ${ADS_PATH}/${INSTALL_TARGET_DST}) - foreach(SOURCE_FILE ${INSTALL_TARGET_SRC}) - add_custom_command( - TARGET ${INSTALL_TARGET_TRG} - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${SOURCE_FILE} - ${ADS_PATH}/${INSTALL_TARGET_DST}) - endforeach() - install(FILES ${INSTALL_TARGET_SRC} DESTINATION ${INSTALL_TARGET_DST}) -endfunction() - -function(get_system_info SYSTEM_INFO) - if(UNIX) - execute_process(COMMAND grep -i ^id= /etc/os-release OUTPUT_VARIABLE TEMP) - string(REGEX REPLACE "\n|id=|ID=|\"" "" SYSTEM_NAME ${TEMP}) - set(${SYSTEM_INFO} - ${SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR} - PARENT_SCOPE) - elseif(WIN32) - message(STATUS "System is Windows. Only for pre-build.") - else() - message(FATAL_ERROR "${CMAKE_SYSTEM_NAME} not support.") - endif() -endfunction() - -function(opbuild) - message(STATUS "Opbuild generating sources") - cmake_parse_arguments(OPBUILD "" "OUT_DIR;PROJECT_NAME;ACCESS_PREFIX" - "OPS_SRC" ${ARGN}) - execute_process( - COMMAND - ${CMAKE_COMPILE} -g -fPIC -shared -std=c++11 ${OPBUILD_OPS_SRC} - -D_GLIBCXX_USE_CXX11_ABI=0 -I ${ASCEND_CANN_PACKAGE_PATH}/include -L - ${ASCEND_CANN_PACKAGE_PATH}/lib64 -lexe_graph -lregister -ltiling_api -o - ${OPBUILD_OUT_DIR}/libascend_all_ops.so - RESULT_VARIABLE EXEC_RESULT - OUTPUT_VARIABLE EXEC_INFO - ERROR_VARIABLE EXEC_ERROR) - if(${EXEC_RESULT}) - message("build ops lib info: ${EXEC_INFO}") - message("build ops lib error: ${EXEC_ERROR}") - message(FATAL_ERROR "opbuild run failed!") - endif() - set(proj_env "") - set(prefix_env "") - if(NOT "${OPBUILD_PROJECT_NAME}x" STREQUAL "x") - set(proj_env "OPS_PROJECT_NAME=${OPBUILD_PROJECT_NAME}") - endif() - if(NOT "${OPBUILD_ACCESS_PREFIX}x" STREQUAL "x") - set(prefix_env "OPS_DIRECT_ACCESS_PREFIX=${OPBUILD_ACCESS_PREFIX}") - endif() - execute_process( - COMMAND - ${proj_env} ${prefix_env} - ${ASCEND_CANN_PACKAGE_PATH}/toolkit/tools/opbuild/op_build - ${OPBUILD_OUT_DIR}/libascend_all_ops.so ${OPBUILD_OUT_DIR} - RESULT_VARIABLE EXEC_RESULT - OUTPUT_VARIABLE EXEC_INFO - ERROR_VARIABLE EXEC_ERROR) - if(${EXEC_RESULT}) - message("opbuild ops info: ${EXEC_INFO}") - message("opbuild ops error: ${EXEC_ERROR}") - endif() - message(STATUS "Opbuild generating sources - done") -endfunction() - -function(add_ops_info_target) - cmake_parse_arguments(OPINFO "" "TARGET;OPS_INFO;OUTPUT;INSTALL_DIR" "" - ${ARGN}) - get_filename_component(opinfo_file_path "${OPINFO_OUTPUT}" DIRECTORY) - add_custom_command( - OUTPUT ${OPINFO_OUTPUT} - COMMAND mkdir -p ${opinfo_file_path} - COMMAND - ${ASCEND_PYTHON_EXECUTABLE} - ${CMAKE_SOURCE_DIR}/cmake/util/parse_ini_to_json.py ${OPINFO_OPS_INFO} - ${OPINFO_OUTPUT}) - add_custom_target(${OPINFO_TARGET} ALL DEPENDS ${OPINFO_OUTPUT}) - install(FILES ${OPINFO_OUTPUT} DESTINATION ${OPINFO_INSTALL_DIR}) -endfunction() - -function(add_ops_compile_options OP_TYPE) - cmake_parse_arguments(OP_COMPILE "" "OP_TYPE" "COMPUTE_UNIT;OPTIONS" ${ARGN}) - file(APPEND ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} - "${OP_TYPE},${OP_COMPILE_COMPUTE_UNIT},${OP_COMPILE_OPTIONS}\n") -endfunction() - -function(add_ops_impl_target) - cmake_parse_arguments(OPIMPL "" "TARGET;OPS_INFO;IMPL_DIR;OUT_DIR" - "OPS_BATCH;OPS_ITERATE" ${ARGN}) - add_custom_command( - OUTPUT ${OPIMPL_OUT_DIR}/.impl_timestamp - COMMAND mkdir -m 700 -p ${OPIMPL_OUT_DIR}/dynamic - COMMAND - ${ASCEND_PYTHON_EXECUTABLE} - ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py ${OPIMPL_OPS_INFO} - \"${OPIMPL_OPS_BATCH}\" \"${OPIMPL_OPS_ITERATE}\" ${OPIMPL_IMPL_DIR} - ${OPIMPL_OUT_DIR}/dynamic ${ASCEND_AUTOGEN_PATH} - COMMAND rm -rf ${OPIMPL_OUT_DIR}/.impl_timestamp - COMMAND touch ${OPIMPL_OUT_DIR}/.impl_timestamp - DEPENDS ${OPIMPL_OPS_INFO} - ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py) - add_custom_target(${OPIMPL_TARGET} ALL - DEPENDS ${OPIMPL_OUT_DIR}/.impl_timestamp) -endfunction() - -function(add_npu_support_target) - cmake_parse_arguments(NPUSUP "" "TARGET;OPS_INFO_DIR;OUT_DIR;INSTALL_DIR" "" - ${ARGN}) - get_filename_component(npu_sup_file_path "${NPUSUP_OUT_DIR}" DIRECTORY) - add_custom_command( - OUTPUT ${NPUSUP_OUT_DIR}/npu_supported_ops.json - COMMAND mkdir -p ${NPUSUP_OUT_DIR} - COMMAND bash ${CMAKE_SOURCE_DIR}/cmake/util/gen_ops_filter.sh - ${NPUSUP_OPS_INFO_DIR} ${NPUSUP_OUT_DIR}) - add_custom_target(npu_supported_ops ALL - DEPENDS ${NPUSUP_OUT_DIR}/npu_supported_ops.json) - install(FILES ${NPUSUP_OUT_DIR}/npu_supported_ops.json - DESTINATION ${NPUSUP_INSTALL_DIR}) -endfunction() - -function(add_bin_compile_target) - cmake_parse_arguments( - BINCMP - "" - "TARGET;OPS_INFO;COMPUTE_UNIT;IMPL_DIR;ADP_DIR;OUT_DIR;INSTALL_DIR;KERNEL_DIR" - "" - ${ARGN}) - file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/src) - file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/gen) - file(MAKE_DIRECTORY ${BINCMP_KERNEL_DIR}/config/${BINCMP_COMPUTE_UNIT}) - execute_process( - COMMAND - ${ASCEND_PYTHON_EXECUTABLE} - ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_bin_param_build.py - ${BINCMP_OPS_INFO} ${BINCMP_OUT_DIR}/gen ${BINCMP_COMPUTE_UNIT} - RESULT_VARIABLE EXEC_RESULT - OUTPUT_VARIABLE EXEC_INFO - ERROR_VARIABLE EXEC_ERROR) - if(${EXEC_RESULT}) - message("ops binary compile scripts gen info: ${EXEC_INFO}") - message("ops binary compile scripts gen error: ${EXEC_ERROR}") - message(FATAL_ERROR "ops binary compile scripts gen failed!") - endif() - add_custom_target(${BINCMP_TARGET} COMMAND cp -r ${BINCMP_IMPL_DIR}/*.* - ${BINCMP_OUT_DIR}/src) - add_custom_target( - ${BINCMP_TARGET}_gen_ops_config ALL - COMMAND - ${ASCEND_PYTHON_EXECUTABLE} - ${CMAKE_SOURCE_DIR}/cmake/util/insert_simplified_keys.py -p - ${BINCMP_KERNEL_DIR}/${BINCMP_COMPUTE_UNIT} - COMMAND - ${ASCEND_PYTHON_EXECUTABLE} - ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_ops_config.py -p - ${BINCMP_KERNEL_DIR}/${BINCMP_COMPUTE_UNIT} -s ${BINCMP_COMPUTE_UNIT}) - file(GLOB bin_scripts ${BINCMP_OUT_DIR}/gen/*.sh) - foreach(bin_script ${bin_scripts}) - get_filename_component(bin_file ${bin_script} NAME_WE) - string(REPLACE "-" ";" bin_sep ${bin_file}) - list(GET bin_sep 0 op_type) - list(GET bin_sep 1 op_file) - list(GET bin_sep 2 op_index) - if(NOT TARGET ${BINCMP_TARGET}_${op_file}_copy) - add_custom_target( - ${BINCMP_TARGET}_${op_file}_copy - COMMAND cp ${BINCMP_ADP_DIR}/${op_file}.py - ${BINCMP_OUT_DIR}/src/${op_type}.py - DEPENDS ascendc_impl_gen) - install( - DIRECTORY ${BINCMP_KERNEL_DIR}/${BINCMP_COMPUTE_UNIT}/${op_file} - DESTINATION ${BINCMP_INSTALL_DIR}/${BINCMP_COMPUTE_UNIT} - OPTIONAL) - install( - FILES ${BINCMP_KERNEL_DIR}/config/${BINCMP_COMPUTE_UNIT}/${op_file}.json - DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT} - OPTIONAL) - endif() - add_custom_target( - ${BINCMP_TARGET}_${op_file}_${op_index} - COMMAND - export HI_PYTHON=${ASCEND_PYTHON_EXECUTABLE} && export - ASCEND_CUSTOM_OPP_PATH=${ADS_PATH}/packages/vendors/${vendor_name} - && bash ${bin_script} ${BINCMP_OUT_DIR}/src/${op_type}.py - ${BINCMP_KERNEL_DIR}/${BINCMP_COMPUTE_UNIT}/${op_file} - WORKING_DIRECTORY ${BINCMP_OUT_DIR}) - add_dependencies(${BINCMP_TARGET}_${op_file}_${op_index} ${BINCMP_TARGET} - ${BINCMP_TARGET}_${op_file}_copy) - add_dependencies(${BINCMP_TARGET}_gen_ops_config - ${BINCMP_TARGET}_${op_file}_${op_index}) - endforeach() - add_custom_command( - TARGET ${BINCMP_TARGET}_gen_ops_config - POST_BUILD - COMMAND mv ${BINCMP_KERNEL_DIR}/${BINCMP_COMPUTE_UNIT}/*.json - ${BINCMP_KERNEL_DIR}/config/${BINCMP_COMPUTE_UNIT}) - install( - FILES - ${BINCMP_KERNEL_DIR}/config/${BINCMP_COMPUTE_UNIT}/binary_info_config.json - DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT} - OPTIONAL) -endfunction() - -function(protobuf_generate) - cmake_parse_arguments(PROTOBUF_GEN "" "PROTO_FILE;OUT_DIR" "" ${ARGN}) - set(OUT_DIR ${PROTOBUF_GEN_OUT_DIR}/proto/onnx) - file(MAKE_DIRECTORY ${OUT_DIR}) - get_filename_component(file_name ${PROTOBUF_GEN_PROTO_FILE} NAME_WE) - get_filename_component(file_dir ${PROTOBUF_GEN_PROTO_FILE} PATH) - execute_process( - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMAND protoc -I${file_dir} --cpp_out=${OUT_DIR} ${PROTOBUF_GEN_PROTO_FILE} - RESULT_VARIABLE EXEC_RESULT - OUTPUT_VARIABLE EXEC_INFO - ERROR_VARIABLE EXEC_ERROR) - if(${EXEC_RESULT}) - message("protobuf gen info: ${EXEC_INFO}") - message("protobuf gen error: ${EXEC_ERROR}") - message(FATAL_ERROR "protobuf gen failed!") - endif() -endfunction() diff --git a/cmake/makeself.cmake b/cmake/makeself.cmake deleted file mode 100644 index 1f4fa76c297ae7650fb3a617f49fd3c615940aa2..0000000000000000000000000000000000000000 --- a/cmake/makeself.cmake +++ /dev/null @@ -1,19 +0,0 @@ -execute_process(COMMAND chmod +x - ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh) -execute_process( - COMMAND - ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh --header - ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself-header.sh --help-header - ./help.info --gzip --complevel 4 --nomd5 --sha256 ./ - ${CPACK_PACKAGE_FILE_NAME} "version:1.0" ./install.sh - WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} - RESULT_VARIABLE EXEC_RESULT - ERROR_VARIABLE EXEC_ERROR) -if(NOT "${EXEC_RESULT}x" STREQUAL "0x") - message(FATAL_ERROR "CPack Command error: ${EXEC_RESULT}\n${EXEC_ERROR}") -endif() -execute_process( - COMMAND cp ${CPACK_EXTERNAL_BUILT_PACKAGES} ${CPACK_PACKAGE_DIRECTORY}/ - COMMAND echo - "Copy ${CPACK_EXTERNAL_BUILT_PACKAGES} to ${CPACK_PACKAGE_DIRECTORY}/" - WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY}) diff --git a/include/csrc/pybind.h b/include/csrc/pybind.h deleted file mode 100644 index b997451f696370aa565addcef1c97a5cd4e2705e..0000000000000000000000000000000000000000 --- a/include/csrc/pybind.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef CSRC_PYBIND_H_ -#define CSRC_PYBIND_H_ -#include -void init_common(pybind11::module& m); -void init_motion(pybind11::module& m); -void init_percention_fused(pybind11::module& m); -void init_perception_point(pybind11::module& m); -void init_perception_vision(pybind11::module& m); -#endif // CSRC_PYBIND_H_ diff --git a/include/onnx/common.h b/include/onnx/common.h deleted file mode 100644 index 3c16c44f00b967110fa4186c779c8debfe68b1d6..0000000000000000000000000000000000000000 --- a/include/onnx/common.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef ADS_ONNX_H_ -#define ADS_ONNX_H_ -#include "proto/onnx/ge_onnx.pb.h" -#endif // ADS_ONNX_H_ diff --git a/scripts/build_kernel.sh b/scripts/build_kernel.sh deleted file mode 100644 index c08e2ecab485515c585cfd54518f6f2ead1d51e6..0000000000000000000000000000000000000000 --- a/scripts/build_kernel.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -script_path=$(realpath $(dirname $0)) -root_path=$(realpath $script_path/..) -rm -rf build_out -mkdir build_out -cd build_out - -if [ $ASCEND_AICPU_PATH ]; then - jq --arg field "configurePresets" --arg value "$ASCEND_AICPU_PATH" '.[$field][0].cacheVariables.ASCEND_CANN_PACKAGE_PATH.value = $value' $root_path/CMakePresets.json >$root_path/CMakePresets_bat.json - - if [ $? -eq 0 ]; then - mv $root_path/CMakePresets_bat.json $root_path/CMakePresets.json -f - else - echo "Error: please install jq with yum or apt-get" - exit 1 - fi -else - echo "Error: please source env.sh" - exit 1 -fi - -cmake_version=$(cmake --version | grep "cmake version" | awk '{print $3}') -if [ "$cmake_version" \< "3.19.0" ]; then - opts=$(python3 $root_path/cmake/util/preset_parse.py $root_path/CMakePresets.json) - echo $opts - cmake .. $opts -else - cmake .. --preset=default -fi - -cmake --build . -j16 -if [ $? -ne 0 ]; then exit 1; fi diff --git a/setup.py b/setup.py index 5205a506655790061e072658bdfce379442ce53b..6dea124dd06bbcde9d15a17b282b62a00bb6d2f2 100644 --- a/setup.py +++ b/setup.py @@ -1,34 +1,34 @@ -import glob import os +import glob import subprocess from pathlib import Path from typing import Union - import torch -from setuptools import find_packages, setup +from setuptools import setup, find_packages from torch.utils.cpp_extension import BuildExtension - from utils import extension +import imp -BASE_DIR = os.path.dirname(os.path.realpath(__file__)) -VERSION = torch.__version__ -full_components = ["common", "motion", "perception/fused", "perception/point", "perception/vision"] -source_file = glob.glob(os.path.join("./bind/", "*.cpp")) -include_dirs = [os.path.join(BASE_DIR, "include")] -for part in full_components: - source_file += glob.glob(os.path.join(f"./ads/{part}/ops/csrc/", "*.cpp")) +from torch.utils.cpp_extension import BuildExtension +torch_npu_dir = extension.PYTORCH_NPU_INSTALL_PATH + +source_file = [] +source_file += glob.glob(os.path.join("./ads/common/ops/csrc/", "*.cpp")) +source_file += glob.glob(os.path.join("./bind/", "*.cpp")) + +include_dirs = [] +include_dirs.append(torch_npu_dir + "/include/third_party/acl/inc/") exts = [] ext1 = extension.NpuExtension( name="ads_c", sources=source_file, - include_dirs=include_dirs, extra_compile_args=[ - '-D__FILENAME__="$$(notdir $$(abspath $$<))"', - "-fprofile-arcs", - "-ftest-coverage", - ], - libraries=["gcov"], + '-D__FILENAME__=\"$$(notdir $$(abspath $$<))\"', + '-I' + imp.find_module('torch_npu')[1] + "/include/third_party/acl/inc", + '-fprofile-arcs', + '-ftest-coverage'], + libraries=['gcov'], ) exts.append(ext1) @@ -36,11 +36,14 @@ exts.append(ext1) def get_sha(pytorch_root: Union[str, Path]) -> str: try: return ( - subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=pytorch_root).decode("ascii").strip() # Compliant + subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=pytorch_root) # Compliant + .decode("ascii") + .strip() ) except Exception: return "Unknown" +BASE_DIR = os.path.dirname(os.path.realpath(__file__)) VERSION = "1.0-" + torch.__version__[0:6] torch_npu_root = Path(__file__).parent sha = get_sha(torch_npu_root) @@ -50,11 +53,11 @@ if not os.getenv("BUILD_WITHOUT_SHA"): setup( name="ads_accelerator", version=VERSION, - description="Cpp Extension Include ascend_ads", - keywords="ads", + description='Cpp Extension Include ascend_ads', + keywords='ads', ext_modules=exts, - author="Ascend Contributors", + author='Ascend Contributors', cmdclass={"build_ext": BuildExtension}, packages=find_packages(), include_package_data=True, -) +) \ No newline at end of file diff --git a/third_party/README.md b/third_party/README.md new file mode 100644 index 0000000000000000000000000000000000000000..72a48a338b8aa3f4966c21dd2829bfab52c76461 --- /dev/null +++ b/third_party/README.md @@ -0,0 +1,2 @@ +## Description ++ The folder contains some third_party libraries. \ No newline at end of file