diff --git a/torch_npu/csrc/profiler/init.cpp b/torch_npu/csrc/profiler/init.cpp
index 94ad31d5a68a3e87af53020549b697740b56747e..e2427f8b8ec4d3d6f9802b1e1ab3315383d75253 100644
--- a/torch_npu/csrc/profiler/init.cpp
+++ b/torch_npu/csrc/profiler/init.cpp
@@ -16,6 +16,7 @@
 #include "torch_npu/csrc/profiler/profiler_python.h"
 #include "torch_npu/csrc/profiler/npu_profiler.h"
 #include "torch_npu/csrc/toolkit/profiler/common/utils.h"
+#include "torch_npu/csrc/toolkit/profiler/inc/mspti_adapter.h"
 #include "torch_npu/csrc/framework/interface/LibAscendHal.h"
 #include "torch_npu/csrc/core/npu/NPUException.h"
 
@@ -86,6 +87,10 @@ PyObject* profiler_initExtension(PyObject* _unused, PyObject *unused) {
     m.def("_get_monotonic", torch_npu::toolkit::profiler::Utils::GetClockMonotonicRawNs);
     m.def("_get_host_uid", torch_npu::toolkit::profiler::Utils::GetHostUid);
 
+     // mspti
+    m.def("_mspti_enable", torch_npu::toolkit::profiler::MsptiEnable);
+    m.def("_mspti_disable", torch_npu::toolkit::profiler::MsptiDisable);
+
     torch_npu::profiler::python_tracer::init();
     Py_RETURN_TRUE;
 }
diff --git a/torch_npu/csrc/toolkit/profiler/CMakeLists.txt b/torch_npu/csrc/toolkit/profiler/CMakeLists.txt
index 0ddc517c6e7434e2c761692b0ae4121079f9e14a..319c27c2327944a3a063664d04b8dcfe000382a1 100644
--- a/torch_npu/csrc/toolkit/profiler/CMakeLists.txt
+++ b/torch_npu/csrc/toolkit/profiler/CMakeLists.txt
@@ -1,5 +1,13 @@
 set(PROFILER_NAME npu_profiler)
 
+set(DEFAULT_ASCEND_HOME_PATH "/usr/local/Ascend/ascend_toolkit/latest")
+if(DEFINED ENV{ASCEND_HOME_PATH})
+    message("ASCEND_HOME_PATH: $ENV{ASCEND_HOME_PATH}")
+    set(DEFAULT_ASCEND_HOME_PATH $ENV{ASCEND_HOME_PATH})
+else()
+    message("NOT DEFINED ASCEND_HOME_PATH VARIABLES")
+endif()
+
 FILE(GLOB NPU_PROF_SRCS
     common/*.cpp
     src/*.cpp
@@ -15,9 +23,14 @@ add_library(${PROFILER_NAME} SHARED
 
 target_include_directories(${PROFILER_NAME} PRIVATE
     ${NPU_PROF_INC}
+    ${DEFAULT_ASCEND_HOME_PATH}/include
+)
+
+target_link_directories(${PROFILER_NAME} PRIVATE
+    ${DEFAULT_ASCEND_HOME_PATH}/lib64
 )
 
-target_link_libraries(${PROFILER_NAME} PRIVATE torch_cpu pthread)
+target_link_libraries(${PROFILER_NAME} PRIVATE torch_cpu pthread mspti ascendcl)
 
 target_compile_options(${PROFILER_NAME} PRIVATE
     ${TORCH_CXX_FLAGS}
diff --git a/torch_npu/csrc/toolkit/profiler/inc/mspti_adapter.h b/torch_npu/csrc/toolkit/profiler/inc/mspti_adapter.h
new file mode 100644
index 0000000000000000000000000000000000000000..b4b71d78974bc4947322d287b518f042c233e658
--- /dev/null
+++ b/torch_npu/csrc/toolkit/profiler/inc/mspti_adapter.h
@@ -0,0 +1,12 @@
+#pragma once
+
+namespace torch_npu {
+namespace toolkit {
+namespace profiler {
+
+void MsptiEnable();
+void MsptiDisable();
+
+}
+}
+}
\ No newline at end of file
diff --git a/torch_npu/csrc/toolkit/profiler/src/mspti_adapter.cpp b/torch_npu/csrc/toolkit/profiler/src/mspti_adapter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ade16b285624f51a036d26e9a1356c60379d706
--- /dev/null
+++ b/torch_npu/csrc/toolkit/profiler/src/mspti_adapter.cpp
@@ -0,0 +1,116 @@
+#include "torch_npu/csrc/toolkit/profiler/inc/mspti_adapter.h"
+
+#include "mspti/mspti.h"
+#include "mstx/ms_tools_ext.h"
+#include "acl/acl_rt.h"
+
+#include <atomic>
+#include <iostream>
+#include <stdint.h>
+
+namespace torch_npu {
+namespace toolkit {
+namespace profiler {
+
+msptiSubscriberHandle g_client{nullptr};
+std::atomic<uint64_t> g_totalApi{0};
+std::atomic<uint64_t> g_totalKernel{0};
+std::atomic<uint64_t> g_totalMarker{0};
+
+void UserBufferRequest(uint8_t **buffer, size_t *size, size_t *maxNumRecords) {
+    constexpr uint32_t SIZE = 5 * 1024 * 1024;
+    uint8_t *pBuffer = (uint8_t*)malloc(SIZE);
+    *buffer = pBuffer;
+    *size = SIZE;
+    *maxNumRecords = 0;
+}
+
+static void ShowApiInfo(msptiActivityApi* api) {
+    if (api == nullptr) {
+        printf("Api nullptr\n");
+        return;
+    }
+    g_totalApi++;
+}
+
+static void ShowKernelInfo(msptiActivityKernel* kernel) {
+    if (kernel == nullptr) {
+        printf("Kernel nullptr\n");
+        return;
+    }
+    g_totalKernel++;
+}
+
+static void ShowMarkerInfo(msptiActivityMarker* marker) {
+    if (marker == nullptr) {
+        printf("Marker nullptr\n");
+        return;
+    }
+    g_totalMarker++;
+}
+
+void UserBufferComplete(uint8_t *buffer, size_t size, size_t validSize) {
+    if (validSize > 0) {
+        msptiActivity *pRecord = nullptr;
+        msptiResult status = MSPTI_SUCCESS;
+        do {
+            status = msptiActivityGetNextRecord(buffer, validSize, &pRecord);
+            if (status == MSPTI_SUCCESS) {
+                if (pRecord->kind == MSPTI_ACTIVITY_KIND_API) {
+                    msptiActivityApi* api = reinterpret_cast<msptiActivityApi*>(pRecord);
+                    ShowApiInfo(api);
+                } else if (pRecord->kind == MSPTI_ACTIVITY_KIND_KERNEL) {
+                    msptiActivityKernel* kernel = reinterpret_cast<msptiActivityKernel*>(pRecord);
+                    ShowKernelInfo(kernel);
+                } else if (pRecord->kind == MSPTI_ACTIVITY_KIND_MARKER) {
+                    msptiActivityMarker* marker = reinterpret_cast<msptiActivityMarker*>(pRecord);
+                    ShowMarkerInfo(marker);
+                }
+            } else if (status == MSPTI_ERROR_MAX_LIMIT_REACHED) {
+                break;
+            }
+        } while (1);
+    }
+}
+
+void UserCallback(void* pUserData, msptiCallbackDomain domain, msptiCallbackId id, const msptiCallbackData* pCallbackInfo) {
+    thread_local uint64_t markId = 0;
+    if (pCallbackInfo->callbackSite == MSPTI_API_ENTER) {
+        mstxMarkA("start", nullptr);
+        markId = mstxRangeStartA("Range start", nullptr);
+    } else if (pCallbackInfo->callbackSite == MSPTI_API_EXIT) {
+        mstxMarkA("end", nullptr);
+        mstxRangeEnd(markId);
+    }
+}
+
+void MsptiEnable() {
+    msptiSubscribe(&g_client, UserCallback, nullptr);
+    msptiEnableCallback(1, g_client, MSPTI_CB_DOMAIN_RUNTIME, MSPTI_CBID_RUNTIME_MALLOC);
+    msptiEnableCallback(1, g_client, MSPTI_CB_DOMAIN_RUNTIME, MSPTI_CBID_RUNTIME_FREE);
+    msptiEnableCallback(1, g_client, MSPTI_CB_DOMAIN_RUNTIME, MSPTI_CBID_RUNTIME_LAUNCH);
+    msptiEnableCallback(1, g_client, MSPTI_CB_DOMAIN_RUNTIME, MSPTI_CBID_RUNTIME_FFTS_LAUNCH);
+    msptiActivityRegisterCallbacks(UserBufferRequest, UserBufferComplete);
+    msptiActivityEnable(MSPTI_ACTIVITY_KIND_MARKER);
+    msptiActivityEnable(MSPTI_ACTIVITY_KIND_API);
+    msptiActivityEnable(MSPTI_ACTIVITY_KIND_KERNEL);
+}
+
+void MsptiDisable() {
+    msptiEnableCallback(0, g_client, MSPTI_CB_DOMAIN_RUNTIME, MSPTI_CBID_RUNTIME_MALLOC);
+    msptiEnableCallback(0, g_client, MSPTI_CB_DOMAIN_RUNTIME, MSPTI_CBID_RUNTIME_FREE);
+    msptiEnableCallback(0, g_client, MSPTI_CB_DOMAIN_RUNTIME, MSPTI_CBID_RUNTIME_LAUNCH);
+    msptiEnableCallback(0, g_client, MSPTI_CB_DOMAIN_RUNTIME, MSPTI_CBID_RUNTIME_FFTS_LAUNCH);
+    msptiActivityDisable(MSPTI_ACTIVITY_KIND_MARKER);
+    msptiActivityDisable(MSPTI_ACTIVITY_KIND_API);
+    msptiActivityDisable(MSPTI_ACTIVITY_KIND_KERNEL);
+    msptiActivityFlushAll(1);
+    msptiUnsubscribe(g_client);
+    int32_t deviceId = 0;
+    aclrtGetDevice(&deviceId);
+    printf("[Total] device: %d, api: %lu, kernel: %lu, marker: %lu\n", deviceId, g_totalApi.load(), g_totalKernel.load(), g_totalMarker.load());
+}
+
+}
+}
+}
\ No newline at end of file
diff --git a/torch_npu/profiler/__init__.py b/torch_npu/profiler/__init__.py
index d31bd85ebb2cb8bd5c84008cd1c9159aca9aaa51..2d2dd4eab91d89b08b075ff69e80f9081fdf2793 100644
--- a/torch_npu/profiler/__init__.py
+++ b/torch_npu/profiler/__init__.py
@@ -11,7 +11,7 @@ from .experimental_config import _ExperimentalConfig, supported_profiler_level,
     supported_export_type, ProfilerLevel, AiCMetrics, ExportType
 from ._non_intrusive_profile import _NonIntrusiveProfile
 
-__all__ = ["profile", "ProfilerActivity", "supported_activities", "tensorboard_trace_handler", "schedule",
+__all__ = ["profile", "mspti", "ProfilerActivity", "supported_activities", "tensorboard_trace_handler", "schedule",
            "ProfilerAction", "_ExperimentalConfig", "supported_profiler_level", "supported_ai_core_metrics",
            "supported_export_type", "ProfilerLevel", "AiCMetrics", "ExportType"]
 
diff --git a/torch_npu/profiler/mspti.py b/torch_npu/profiler/mspti.py
new file mode 100644
index 0000000000000000000000000000000000000000..f32901b4d6381d950c8e030467ac6e5ad3341f79
--- /dev/null
+++ b/torch_npu/profiler/mspti.py
@@ -0,0 +1,13 @@
+from torch_npu._C._profiler import _mspti_enable, _mspti_disable
+
+class mspti:
+    def __init__(self):
+        pass
+
+    def start(self):
+        print("_mspti_enable")
+        _mspti_enable()
+
+    def stop(self):
+        print("_mspti_disable")
+        _mspti_disable()