From 0e05f29ea28d2f2b980afda3d814edf0f12475e5 Mon Sep 17 00:00:00 2001 From: Zhao Hang Date: Mon, 12 Aug 2024 09:43:18 +0800 Subject: [PATCH 1/4] update to papi-6.0.0-16.src.rpm Signed-off-by: Zhao Hang --- 1000-papi-anolis-add-loongarch-support.patch | 90 ------ papi-6.0.0-sw.patch | 74 ----- papi-71eventupdate.patch | 321 +++++++++++++++++++ papi.spec | 32 +- 4 files changed, 326 insertions(+), 191 deletions(-) delete mode 100644 1000-papi-anolis-add-loongarch-support.patch delete mode 100644 papi-6.0.0-sw.patch create mode 100644 papi-71eventupdate.patch diff --git a/1000-papi-anolis-add-loongarch-support.patch b/1000-papi-anolis-add-loongarch-support.patch deleted file mode 100644 index 8f0369c..0000000 --- a/1000-papi-anolis-add-loongarch-support.patch +++ /dev/null @@ -1,90 +0,0 @@ -diff -Nur papi-6.0.0/src/linux-context.h papi-6.0.0.new/src/linux-context.h ---- papi-6.0.0/src/linux-context.h 2020-03-04 23:57:01.000000000 +0800 -+++ papi-6.0.0.new/src/linux-context.h 2023-12-13 10:27:34.048248582 +0800 -@@ -33,6 +33,8 @@ - #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.arm_pc - #elif defined(__aarch64__) - #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc -+#elif defined(__loongarch__) -+#define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.__pc - #elif defined(__mips__) - #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc - #elif defined(__hppa__) -diff -Nur papi-6.0.0/src/linux-lock.h papi-6.0.0.new/src/linux-lock.h ---- papi-6.0.0/src/linux-lock.h 2020-03-04 23:57:01.000000000 +0800 -+++ papi-6.0.0.new/src/linux-lock.h 2023-12-13 10:36:30.035342866 +0800 -@@ -227,6 +227,37 @@ - } - #define _papi_hwd_lock(lck) __raw_spin_lock(&_papi_hwd_lock_data[lck]); - #define _papi_hwd_unlock(lck) __raw_spin_unlock(&_papi_hwd_lock_data[lck]) -+#elif defined(__loongarch__) -+static inline void __raw_spin_lock(volatile unsigned int *lock) -+{ -+ unsigned int tmp; -+ __asm__ __volatile__( -+ "1: ll.w %1, %2 \n" -+ " bnez %1, 1b \n" -+ " li.w %1, 1 \n" -+ " sc.w %1, %0 \n" -+ " beqz %1, 1b \n" -+ " nop \n" -+ : "=m" (*lock), "=&r" (tmp) -+ : "m" (*lock) -+ : "memory"); -+} -+ -+static inline void __raw_spin_unlock(volatile unsigned int *lock) -+{ -+ unsigned int tmp; -+ __asm__ __volatile__( -+ " nop \n" -+ " li.w %1, 0 \n" -+ " st.w %1, %0 \n" -+ : "=m" (*lock), "=&r" (tmp) -+ : "m" (*lock) -+ : "memory"); -+} -+#define _papi_hwd_lock(lck) { rmb(); __raw_spin_lock(&_papi_hwd_lock_data[lck]); rmb(); } -+#define _papi_hwd_unlock(lck) { rmb(); __raw_spin_unlock(&_papi_hwd_lock_data[lck]); rmb(); } -+ -+ - #else - - #error "_papi_hwd_lock/unlock undefined!" -diff -Nur papi-6.0.0/src/linux-timer.c papi-6.0.0.new/src/linux-timer.c ---- papi-6.0.0/src/linux-timer.c 2020-03-04 23:57:01.000000000 +0800 -+++ papi-6.0.0.new/src/linux-timer.c 2023-12-13 10:32:38.681575639 +0800 -@@ -245,6 +245,20 @@ - return ret; - } - -+/****************************/ -+/* loongarch64 get_cycles() */ -+/****************************/ -+#elif defined(__loongarch__) -+static inline long long -+get_cycles(void) -+{ -+ register unsigned long ret = 0; -+ int rID = 0; -+ __asm__ __volatile__ ("ibar 0" ::: "memory"); -+ __asm__ __volatile__ ("rdtime.d %0, %1" :"=r"(ret),"=r"(rID)); -+ return ret; -+} -+ - /************************/ - /* POWER get_cycles() */ - /************************/ -diff -Nur papi-6.0.0/src/mb.h papi-6.0.0.new/src/mb.h ---- papi-6.0.0/src/mb.h 2020-03-04 23:57:01.000000000 +0800 -+++ papi-6.0.0.new/src/mb.h 2023-12-13 10:33:08.705804990 +0800 -@@ -39,6 +39,9 @@ - #elif defined(__aarch64__) - #define rmb() asm volatile("dmb ld" ::: "memory") - -+#elif defined(__loongarch__) -+#define rmb() asm volatile("dbar 0" ::: "memory") -+ - #elif defined(__mips__) - #define rmb() asm volatile( \ - ".set mips2\n\t" \ diff --git a/papi-6.0.0-sw.patch b/papi-6.0.0-sw.patch deleted file mode 100644 index 483a363..0000000 --- a/papi-6.0.0-sw.patch +++ /dev/null @@ -1,74 +0,0 @@ -diff -Nuar papi-6.0.0.org/src/configure papi-6.0.0.sw/src/configure ---- papi-6.0.0.org/src/configure 2023-08-04 14:53:14.229642734 +0800 -+++ papi-6.0.0.sw/src/configure 2023-08-04 15:15:36.257871330 +0800 -@@ -4709,7 +4709,7 @@ - - # First set pthread-mutexes based on arch - case $arch in -- aarch64|arm*|parisc*) -+ sw_64|aarch64|arm*|parisc*) - pthread_mutexes=yes - CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" - echo "forcing use of pthread mutexes... " >&6 -diff -Nuar papi-6.0.0.org/src/configure.in papi-6.0.0.sw/src/configure.in ---- papi-6.0.0.org/src/configure.in 2023-08-04 14:53:14.198641645 +0800 -+++ papi-6.0.0.sw/src/configure.in 2023-08-04 15:07:59.605759288 +0800 -@@ -410,7 +410,7 @@ - - # First set pthread-mutexes based on arch - case $arch in -- aarch64|arm*|parisc*) -+ sw_64|aarch64|arm*|parisc*) - pthread_mutexes=yes - CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" - echo "forcing use of pthread mutexes... " >&6 -diff -Nuar papi-6.0.0.org/src/linux-context.h papi-6.0.0.sw/src/linux-context.h ---- papi-6.0.0.org/src/linux-context.h 2023-08-04 14:53:14.232642839 +0800 -+++ papi-6.0.0.sw/src/linux-context.h 2023-08-04 15:05:06.764673199 +0800 -@@ -35,6 +35,8 @@ - #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc - #elif defined(__loongarch__) - #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.__pc -+#elif defined(__sw_64__) -+#define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.sc_pc - #elif defined(__mips__) - #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc - #elif defined(__hppa__) -diff -Nuar papi-6.0.0.org/src/linux-timer.c papi-6.0.0.sw/src/linux-timer.c ---- papi-6.0.0.org/src/linux-timer.c 2023-08-04 14:53:14.234642909 +0800 -+++ papi-6.0.0.sw/src/linux-timer.c 2023-08-04 15:14:41.349934011 +0800 -@@ -231,6 +231,21 @@ - } - - /************************/ -+/* sw_64 get_cycles() */ -+/************************/ -+ -+#elif defined(__sw_64__) -+static inline long long -+get_cycles( void ) -+{ -+ register unsigned long ret; -+ -+ __asm__ __volatile__ ("rtc %0" : "=r" (ret)); -+ -+ return ret; -+} -+ -+/************************/ - /* aarch64 get_cycles() */ - /************************/ - -diff -Nuar papi-6.0.0.org/src/mb.h papi-6.0.0.sw/src/mb.h ---- papi-6.0.0.org/src/mb.h 2023-08-04 14:53:14.257643717 +0800 -+++ papi-6.0.0.sw/src/mb.h 2023-08-04 15:00:40.723327507 +0800 -@@ -26,6 +26,9 @@ - #elif defined (__alpha__) - #define rmb() asm volatile("mb" ::: "memory") - -+#elif defined (__sw_64__) -+#define rmb() asm volatile("memb" ::: "memory") -+ - #elif defined(__ia64__) - #define rmb() asm volatile ("mf" ::: "memory") - diff --git a/papi-71eventupdate.patch b/papi-71eventupdate.patch new file mode 100644 index 0000000..aeb576d --- /dev/null +++ b/papi-71eventupdate.patch @@ -0,0 +1,321 @@ +commit b969d25f2a87a53365e3e9a040533b093544a05d +Author: John Linford +Date: Mon Apr 3 22:30:14 2023 +0000 + + Update Neoverse V2 events + + Add/remove PAPI events to match available hardware counters + All tests pass on NVIDIA Grace + + Disclaimer: + The PAPI team was not able to verify the functionality included in this + commit. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 549e337c..3089d2d4 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -2170,34 +2170,113 @@ PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + CPU,arm_v2 + # + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_INT_INS,NOT_DERIVED,DP_SPEC ++#NOT_IMPLEMENTED,PAPI_TOT_IIS,Instructions issued + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES +-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_REF_CYC,NOT_DERIVED,CNT_CYCLES ++PRESET,PAPI_STL_CCY,NOT_DERIVED,STALL ++#NOT_IMPLEMENTED,PAPI_FUL_CCY,Cycles with maximum instructions completed ++#NOT_IMPLEMENTED,PAPI_FUL_ICY,Cycles with maximum instruction issue ++#NOT_IMPLEMENTED,PAPI_FXU_IDL,Cycles integer units are idle ++#NOT_IMPLEMENTED,PAPI_LSU_IDL,Cycles load/store units are idle ++#NOT_IMPLEMENTED,PAPI_MEM_RCY,Cycles Stalled Waiting for memory Reads ++#NOT_IMPLEMENTED,PAPI_MEM_SCY,Cycles Stalled Waiting for memory accesses ++#NOT_IMPLEMENTED,PAPI_MEM_WCY,Cycles Stalled Waiting for memory writes ++#NOT_IMPLEMENTED,PAPI_FP_STAL,Cycles the FP unit(s) are stalled ++#NOT_IMPLEMENTED,PAPI_FPU_IDL,Cycles floating point units are idle ++#NOT_IMPLEMENTED,PAPI_BRU_IDL,Cycles branch units are idle ++PRESET,PAPI_STL_ICY,NOT_DERIVED,STALL ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC ++#NOT_IMPLEMENTED,PAPI_SP_OPS,Floating point operations; optimized to count scaled single precision vector operations ++#NOT_IMPLEMENTED,PAPI_DP_OPS,Floating point operations; optimized to count scaled double precision vector operations ++PRESET,PAPI_FP_INS,DERIVED_ADD,FP_HP_SPEC,FP_SP_SPEC,FP_DP_SPEC ++#NOT_IMPLEMENTED,PAPI_FAD_INS,Floating point add instructions ++#NOT_IMPLEMENTED,PAPI_FDV_INS,Floating point divide instructions ++#NOT_IMPLEMENTED,PAPI_FMA_INS,FMA instructions completed ++#NOT_IMPLEMENTED,PAPI_FML_INS,Floating point multiply instructions ++#NOT_IMPLEMENTED,PAPI_FNV_INS,Floating point inverse instructions ++#NOT_IMPLEMENTED,PAPI_FSQ_INS,Floating point square root instructions + PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC ++#NOT_IMPLEMENTED,PAPI_VEC_DP,Double precision vector/SIMD instructions ++#NOT_IMPLEMENTED,PAPI_VEC_SP,Single precision vector/SIMD instructions + PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED +-PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +-PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED +-PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED ++#NOT_IMPLEMENTED,PAPI_BR_CN,Conditional branch instructions ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_RETIRED,BR_MIS_PRED_RETIRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED_RETIRED ++#NOT_IMPLEMENTED,PAPI_BR_NTK,Conditional branch instructions not taken ++#NOT_IMPLEMENTED,PAPI_BR_TKN,Conditional branch instructions taken ++#NOT_IMPLEMENTED,PAPI_BR_UCN,Unconditional branch instructions ++#NOT_IMPLEMENTED,PAPI_BTAC_M,Branch target address cache misses + PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC + PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC + PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC + PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD + PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR + PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS + PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +-PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS +-PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR ++#NOT_IMPLEMENTED,PAPI_L1_ICR,Level 1 instruction cache reads ++#NOT_IMPLEMENTED,PAPI_L1_ICW,Level 1 instruction cache writes ++#NOT_IMPLEMENTED,PAPI_L1_LDM,Level 1 load misses ++#NOT_IMPLEMENTED,PAPI_L1_STM,Level 1 store misses ++PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE_ACCESS,L1I_CACHE_REFILL ++PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL ++#NOT_IMPLEMENTED,PAPI_L1_TCR,Level 1 total cache reads ++#NOT_IMPLEMENTED,PAPI_L1_TCW,Level 1 total cache writes ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE ++PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE + PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL + PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD + PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L2_DCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL + PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD +-PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND +-PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_CACHE_REFILL_WR ++#NOT_IMPLEMENTED,PAPI_L2_ICA,Level 2 instruction cache accesses ++#NOT_IMPLEMENTED,PAPI_L2_ICH,Level 2 instruction cache hits ++#NOT_IMPLEMENTED,PAPI_L2_ICM,Level 2 instruction cache misses ++#NOT_IMPLEMENTED,PAPI_L2_ICR,Level 2 instruction cache reads ++#NOT_IMPLEMENTED,PAPI_L2_ICW,Level 2 instruction cache writes ++PRESET,PAPI_L2_TCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL ++PRESET,PAPI_L2_TCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_TCR,NOT_DERIVED,L2D_CACHE_RD ++PRESET,PAPI_L2_TCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L3_TCA,NOT_DERIVED,L3D_CACHE ++PRESET,PAPI_L3_DCA,NOT_DERIVED,L3D_CACHE ++#NOT_IMPLEMENTED,PAPI_L3_DCH,Level 3 data cache hits ++PRESET,PAPI_L3_DCM,NOT_DERIVED,L3D_CACHE_REFILL ++#NOT_IMPLEMENTED,PAPI_L3_DCR,Level 3 data cache reads ++#NOT_IMPLEMENTED,PAPI_L3_DCW,Level 3 data cache writes ++#NOT_IMPLEMENTED,PAPI_L3_ICA,Level 3 instruction cache accesses ++#NOT_IMPLEMENTED,PAPI_L3_ICH,Level 3 instruction cache hits ++#NOT_IMPLEMENTED,PAPI_L3_ICM,Level 3 instruction cache misses ++#NOT_IMPLEMENTED,PAPI_L3_ICR,Level 3 instruction cache reads ++#NOT_IMPLEMENTED,PAPI_L3_ICW,Level 3 instruction cache writes ++#NOT_IMPLEMENTED,PAPI_L3_LDM,Level 3 load misses ++#NOT_IMPLEMENTED,PAPI_L3_STM,Level 3 store misses ++#NOT_IMPLEMENTED,PAPI_L3_TCH,Level 3 total cache hits ++#NOT_IMPLEMENTED,PAPI_L3_TCM,Level 3 cache misses ++#NOT_IMPLEMENTED,PAPI_L3_TCR,Level 3 total cache reads ++#NOT_IMPLEMENTED,PAPI_L3_TCW,Level 3 total cache writes + PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC + PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++PRESET,PAPI_TLB_IM,NOT_DERIVED,L1I_TLB_REFILL ++#NOT_IMPLEMENTED,PAPI_TLB_SD,Translation lookaside buffer shootdowns ++PRESET,PAPI_TLB_TL,DERIVED_ADD,L1D_TLB_REFILL,L2D_TLB_REFILL ++#NOT_IMPLEMENTED,PAPI_CA_CLN,Requests for exclusive access to clean cache line ++#NOT_IMPLEMENTED,PAPI_CA_INV,Requests for cache line invalidation ++#NOT_IMPLEMENTED,PAPI_CA_ITV,Requests for cache line intervention ++#NOT_IMPLEMENTED,PAPI_CA_SHR,Requests for exclusive access to shared cache line ++#NOT_IMPLEMENTED,PAPI_CA_SNP,Requests for a snoop ++#NOT_IMPLEMENTED,PAPI_CSR_FAL,Failed store conditional instructions ++#NOT_IMPLEMENTED,PAPI_CSR_SUC,Successful store conditional instructions ++#NOT_IMPLEMENTED,PAPI_CSR_TOT,Total store conditional instructions ++#NOT_IMPLEMENTED,PAPI_PRF_DM,Data prefetch cache misses + + # + CPU,mips_74k + +commit 15f32cb3a2e6bdd9e51aa4043842f0130e9dcf24 +Author: Daniel Barry +Date: Wed Jun 7 14:38:39 2023 +0000 + + add branch presets for Zen3 and Zen4 + + These changes include all branching preset events for Zen3 and Zen4, + validated using the Counter Analysis Toolkit. + + For Zen3, PAPI_BR_TKN was modified to exclude unconditional branches + taken, in order to adhere to the preset's meaning. + + These changes have been tested on the AMD Zen3 and Zen4 architectures. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 3089d2d4..319cf82c 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -488,8 +488,12 @@ CPU,amd64_fam19h_zen3 + PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT + PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS +-PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_UCN,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_TKN,DERIVED_POSTFIX,N0|N1|-|N2|+|,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS + PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED + PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT + PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K + PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH +@@ -509,6 +513,16 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS + PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS + PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS + PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS ++# ++# ++CPU,amd64_fam19h_zen4 ++PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_UCN,NOT_DERIVED,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED + + + CPU,Intel architectural PMU + +commit da93ed4dd1fadb70ccee62a976597ff431c9f58c +Author: Daniel Barry +Date: Mon Jun 12 17:27:59 2023 +0000 + + add flops presets for Zen4 + + These changes include FLOPs presets for Zen4, validated using the + Counter Analysis Toolkit. + + These changes have been tested on the AMD Zen4 architecture. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 319cf82c..f6a40a35 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -523,6 +523,14 @@ PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDI + PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS + PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED + PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_FP_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL,RETIRED_FP_OPS_BY_TYPE:SCALAR_ALL ++PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL ++PRESET,PAPI_FMA_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MAC,RETIRED_FP_OPS_BY_TYPE:SCALAR_MAC ++PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS_BY_TYPE:SCALAR_MUL ++PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD ++PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV ++PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT + + + CPU,Intel architectural PMU + +commit a31c3a4e9788e03fee113263a9f94bd638a66721 +Author: Daniel Barry +Date: Wed Jun 21 15:13:47 2023 +0000 + + add cycles and instructions presets for Zen4 + + These changes include the 'total cycles' and 'instructions completed' + presets for Zen4, validated using the Counter Analysis Toolkit. + + These changes have been tested on the AMD Zen4 architecture. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index f6a40a35..86e11fe6 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -531,6 +531,8 @@ PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS + PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD + PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV + PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT ++PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT + + + CPU,Intel architectural PMU + +commit 94303410ce97a84408b0b2d727701a60c6f137aa +Author: Daniel Barry +Date: Sun Jul 23 15:38:36 2023 +0000 + + add various Sapphire Rapids presets + + These changes include cycles, instructions, branching, and FLOPs presets + for Intel Sapphire Rapids, validated using the Counter Analysis Toolkit. + + These changes have been tested on the Intel Sapphire Rapids architecture. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 86e11fe6..eac0855f 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1010,6 +1010,29 @@ PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ + PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE + # End of icx list + ++# Intel Sapphire Rapids events ++CPU,spr ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P ++PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES ++# FLOPs ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE ++PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE ++PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++# Branches ++PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND ++PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND ++PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN ++PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES ++# End of spr list ++ + # + # Intel MIC / Xeon-Phi / Knights Landing + # Intel Knights Mill + +commit 42b14987ca1a7028b6cf6fdc190a2fa6a0fd8e18 +Author: Daniel Barry +Date: Tue Jul 25 12:16:56 2023 +0000 + + add more Ice Lake FLOPs presets + + Since there are enough counters available to monitor both single- and + double-precision floating-point events, PAPI_FP_OPS, PAPI_FP_INS, and + PAPI_VEC_INS are all defined. + These presets have been validated using the Counter Analysis Toolkit. + + These changes have been tested on the Intel Ice Lake architecture. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index eac0855f..df82ac1c 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1006,8 +1006,11 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES + PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE + # PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE + PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE + PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE + PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE + # End of icx list + + # Intel Sapphire Rapids events diff --git a/papi.spec b/papi.spec index 178b326..c4eeb46 100644 --- a/papi.spec +++ b/papi.spec @@ -1,4 +1,3 @@ -%define anolis_release .0.3 # Default to no static libraries %{!?with_static: %global with_static 0} %bcond_with bundled_libpfm @@ -12,7 +11,7 @@ Summary: Performance Application Programming Interface Name: papi Version: 6.0.0 -Release: 15%{anolis_release}%{?dist} +Release: 16%{?dist} License: BSD Requires: papi-libs = %{version}-%{release} URL: http://icl.cs.utk.edu/papi/ @@ -30,11 +29,8 @@ Patch7: papi-rhbz1923967.patch Patch21: papi-arm64fastread.patch Patch31: papi-701eventupdate.patch Patch40: papi-thread_init.patch +Patch41: papi-71eventupdate.patch BuildRequires: make - -Patch1000: 1000-papi-anolis-add-loongarch-support.patch -Patch1001: papi-6.0.0-sw.patch - BuildRequires: autoconf BuildRequires: doxygen BuildRequires: ncurses-devel @@ -42,10 +38,6 @@ BuildRequires: gcc-gfortran BuildRequires: kernel-headers >= 2.6.32 BuildRequires: chrpath BuildRequires: lm_sensors-devel -%ifarch sw_64 -BuildRequires: automake -BuildRequires: autoconf -%endif %if %{without bundled_libpfm} BuildRequires: libpfm-devel >= 4.13.0-1 %if %{with_static} @@ -113,14 +105,7 @@ the PAPI user-space libraries and interfaces. %patch21 -p1 %patch31 -p1 %patch40 -p1 -%patch1000 -p1 - -%ifarch sw_64 -%patch1001 -p1 -pushd src -autoreconf -fiv -popd -%endif +%patch41 -p1 %build @@ -213,15 +198,8 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* %endif %changelog -* Fri Aug 9 2024 Weisson - 6.0.0-15.0.3 -- Remove static lib for sw_64. - -* Tue Mar 26 2024 Weisson - 6.0.0-15.0.2 -- cherry-pick `add sw arch #8df28831ab9ee0af53d8fa394c9f5cd1863d6e7d`. -- cherry-pick `explicitly disable static lib #266b8d83eb0812ae4dca5661e9491329cd0d3111`. - -* Wed Dec 13 2023 - 6.0.0-15.0.1 -- add 1000-papi-anolis-add-loongarch-support.patch +* Fri Nov 17 2023 William Cohen - 6.0.0-16 +- Update papi event presets (RHEL-9333, RHEL-9334, RHEL-9335) * Fri Jun 16 2023 William Cohen - 6.0.0-15 - Address thread initialization order. (RHBZ#2215582) -- Gitee From eb2f299a79250b22a16bc3e447fb6a124467ca67 Mon Sep 17 00:00:00 2001 From: liusinan Date: Fri, 26 Nov 2021 16:49:57 +0800 Subject: [PATCH 2/4] add loongarch support Signed-off-by: liusinan --- 1000-papi-anolis-add-loongarch-support.patch | 90 ++++++++++++++++++++ papi.spec | 10 ++- 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 1000-papi-anolis-add-loongarch-support.patch diff --git a/1000-papi-anolis-add-loongarch-support.patch b/1000-papi-anolis-add-loongarch-support.patch new file mode 100644 index 0000000..8f0369c --- /dev/null +++ b/1000-papi-anolis-add-loongarch-support.patch @@ -0,0 +1,90 @@ +diff -Nur papi-6.0.0/src/linux-context.h papi-6.0.0.new/src/linux-context.h +--- papi-6.0.0/src/linux-context.h 2020-03-04 23:57:01.000000000 +0800 ++++ papi-6.0.0.new/src/linux-context.h 2023-12-13 10:27:34.048248582 +0800 +@@ -33,6 +33,8 @@ + #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.arm_pc + #elif defined(__aarch64__) + #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc ++#elif defined(__loongarch__) ++#define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.__pc + #elif defined(__mips__) + #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc + #elif defined(__hppa__) +diff -Nur papi-6.0.0/src/linux-lock.h papi-6.0.0.new/src/linux-lock.h +--- papi-6.0.0/src/linux-lock.h 2020-03-04 23:57:01.000000000 +0800 ++++ papi-6.0.0.new/src/linux-lock.h 2023-12-13 10:36:30.035342866 +0800 +@@ -227,6 +227,37 @@ + } + #define _papi_hwd_lock(lck) __raw_spin_lock(&_papi_hwd_lock_data[lck]); + #define _papi_hwd_unlock(lck) __raw_spin_unlock(&_papi_hwd_lock_data[lck]) ++#elif defined(__loongarch__) ++static inline void __raw_spin_lock(volatile unsigned int *lock) ++{ ++ unsigned int tmp; ++ __asm__ __volatile__( ++ "1: ll.w %1, %2 \n" ++ " bnez %1, 1b \n" ++ " li.w %1, 1 \n" ++ " sc.w %1, %0 \n" ++ " beqz %1, 1b \n" ++ " nop \n" ++ : "=m" (*lock), "=&r" (tmp) ++ : "m" (*lock) ++ : "memory"); ++} ++ ++static inline void __raw_spin_unlock(volatile unsigned int *lock) ++{ ++ unsigned int tmp; ++ __asm__ __volatile__( ++ " nop \n" ++ " li.w %1, 0 \n" ++ " st.w %1, %0 \n" ++ : "=m" (*lock), "=&r" (tmp) ++ : "m" (*lock) ++ : "memory"); ++} ++#define _papi_hwd_lock(lck) { rmb(); __raw_spin_lock(&_papi_hwd_lock_data[lck]); rmb(); } ++#define _papi_hwd_unlock(lck) { rmb(); __raw_spin_unlock(&_papi_hwd_lock_data[lck]); rmb(); } ++ ++ + #else + + #error "_papi_hwd_lock/unlock undefined!" +diff -Nur papi-6.0.0/src/linux-timer.c papi-6.0.0.new/src/linux-timer.c +--- papi-6.0.0/src/linux-timer.c 2020-03-04 23:57:01.000000000 +0800 ++++ papi-6.0.0.new/src/linux-timer.c 2023-12-13 10:32:38.681575639 +0800 +@@ -245,6 +245,20 @@ + return ret; + } + ++/****************************/ ++/* loongarch64 get_cycles() */ ++/****************************/ ++#elif defined(__loongarch__) ++static inline long long ++get_cycles(void) ++{ ++ register unsigned long ret = 0; ++ int rID = 0; ++ __asm__ __volatile__ ("ibar 0" ::: "memory"); ++ __asm__ __volatile__ ("rdtime.d %0, %1" :"=r"(ret),"=r"(rID)); ++ return ret; ++} ++ + /************************/ + /* POWER get_cycles() */ + /************************/ +diff -Nur papi-6.0.0/src/mb.h papi-6.0.0.new/src/mb.h +--- papi-6.0.0/src/mb.h 2020-03-04 23:57:01.000000000 +0800 ++++ papi-6.0.0.new/src/mb.h 2023-12-13 10:33:08.705804990 +0800 +@@ -39,6 +39,9 @@ + #elif defined(__aarch64__) + #define rmb() asm volatile("dmb ld" ::: "memory") + ++#elif defined(__loongarch__) ++#define rmb() asm volatile("dbar 0" ::: "memory") ++ + #elif defined(__mips__) + #define rmb() asm volatile( \ + ".set mips2\n\t" \ diff --git a/papi.spec b/papi.spec index c4eeb46..ad44fac 100644 --- a/papi.spec +++ b/papi.spec @@ -1,3 +1,4 @@ +%define anolis_release .0.1 # Default to no static libraries %{!?with_static: %global with_static 0} %bcond_with bundled_libpfm @@ -11,7 +12,7 @@ Summary: Performance Application Programming Interface Name: papi Version: 6.0.0 -Release: 16%{?dist} +Release: 16%{anolis_release}%{?dist} License: BSD Requires: papi-libs = %{version}-%{release} URL: http://icl.cs.utk.edu/papi/ @@ -31,6 +32,9 @@ Patch31: papi-701eventupdate.patch Patch40: papi-thread_init.patch Patch41: papi-71eventupdate.patch BuildRequires: make + +Patch1000: 1000-papi-anolis-add-loongarch-support.patch + BuildRequires: autoconf BuildRequires: doxygen BuildRequires: ncurses-devel @@ -106,6 +110,7 @@ the PAPI user-space libraries and interfaces. %patch31 -p1 %patch40 -p1 %patch41 -p1 +%patch1000 -p1 %build @@ -198,6 +203,9 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* %endif %changelog +* Mon Aug 12 2024 - 6.0.0-16.0.1 +- add 1000-papi-anolis-add-loongarch-support.patch + * Fri Nov 17 2023 William Cohen - 6.0.0-16 - Update papi event presets (RHEL-9333, RHEL-9334, RHEL-9335) -- Gitee From 03d6876903ac3de836ebcfff13e571f24dcbd0f5 Mon Sep 17 00:00:00 2001 From: wxiat Date: Mon, 7 Aug 2023 19:12:34 +0800 Subject: [PATCH 3/4] cherry-pick `add sw arch #8df28831ab9ee0af53d8fa394c9f5cd1863d6e7d`. cherry-pick `explicitly disable static lib #266b8d83eb0812ae4dca5661e9491329cd0d3111` Signed-off-by: Weisson --- papi-6.0.0-sw.patch | 74 +++++++++++++++++++++++++++++++++++++++++++++ papi.spec | 7 +++++ 2 files changed, 81 insertions(+) create mode 100644 papi-6.0.0-sw.patch diff --git a/papi-6.0.0-sw.patch b/papi-6.0.0-sw.patch new file mode 100644 index 0000000..483a363 --- /dev/null +++ b/papi-6.0.0-sw.patch @@ -0,0 +1,74 @@ +diff -Nuar papi-6.0.0.org/src/configure papi-6.0.0.sw/src/configure +--- papi-6.0.0.org/src/configure 2023-08-04 14:53:14.229642734 +0800 ++++ papi-6.0.0.sw/src/configure 2023-08-04 15:15:36.257871330 +0800 +@@ -4709,7 +4709,7 @@ + + # First set pthread-mutexes based on arch + case $arch in +- aarch64|arm*|parisc*) ++ sw_64|aarch64|arm*|parisc*) + pthread_mutexes=yes + CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" + echo "forcing use of pthread mutexes... " >&6 +diff -Nuar papi-6.0.0.org/src/configure.in papi-6.0.0.sw/src/configure.in +--- papi-6.0.0.org/src/configure.in 2023-08-04 14:53:14.198641645 +0800 ++++ papi-6.0.0.sw/src/configure.in 2023-08-04 15:07:59.605759288 +0800 +@@ -410,7 +410,7 @@ + + # First set pthread-mutexes based on arch + case $arch in +- aarch64|arm*|parisc*) ++ sw_64|aarch64|arm*|parisc*) + pthread_mutexes=yes + CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" + echo "forcing use of pthread mutexes... " >&6 +diff -Nuar papi-6.0.0.org/src/linux-context.h papi-6.0.0.sw/src/linux-context.h +--- papi-6.0.0.org/src/linux-context.h 2023-08-04 14:53:14.232642839 +0800 ++++ papi-6.0.0.sw/src/linux-context.h 2023-08-04 15:05:06.764673199 +0800 +@@ -35,6 +35,8 @@ + #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc + #elif defined(__loongarch__) + #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.__pc ++#elif defined(__sw_64__) ++#define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.sc_pc + #elif defined(__mips__) + #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc + #elif defined(__hppa__) +diff -Nuar papi-6.0.0.org/src/linux-timer.c papi-6.0.0.sw/src/linux-timer.c +--- papi-6.0.0.org/src/linux-timer.c 2023-08-04 14:53:14.234642909 +0800 ++++ papi-6.0.0.sw/src/linux-timer.c 2023-08-04 15:14:41.349934011 +0800 +@@ -231,6 +231,21 @@ + } + + /************************/ ++/* sw_64 get_cycles() */ ++/************************/ ++ ++#elif defined(__sw_64__) ++static inline long long ++get_cycles( void ) ++{ ++ register unsigned long ret; ++ ++ __asm__ __volatile__ ("rtc %0" : "=r" (ret)); ++ ++ return ret; ++} ++ ++/************************/ + /* aarch64 get_cycles() */ + /************************/ + +diff -Nuar papi-6.0.0.org/src/mb.h papi-6.0.0.sw/src/mb.h +--- papi-6.0.0.org/src/mb.h 2023-08-04 14:53:14.257643717 +0800 ++++ papi-6.0.0.sw/src/mb.h 2023-08-04 15:00:40.723327507 +0800 +@@ -26,6 +26,9 @@ + #elif defined (__alpha__) + #define rmb() asm volatile("mb" ::: "memory") + ++#elif defined (__sw_64__) ++#define rmb() asm volatile("memb" ::: "memory") ++ + #elif defined(__ia64__) + #define rmb() asm volatile ("mf" ::: "memory") + diff --git a/papi.spec b/papi.spec index ad44fac..a2c6f9d 100644 --- a/papi.spec +++ b/papi.spec @@ -1,5 +1,8 @@ %define anolis_release .0.1 # Default to no static libraries +%ifarch sw_64 +%define with_static 1 +%endif %{!?with_static: %global with_static 0} %bcond_with bundled_libpfm # rdma is not available @@ -34,6 +37,7 @@ Patch41: papi-71eventupdate.patch BuildRequires: make Patch1000: 1000-papi-anolis-add-loongarch-support.patch +Patch1001: papi-6.0.0-sw.patch BuildRequires: autoconf BuildRequires: doxygen @@ -111,6 +115,7 @@ the PAPI user-space libraries and interfaces. %patch40 -p1 %patch41 -p1 %patch1000 -p1 +%patch1001 -p1 %build @@ -205,6 +210,8 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* %changelog * Mon Aug 12 2024 - 6.0.0-16.0.1 - add 1000-papi-anolis-add-loongarch-support.patch +- cherry-pick `add sw arch #8df28831ab9ee0af53d8fa394c9f5cd1863d6e7d`. (Weisson@linux.alibaba.com) + cherry-pick `explicitly disable static lib #266b8d83eb0812ae4dca5661e9491329cd0d3111`. * Fri Nov 17 2023 William Cohen - 6.0.0-16 - Update papi event presets (RHEL-9333, RHEL-9334, RHEL-9335) -- Gitee From d8330fcb0249ffcdc11ba2156d21389711476e16 Mon Sep 17 00:00:00 2001 From: Weisson Date: Fri, 9 Aug 2024 17:17:17 +0800 Subject: [PATCH 4/4] Remove static libs for sw_64. Signed-off-by: Weisson --- papi.spec | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/papi.spec b/papi.spec index a2c6f9d..e58bc41 100644 --- a/papi.spec +++ b/papi.spec @@ -1,8 +1,5 @@ %define anolis_release .0.1 # Default to no static libraries -%ifarch sw_64 -%define with_static 1 -%endif %{!?with_static: %global with_static 0} %bcond_with bundled_libpfm # rdma is not available @@ -46,6 +43,10 @@ BuildRequires: gcc-gfortran BuildRequires: kernel-headers >= 2.6.32 BuildRequires: chrpath BuildRequires: lm_sensors-devel +%ifarch sw_64 +BuildRequires: automake +BuildRequires: autoconf +%endif %if %{without bundled_libpfm} BuildRequires: libpfm-devel >= 4.13.0-1 %if %{with_static} @@ -115,7 +116,13 @@ the PAPI user-space libraries and interfaces. %patch40 -p1 %patch41 -p1 %patch1000 -p1 + +%ifarch sw_64 %patch1001 -p1 +pushd src +autoreconf -fiv +popd +%endif %build @@ -212,6 +219,7 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* - add 1000-papi-anolis-add-loongarch-support.patch - cherry-pick `add sw arch #8df28831ab9ee0af53d8fa394c9f5cd1863d6e7d`. (Weisson@linux.alibaba.com) cherry-pick `explicitly disable static lib #266b8d83eb0812ae4dca5661e9491329cd0d3111`. +- Remove static lib for sw_64. (Weisson@linux.alibaba.com) * Fri Nov 17 2023 William Cohen - 6.0.0-16 - Update papi event presets (RHEL-9333, RHEL-9334, RHEL-9335) -- Gitee