diff --git a/arch/arm64/Kconfig.turbo b/arch/arm64/Kconfig.turbo index c4a8e4e889aa4ae81c7c9ed5a739400d30ca8c9c..99bac9292a4890943324bed6d9ff356518349ac3 100644 --- a/arch/arm64/Kconfig.turbo +++ b/arch/arm64/Kconfig.turbo @@ -71,4 +71,16 @@ config ACTLR_XCALL_XINT Use the 0x680 as the offset to the exception vector base address for the Armv8.8 NMI taken from EL0. +config XCALL_SMT_QOS + bool "Xcall SMT QoS Optimization in Offline Mixed Deployment Scenarios" + depends on !QOS_SCHED && !QOS_SCHED_SMART_GRID && SCHED_SMT + depends on FAST_IRQ || ACTLR_XCALL_XINT + default n + help + To reduce the interference of offline tasks on online tasks + in scenarios where SMT (Simultaneous Multithreading) is used + for mixed deployment of offline and online tasks, WFxT instructions + that do not affect SMT performance are executed by injecting PMU + interrupts on the CPU cores running the offline tasks. + endmenu # "Turbo features selection" diff --git a/arch/arm64/include/asm/smt_qos.h b/arch/arm64/include/asm/smt_qos.h new file mode 100644 index 0000000000000000000000000000000000000000..18786de173457371b072164726dd125a4e5b2f53 --- /dev/null +++ b/arch/arm64/include/asm/smt_qos.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_SMT_QOS_H +#define __ASM_SMT_QOS_H + +#include +#include + +#define INST_RETIRED_COUNTER 0 +#define CYCLE_COUNTER 1 + +DECLARE_PER_CPU(bool, pmu_enable); + +extern unsigned int sysctl_delay_cycles; +extern unsigned int sysctl_sample_interval_inst; +extern unsigned int sysctl_sample_interval_cycles; + +// Enable Performance Monitors +static inline void pmu_start(void) +{ + u64 reg_val; + + reg_val = read_sysreg(pmcr_el0); + reg_val |= ARMV8_PMU_PMCR_E; // Enable the PMU counter + write_sysreg(reg_val, pmcr_el0); + isb(); +} + +// Disable the PMU entirely +static inline void pmu_stop(void) +{ + u64 reg_val; + + reg_val = read_sysreg(pmcr_el0); + reg_val &= ~ARMV8_PMU_PMCR_E; // Clear bit 0 (E) to Disable the PMU + write_sysreg(reg_val, pmcr_el0); + isb(); +} + +static inline void write_pmevtypern_el0(int n, u64 val) +{ + u64 and = ARMV8_PMU_INCLUDE_EL2 | ARMV8_PMU_EXCLUDE_EL1; + + switch(n) { + case 0: + write_sysreg(val | and, pmevtyper0_el0); + break; + case 1: + write_sysreg(val | and, pmevtyper1_el0); + break; + default: + break; + } +} + +static inline void write_pmevcntrn_el0(int n, u64 val) +{ + val |= GENMASK_ULL(63, 32); + + switch(n) { + case 0: + write_sysreg(val, pmevcntr0_el0); + break; + case 1: + write_sysreg(val, pmevcntr1_el0); + break; + default: + break; + } +} + +static inline void write_pmintenset_el1(unsigned int counter) +{ + write_sysreg(BIT(counter), pmintenset_el1); +} + +static inline void write_pmcntenset_el0(unsigned int counter) +{ + write_sysreg(BIT(counter), pmcntenset_el0); +} + +static inline void write_pmcntenclr_el0(unsigned int counter) +{ + write_sysreg(BIT(counter), pmcntenclr_el0); +} + +static inline void write_pmintenclr_el1(unsigned int counter) +{ + write_sysreg(BIT(counter), pmintenclr_el1); +} + +static inline void write_pmovsclr_el0(unsigned int counter) +{ + write_sysreg(BIT(counter), pmovsclr_el0); +} + +void setup_pmu_counter(void *info); +void stop_pmu_counter(void *info); +irqreturn_t my_pmu_irq_handler(int irq, void *dev_id); + +#endif /* __ASM_SMT_QOS_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 379d24059f5b140ebaac142054706c1e59f7ee17..ed27011113fb7c6e6784a484b8ae46b4fd9eef66 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -48,8 +48,13 @@ struct thread_info { #endif u32 cpu; +#ifdef CONFIG_XCALL_SMT_QOS + u64 qos_context1; // save x0 + u64 qos_context2; // save x1 +#else KABI_RESERVE(1) KABI_RESERVE(2) +#endif }; #define thread_saved_pc(tsk) \ diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 0cedfa1cce8a74c722918ac850df32c0f3043205..574bee3604056c94bb9187e7d4048a0187525798 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -12,7 +12,7 @@ */ #define VDSO_LBASE 0x0 -#define __VVAR_PAGES 2 +#define __VVAR_PAGES 3 #ifndef __ASSEMBLY__ @@ -38,6 +38,10 @@ extern char vdso32_start[], vdso32_end[]; extern char vdso_ilp32_start[], vdso_ilp32_end[]; #endif +#ifdef CONFIG_XCALL_SMT_QOS +struct qos_data *arch_get_qos_data(void *vvar_page); +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_H */ diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h index 079139c04b149d8a086a3dad1ae5b2579988bfa7..50a62c4978450d83ae116c51a714ea7db7865e6e 100644 --- a/arch/arm64/include/uapi/asm/unistd.h +++ b/arch/arm64/include/uapi/asm/unistd.h @@ -36,3 +36,10 @@ #define __ARCH_WANT_MEMFD_SECRET #include + +#ifdef CONFIG_XCALL_SMT_QOS +#ifndef __NR_vdso_wfxt_return +#define __NR_vdso_wfxt_return (__NR_arch_specific_syscall + 11) +#endif +__SYSCALL(__NR_vdso_wfxt_return, sys_vdso_wfxt_return) +#endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index d67126570cb2f61fbb17eea16659e017e120b225..9784594112abfe2715047bcae37454dff659c691 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -80,6 +80,7 @@ obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o obj-$(CONFIG_ARM64_ILP32) += vdso-ilp32/ obj-$(CONFIG_FAST_SYSCALL) += xcall/ +obj-$(CONFIG_XCALL_SMT_QOS) += smt_qos.o obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o obj-$(CONFIG_IPI_AS_NMI) += ipi_nmi.o obj-$(CONFIG_HISI_VIRTCCA_GUEST) += virtcca_cvm_guest.o virtcca_cvm_tsi.o diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 1e8171c1efe76a1873849c1c34bb73c577b9cdef..642d3c598d58a927907a2de3bdd29d0f7e1bb869 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -13,6 +13,9 @@ #include #include #include +#ifdef CONFIG_XCALL_SMT_QOS +#include +#endif #include #include @@ -26,6 +29,10 @@ #include #include #include +#ifdef CONFIG_XCALL_SMT_QOS +#include +#include +#endif /* * Handle IRQ/context state management when entering from kernel mode. @@ -570,13 +577,35 @@ static __always_inline void __el1_pnmi(struct pt_regs *regs, arm64_exit_nmi(regs); } -#ifdef CONFIG_FAST_IRQ +#ifdef CONFIG_XCALL_SMT_QOS +static void set_return_to_vdso(struct pt_regs *regs, struct qos_data *qos_data, + unsigned long trampoline) +{ + qos_data->delay_cycles = sysctl_delay_cycles; + trace_printk("delay_cycles: %ld, saved pc: 0x%llx, saved x8: 0x%llx\n", + qos_data->delay_cycles, regs->pc, regs->regs[8]); + task_thread_info(current)->qos_context1 = regs->pc; + task_thread_info(current)->qos_context2 = regs->regs[8]; + regs->pc = trampoline; +} +#endif + +#if defined(CONFIG_FAST_IRQ) || defined(CONFIG_ACTLR_XCALL_XINT) static void noinstr el0_xint(struct pt_regs *regs, u64 nmi_flag, void (*handler)(struct pt_regs *), void (*nmi_handler)(struct pt_regs *)) { +#ifdef CONFIG_XCALL_SMT_QOS + struct qos_data *qos_data = NULL; + unsigned long trampoline = 0; + + if (current->mm && current->mm->smt_qos_page) { + qos_data = arch_get_qos_data(page_address(current->mm->smt_qos_page)); + trampoline = (unsigned long)VDSO_SYMBOL(current->mm->context.vdso, smt_qos_trampoline); + } +#endif + fast_enter_from_user_mode(regs); -#ifndef CONFIG_DEBUG_FEATURE_BYPASS /* Is there a NMI to handle? */ if (system_uses_nmi() && (read_sysreg(isr_el1) & nmi_flag)) { /* @@ -589,10 +618,15 @@ static void noinstr el0_xint(struct pt_regs *regs, u64 nmi_flag, do_interrupt_handler(regs, nmi_handler); arm64_exit_nmi(regs); - exit_to_user_mode(regs); - return; - } +#ifdef CONFIG_XCALL_SMT_QOS + if (cpus_have_const_cap(ARM64_HAS_WFXT) && qos_data && + trampoline && sysctl_delay_cycles > 0) { + set_return_to_vdso(regs, qos_data, trampoline); + fast_exit_to_user_mode(regs); + return; + } #endif + } write_sysreg(DAIF_PROCCTX_NOIRQ, daif); @@ -605,10 +639,15 @@ static void noinstr el0_xint(struct pt_regs *regs, u64 nmi_flag, do_interrupt_handler(regs, handler); xint_exit_rcu(); +#ifdef CONFIG_XCALL_SMT_QOS + if (cpus_have_const_cap(ARM64_HAS_WFXT) && qos_data && + trampoline && sysctl_delay_cycles > 0) + set_return_to_vdso(regs, qos_data, trampoline); +#endif + fast_exit_to_user_mode(regs); } - asmlinkage void noinstr el0t_64_fast_irq_handler(struct pt_regs *regs) { el0_xint(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); @@ -1058,7 +1097,7 @@ asmlinkage void noinstr el0t_64_xcall_handler(struct pt_regs *regs) } asmlinkage void noinstr el0t_64_xint_handler(struct pt_regs *regs) { - el0_interrupt(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); + el0_xint(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); } #endif diff --git a/arch/arm64/kernel/smt_qos.c b/arch/arm64/kernel/smt_qos.c new file mode 100644 index 0000000000000000000000000000000000000000..28cc576d64380f9b1bc3a4a4b2d0860ff64e2ad5 --- /dev/null +++ b/arch/arm64/kernel/smt_qos.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) "SMT QoS: " fmt + +#include +#include +#include +#include +#include +#include + +#include + +unsigned int sysctl_delay_cycles = 10000000; +unsigned int sysctl_sample_interval_inst = 100000000; +unsigned int sysctl_sample_interval_cycles = 100000000; + +SYSCALL_DEFINE0(vdso_wfxt_return) +{ + struct pt_regs *regs = current_pt_regs(); + + regs->pc = task_thread_info(current)->qos_context1; + regs->regs[8] = task_thread_info(current)->qos_context2; + trace_printk("restored pc: 0x%llx, restored x8: 0x%llx\n", regs->pc, regs->regs[8]); + + return regs->regs[0]; +} + +void setup_pmu_counter(void *info) +{ + if (unlikely(__this_cpu_read(pmu_enable))) + return; + + trace_printk("Enable pmu on CPU %d.\n", smp_processor_id()); + + pmu_start(); + + if (sysctl_sample_interval_inst != 0) { + write_pmevtypern_el0(INST_RETIRED_COUNTER, ARMV8_PMUV3_PERFCTR_INST_RETIRED); + write_pmevcntrn_el0(INST_RETIRED_COUNTER, (0xffffffffUL - sysctl_sample_interval_inst)); + write_pmintenset_el1(INST_RETIRED_COUNTER); + write_pmcntenset_el0(INST_RETIRED_COUNTER); + } + + if (sysctl_sample_interval_cycles != 0) { + write_pmevtypern_el0(CYCLE_COUNTER, ARMV8_PMUV3_PERFCTR_CPU_CYCLES); + write_pmevcntrn_el0(CYCLE_COUNTER, (0xffffffffUL - sysctl_sample_interval_cycles)); + write_pmintenset_el1(CYCLE_COUNTER); + write_pmcntenset_el0(CYCLE_COUNTER); + } + isb(); + + __this_cpu_write(pmu_enable, true); +} + +void stop_pmu_counter(void *info) +{ + if (likely(!__this_cpu_read(pmu_enable))) + return; + + trace_printk("Disable pmu on cpu%d\n", smp_processor_id()); + + if (sysctl_sample_interval_inst != 0) { + write_pmcntenclr_el0(INST_RETIRED_COUNTER); + write_pmintenclr_el1(INST_RETIRED_COUNTER); + write_pmovsclr_el0(INST_RETIRED_COUNTER); + } + + if (sysctl_sample_interval_cycles != 0) { + write_pmcntenclr_el0(CYCLE_COUNTER); + write_pmintenclr_el1(CYCLE_COUNTER); + write_pmovsclr_el0(CYCLE_COUNTER); + } + isb(); + + __this_cpu_write(pmu_enable, false); +} + +irqreturn_t my_pmu_irq_handler(int irq, void *dev_id) +{ + u64 pmovsclr; + + pmovsclr = read_sysreg(pmovsclr_el0); + write_sysreg(pmovsclr, pmovsclr_el0); + + // Check if our specific counter caused the interrupt + if (!(pmovsclr & BIT(INST_RETIRED_COUNTER)) && !(pmovsclr & BIT(CYCLE_COUNTER))) + return IRQ_NONE; + + pmu_stop(); + + if (pmovsclr & BIT(INST_RETIRED_COUNTER)) + write_pmevcntrn_el0(INST_RETIRED_COUNTER, (0xffffffffUL - sysctl_sample_interval_inst)); + + if (pmovsclr & BIT(CYCLE_COUNTER)) + write_pmevcntrn_el0(CYCLE_COUNTER, (0xffffffffUL - sysctl_sample_interval_cycles)); + + pmu_start(); + + return IRQ_HANDLED; +} diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index fff989a09f7534beb5612810dd44e116b18a8614..0ff0b3569802f19f683e44ceb5ce39b21b7f1244 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -40,6 +40,7 @@ enum vdso_abi { enum vvar_pages { VVAR_DATA_PAGE_OFFSET, VVAR_TIMENS_PAGE_OFFSET, + VVAR_SMT_QOS_PAGE_OFFSET, VVAR_NR_PAGES, }; @@ -126,6 +127,13 @@ static int __init __vdso_init(enum vdso_abi abi) return 0; } +#ifdef CONFIG_XCALL_SMT_QOS +struct qos_data *arch_get_qos_data(void *vvar_page) +{ + return (struct qos_data *)(vvar_page); +} +#endif + #ifdef CONFIG_TIME_NS struct vdso_data *arch_get_vdso_data(void *vvar_page) { @@ -161,6 +169,18 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) } #endif +#ifdef CONFIG_XCALL_SMT_QOS +static struct page *find_qos_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->mm->smt_qos_page; + + WARN(1, "smt qos page accessed remotely"); + + return NULL; +} +#endif + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -188,6 +208,11 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, pfn = sym_to_pfn(vdso_data); break; #endif /* CONFIG_TIME_NS */ +#ifdef CONFIG_XCALL_SMT_QOS + case VVAR_SMT_QOS_PAGE_OFFSET: + pfn = page_to_pfn(find_qos_vvar_page(vma)); + break; +#endif default: return VM_FAULT_SIGBUS; } diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index b46df9f340f99deaacf17b1b5f3014733f3bcf0c..8a023bb821dc6b116c01a390c57533e0c81c268c 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -9,7 +9,11 @@ # Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile +ifdef CONFIG_XCALL_SMT_QOS +obj-vdso := vgettimeofday.o note.o sigreturn.o smt_qos.o smt_qos_trampoline.o +else obj-vdso := vgettimeofday.o note.o sigreturn.o +endif # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg @@ -40,6 +44,12 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ -Wmissing-prototypes -Wmissing-declarations +ifdef CONFIG_XCALL_SMT_QOS +CFLAGS_REMOVE_smt_qos.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ + $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ + -Wmissing-prototypes -Wmissing-declarations +endif KASAN_SANITIZE := n KCSAN_SANITIZE := n UBSAN_SANITIZE := n diff --git a/arch/arm64/kernel/vdso/smt_qos.c b/arch/arm64/kernel/vdso/smt_qos.c new file mode 100644 index 0000000000000000000000000000000000000000..41077726c5c269e119c20c4f479df561045b5244 --- /dev/null +++ b/arch/arm64/kernel/vdso/smt_qos.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM64 userspace implementations of smt qos trampoline + * + */ +#include +#include + +#define wfit(val) asm volatile("msr s0_3_c1_c0_1, %0" \ + : : "r" (val) : "memory") +#define wfet(val) asm volatile("msr s0_3_c1_c0_0, %0" \ + : : "r" (val) : "memory") + +static __always_inline struct qos_data *__arch_get_qos_vdso_data(void) +{ + return &_qos_data; +} + +static unsigned long long get_cycles(void) +{ + unsigned long long val; + + asm volatile("mrs %0, cntvct_el0" : "=r" (val)); + + return val; +} + +static void delay(unsigned long cycles) +{ + unsigned long long start = get_cycles(); + unsigned long long end = start + cycles; + + wfit(end); + while ((get_cycles() - start) < cycles) + wfet(end); +} + + +void do_smt_qos_trampoline(void) +{ + struct qos_data *qos_data = __arch_get_qos_vdso_data(); + + delay(qos_data->delay_cycles); + return; +} diff --git a/arch/arm64/kernel/vdso/smt_qos_trampoline.S b/arch/arm64/kernel/vdso/smt_qos_trampoline.S new file mode 100644 index 0000000000000000000000000000000000000000..bcbc2593fb5aac3bcc267258bf80c6639fe1da81 --- /dev/null +++ b/arch/arm64/kernel/vdso/smt_qos_trampoline.S @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include +#include +#include + + .text +SYM_CODE_START(__kernel_smt_qos_trampoline) + sub sp, sp, #16 * 16 + stp x0, x1, [sp, #16 * 0] + stp x2, x3, [sp, #16 * 1] + stp x4, x5, [sp, #16 * 2] + stp x6, x7, [sp, #16 * 3] + stp x8, x9, [sp, #16 * 4] + stp x10, x11, [sp, #16 * 5] + stp x12, x13, [sp, #16 * 6] + stp x14, x15, [sp, #16 * 7] + stp x16, x17, [sp, #16 * 8] + stp x18, x19, [sp, #16 * 9] + stp x20, x21, [sp, #16 * 10] + stp x22, x23, [sp, #16 * 11] + stp x24, x25, [sp, #16 * 12] + stp x26, x27, [sp, #16 * 13] + stp x28, x29, [sp, #16 * 14] + str x30, [sp, #16 * 15] + + bl do_smt_qos_trampoline + + ldp x0, x1, [sp, #16 * 0] + ldp x2, x3, [sp, #16 * 1] + ldp x4, x5, [sp, #16 * 2] + ldp x6, x7, [sp, #16 * 3] + ldp x8, x9, [sp, #16 * 4] + ldp x10, x11, [sp, #16 * 5] + ldp x12, x13, [sp, #16 * 6] + ldp x14, x15, [sp, #16 * 7] + ldp x16, x17, [sp, #16 * 8] + ldp x18, x19, [sp, #16 * 9] + ldp x20, x21, [sp, #16 * 10] + ldp x22, x23, [sp, #16 * 11] + ldp x24, x25, [sp, #16 * 12] + ldp x26, x27, [sp, #16 * 13] + ldp x28, x29, [sp, #16 * 14] + ldr x30, [sp, #16 * 15] + add sp, sp, #16 * 16 + + /* + * Call vdso_wfxt_return to restore context + */ + mov x8, 255 + svc #0xffff + +SYM_CODE_END(__kernel_smt_qos_trampoline) + diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 2e126ab79ecd8688278da63bb94141cf258930e1..467ae0a6189050765e108d416c9a0cac78294c1a 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -21,6 +21,9 @@ SECTIONS PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); #ifdef CONFIG_TIME_NS PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); +#endif +#ifdef CONFIG_XCALL_SMT_QOS + PROVIDE(_qos_data = _vdso_data + PAGE_SIZE); #endif . = VDSO_LBASE + SIZEOF_HEADERS; @@ -103,6 +106,7 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; + __kernel_smt_qos_trampoline; local: *; }; } @@ -111,3 +115,6 @@ VERSION * Make the sigreturn code visible to the kernel. */ VDSO_sigtramp = __kernel_rt_sigreturn; +#ifdef CONFIG_XCALL_SMT_QOS +VDSO_smt_qos_trampoline = __kernel_smt_qos_trampoline; +#endif diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 849d2e0db4fd129a93e173bb82e0f3f9f68477bb..e6935105a842e81a76cc10594d2aea1447ea5fec 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1008,7 +1008,8 @@ DECLARE_BITMAP(irqnr_xint_map, 1024); static bool can_set_xint(unsigned int hwirq) { if (__get_intid_range(hwirq) == SGI_RANGE || - __get_intid_range(hwirq) == SPI_RANGE) + __get_intid_range(hwirq) == SPI_RANGE || + __get_intid_range(hwirq) == PPI_RANGE) return true; return false; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 9a97651b7afb1eb3aaf4efbd95a8c995ae835fdc..a0f7feb998c9c6c511593d4f86106b4070729433 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -448,7 +448,11 @@ validate_group(struct perf_event *event) return 0; } +#ifdef CONFIG_XCALL_SMT_QOS +static __maybe_unused irqreturn_t armpmu_dispatch_irq(int irq, void *dev) +#else static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) +#endif { struct arm_pmu *armpmu; int ret; @@ -658,10 +662,17 @@ void armpmu_free_irq(int irq, int cpu) per_cpu(cpu_irq_ops, cpu) = NULL; } +#ifdef CONFIG_XCALL_SMT_QOS +extern irqreturn_t my_pmu_irq_handler(int irq, void *dev_id); +#endif int armpmu_request_irq(int irq, int cpu) { int err = 0; +#ifndef CONFIG_XCALL_SMT_QOS const irq_handler_t handler = armpmu_dispatch_irq; +#else + const irq_handler_t handler = my_pmu_irq_handler; +#endif const struct pmu_irq_ops *irq_ops; if (!irq) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 64c38b09e18d5579dd362cc160f68d6535c70428..e9250be17f9361236ecc3f624232d849ac4d0cd3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1016,7 +1016,11 @@ struct mm_struct { #else KABI_RESERVE(1) #endif +#ifdef CONFIG_FAST_SYSCALL + KABI_USE(2, struct page *smt_qos_page) +#else KABI_RESERVE(2) +#endif KABI_RESERVE(3) KABI_RESERVE(4) KABI_RESERVE(5) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 36c592e43d65208f6d1b3099fa6805a24d5961de..5a91f219c8cb6b5283c4b7ed221aace6e86de116 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -948,6 +948,10 @@ asmlinkage long sys_cachestat(unsigned int fd, struct cachestat __user *cstat, unsigned int flags); asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); +#ifdef CONFIG_XCALL_SMT_QOS +asmlinkage long sys_vdso_wfxt_return(void); +#endif + /* * Architecture-specific system calls */ diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index f28f51c2c8f872cc291abe23153c0a06caa3dadc..04b098153b4ba4d7f6045710bf83506bed15f157 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -115,6 +115,10 @@ struct vdso_data { struct arch_vdso_data arch_data; }; +struct qos_data { + unsigned long delay_cycles; +}; + /* * We use the hidden visibility to prevent the compiler from generating a GOT * relocation. Not only is going through a GOT useless (the entry couldn't and @@ -126,6 +130,7 @@ struct vdso_data { */ extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden"))); extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden"))); +extern struct qos_data _qos_data __attribute__((visibility("hidden"))); /* * The generic vDSO implementation requires that gettimeofday.h diff --git a/kernel/fork.c b/kernel/fork.c index 78663ca681600ff7b78150acb521d115e3f1f1a9..6c3956cb027f8e26eb9e533e669ede3e37043936 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -974,6 +974,13 @@ void __mmdrop(struct mm_struct *mm) mm_destroy_cid(mm); mm_counter_destroy(mm); +#ifdef CONFIG_FAST_SYSCALL + if (mm->smt_qos_page) { + __free_page(mm->smt_qos_page); + mm->smt_qos_page = NULL; + } +#endif + free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -1390,6 +1397,13 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, sp_init_mm(mm); mm->user_ns = get_user_ns(user_ns); lru_gen_init_mm(mm); + +#ifdef CONFIG_FAST_SYSCALL + mm->smt_qos_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!mm->smt_qos_page) + goto fail_cid; +#endif + return mm; fail_cid: diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 47877f3b52f67fc4cb450abd5d0994a2e583727b..f393275fe67474dd54c93c95b5a28bcd80bee379 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10480,7 +10480,7 @@ void ia64_set_curr_task(int cpu, struct task_struct *p) /* task_group_lock serializes the addition/removal of task groups */ static DEFINE_SPINLOCK(task_group_lock); -#ifdef CONFIG_QOS_SCHED +#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_XCALL_SMT_QOS) static inline int alloc_qos_sched_group(struct task_group *tg, struct task_group *parent) { @@ -10488,7 +10488,9 @@ static inline int alloc_qos_sched_group(struct task_group *tg, return 1; } +#endif +#ifdef CONFIG_QOS_SCHED static void sched_change_qos_group(struct task_struct *tsk, struct task_group *tg) { struct sched_attr attr = {0}; @@ -10598,7 +10600,7 @@ struct task_group *sched_create_group(struct task_group *parent) if (!alloc_fair_sched_group(tg, parent)) goto err; -#ifdef CONFIG_QOS_SCHED +#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_XCALL_SMT_QOS) if (!alloc_qos_sched_group(tg, parent)) goto err; #endif @@ -11706,6 +11708,47 @@ static inline s64 cpu_soft_quota_read(struct cgroup_subsys_state *css, } #endif +#ifdef CONFIG_XCALL_SMT_QOS +static int pmu_tg_change_scheduler(struct task_group *tg, void *data) +{ + s64 qos_level = *(s64 *)data; + + tg->qos_level = qos_level; + + return 0; +} + +static int pmu_cpu_qos_write(struct cgroup_subsys_state *css, + struct cftype *cftype, s64 qos_level) +{ + struct task_group *tg = css_tg(css); + + if (!tg->se[0]) + return -EINVAL; + + if (qos_level > QOS_LEVEL_HIGH_EX || qos_level < QOS_LEVEL_OFFLINE_EX) + return -EINVAL; + + if (tg->qos_level == qos_level) + goto done; + + if (tg->qos_level != QOS_LEVEL_ONLINE) + return -EINVAL; + + rcu_read_lock(); + walk_tg_tree_from(tg, pmu_tg_change_scheduler, tg_nop, (void *)(&qos_level)); + rcu_read_unlock(); +done: + return 0; +} + +static inline s64 pmu_cpu_qos_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return css_tg(css)->qos_level; +} +#endif + #ifdef CONFIG_BPF_SCHED void sched_settag(struct task_struct *tsk, s64 tag) { @@ -11960,6 +12003,14 @@ static struct cftype cpu_legacy_files[] = { .write_s64 = cpu_soft_quota_write, }, #endif +#ifdef CONFIG_XCALL_SMT_QOS + { + .name = "pmu_qos_level", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = pmu_cpu_qos_read, + .write_s64 = pmu_cpu_qos_write, + }, +#endif #ifdef CONFIG_BPF_SCHED { .name = "tag", diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b22f3c072d20385e755c9d5fc5955b4e91bdd198..4dfb75304f7332c62164fbdcd31a8db5d1fc5e34 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -79,6 +79,10 @@ #include #endif +#ifdef CONFIG_XCALL_SMT_QOS +#include +#endif + /* * The initial- and re-scaling of tunables is configurable * @@ -169,6 +173,11 @@ static bool is_offline_task(struct task_struct *p); static DEFINE_PER_CPU(int, qos_smt_status); #endif +#ifdef CONFIG_XCALL_SMT_QOS +static DEFINE_PER_CPU(int, pmu_smt_status); +DEFINE_PER_CPU(bool, pmu_enable); +#endif + #ifdef CONFIG_QOS_SCHED_PRIO_LB unsigned int sysctl_sched_prio_load_balance_enabled; #endif @@ -9970,6 +9979,183 @@ static bool qos_smt_expelled(int this_cpu) #endif +#ifdef CONFIG_XCALL_SMT_QOS +static bool pmu_smt_update_status(struct task_struct *p) +{ + int status = QOS_LEVEL_ONLINE; + + if (p != NULL && task_group(p)->qos_level < QOS_LEVEL_ONLINE) + status = QOS_LEVEL_OFFLINE; + + if (p != NULL && task_group(p)->qos_level > QOS_LEVEL_ONLINE) + status = QOS_LEVEL_HIGH; + + if (__this_cpu_read(pmu_smt_status) == status) + return false; + + __this_cpu_write(pmu_smt_status, status); + if (status == QOS_LEVEL_OFFLINE) + trace_printk("udpate %s-%d to offline!\n", p->comm, p->pid); + else if (status == QOS_LEVEL_HIGH) + trace_printk("udpate %s-%d to high level online!\n", p->comm, p->pid); + + return true; +} + +static DEFINE_PER_CPU(call_single_data_t, pmu_setup_csd) = + CSD_INIT(setup_pmu_counter, NULL); + +static void send_pmu_setup_ipi(int cpu) +{ + call_single_data_t *csd; + int ret; + + csd = &per_cpu(pmu_setup_csd, cpu); + ret = smp_call_function_single_async(cpu, csd); + if (ret) + trace_printk("Sending IPI failed to CPU %d\n", cpu); +} + +static void pmu_smt_send_ipi_setup_pmu(int this_cpu) +{ + struct rq *rq = NULL; + int cpu; + + /* + * If the cfs.h_nr_running of current cpu is 0 (which means + * current CPU is idle), not send IPI to setup pmu + * for sibling CPU + */ + rq = cpu_rq(this_cpu); + if (rq->cfs.h_nr_running == 0) + return; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + rq = cpu_rq(cpu); + + /* + * There are two cases where current don't need to send ipi + * to setup PMU: + * a) The pmu_smt_status of siblings cpu is online; + * b) The cfs.h_nr_running of siblings cpu is 0. + */ + if (per_cpu(pmu_smt_status, cpu) >= QOS_LEVEL_ONLINE || + rq->cfs.h_nr_running == 0) + continue; + + if (!per_cpu(pmu_enable, cpu)) { + trace_printk("cpu%d send ipi to cpu%d to setup pmu\n", smp_processor_id(), cpu); + send_pmu_setup_ipi(cpu); + } + } +} + +static DEFINE_PER_CPU(call_single_data_t, pmu_stop_csd) = + CSD_INIT(stop_pmu_counter, NULL); + +static void send_pmu_stop_ipi(int cpu) +{ + call_single_data_t *csd; + int ret; + + csd = &per_cpu(pmu_stop_csd, cpu); + ret = smp_call_function_single_async(cpu, csd); + if (ret) + trace_printk("Sending IPI failed to CPU %d\n", cpu); +} + +static void pmu_smt_send_ipi_stop_pmu(int this_cpu) +{ + struct rq *rq = NULL; + int cpu; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + rq = cpu_rq(cpu); + + trace_printk("cpu%d send ipi to cpu%d to stop pmu\n", smp_processor_id(), cpu); + send_pmu_stop_ipi(cpu); + } +} + +/* + * If current cpu runs offline task, check whether + * SMT cpu runs online task, if so, enable PMU + * counter on current cpu. + */ +static void setup_pmu_counter_on_cpu(int this_cpu) +{ + struct rq *rq = NULL; + int cpu; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + rq = cpu_rq(cpu); + + /* + * There are two cases where current don't need to enable PMU counter + * to setup PMU: + * a) The pmu_smt_status of siblings cpu is offline; + * b) The cfs.h_nr_running of siblings cpu is 0. + */ + if (per_cpu(pmu_smt_status, cpu) <= QOS_LEVEL_ONLINE || + rq->cfs.h_nr_running == 0) + continue; + + setup_pmu_counter(NULL); + } +} + +static void pmu_smt_qos_setup(int this_cpu, struct task_struct *p) +{ + int old_status = __this_cpu_read(pmu_smt_status); + + pmu_smt_update_status(p); + + /* + * Offline task has finished, need to stop pmu counter + */ + if (old_status < QOS_LEVEL_ONLINE && __this_cpu_read(pmu_smt_status) >= QOS_LEVEL_ONLINE) + stop_pmu_counter(NULL); + + /* + * Online -> High or offline -> High switch need to check if SMT cpu is + * already running offline task. + * + * If current cpu is to run High task, check whether SMT cpu + * runs offline task, if so, send IPI to enable PMU counter. + */ + if (__this_cpu_read(pmu_smt_status) > QOS_LEVEL_ONLINE) { + pmu_smt_send_ipi_setup_pmu(this_cpu); + return; + } + + /* + * High -> online or High -> offline + * which means high task has finished on this cpu + * we need to stop pmu counter on sibling cpu. + */ + if (old_status > QOS_LEVEL_ONLINE && __this_cpu_read(pmu_smt_status) <= QOS_LEVEL_ONLINE) + pmu_smt_send_ipi_stop_pmu(this_cpu); + + /* + * High -> offline or online -> offline + * If current cpu is to run offline task, check whether SMT cpu + * runs High task, if so, enable PMU counter. + */ + if (old_status >= QOS_LEVEL_ONLINE && __this_cpu_read(pmu_smt_status) < QOS_LEVEL_ONLINE) + setup_pmu_counter_on_cpu(this_cpu); +} + +#endif + #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER DEFINE_STATIC_KEY_TRUE(qos_smt_expell_switch); @@ -10175,7 +10361,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf struct task_struct *p; int new_tasks; unsigned long time; -#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +#if defined(CONFIG_QOS_SCHED_SMT_EXPELLER) || defined(CONFIG_XCALL_SMT_QOS) int this_cpu = rq->cpu; #endif @@ -10356,6 +10542,9 @@ done: __maybe_unused; #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER qos_smt_expel(this_cpu, p); #endif +#ifdef CONFIG_XCALL_SMT_QOS + pmu_smt_qos_setup(this_cpu, p); +#endif return p; @@ -10409,6 +10598,9 @@ done: __maybe_unused; #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER qos_smt_expel(this_cpu, NULL); #endif +#ifdef CONFIG_XCALL_SMT_QOS + pmu_smt_qos_setup(this_cpu, p); +#endif return NULL; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0e21ad151ec952c84c393a6bd98e48b1ffc18878..bea584e76c5050911c6adfe531517fea8ce0aad1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -458,7 +458,7 @@ struct task_group { struct cfs_bandwidth cfs_bandwidth; -#ifdef CONFIG_QOS_SCHED +#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_XCALL_SMT_QOS) long qos_level; #endif @@ -1575,8 +1575,8 @@ do { \ flags = _raw_spin_rq_lock_irqsave(rq); \ } while (0) -#ifdef CONFIG_QOS_SCHED -#ifdef CONFIG_QOS_SCHED_MULTILEVEL +#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_XCALL_SMT_QOS) +#if defined(CONFIG_QOS_SCHED_MULTILEVEL) || defined(CONFIG_XCALL_SMT_QOS) enum task_qos_level { QOS_LEVEL_OFFLINE_EX = -2, QOS_LEVEL_OFFLINE = -1, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e84df0818517681529df54f0da315a58c92cd871..dff8309d495609cd5e9b012a7517b6b1e7d550cf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -81,6 +81,10 @@ #include #endif +#ifdef CONFIG_XCALL_SMT_QOS +#include +#endif + /* shared constants to be used in various sysctls */ const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 }; EXPORT_SYMBOL(sysctl_vals); @@ -2044,6 +2048,35 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, +#endif +#ifdef CONFIG_XCALL_SMT_QOS + { + .procname = "xcall_vdso_delay_cycles", + .data = &sysctl_delay_cycles, + .maxlen = sizeof(sysctl_delay_cycles), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "sample_interval_inst", + .data = &sysctl_sample_interval_inst, + .maxlen = sizeof(sysctl_sample_interval_inst), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "sample_interval_cycles", + .data = &sysctl_sample_interval_cycles, + .maxlen = sizeof(sysctl_sample_interval_cycles), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, #endif { } };