From f723ffabab4c658d76d7c109489cc22ee48016eb Mon Sep 17 00:00:00 2001 From: Cathy Zhang Date: Mon, 7 Dec 2020 19:34:41 -0800 Subject: [PATCH 01/17] KVM: x86: Expose AVX512_FP16 for supported CPUID mainline inclusion from mainline-v5.11-rc1 commit 2224fc9efb2d6593fbfb57287e39ba4958b188ba category: feature feature: SPR New Instructions Virtualization bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5O6WB CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=2224fc9efb2d6593fbfb57287e39ba4958b188ba Intel-SIG: commit 2224fc9efb2d ("KVM: x86: Expose AVX512_FP16 for supported CPUID") ------------------------------------- KVM: x86: Expose AVX512_FP16 for supported CPUID AVX512_FP16 is supported by Intel processors, like Sapphire Rapids. It could gain better performance for it's faster compared to FP32 if the precision or magnitude requirements are met. It's availability is indicated by CPUID.(EAX=7,ECX=0):EDX[bit 23]. Expose it in KVM supported CPUID, then guest could make use of it; no new registers are used, only new instructions. Signed-off-by: Cathy Zhang Signed-off-by: Kyung Min Park Acked-by: Dave Hansen Reviewed-by: Tony Luck Message-Id: <20201208033441.28207-3-kyung.min.park@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c957e591401c..3960f8bd0181 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void) F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | - F(SERIALIZE) | F(TSXLDTRK) + F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) ); /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ -- Gitee From b733e0a1f27aff42c46f2012a95efd2ee195f54b Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Tue, 5 Jan 2021 08:49:09 +0800 Subject: [PATCH 02/17] KVM: Expose AVX_VNNI instruction to guset mainline inclusion from mainline-v5.12-rc1 commit 1085a6b585d7d1c441cd10fdb4c7a4d96a22eba7 category: feature feature: SPR New Instructions Virtualization bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5O6WB CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=1085a6b585d7d1c441cd10fdb4c7a4d96a22eba7 Intel-SIG: commit 1085a6b585d7 ("KVM: Expose AVX_VNNI instruction to guset") ------------------------------------- KVM: Expose AVX_VNNI instruction to guset Expose AVX (VEX-encoded) versions of the Vector Neural Network Instructions to guest. The bit definition: CPUID.(EAX=7,ECX=1):EAX[bit 4] AVX_VNNI The following instructions are available when this feature is present in the guest. 1. VPDPBUS: Multiply and Add Unsigned and Signed Bytes 2. VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation 3. VPDPWSSD: Multiply and Add Signed Word Integers 4. VPDPWSSDS: Multiply and Add Signed Integers with Saturation This instruction is currently documented in the latest "extensions" manual (ISE). It will appear in the "main" manual (SDM) in the future. Signed-off-by: Yang Zhong Reviewed-by: Tony Luck Message-Id: <20210105004909.42000-3-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 3960f8bd0181..29a463658e67 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -473,7 +473,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); kvm_cpu_cap_mask(CPUID_7_1_EAX, - F(AVX512_BF16) + F(AVX_VNNI) | F(AVX512_BF16) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, -- Gitee From 580aa8e4376b5683289c13591c968be4c7c4ff05 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Fri, 6 Nov 2020 17:03:13 +0800 Subject: [PATCH 03/17] KVM: X86: Reset the vcpu->run->flags at the beginning of vcpu_run mainline inclusion from mainline-v5.12-rc1 commit 15aad3be9adb3fb7fba84190a2ce57d66e8b51da category: feature feature: KVM Bus Lock VM Exit bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RJCB CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=15aad3be9adb3fb7fba84190a2ce57d66e8b51da Intel-SIG: commit 15aad3be9adb ("KVM: X86: Reset the vcpu->run->flags at the beginning of vcpu_run") ------------------------------------- KVM: X86: Reset the vcpu->run->flags at the beginning of vcpu_run Reset the vcpu->run->flags at the beginning of kvm_arch_vcpu_ioctl_run. It can avoid every thunk of code that needs to set the flag clear it, which increases the odds of missing a case and ending up with a flag in an undefined state. Signed-off-by: Chenyi Qiang Message-Id: <20201106090315.18606-3-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- arch/x86/kvm/x86.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c7b4dfd2a2c4..fcc12f954678 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8403,12 +8403,15 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; - kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; + kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); kvm_run->ready_for_interrupt_injection = pic_in_kernel(vcpu->kvm) || kvm_vcpu_ready_for_interrupt_injection(vcpu); + + if (is_smm(vcpu)) + kvm_run->flags |= KVM_RUN_X86_SMM; } static void update_cr8_intercept(struct kvm_vcpu *vcpu) @@ -9537,6 +9540,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu_load(vcpu); kvm_sigset_activate(vcpu); + kvm_run->flags = 0; kvm_load_guest_fpu(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { -- Gitee From 26bba696f5421f08024a5a62976fb51b728107b6 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Fri, 6 Nov 2020 17:03:14 +0800 Subject: [PATCH 04/17] KVM: VMX: Enable bus lock VM exit mainline inclusion from mainline-v5.12-rc1 commit fe6b6bc802b40081e8a7a1abe8d32b88d10a03e1 category: feature feature: KVM Bus Lock VM Exit bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RJCB CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=fe6b6bc802b40081e8a7a1abe8d32b88d10a03e1 Intel-SIG: commit fe6b6bc802b4 ("KVM: VMX: Enable bus lock VM exit") ------------------------------------- KVM: VMX: Enable bus lock VM exit Virtual Machine can exploit bus locks to degrade the performance of system. Bus lock can be caused by split locked access to writeback(WB) memory or by using locks on uncacheable(UC) memory. The bus lock is typically >1000 cycles slower than an atomic operation within a cache line. It also disrupts performance on other cores (which must wait for the bus lock to be released before their memory operations can complete). To address the threat, bus lock VM exit is introduced to notify the VMM when a bus lock was acquired, allowing it to enforce throttling or other policy based mitigations. A VMM can enable VM exit due to bus locks by setting a new "Bus Lock Detection" VM-execution control(bit 30 of Secondary Processor-based VM execution controls). If delivery of this VM exit was preempted by a higher priority VM exit (e.g. EPT misconfiguration, EPT violation, APIC access VM exit, APIC write VM exit, exception bitmap exiting), bit 26 of exit reason in vmcs field is set to 1. In current implementation, the KVM exposes this capability through KVM_CAP_X86_BUS_LOCK_EXIT. The user can get the supported mode bitmap (i.e. off and exit) and enable it explicitly (disabled by default). If bus locks in guest are detected by KVM, exit to user space even when current exit reason is handled by KVM internally. Set a new field KVM_RUN_BUS_LOCK in vcpu->run->flags to inform the user space that there is a bus lock detected in guest. Document for Bus Lock VM exit is now available at the latest "Intel Architecture Instruction Set Extensions Programming Reference". Document Link: https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Chenyi Qiang Message-Id: <20201106090315.18606-4-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- arch/x86/include/asm/kvm_host.h | 5 ++++ arch/x86/include/asm/vmx.h | 1 + arch/x86/include/asm/vmxfeatures.h | 1 + arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/include/uapi/asm/vmx.h | 4 +++- arch/x86/kvm/vmx/capabilities.h | 6 +++++ arch/x86/kvm/vmx/vmx.c | 37 ++++++++++++++++++++++++++++-- arch/x86/kvm/vmx/vmx.h | 2 +- arch/x86/kvm/x86.c | 23 +++++++++++++++++++ include/uapi/linux/kvm.h | 5 ++++ 10 files changed, 81 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b2e3a2b812ed..66b05469c5ef 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -52,6 +52,9 @@ #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) +#define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \ + KVM_BUS_LOCK_DETECTION_EXIT) + /* x86-specific vcpu->requests bit members */ #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) @@ -1436,6 +1439,8 @@ extern u8 kvm_tsc_scaling_ratio_frac_bits; extern u64 kvm_max_tsc_scaling_ratio; /* 1ull << kvm_tsc_scaling_ratio_frac_bits */ extern u64 kvm_default_tsc_scaling_ratio; +/* bus lock detection supported? */ +extern bool kvm_has_bus_lock_exit; extern u64 kvm_mce_cap_supported; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index ecae8a49e600..0cbb154b37b0 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -75,6 +75,7 @@ #define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA) #define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) #define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE) +#define SECONDARY_EXEC_BUS_LOCK_DETECTION VMCS_CONTROL_BIT(BUS_LOCK_DETECTION) /* * Definitions of Tertiary Processor-Based VM-Execution Controls. diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index f26dea5769d8..c1377e48c6bf 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -85,6 +85,7 @@ #define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */ #define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */ #define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ +#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */ /* Tertiary Processor-Based VM-Execution Controls, word 3 */ #define VMX_TERTIARY_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 89e5f3d1bba8..9a702e05fd7d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -111,6 +111,7 @@ struct kvm_ioapic_state { #define KVM_NR_IRQCHIPS 3 #define KVM_RUN_X86_SMM (1 << 0) +#define KVM_RUN_X86_BUS_LOCK (1 << 1) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index df6707a76a3d..b3271a230331 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -89,6 +89,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_BUS_LOCK 74 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -149,7 +150,8 @@ { EXIT_REASON_XSAVES, "XSAVES" }, \ { EXIT_REASON_XRSTORS, "XRSTORS" }, \ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ - { EXIT_REASON_TPAUSE, "TPAUSE" } + { EXIT_REASON_TPAUSE, "TPAUSE" }, \ + { EXIT_REASON_BUS_LOCK, "BUS_LOCK" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 1fc2d1415889..8a6ac0f6a102 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -273,6 +273,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void) SECONDARY_EXEC_TSC_SCALING; } +static inline bool cpu_has_vmx_bus_lock_detection(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_BUS_LOCK_DETECTION; +} + static inline bool cpu_has_vmx_apicv(void) { return cpu_has_vmx_apic_register_virt() && diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c115c57261c1..2feb986114b7 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2643,7 +2643,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX | - SECONDARY_EXEC_ENABLE_VMFUNC; + SECONDARY_EXEC_ENABLE_VMFUNC | + SECONDARY_EXEC_BUS_LOCK_DETECTION; if (cpu_has_sgx()) opt2 |= SECONDARY_EXEC_ENCLS_EXITING; if (adjust_vmx_controls(min2, opt2, @@ -4518,6 +4519,9 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) vmx_adjust_sec_exec_control(vmx, &exec_control, waitpkg, WAITPKG, ENABLE_USR_WAIT_PAUSE, false); + if (!vcpu->kvm->arch.bus_lock_detection_enabled) + exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION; + vmx->secondary_exec_control = exec_control; } @@ -5943,6 +5947,13 @@ static int handle_encls(struct kvm_vcpu *vcpu) } #endif /* CONFIG_X86_SGX_KVM */ +static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu) +{ + vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; + vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK; + return 0; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -5999,6 +6010,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_VMFUNC] = handle_vmx_instruction, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, [EXIT_REASON_ENCLS] = handle_encls, + [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, }; static const int kvm_vmx_max_exit_handlers = @@ -6257,7 +6269,7 @@ void dump_vmcs(void) * The guest has exited. See if we can fix it or if we need userspace * assistance. */ -static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) +static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_vmx *vmx = to_vmx(vcpu); union vmx_exit_reason exit_reason = vmx->exit_reason; @@ -6410,6 +6422,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) return 0; } +static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) +{ + int ret = __vmx_handle_exit(vcpu, exit_fastpath); + + /* + * Even when current exit reason is handled by KVM internally, we + * still need to exit to user space when bus lock detected to inform + * that there is a bus lock in guest. + */ + if (to_vmx(vcpu)->exit_reason.bus_lock_detected) { + if (ret > 0) + vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; + + vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK; + return 0; + } + return ret; +} + /* * Software based L1D cache flush which is used when microcode providing * the cache control MSR is not loaded. @@ -8202,6 +8233,8 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } + kvm_has_bus_lock_exit = cpu_has_vmx_bus_lock_detection(); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ if (enable_ept) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index bcedbe5ff609..60be5371d297 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -84,7 +84,7 @@ union vmx_exit_reason { u32 reserved23 : 1; u32 reserved24 : 1; u32 reserved25 : 1; - u32 reserved26 : 1; + u32 bus_lock_detected : 1; u32 enclave_mode : 1; u32 smi_pending_mtf : 1; u32 smi_from_vmx_root : 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fcc12f954678..8f912a17c523 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -138,6 +138,8 @@ u64 __read_mostly kvm_max_tsc_scaling_ratio; EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio); u64 __read_mostly kvm_default_tsc_scaling_ratio; EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio); +bool __read_mostly kvm_has_bus_lock_exit; +EXPORT_SYMBOL_GPL(kvm_has_bus_lock_exit); /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ static u32 __read_mostly tsc_tolerance_ppm = 250; @@ -3918,6 +3920,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_STEAL_TIME: r = sched_info_on(); break; + case KVM_CAP_X86_BUS_LOCK_EXIT: + if (kvm_has_bus_lock_exit) + r = KVM_BUS_LOCK_DETECTION_OFF | + KVM_BUS_LOCK_DETECTION_EXIT; + else + r = 0; + break; default: break; } @@ -5446,6 +5455,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, } mutex_unlock(&kvm->lock); break; + case KVM_CAP_X86_BUS_LOCK_EXIT: + r = -EINVAL; + if (cap->args[0] & ~KVM_BUS_LOCK_DETECTION_VALID_MODE) + break; + + if ((cap->args[0] & KVM_BUS_LOCK_DETECTION_OFF) && + (cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT)) + break; + + if (kvm_has_bus_lock_exit && + cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT) + kvm->arch.bus_lock_detection_enabled = true; + r = 0; + break; default: r = -EINVAL; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4e0ebd66368f..6a6a2b1a996e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -251,6 +251,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_RISCV_SBI 31 +#define KVM_EXIT_X86_BUS_LOCK 33 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -1061,6 +1062,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_ARM_CPU_FEATURE 555 @@ -1737,4 +1739,7 @@ struct kvm_hyperv_eventfd { #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) +#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) +#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + #endif /* __LINUX_KVM_H */ -- Gitee From fbea843801934558d020d952314546730503ea13 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Fri, 6 Nov 2020 17:03:15 +0800 Subject: [PATCH 05/17] KVM: X86: Add the Document for KVM_CAP_X86_BUS_LOCK_EXIT mainline inclusion from mainline-v5.12-rc1 commit c32b1b896d2ab30ac30bc39194bac47a09f7f497 category: feature feature: KVM Bus Lock VM Exit bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RJCB CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=c32b1b896d2ab30ac30bc39194bac47a09f7f497 Intel-SIG: commit c32b1b896d2a ("KVM: X86: Add the Document for KVM_CAP_X86_BUS_LOCK_EXIT") ------------------------------------- KVM: X86: Add the Document for KVM_CAP_X86_BUS_LOCK_EXIT Introduce a new capability named KVM_CAP_X86_BUS_LOCK_EXIT, which is used to handle bus locks detected in guest. It allows the userspace to do custom throttling policies to mitigate the 'noisy neighbour' problem. Signed-off-by: Chenyi Qiang Message-Id: <20201106090315.18606-5-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- Documentation/virt/kvm/api.rst | 47 +++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 19438a5c942b..56fcea9b221d 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5033,9 +5033,11 @@ local APIC is not used. __u16 flags; More architecture-specific flags detailing state of the VCPU that may -affect the device's behavior. The only currently defined flag is -KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the -VCPU is in system management mode. +affect the device's behavior. Current defined flags: + /* x86, set if the VCPU is in system management mode */ + #define KVM_RUN_X86_SMM (1 << 0) + /* x86, set if bus lock detected in VM */ + #define KVM_RUN_BUS_LOCK (1 << 1) :: @@ -6197,7 +6199,7 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space can then handle to implement model specific MSR handling and/or user notifications to inform a user that an MSR was not handled. -7.25 KVM_CAP_SGX_ATTRIBUTE +7.22 KVM_CAP_SGX_ATTRIBUTE ---------------------- :Architectures: x86 @@ -6241,6 +6243,43 @@ if the value was set to zero or KVM_ENABLE_CAP was not invoked, KVM uses the return value of KVM_CHECK_EXTENSION(KVM_CAP_MAX_VCPU_ID) as the maximum APIC ID. +7.24 KVM_CAP_X86_BUS_LOCK_EXIT +------------------------------- + +:Architectures: x86 +:Target: VM +:Parameters: args[0] defines the policy used when bus locks detected in guest +:Returns: 0 on success, -EINVAL when args[0] contains invalid bits + +Valid bits in args[0] are:: + + #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) + #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + +Enabling this capability on a VM provides userspace with a way to select +a policy to handle the bus locks detected in guest. Userspace can obtain +the supported modes from the result of KVM_CHECK_EXTENSION and define it +through the KVM_ENABLE_CAP. + +KVM_BUS_LOCK_DETECTION_OFF and KVM_BUS_LOCK_DETECTION_EXIT are supported +currently and mutually exclusive with each other. More bits can be added in +the future. + +With KVM_BUS_LOCK_DETECTION_OFF set, bus locks in guest will not cause vm exits +so that no additional actions are needed. This is the default mode. + +With KVM_BUS_LOCK_DETECTION_EXIT set, vm exits happen when bus lock detected +in VM. KVM just exits to userspace when handling them. Userspace can enforce +its own throttling or other policy based mitigations. + +This capability is aimed to address the thread that VM can exploit bus locks to +degree the performance of the whole system. Once the userspace enable this +capability and select the KVM_BUS_LOCK_DETECTION_EXIT mode, KVM will set the +KVM_RUN_BUS_LOCK flag in vcpu-run->flags field and exit to userspace. Concerning +the bus lock vm exit can be preempted by a higher priority VM exit, the exit +notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons. +KVM_RUN_BUS_LOCK flag is used to distinguish between them. + 8. Other capabilities. ====================== -- Gitee From 564b4800db52aa53f77a2747d15aba8468d5c985 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 17:20:08 -0300 Subject: [PATCH 06/17] tools headers UAPI: Sync KVM's kvm.h and vmx.h headers with the kernel sources mainline inclusion from mainline-v5.12-rc3 commit 33dc525f93216bc83935ce98518644def04d6c54 category: feature feature: KVM Bus Lock VM Exit bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RJCB CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=33dc525f93216bc83935ce98518644def04d6c54 Intel-SIG: commit 33dc525f9321 ("tools headers UAPI: Sync KVM's kvm.h and vmx.h headers with the kernel sources") ------------------------------------- tools headers UAPI: Sync KVM's kvm.h and vmx.h headers with the kernel sources To pick the changes in: fe6b6bc802b40081 ("KVM: VMX: Enable bus lock VM exit") That makes 'perf kvm-stat' aware of this new BUS_LOCK exit reason, thus addressing the following perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Cc: Chenyi Qiang Cc: Paolo Bonzini Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Aichun Shi --- tools/arch/x86/include/uapi/asm/kvm.h | 1 + tools/arch/x86/include/uapi/asm/vmx.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 89e5f3d1bba8..9a702e05fd7d 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -111,6 +111,7 @@ struct kvm_ioapic_state { #define KVM_NR_IRQCHIPS 3 #define KVM_RUN_X86_SMM (1 << 0) +#define KVM_RUN_X86_BUS_LOCK (1 << 1) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index b8ff9e8ac0d5..14c177c4afd5 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -88,6 +88,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_BUS_LOCK 74 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -148,7 +149,8 @@ { EXIT_REASON_XSAVES, "XSAVES" }, \ { EXIT_REASON_XRSTORS, "XRSTORS" }, \ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ - { EXIT_REASON_TPAUSE, "TPAUSE" } + { EXIT_REASON_TPAUSE, "TPAUSE" }, \ + { EXIT_REASON_BUS_LOCK, "BUS_LOCK" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } -- Gitee From 78e6cda7e6983f9357495d4b3aa9301f8a0e6fae Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Tue, 14 Sep 2021 17:50:41 +0800 Subject: [PATCH 07/17] KVM: nVMX: Fix nested bus lock VM exit mainline inclusion from mainline-v5.15-rc4 commit 24a996ade34d00deef5dee2c33aacd8fda91ec31 category: feature feature: KVM Bus Lock VM Exit bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RJCB CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=24a996ade34d00deef5dee2c33aacd8fda91ec31 Intel-SIG: commit 24a996ade34d ("KVM: nVMX: Fix nested bus lock VM exit") ------------------------------------- KVM: nVMX: Fix nested bus lock VM exit Nested bus lock VM exits are not supported yet. If L2 triggers bus lock VM exit, it will be directed to L1 VMM, which would cause unexpected behavior. Therefore, handle L2's bus lock VM exits in L0 directly. Fixes: fe6b6bc802b4 ("KVM: VMX: Enable bus lock VM exit") Signed-off-by: Chenyi Qiang Reviewed-by: Sean Christopherson Reviewed-by: Xiaoyao Li Message-Id: <20210914095041.29764-1-chenyi.qiang@intel.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- arch/x86/kvm/vmx/nested.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 693ce29e8dce..8fb6f61c7ad3 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5857,6 +5857,12 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, case EXIT_REASON_VMFUNC: /* VM functions are emulated through L2->L0 vmexits. */ return true; + case EXIT_REASON_BUS_LOCK: + /* + * At present, bus lock VM exit is never exposed to L1. + * Handle L2's bus locks in L0 directly. + */ + return true; default: break; } -- Gitee From 265cc29f0df45f7ddb6432eaa35cfd5410a78e2c Mon Sep 17 00:00:00 2001 From: Hao Xiang Date: Fri, 15 Oct 2021 19:59:21 +0800 Subject: [PATCH 08/17] KVM: VMX: Remove redundant handling of bus lock vmexit mainline inclusion from mainline-v5.15-rc7 commit d61863c66f9b443192997613cd6aeca3f65cc313 category: feature feature: KVM Bus Lock VM Exit bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RJCB CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=d61863c66f9b443192997613cd6aeca3f65cc313 Intel-SIG: commit d61863c66f9b ("KVM: VMX: Remove redundant handling of bus lock vmexit") ------------------------------------- KVM: VMX: Remove redundant handling of bus lock vmexit Hardware may or may not set exit_reason.bus_lock_detected on BUS_LOCK VM-Exits. Dealing with KVM_RUN_X86_BUS_LOCK in handle_bus_lock_vmexit could be redundant when exit_reason.basic is EXIT_REASON_BUS_LOCK. We can remove redundant handling of bus lock vmexit. Unconditionally Set exit_reason.bus_lock_detected in handle_bus_lock_vmexit(), and deal with KVM_RUN_X86_BUS_LOCK only in vmx_handle_exit(). Signed-off-by: Hao Xiang Message-Id: <1634299161-30101-1-git-send-email-hao.xiang@linux.alibaba.com> Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- arch/x86/kvm/vmx/vmx.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2feb986114b7..2c79f617e296 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5949,9 +5949,13 @@ static int handle_encls(struct kvm_vcpu *vcpu) static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu) { - vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; - vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK; - return 0; + /* + * Hardware may or may not set the BUS_LOCK_DETECTED flag on BUS_LOCK + * VM-Exits. Unconditionally set the flag here and leave the handling to + * vmx_handle_exit(). + */ + to_vmx(vcpu)->exit_reason.bus_lock_detected = true; + return 1; } /* @@ -6427,9 +6431,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) int ret = __vmx_handle_exit(vcpu, exit_fastpath); /* - * Even when current exit reason is handled by KVM internally, we - * still need to exit to user space when bus lock detected to inform - * that there is a bus lock in guest. + * Exit to user space when bus lock detected to inform that there is + * a bus lock in guest. */ if (to_vmx(vcpu)->exit_reason.bus_lock_detected) { if (ret > 0) -- Gitee From af5a448871fd6b055c2be4d5359dfa5aaa2a59b2 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Tue, 24 May 2022 21:56:21 +0800 Subject: [PATCH 09/17] KVM: x86: Extend KVM_{G,S}ET_VCPU_EVENTS to support pending triple fault mainline inclusion from mainline-v6.0-rc1 commit ed2351174e38ad4febbbc0dba802803e6cff8ae0 category: feature feature: Notify VM exit bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5PAJ5 CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=ed2351174e38ad4febbbc0dba802803e6cff8ae0 Intel-SIG: commit ed2351174e38 ("KVM: x86: Extend KVM_{G,S}ET_VCPU_EVENTS to support pending triple fault") ------------------------------------- KVM: x86: Extend KVM_{G,S}ET_VCPU_EVENTS to support pending triple fault For the triple fault sythesized by KVM, e.g. the RSM path or nested_vmx_abort(), if KVM exits to userspace before the request is serviced, userspace could migrate the VM and lose the triple fault. Extend KVM_{G,S}ET_VCPU_EVENTS to support pending triple fault with a new event KVM_VCPUEVENT_VALID_FAULT_FAULT so that userspace can save and restore the triple fault event. This extension is guarded by a new KVM capability KVM_CAP_TRIPLE_FAULT_EVENT. Note that in the set_vcpu_events path, userspace is able to set/clear the triple fault request through triple_fault.pending field. Signed-off-by: Chenyi Qiang Message-Id: <20220524135624.22988-2-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- Documentation/virt/kvm/api.rst | 8 ++++++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/include/uapi/asm/kvm.h | 6 +++++- arch/x86/kvm/x86.c | 21 ++++++++++++++++++++- include/uapi/linux/kvm.h | 1 + 5 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 56fcea9b221d..1b6d729cea98 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1084,6 +1084,10 @@ The following bits are defined in the flags field: fields contain a valid state. This bit will be set whenever KVM_CAP_EXCEPTION_PAYLOAD is enabled. +- KVM_VCPUEVENT_VALID_TRIPLE_FAULT may be set to signal that the + triple_fault_pending field contains a valid state. This bit will + be set whenever KVM_CAP_TRIPLE_FAULT_EVENT is enabled. + ARM/ARM64: ^^^^^^^^^^ @@ -1179,6 +1183,10 @@ can be set in the flags field to signal that the exception_has_payload, exception_payload, and exception.pending fields contain a valid state and shall be written into the VCPU. +If KVM_CAP_TRIPLE_FAULT_EVENT is enabled, KVM_VCPUEVENT_VALID_TRIPLE_FAULT +can be set in flags field to signal that the triple_fault field contains +a valid state and shall be written into the VCPU. + ARM/ARM64: ^^^^^^^^^^ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 66b05469c5ef..90476381551b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -997,6 +997,8 @@ struct kvm_arch { bool guest_can_read_msr_platform_info; bool exception_payload_enabled; + bool triple_fault_event; + bool bus_lock_detection_enabled; /* Guest can access the SGX PROVISIONKEY. */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 9a702e05fd7d..0e0694a0a4f4 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -310,6 +310,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 #define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 +#define KVM_VCPUEVENT_VALID_TRIPLE_FAULT 0x00000020 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -344,7 +345,10 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u8 reserved[27]; + struct { + __u8 pending; + } triple_fault; + __u8 reserved[26]; __u8 exception_has_payload; __u64 exception_payload; }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f912a17c523..5e7cccb0be6d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3844,6 +3844,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_GET_MSR_FEATURES: case KVM_CAP_MSR_PLATFORM_INFO: case KVM_CAP_EXCEPTION_PAYLOAD: + case KVM_CAP_X86_TRIPLE_FAULT_EVENT: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_LAST_CPU: case KVM_CAP_X86_USER_SPACE_MSR: @@ -4415,6 +4416,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | KVM_VCPUEVENT_VALID_SMM); if (vcpu->kvm->arch.exception_payload_enabled) events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD; + if (vcpu->kvm->arch.triple_fault_event) { + events->triple_fault.pending = kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu); + events->flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; + } memset(&events->reserved, 0, sizeof(events->reserved)); } @@ -4428,7 +4433,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW | KVM_VCPUEVENT_VALID_SMM - | KVM_VCPUEVENT_VALID_PAYLOAD)) + | KVM_VCPUEVENT_VALID_PAYLOAD + | KVM_VCPUEVENT_VALID_TRIPLE_FAULT)) return -EINVAL; if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) { @@ -4506,6 +4512,15 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, } } + if (events->flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT) { + if (!vcpu->kvm->arch.triple_fault_event) + return -EINVAL; + if (events->triple_fault.pending) + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + else + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); + } + kvm_make_request(KVM_REQ_EVENT, vcpu); return 0; @@ -5420,6 +5435,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.exception_payload_enabled = cap->args[0]; r = 0; break; + case KVM_CAP_X86_TRIPLE_FAULT_EVENT: + kvm->arch.triple_fault_event = cap->args[0]; + r = 0; + break; case KVM_CAP_X86_USER_SPACE_MSR: kvm->arch.user_space_msr_mask = cap->args[0]; r = 0; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6a6a2b1a996e..cab5552b27dc 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1064,6 +1064,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_SGX_ATTRIBUTE 196 +#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 #define KVM_CAP_ARM_CPU_FEATURE 555 -- Gitee From ebf77f63926023549b9d5dac7970180ce8b28327 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Tue, 24 May 2022 21:56:22 +0800 Subject: [PATCH 10/17] KVM: selftests: Add a test to get/set triple fault event mainline inclusion from mainline-v6.0-rc1 commit 30267b43c5b08260da7c76cacd28bf855b06ab93 category: feature feature: Notify VM exit bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5PAJ5 CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=30267b43c5b08260da7c76cacd28bf855b06ab93 Intel-SIG: commit 30267b43c5b0 ("KVM: selftests: Add a test to get/set triple fault event") ------------------------------------- KVM: selftests: Add a test to get/set triple fault event Add a selftest for triple fault event: - launch the L2 and exit to userspace via I/O. - using KVM_SET_VCPU_EVENTS to pend a triple fault event. - with the immediate_exit, check the triple fault is pending. - run for real with pending triple fault and L1 can see the triple fault. Suggested-by: Sean Christopherson Signed-off-by: Chenyi Qiang Message-Id: <20220524135624.22988-3-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../kvm/x86_64/triple_fault_event_test.c | 101 ++++++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index d4768897eda6..17d87d8c7d6b 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -26,6 +26,7 @@ /x86_64/vmx_tsc_adjust_test /x86_64/xss_msr_test /clear_dirty_log_test +/x86_64/triple_fault_event_test /demand_paging_test /dirty_log_test /dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 25618728dc4a..2756bf2d84b9 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -60,6 +60,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test +TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c new file mode 100644 index 000000000000..6e1de0631ce9 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include +#include + +#include "kselftest.h" + +#define VCPU_ID 0 +#define ARBITRARY_IO_PORT 0x2000 + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +static void l2_guest_code(void) +{ + asm volatile("inb %%dx, %%al" + : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); +} + +void l1_guest_code(struct vmx_pages *vmx) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + prepare_vmcs(vmx, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmlaunch()); + /* L2 should triple fault after a triple fault event injected. */ + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT); + GUEST_DONE(); +} + +int main(void) +{ + struct kvm_run *run; + struct kvm_vcpu_events events; + vm_vaddr_t vmx_pages_gva; + struct ucall uc; + + struct kvm_enable_cap cap = { + .cap = KVM_CAP_TRIPLE_FAULT_EVENT, + .args = {1} + }; + + if (!nested_vmx_supported()) { + print_skip("Nested VMX not supported"); + exit(KSFT_SKIP); + } + + if (!kvm_check_cap(KVM_CAP_TRIPLE_FAULT_EVENT)) { + print_skip("KVM_CAP_TRIPLE_FAULT_EVENT not supported"); + exit(KSFT_SKIP); + } + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vm_enable_cap(vm, &cap); + + run = vcpu_state(vm, VCPU_ID); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + vcpu_run(vm, VCPU_ID); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Expected KVM_EXIT_IO, got: %u (%s)\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT, + "Expected IN from port %d from L2, got port %d", + ARBITRARY_IO_PORT, run->io.port); + vcpu_events_get(vm, VCPU_ID, &events); + events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; + events.triple_fault.pending = true; + vcpu_events_set(vm, VCPU_ID, &events); + run->immediate_exit = true; + vcpu_run_complete_io(vm, VCPU_ID); + + vcpu_events_get(vm, VCPU_ID, &events); + TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT, + "Triple fault event invalid"); + TEST_ASSERT(events.triple_fault.pending, + "No triple fault pending"); + vcpu_run(vm, VCPU_ID); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + TEST_FAIL("%s", (const char *)uc.args[0]); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + +} -- Gitee From 0cbdfd9b67756c729b8c727026ab03e5ef4e15e0 Mon Sep 17 00:00:00 2001 From: Tao Xu Date: Tue, 24 May 2022 21:56:24 +0800 Subject: [PATCH 11/17] KVM: VMX: Enable Notify VM exit mainline inclusion from mainline-v6.0-rc1 commit 2f4073e08f4cc5a41e35d777c240aaadd0257051 category: feature feature: Notify VM exit bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5PAJ5 CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=2f4073e08f4cc5a41e35d777c240aaadd0257051 Intel-SIG: commit 2f4073e08f4c ("KVM: VMX: Enable Notify VM exit") ------------------------------------- KVM: VMX: Enable Notify VM exit There are cases that malicious virtual machines can cause CPU stuck (due to event windows don't open up), e.g., infinite loop in microcode when nested #AC (CVE-2015-5307). No event window means no event (NMI, SMI and IRQ) can be delivered. It leads the CPU to be unavailable to host or other VMs. VMM can enable notify VM exit that a VM exit generated if no event window occurs in VM non-root mode for a specified amount of time (notify window). Feature enabling: - The new vmcs field SECONDARY_EXEC_NOTIFY_VM_EXITING is introduced to enable this feature. VMM can set NOTIFY_WINDOW vmcs field to adjust the expected notify window. - Add a new KVM capability KVM_CAP_X86_NOTIFY_VMEXIT so that user space can query and enable this feature in per-VM scope. The argument is a 64bit value: bits 63:32 are used for notify window, and bits 31:0 are for flags. Current supported flags: - KVM_X86_NOTIFY_VMEXIT_ENABLED: enable the feature with the notify window provided. - KVM_X86_NOTIFY_VMEXIT_USER: exit to userspace once the exits happen. - It's safe to even set notify window to zero since an internal hardware threshold is added to vmcs.notify_window. VM exit handling: - Introduce a vcpu state notify_window_exits to records the count of notify VM exits and expose it through the debugfs. - Notify VM exit can happen incident to delivery of a vector event. Allow it in KVM. - Exit to userspace unconditionally for handling when VM_CONTEXT_INVALID bit is set. Nested handling - Nested notify VM exits are not supported yet. Keep the same notify window control in vmcs02 as vmcs01, so that L1 can't escape the restriction of notify VM exits through launching L2 VM. Notify VM exit is defined in latest Intel Architecture Instruction Set Extensions Programming Reference, chapter 9.2. Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Tao Xu Co-developed-by: Chenyi Qiang Signed-off-by: Chenyi Qiang Message-Id: <20220524135624.22988-5-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- Documentation/virt/kvm/api.rst | 49 ++++++++++++++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 9 ++++++ arch/x86/include/asm/vmx.h | 7 +++++ arch/x86/include/asm/vmxfeatures.h | 1 + arch/x86/include/uapi/asm/vmx.h | 4 ++- arch/x86/kvm/vmx/capabilities.h | 6 ++++ arch/x86/kvm/vmx/nested.c | 8 +++++ arch/x86/kvm/vmx/vmx.c | 40 ++++++++++++++++++++++-- arch/x86/kvm/x86.c | 22 ++++++++++++++ arch/x86/kvm/x86.h | 5 +++ include/uapi/linux/kvm.h | 10 ++++++ 11 files changed, 158 insertions(+), 3 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1b6d729cea98..ad0e225f6971 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5496,6 +5496,26 @@ array field represents return values. The userspace should update the return values of SBI call before resuming the VCPU. For more details on RISC-V SBI spec refer, https://github.com/riscv/riscv-sbi-doc. +:: + + /* KVM_EXIT_NOTIFY */ + struct { + #define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; + +Used on x86 systems. When the VM capability KVM_CAP_X86_NOTIFY_VMEXIT is +enabled, a VM exit generated if no event window occurs in VM non-root mode +for a specified amount of time. Once KVM_X86_NOTIFY_VMEXIT_USER is set when +enabling the cap, it would exit to userspace with the exit reason +KVM_EXIT_NOTIFY for further handling. The "flags" field contains more +detailed info. + +The valid value for 'flags' is: + + - KVM_NOTIFY_CONTEXT_INVALID -- the VM context is corrupted and not valid + in VMCS. It would run into unknown result if resume the target VM. + :: /* Fix the size of the union. */ @@ -6288,6 +6308,35 @@ the bus lock vm exit can be preempted by a higher priority VM exit, the exit notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons. KVM_RUN_BUS_LOCK flag is used to distinguish between them. +7.25 KVM_CAP_X86_NOTIFY_VMEXIT +------------------------------ + +:Architectures: x86 +:Target: VM +:Parameters: args[0] is the value of notify window as well as some flags +:Returns: 0 on success, -EINVAL if args[0] contains invalid flags or notify + VM exit is unsupported. + +Bits 63:32 of args[0] are used for notify window. +Bits 31:0 of args[0] are for some flags. Valid bits are:: + + #define KVM_X86_NOTIFY_VMEXIT_ENABLED (1 << 0) + #define KVM_X86_NOTIFY_VMEXIT_USER (1 << 1) + +This capability allows userspace to configure the notify VM exit on/off +in per-VM scope during VM creation. Notify VM exit is disabled by default. +When userspace sets KVM_X86_NOTIFY_VMEXIT_ENABLED bit in args[0], VMM will +enable this feature with the notify window provided, which will generate +a VM exit if no event window occurs in VM non-root mode for a specified of +time (notify window). + +If KVM_X86_NOTIFY_VMEXIT_USER is set in args[0], upon notify VM exits happen, +KVM would exit to userspace for handling. + +This capability is aimed to mitigate the threat that malicious VMs can +cause CPU stuck (due to event windows don't open up) and make the CPU +unavailable to host or other VMs. + 8. Other capabilities. ====================== diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 90476381551b..701f9603e408 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -55,6 +55,9 @@ #define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \ KVM_BUS_LOCK_DETECTION_EXIT) +#define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED | \ + KVM_X86_NOTIFY_VMEXIT_USER) + /* x86-specific vcpu->requests bit members */ #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) @@ -1001,6 +1004,9 @@ struct kvm_arch { bool bus_lock_detection_enabled; + u32 notify_window; + u32 notify_vmexit_flags; + /* Guest can access the SGX PROVISIONKEY. */ bool sgx_provisioning_allowed; @@ -1093,6 +1099,7 @@ struct kvm_vcpu_stat { u64 preemption_reported; u64 preemption_other; u64 preemption_timer_exits; + u64 notify_window_exits; }; struct x86_instruction_info; @@ -1443,6 +1450,8 @@ extern u64 kvm_max_tsc_scaling_ratio; extern u64 kvm_default_tsc_scaling_ratio; /* bus lock detection supported? */ extern bool kvm_has_bus_lock_exit; +/* notify vmexit supported? */ +extern bool kvm_has_notify_vmexit; extern u64 kvm_mce_cap_supported; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 0cbb154b37b0..331fbedb7cde 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -76,6 +76,7 @@ #define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) #define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE) #define SECONDARY_EXEC_BUS_LOCK_DETECTION VMCS_CONTROL_BIT(BUS_LOCK_DETECTION) +#define SECONDARY_EXEC_NOTIFY_VM_EXITING VMCS_CONTROL_BIT(NOTIFY_VM_EXITING) /* * Definitions of Tertiary Processor-Based VM-Execution Controls. @@ -280,6 +281,7 @@ enum vmcs_field { SECONDARY_VM_EXEC_CONTROL = 0x0000401e, PLE_GAP = 0x00004020, PLE_WINDOW = 0x00004022, + NOTIFY_WINDOW = 0x00004024, VM_INSTRUCTION_ERROR = 0x00004400, VM_EXIT_REASON = 0x00004402, VM_EXIT_INTR_INFO = 0x00004404, @@ -566,6 +568,11 @@ enum vm_entry_failure_code { #define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) #define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT) +/* + * Exit Qualifications for NOTIFY VM EXIT + */ +#define NOTIFY_VM_CONTEXT_INVALID BIT(0) + /* * VM-instruction error numbers */ diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index c1377e48c6bf..9d9da79cf930 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -86,6 +86,7 @@ #define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */ #define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ #define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */ +#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* VM-Exit when no event windows after notify window */ /* Tertiary Processor-Based VM-Execution Controls, word 3 */ #define VMX_TERTIARY_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */ diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index b3271a230331..73dac304d949 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -90,6 +90,7 @@ #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 +#define EXIT_REASON_NOTIFY 75 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -151,7 +152,8 @@ { EXIT_REASON_XRSTORS, "XRSTORS" }, \ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ { EXIT_REASON_TPAUSE, "TPAUSE" }, \ - { EXIT_REASON_BUS_LOCK, "BUS_LOCK" } + { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ + { EXIT_REASON_NOTIFY, "NOTIFY" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 8a6ac0f6a102..c1120b570cc8 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -417,4 +417,10 @@ static inline u64 vmx_supported_debugctl(void) return debugctl; } +static inline bool cpu_has_notify_vmexit(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_NOTIFY_VM_EXITING; +} + #endif /* __KVM_X86_VMX_CAPS_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 8fb6f61c7ad3..c6c0ff25acdb 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2158,6 +2158,8 @@ static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) { + struct kvm *kvm = vmx->vcpu.kvm; + /* * If vmcs02 hasn't been initialized, set the constant vmcs02 state * according to L0's settings (vmcs12 is irrelevant here). Host @@ -2202,6 +2204,9 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) if (cpu_has_vmx_encls_vmexit()) vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); + if (kvm_notify_vmexit_enabled(kvm)) + vmcs_write32(NOTIFY_WINDOW, kvm->arch.notify_window); + /* * Set the MSR load/store lists to match L0's settings. Only the * addresses are constant (for vmcs02), the counts can change based @@ -5988,6 +5993,9 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE); case EXIT_REASON_ENCLS: return nested_vmx_exit_handled_encls(vcpu, vmcs12); + case EXIT_REASON_NOTIFY: + /* Notify VM exit is not exposed to L1 */ + return false; default: return true; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2c79f617e296..d0c41d365f1e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2644,7 +2644,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX | SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_BUS_LOCK_DETECTION; + SECONDARY_EXEC_BUS_LOCK_DETECTION | + SECONDARY_EXEC_NOTIFY_VM_EXITING; if (cpu_has_sgx()) opt2 |= SECONDARY_EXEC_ENCLS_EXITING; if (adjust_vmx_controls(min2, opt2, @@ -4522,6 +4523,9 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (!vcpu->kvm->arch.bus_lock_detection_enabled) exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION; + if (!kvm_notify_vmexit_enabled(vcpu->kvm)) + exec_control &= ~SECONDARY_EXEC_NOTIFY_VM_EXITING; + vmx->secondary_exec_control = exec_control; } @@ -4618,6 +4622,9 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmx->ple_window_dirty = true; } + if (kvm_notify_vmexit_enabled(vmx->vcpu.kvm)) + vmcs_write32(NOTIFY_WINDOW, vmx->vcpu.kvm->arch.notify_window); + vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ @@ -5958,6 +5965,32 @@ static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu) return 1; } +static int handle_notify(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qual = vmx_get_exit_qual(vcpu); + bool context_invalid = exit_qual & NOTIFY_VM_CONTEXT_INVALID; + + ++vcpu->stat.notify_window_exits; + + /* + * Notify VM exit happened while executing iret from NMI, + * "blocked by NMI" bit has to be set before next VM entry. + */ + if (enable_vnmi && (exit_qual & INTR_INFO_UNBLOCK_NMI)) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + + if (vcpu->kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_USER || + context_invalid) { + vcpu->run->exit_reason = KVM_EXIT_NOTIFY; + vcpu->run->notify.flags = context_invalid ? + KVM_NOTIFY_CONTEXT_INVALID : 0; + return 0; + } + + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -6015,6 +6048,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, [EXIT_REASON_ENCLS] = handle_encls, [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, + [EXIT_REASON_NOTIFY] = handle_notify, }; static const int kvm_vmx_max_exit_handlers = @@ -6350,7 +6384,8 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) exit_reason.basic != EXIT_REASON_EPT_VIOLATION && exit_reason.basic != EXIT_REASON_PML_FULL && exit_reason.basic != EXIT_REASON_APIC_ACCESS && - exit_reason.basic != EXIT_REASON_TASK_SWITCH)) { + exit_reason.basic != EXIT_REASON_TASK_SWITCH && + exit_reason.basic != EXIT_REASON_NOTIFY)) { int ndata = 3; vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -8237,6 +8272,7 @@ static __init int hardware_setup(void) } kvm_has_bus_lock_exit = cpu_has_vmx_bus_lock_detection(); + kvm_has_notify_vmexit = cpu_has_notify_vmexit(); set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5e7cccb0be6d..ba29dee61e9d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -140,6 +140,8 @@ u64 __read_mostly kvm_default_tsc_scaling_ratio; EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio); bool __read_mostly kvm_has_bus_lock_exit; EXPORT_SYMBOL_GPL(kvm_has_bus_lock_exit); +bool __read_mostly kvm_has_notify_vmexit; +EXPORT_SYMBOL_GPL(kvm_has_notify_vmexit); /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ static u32 __read_mostly tsc_tolerance_ppm = 250; @@ -237,6 +239,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), VCPU_STAT("preemption_reported", preemption_reported), VCPU_STAT("preemption_other", preemption_other), + VCPU_STAT("notify_window_exits", notify_window_exits), VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped), VM_STAT("mmu_pte_write", mmu_pte_write), VM_STAT("mmu_pde_zapped", mmu_pde_zapped), @@ -3928,6 +3931,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else r = 0; break; + case KVM_CAP_X86_NOTIFY_VMEXIT: + r = kvm_has_notify_vmexit; + break; default: break; } @@ -5488,6 +5494,22 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.bus_lock_detection_enabled = true; r = 0; break; + case KVM_CAP_X86_NOTIFY_VMEXIT: + r = -EINVAL; + if ((u32)cap->args[0] & ~KVM_X86_NOTIFY_VMEXIT_VALID_BITS) + break; + if (!kvm_has_notify_vmexit) + break; + if (!((u32)cap->args[0] & KVM_X86_NOTIFY_VMEXIT_ENABLED)) + break; + mutex_lock(&kvm->lock); + if (!kvm->created_vcpus) { + kvm->arch.notify_window = cap->args[0] >> 32; + kvm->arch.notify_vmexit_flags = (u32)cap->args[0]; + r = 0; + } + mutex_unlock(&kvm->lock); + break; default: r = -EINVAL; break; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 81ea01462175..20a1f446acdf 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -340,6 +340,11 @@ static inline bool kvm_cstate_in_guest(struct kvm *kvm) DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); +static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm) +{ + return kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_ENABLED; +} + static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) { __this_cpu_write(current_vcpu, vcpu); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cab5552b27dc..e1c3ef841eb8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -252,6 +252,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_RISCV_SBI 31 #define KVM_EXIT_X86_BUS_LOCK 33 +#define KVM_EXIT_NOTIFY 37 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -435,6 +436,11 @@ struct kvm_run { unsigned long args[6]; unsigned long ret[2]; } riscv_sbi; + /* KVM_EXIT_NOTIFY */ + struct { +#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; /* Fix the size of the union. */ char padding[256]; }; @@ -1065,6 +1071,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 +#define KVM_CAP_X86_NOTIFY_VMEXIT 219 #define KVM_CAP_ARM_CPU_FEATURE 555 @@ -1742,5 +1749,8 @@ struct kvm_hyperv_eventfd { #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +/* Available with KVM_CAP_X86_NOTIFY_VMEXIT */ +#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0) +#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1) #endif /* __LINUX_KVM_H */ -- Gitee From 715d107a28f9786b99e85b59421a8dce6ca611c3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 1 Jun 2022 12:16:53 -0700 Subject: [PATCH 12/17] KVM: Fix references to non-existent KVM_CAP_TRIPLE_FAULT_EVENT mainline inclusion from mainline-v6.0-rc1 commit 8deb03e75f6048b33b80025b6475c92975670c5b category: feature feature: Notify VM exit bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5PAJ5 CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=8deb03e75f6048b33b80025b6475c92975670c5b Intel-SIG: commit 8deb03e75f60 ("KVM: Fix references to non-existent KVM_CAP_TRIPLE_FAULT_EVENT") ------------------------------------- KVM: Fix references to non-existent KVM_CAP_TRIPLE_FAULT_EVENT The x86-only KVM_CAP_TRIPLE_FAULT_EVENT was (appropriately) renamed to KVM_CAP_X86_TRIPLE_FAULT_EVENT when the patches were applied, but the docs and selftests got left behind. Fix them. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- Documentation/virt/kvm/api.rst | 4 ++-- .../testing/selftests/kvm/x86_64/triple_fault_event_test.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index ad0e225f6971..a1575e9532ec 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1086,7 +1086,7 @@ The following bits are defined in the flags field: - KVM_VCPUEVENT_VALID_TRIPLE_FAULT may be set to signal that the triple_fault_pending field contains a valid state. This bit will - be set whenever KVM_CAP_TRIPLE_FAULT_EVENT is enabled. + be set whenever KVM_CAP_X86_TRIPLE_FAULT_EVENT is enabled. ARM/ARM64: ^^^^^^^^^^ @@ -1183,7 +1183,7 @@ can be set in the flags field to signal that the exception_has_payload, exception_payload, and exception.pending fields contain a valid state and shall be written into the VCPU. -If KVM_CAP_TRIPLE_FAULT_EVENT is enabled, KVM_VCPUEVENT_VALID_TRIPLE_FAULT +If KVM_CAP_X86_TRIPLE_FAULT_EVENT is enabled, KVM_VCPUEVENT_VALID_TRIPLE_FAULT can be set in flags field to signal that the triple_fault field contains a valid state and shall be written into the VCPU. diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c index 6e1de0631ce9..66378140764d 100644 --- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c @@ -47,7 +47,7 @@ int main(void) struct ucall uc; struct kvm_enable_cap cap = { - .cap = KVM_CAP_TRIPLE_FAULT_EVENT, + .cap = KVM_CAP_X86_TRIPLE_FAULT_EVENT, .args = {1} }; @@ -56,8 +56,8 @@ int main(void) exit(KSFT_SKIP); } - if (!kvm_check_cap(KVM_CAP_TRIPLE_FAULT_EVENT)) { - print_skip("KVM_CAP_TRIPLE_FAULT_EVENT not supported"); + if (!kvm_check_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT)) { + print_skip("KVM_CAP_X86_TRIPLE_FAULT_EVENT not supported"); exit(KSFT_SKIP); } -- Gitee From fe26a9fca81f9c7290d2490ac170842e723bb327 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 27 Nov 2020 12:46:36 -0500 Subject: [PATCH 13/17] KVM: nSVM: set fixed bits by hand mainline inclusion from mainline-v5.11-rc1 commit 8cce12b3c82717df72afb955ce74c769b0eb2b4f category: feature feature: KVM bus lock debug exception bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RHW7 CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=8cce12b3c82717df72afb955ce74c769b0eb2b4f Intel-SIG: commit 8cce12b3c827 ("KVM: nSVM: set fixed bits by hand") ------------------------------------- KVM: nSVM: set fixed bits by hand SVM generally ignores fixed-1 bits. Set them manually so that we do not end up by mistake without those bits set in struct kvm_vcpu; it is part of userspace API that KVM always returns value with the bits set. Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- arch/x86/kvm/svm/nested.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index e7feaa7910ab..a0dd84f4fa07 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -405,7 +405,7 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) svm->vmcb->save.ds = vmcb12->save.ds; svm->vmcb->save.gdtr = vmcb12->save.gdtr; svm->vmcb->save.idtr = vmcb12->save.idtr; - kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags); + kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); /* * Force-set EFER_SVME even though it is checked earlier on the @@ -425,8 +425,8 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) svm->vmcb->save.rax = vmcb12->save.rax; svm->vmcb->save.rsp = vmcb12->save.rsp; svm->vmcb->save.rip = vmcb12->save.rip; - svm->vmcb->save.dr7 = vmcb12->save.dr7; - svm->vcpu.arch.dr6 = vmcb12->save.dr6; + svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1; + svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_FIXED_1 | DR6_RTM; svm->vmcb->save.cpl = vmcb12->save.cpl; } @@ -699,14 +699,14 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.ds = hsave->save.ds; svm->vmcb->save.gdtr = hsave->save.gdtr; svm->vmcb->save.idtr = hsave->save.idtr; - kvm_set_rflags(&svm->vcpu, hsave->save.rflags); + kvm_set_rflags(&svm->vcpu, hsave->save.rflags | X86_EFLAGS_FIXED); svm_set_efer(&svm->vcpu, hsave->save.efer); svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); svm_set_cr4(&svm->vcpu, hsave->save.cr4); kvm_rax_write(&svm->vcpu, hsave->save.rax); kvm_rsp_write(&svm->vcpu, hsave->save.rsp); kvm_rip_write(&svm->vcpu, hsave->save.rip); - svm->vmcb->save.dr7 = 0; + svm->vmcb->save.dr7 = DR7_FIXED_1; svm->vmcb->save.cpl = 0; svm->vmcb->control.exit_int_info = 0; -- Gitee From 232e552266507fdd4fe6ef3a7cdd19e53ec3d61a Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Tue, 2 Feb 2021 17:04:31 +0800 Subject: [PATCH 14/17] KVM: X86: Rename DR6_INIT to DR6_ACTIVE_LOW mainline inclusion from mainline-v5.12-rc1 commit 9a3ecd5e2aa10af18d0d5a055122d6cc0b0944c7 category: feature feature: KVM Bus Lock Debug Exception bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RHW7 CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=9a3ecd5e2aa10af18d0d5a055122d6cc0b0944c7 Intel-SIG: commit 9a3ecd5e2aa1 ("KVM: X86: Rename DR6_INIT to DR6_ACTIVE_LOW") ------------------------------------- KVM: X86: Rename DR6_INIT to DR6_ACTIVE_LOW DR6_INIT contains the 1-reserved bits as well as the bit that is cleared to 0 when the condition (e.g. RTM) happens. The value can be used to initialize dr6 and also be the XOR mask between the #DB exit qualification (or payload) and DR6. Concerning that DR6_INIT is used as initial value only once, rename it to DR6_ACTIVE_LOW and apply it in other places, which would make the incoming changes for bus lock debug exception more simple. Signed-off-by: Chenyi Qiang Message-Id: <20210202090433.13441-2-chenyi.qiang@intel.com> [Define DR6_FIXED_1 from DR6_ACTIVE_LOW and DR6_VOLATILE. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- arch/x86/include/asm/kvm_host.h | 12 ++++++++++-- arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/svm/nested.c | 2 +- arch/x86/kvm/svm/svm.c | 6 +++--- arch/x86/kvm/vmx/nested.c | 4 ++-- arch/x86/kvm/vmx/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 33 +++++++++++++++++++-------------- 7 files changed, 38 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 701f9603e408..7c49d7336e0f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -206,9 +206,17 @@ enum x86_intercept_stage; #define DR6_BS (1 << 14) #define DR6_BT (1 << 15) #define DR6_RTM (1 << 16) -#define DR6_FIXED_1 0xfffe0ff0 -#define DR6_INIT 0xffff0ff0 +/* + * DR6_ACTIVE_LOW combines fixed-1 and active-low bits. + * We can regard all the bits in DR6_FIXED_1 as active_low bits; + * they will never be 0 for now, but when they are defined + * in the future it will require no code change. + * + * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. + */ +#define DR6_ACTIVE_LOW 0xffff0ff0 #define DR6_VOLATILE 0x0001e00f +#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 737035f16a9e..fb2878c58a00 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4276,7 +4276,7 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) ctxt->ops->get_dr(ctxt, 6, &dr6); dr6 &= ~DR_TRAP_BITS; - dr6 |= DR6_BD | DR6_RTM; + dr6 |= DR6_BD | DR6_ACTIVE_LOW; ctxt->ops->set_dr(ctxt, 6, dr6); return emulate_db(ctxt); } diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a0dd84f4fa07..0c164ca686c7 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -426,7 +426,7 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) svm->vmcb->save.rsp = vmcb12->save.rsp; svm->vmcb->save.rip = vmcb12->save.rip; svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1; - svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_FIXED_1 | DR6_RTM; + svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_ACTIVE_LOW; svm->vmcb->save.cpl = vmcb12->save.cpl; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5231f40e8312..e2677e38ea42 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1788,7 +1788,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) get_debugreg(vcpu->arch.db[2], 2); get_debugreg(vcpu->arch.db[3], 3); /* - * We cannot reset svm->vmcb->save.dr6 to DR6_FIXED_1|DR6_RTM here, + * We cannot reset svm->vmcb->save.dr6 to DR6_ACTIVE_LOW here, * because db_interception might need it. We can do it before vmentry. */ vcpu->arch.dr6 = svm->vmcb->save.dr6; @@ -1836,7 +1836,7 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && !svm->nmi_singlestep) { - u32 payload = (svm->vmcb->save.dr6 ^ DR6_RTM) & ~DR6_FIXED_1; + u32 payload = svm->vmcb->save.dr6 ^ DR6_ACTIVE_LOW; kvm_queue_exception_p(&svm->vcpu, DB_VECTOR, payload); return 1; } @@ -3595,7 +3595,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) svm_set_dr6(svm, vcpu->arch.dr6); else - svm_set_dr6(svm, DR6_FIXED_1 | DR6_RTM); + svm_set_dr6(svm, DR6_ACTIVE_LOW); clgi(); kvm_load_guest_xsave_state(vcpu); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index c6c0ff25acdb..6accb687d062 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -413,8 +413,8 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit if (nr == DB_VECTOR) { if (!has_payload) { payload = vcpu->arch.dr6; - payload &= ~(DR6_FIXED_1 | DR6_BT); - payload ^= DR6_RTM; + payload &= ~DR6_BT; + payload ^= DR6_ACTIVE_LOW; } *exit_qual = payload; } else diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d0c41d365f1e..c90ae14015ed 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5167,7 +5167,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); return 1; } - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; + kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW; kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); fallthrough; case BP_VECTOR: @@ -5412,7 +5412,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) * guest debugging itself. */ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { - vcpu->run->debug.arch.dr6 = DR6_BD | DR6_RTM | DR6_FIXED_1; + vcpu->run->debug.arch.dr6 = DR6_BD | DR6_ACTIVE_LOW; vcpu->run->debug.arch.dr7 = dr7; vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); vcpu->run->debug.arch.exception = DB_VECTOR; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ba29dee61e9d..e2aae96d66b4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -552,19 +552,24 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) */ vcpu->arch.dr6 &= ~DR_TRAP_BITS; /* - * DR6.RTM is set by all #DB exceptions that don't clear it. + * In order to reflect the #DB exception payload in guest + * dr6, three components need to be considered: active low + * bit, FIXED_1 bits and active high bits (e.g. DR6_BD, + * DR6_BS and DR6_BT) + * DR6_ACTIVE_LOW contains the FIXED_1 and active low bits. + * In the target guest dr6: + * FIXED_1 bits should always be set. + * Active low bits should be cleared if 1-setting in payload. + * Active high bits should be set if 1-setting in payload. + * + * Note, the payload is compatible with the pending debug + * exceptions/exit qualification under VMX, that active_low bits + * are active high in payload. + * So they need to be flipped for DR6. */ - vcpu->arch.dr6 |= DR6_RTM; + vcpu->arch.dr6 |= DR6_ACTIVE_LOW; vcpu->arch.dr6 |= payload; - /* - * Bit 16 should be set in the payload whenever the #DB - * exception should clear DR6.RTM. This makes the payload - * compatible with the pending debug exceptions under VMX. - * Though not currently documented in the SDM, this also - * makes the payload compatible with the exit qualification - * for #DB exceptions under VMX. - */ - vcpu->arch.dr6 ^= payload & DR6_RTM; + vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW; /* * The #DB payload is defined as compatible with the 'pending @@ -7443,7 +7448,7 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM; + kvm_run->debug.arch.dr6 = DR6_BS | DR6_ACTIVE_LOW; kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu); kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; @@ -7487,7 +7492,7 @@ static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r) vcpu->arch.eff_db); if (dr6 != 0) { - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; + kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW; kvm_run->debug.arch.pc = eip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; @@ -10373,7 +10378,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); - vcpu->arch.dr6 = DR6_INIT; + vcpu->arch.dr6 = DR6_ACTIVE_LOW; vcpu->arch.dr7 = DR7_FIXED_1; kvm_update_dr7(vcpu); -- Gitee From f5e2ac5e4e48c689105af5a991fdb34acbb10d5b Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Tue, 2 Feb 2021 17:04:32 +0800 Subject: [PATCH 15/17] KVM: X86: Add support for the emulation of DR6_BUS_LOCK bit mainline inclusion from mainline-v5.13-rc2 commit e8ea85fb280ec55674bca88ea7cd85f60d19567f category: feature feature: KVM Bus Lock Debug Exception bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RHW7 CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=e8ea85fb280ec55674bca88ea7cd85f60d19567f Intel-SIG: commit e8ea85fb280e ("KVM: X86: Add support for the emulation of DR6_BUS_LOCK bit") ------------------------------------- KVM: X86: Add support for the emulation of DR6_BUS_LOCK bit Bus lock debug exception introduces a new bit DR6_BUS_LOCK (bit 11 of DR6) to indicate that bus lock #DB exception is generated. The set/clear of DR6_BUS_LOCK is similar to the DR6_RTM. The processor clears DR6_BUS_LOCK when the exception is generated. For all other #DB, the processor sets this bit to 1. Software #DB handler should set this bit before returning to the interrupted task. In VMM, to avoid breaking the CPUs without bus lock #DB exception support, activate the DR6_BUS_LOCK conditionally in DR6_FIXED_1 bits. When intercepting the #DB exception caused by bus locks, bit 11 of the exit qualification is set to identify it. The VMM should emulate the exception by clearing the bit 11 of the guest DR6. Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Chenyi Qiang Message-Id: <20210202090433.13441-3-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/x86.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7c49d7336e0f..1852953fea38 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -202,6 +202,7 @@ enum x86_intercept_stage; #define KVM_NR_DB_REGS 4 +#define DR6_BUS_LOCK (1 << 11) #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) #define DR6_BT (1 << 15) @@ -215,7 +216,7 @@ enum x86_intercept_stage; * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. */ #define DR6_ACTIVE_LOW 0xffff0ff0 -#define DR6_VOLATILE 0x0001e00f +#define DR6_VOLATILE 0x0001e80f #define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) #define DR7_BP_EN_MASK 0x000000ff diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e2aae96d66b4..757891cd820c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1181,6 +1181,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) fixed |= DR6_RTM; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + fixed |= DR6_BUS_LOCK; return fixed; } -- Gitee From b9ddddea0a9e39e5729e35cea518d82c2827e3dc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 6 May 2021 06:30:04 -0400 Subject: [PATCH 16/17] KVM: X86: Expose bus lock debug exception to guest mainline inclusion from mainline-v5.13-rc2 commit 76ea438b4afcd9ee8da3387e9af4625eaccff58f category: feature feature: KVM Bus Lock Debug Exception bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RHW7 CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=76ea438b4afcd9ee8da3387e9af4625eaccff58f Intel-SIG: commit 76ea438b4afc ("KVM: X86: Expose bus lock debug exception to guest") ------------------------------------- KVM: X86: Expose bus lock debug exception to guest Bus lock debug exception is an ability to notify the kernel by an #DB trap after the instruction acquires a bus lock and is executed when CPL>0. This allows the kernel to enforce user application throttling or mitigations. Existence of bus lock debug exception is enumerated via CPUID.(EAX=7,ECX=0).ECX[24]. Software can enable these exceptions by setting bit 2 of the MSR_IA32_DEBUGCTL. Expose the CPUID to guest and emulate the MSR handling when guest enables it. Support for this feature was originally developed by Xiaoyao Li and Chenyi Qiang, but code has since changed enough that this patch has nothing in common with theirs, except for this commit message. Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Chenyi Qiang Message-Id: <20210202090433.13441-4-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Aichun Shi --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx/capabilities.h | 3 +++ arch/x86/kvm/vmx/vmx.c | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 29a463658e67..4638672db180 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -441,7 +441,7 @@ void kvm_set_cpu_caps(void) F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ | - F(SGX_LC) + F(SGX_LC) | F(BUS_LOCK_DETECT) ); /* Set LA57 based on hardware capability. */ if (cpuid_ecx(7) & F(LA57)) diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index c1120b570cc8..49271b8e6871 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -411,6 +411,9 @@ static inline u64 vmx_supported_debugctl(void) { u64 debugctl = 0; + if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; + if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) debugctl |= DEBUGCTLMSR_LBR_MASK; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c90ae14015ed..131f2a1e3e3d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2094,6 +2094,9 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu) if (!intel_pmu_lbr_is_enabled(vcpu)) debugctl &= ~DEBUGCTLMSR_LBR_MASK; + if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; + return debugctl; } -- Gitee From 4f38b7f4a8eac5f6e5f29e9a162303d43a95c49c Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Tue, 2 Aug 2022 11:32:06 +0800 Subject: [PATCH 17/17] x86/bus_lock: Don't assume the init value of DEBUGCTLMSR.BUS_LOCK_DETECT to be zero mainline inclusion from mainline-v6.0-rc1 commit ffa6482e461ff550325356ae705b79e256702ea9 category: feature feature: KVM Bus Lock Debug Exception bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5RHW7 CVE: N/A Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=ffa6482e461ff550325356ae705b79e256702ea9 Intel-SIG: commit ffa6482e461f ("x86/bus_lock: Don't assume the init value of DEBUGCTLMSR.BUS_LOCK_DETECT to be zero") ------------------------------------- x86/bus_lock: Don't assume the init value of DEBUGCTLMSR.BUS_LOCK_DETECT to be zero It's possible that this kernel has been kexec'd from a kernel that enabled bus lock detection, or (hypothetically) BIOS/firmware has set DEBUGCTLMSR_BUS_LOCK_DETECT. Disable bus lock detection explicitly if not wanted. Fixes: ebb1064e7c2e ("x86/traps: Handle #DB for bus lock") Signed-off-by: Chenyi Qiang Signed-off-by: Ingo Molnar Reviewed-by: Tony Luck Link: https://lore.kernel.org/r/20220802033206.21333-1-chenyi.qiang@intel.com Signed-off-by: Aichun Shi --- arch/x86/kernel/cpu/intel.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index e4cc9d901c51..5c5a2c6d5d4d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1150,22 +1150,23 @@ static void bus_lock_init(void) { u64 val; - /* - * Warn and fatal are handled by #AC for split lock if #AC for - * split lock is supported. - */ - if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) || - (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && - (sld_state == sld_warn || sld_state == sld_fatal)) || - sld_state == sld_off) + if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) return; - /* - * Enable #DB for bus lock. All bus locks are handled in #DB except - * split locks are handled in #AC in the fatal case. - */ rdmsrl(MSR_IA32_DEBUGCTLMSR, val); - val |= DEBUGCTLMSR_BUS_LOCK_DETECT; + + if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + (sld_state == sld_warn || sld_state == sld_fatal)) || + sld_state == sld_off) { + /* + * Warn and fatal are handled by #AC for split lock if #AC for + * split lock is supported. + */ + val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; + } else { + val |= DEBUGCTLMSR_BUS_LOCK_DETECT; + } + wrmsrl(MSR_IA32_DEBUGCTLMSR, val); } -- Gitee