diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 19438a5c942b64b9d157deff213998b243e54846..a1575e9532ecbe7f65c9095f00bbb68d19a77b42 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1084,6 +1084,10 @@ The following bits are defined in the flags field: fields contain a valid state. This bit will be set whenever KVM_CAP_EXCEPTION_PAYLOAD is enabled. +- KVM_VCPUEVENT_VALID_TRIPLE_FAULT may be set to signal that the + triple_fault_pending field contains a valid state. This bit will + be set whenever KVM_CAP_X86_TRIPLE_FAULT_EVENT is enabled. + ARM/ARM64: ^^^^^^^^^^ @@ -1179,6 +1183,10 @@ can be set in the flags field to signal that the exception_has_payload, exception_payload, and exception.pending fields contain a valid state and shall be written into the VCPU. +If KVM_CAP_X86_TRIPLE_FAULT_EVENT is enabled, KVM_VCPUEVENT_VALID_TRIPLE_FAULT +can be set in flags field to signal that the triple_fault field contains +a valid state and shall be written into the VCPU. + ARM/ARM64: ^^^^^^^^^^ @@ -5033,9 +5041,11 @@ local APIC is not used. __u16 flags; More architecture-specific flags detailing state of the VCPU that may -affect the device's behavior. The only currently defined flag is -KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the -VCPU is in system management mode. +affect the device's behavior. Current defined flags: + /* x86, set if the VCPU is in system management mode */ + #define KVM_RUN_X86_SMM (1 << 0) + /* x86, set if bus lock detected in VM */ + #define KVM_RUN_BUS_LOCK (1 << 1) :: @@ -5486,6 +5496,26 @@ array field represents return values. The userspace should update the return values of SBI call before resuming the VCPU. For more details on RISC-V SBI spec refer, https://github.com/riscv/riscv-sbi-doc. +:: + + /* KVM_EXIT_NOTIFY */ + struct { + #define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; + +Used on x86 systems. When the VM capability KVM_CAP_X86_NOTIFY_VMEXIT is +enabled, a VM exit generated if no event window occurs in VM non-root mode +for a specified amount of time. Once KVM_X86_NOTIFY_VMEXIT_USER is set when +enabling the cap, it would exit to userspace with the exit reason +KVM_EXIT_NOTIFY for further handling. The "flags" field contains more +detailed info. + +The valid value for 'flags' is: + + - KVM_NOTIFY_CONTEXT_INVALID -- the VM context is corrupted and not valid + in VMCS. It would run into unknown result if resume the target VM. + :: /* Fix the size of the union. */ @@ -6197,7 +6227,7 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space can then handle to implement model specific MSR handling and/or user notifications to inform a user that an MSR was not handled. -7.25 KVM_CAP_SGX_ATTRIBUTE +7.22 KVM_CAP_SGX_ATTRIBUTE ---------------------- :Architectures: x86 @@ -6241,6 +6271,72 @@ if the value was set to zero or KVM_ENABLE_CAP was not invoked, KVM uses the return value of KVM_CHECK_EXTENSION(KVM_CAP_MAX_VCPU_ID) as the maximum APIC ID. +7.24 KVM_CAP_X86_BUS_LOCK_EXIT +------------------------------- + +:Architectures: x86 +:Target: VM +:Parameters: args[0] defines the policy used when bus locks detected in guest +:Returns: 0 on success, -EINVAL when args[0] contains invalid bits + +Valid bits in args[0] are:: + + #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) + #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + +Enabling this capability on a VM provides userspace with a way to select +a policy to handle the bus locks detected in guest. Userspace can obtain +the supported modes from the result of KVM_CHECK_EXTENSION and define it +through the KVM_ENABLE_CAP. + +KVM_BUS_LOCK_DETECTION_OFF and KVM_BUS_LOCK_DETECTION_EXIT are supported +currently and mutually exclusive with each other. More bits can be added in +the future. + +With KVM_BUS_LOCK_DETECTION_OFF set, bus locks in guest will not cause vm exits +so that no additional actions are needed. This is the default mode. + +With KVM_BUS_LOCK_DETECTION_EXIT set, vm exits happen when bus lock detected +in VM. KVM just exits to userspace when handling them. Userspace can enforce +its own throttling or other policy based mitigations. + +This capability is aimed to address the thread that VM can exploit bus locks to +degree the performance of the whole system. Once the userspace enable this +capability and select the KVM_BUS_LOCK_DETECTION_EXIT mode, KVM will set the +KVM_RUN_BUS_LOCK flag in vcpu-run->flags field and exit to userspace. Concerning +the bus lock vm exit can be preempted by a higher priority VM exit, the exit +notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons. +KVM_RUN_BUS_LOCK flag is used to distinguish between them. + +7.25 KVM_CAP_X86_NOTIFY_VMEXIT +------------------------------ + +:Architectures: x86 +:Target: VM +:Parameters: args[0] is the value of notify window as well as some flags +:Returns: 0 on success, -EINVAL if args[0] contains invalid flags or notify + VM exit is unsupported. + +Bits 63:32 of args[0] are used for notify window. +Bits 31:0 of args[0] are for some flags. Valid bits are:: + + #define KVM_X86_NOTIFY_VMEXIT_ENABLED (1 << 0) + #define KVM_X86_NOTIFY_VMEXIT_USER (1 << 1) + +This capability allows userspace to configure the notify VM exit on/off +in per-VM scope during VM creation. Notify VM exit is disabled by default. +When userspace sets KVM_X86_NOTIFY_VMEXIT_ENABLED bit in args[0], VMM will +enable this feature with the notify window provided, which will generate +a VM exit if no event window occurs in VM non-root mode for a specified of +time (notify window). + +If KVM_X86_NOTIFY_VMEXIT_USER is set in args[0], upon notify VM exits happen, +KVM would exit to userspace for handling. + +This capability is aimed to mitigate the threat that malicious VMs can +cause CPU stuck (due to event windows don't open up) and make the CPU +unavailable to host or other VMs. + 8. Other capabilities. ====================== diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b2e3a2b812ed863dd81414d56ee58d9898092c8f..1852953fea3837ee6bebfae943ed383af9ebf9cb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -52,6 +52,12 @@ #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) +#define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \ + KVM_BUS_LOCK_DETECTION_EXIT) + +#define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED | \ + KVM_X86_NOTIFY_VMEXIT_USER) + /* x86-specific vcpu->requests bit members */ #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) @@ -196,13 +202,22 @@ enum x86_intercept_stage; #define KVM_NR_DB_REGS 4 +#define DR6_BUS_LOCK (1 << 11) #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) #define DR6_BT (1 << 15) #define DR6_RTM (1 << 16) -#define DR6_FIXED_1 0xfffe0ff0 -#define DR6_INIT 0xffff0ff0 -#define DR6_VOLATILE 0x0001e00f +/* + * DR6_ACTIVE_LOW combines fixed-1 and active-low bits. + * We can regard all the bits in DR6_FIXED_1 as active_low bits; + * they will never be 0 for now, but when they are defined + * in the future it will require no code change. + * + * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. + */ +#define DR6_ACTIVE_LOW 0xffff0ff0 +#define DR6_VOLATILE 0x0001e80f +#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) @@ -994,8 +1009,13 @@ struct kvm_arch { bool guest_can_read_msr_platform_info; bool exception_payload_enabled; + bool triple_fault_event; + bool bus_lock_detection_enabled; + u32 notify_window; + u32 notify_vmexit_flags; + /* Guest can access the SGX PROVISIONKEY. */ bool sgx_provisioning_allowed; @@ -1088,6 +1108,7 @@ struct kvm_vcpu_stat { u64 preemption_reported; u64 preemption_other; u64 preemption_timer_exits; + u64 notify_window_exits; }; struct x86_instruction_info; @@ -1436,6 +1457,10 @@ extern u8 kvm_tsc_scaling_ratio_frac_bits; extern u64 kvm_max_tsc_scaling_ratio; /* 1ull << kvm_tsc_scaling_ratio_frac_bits */ extern u64 kvm_default_tsc_scaling_ratio; +/* bus lock detection supported? */ +extern bool kvm_has_bus_lock_exit; +/* notify vmexit supported? */ +extern bool kvm_has_notify_vmexit; extern u64 kvm_mce_cap_supported; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index ecae8a49e6009bf68a66f7b98c7e423df5fd86c2..331fbedb7cde0111487c16a6780523f3cf3035c5 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -75,6 +75,8 @@ #define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA) #define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) #define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE) +#define SECONDARY_EXEC_BUS_LOCK_DETECTION VMCS_CONTROL_BIT(BUS_LOCK_DETECTION) +#define SECONDARY_EXEC_NOTIFY_VM_EXITING VMCS_CONTROL_BIT(NOTIFY_VM_EXITING) /* * Definitions of Tertiary Processor-Based VM-Execution Controls. @@ -279,6 +281,7 @@ enum vmcs_field { SECONDARY_VM_EXEC_CONTROL = 0x0000401e, PLE_GAP = 0x00004020, PLE_WINDOW = 0x00004022, + NOTIFY_WINDOW = 0x00004024, VM_INSTRUCTION_ERROR = 0x00004400, VM_EXIT_REASON = 0x00004402, VM_EXIT_INTR_INFO = 0x00004404, @@ -565,6 +568,11 @@ enum vm_entry_failure_code { #define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) #define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT) +/* + * Exit Qualifications for NOTIFY VM EXIT + */ +#define NOTIFY_VM_CONTEXT_INVALID BIT(0) + /* * VM-instruction error numbers */ diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index f26dea5769d8b50bc3b2d2e55b33e8e495767238..9d9da79cf930fcac3f1f0e03e54b56028940346c 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -85,6 +85,8 @@ #define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */ #define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */ #define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ +#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */ +#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* VM-Exit when no event windows after notify window */ /* Tertiary Processor-Based VM-Execution Controls, word 3 */ #define VMX_TERTIARY_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 89e5f3d1bba86b8bbb0e7a80cbed2d1373d093af..0e0694a0a4f4b06822a4ad956a51b20d913d3713 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -111,6 +111,7 @@ struct kvm_ioapic_state { #define KVM_NR_IRQCHIPS 3 #define KVM_RUN_X86_SMM (1 << 0) +#define KVM_RUN_X86_BUS_LOCK (1 << 1) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { @@ -309,6 +310,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 #define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 +#define KVM_VCPUEVENT_VALID_TRIPLE_FAULT 0x00000020 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -343,7 +345,10 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u8 reserved[27]; + struct { + __u8 pending; + } triple_fault; + __u8 reserved[26]; __u8 exception_has_payload; __u64 exception_payload; }; diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index df6707a76a3d06424ac6102aa6dfbfa879136614..73dac304d949e320518528be7aaf8b09904556ef 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -89,6 +89,8 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_BUS_LOCK 74 +#define EXIT_REASON_NOTIFY 75 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -149,7 +151,9 @@ { EXIT_REASON_XSAVES, "XSAVES" }, \ { EXIT_REASON_XRSTORS, "XRSTORS" }, \ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ - { EXIT_REASON_TPAUSE, "TPAUSE" } + { EXIT_REASON_TPAUSE, "TPAUSE" }, \ + { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ + { EXIT_REASON_NOTIFY, "NOTIFY" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index e4cc9d901c51e317099daab876d71ea194997e20..5c5a2c6d5d4da4f6660d42d8b5ea41bd41727fd3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1150,22 +1150,23 @@ static void bus_lock_init(void) { u64 val; - /* - * Warn and fatal are handled by #AC for split lock if #AC for - * split lock is supported. - */ - if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) || - (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && - (sld_state == sld_warn || sld_state == sld_fatal)) || - sld_state == sld_off) + if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) return; - /* - * Enable #DB for bus lock. All bus locks are handled in #DB except - * split locks are handled in #AC in the fatal case. - */ rdmsrl(MSR_IA32_DEBUGCTLMSR, val); - val |= DEBUGCTLMSR_BUS_LOCK_DETECT; + + if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + (sld_state == sld_warn || sld_state == sld_fatal)) || + sld_state == sld_off) { + /* + * Warn and fatal are handled by #AC for split lock if #AC for + * split lock is supported. + */ + val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; + } else { + val |= DEBUGCTLMSR_BUS_LOCK_DETECT; + } + wrmsrl(MSR_IA32_DEBUGCTLMSR, val); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c957e591401c181ab44bf43c38e1950aaae6b444..4638672db180bb85c179bd1adaa8f6c9f8a4ea4e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -441,7 +441,7 @@ void kvm_set_cpu_caps(void) F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ | - F(SGX_LC) + F(SGX_LC) | F(BUS_LOCK_DETECT) ); /* Set LA57 based on hardware capability. */ if (cpuid_ecx(7) & F(LA57)) @@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void) F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | - F(SERIALIZE) | F(TSXLDTRK) + F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) ); /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ @@ -473,7 +473,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); kvm_cpu_cap_mask(CPUID_7_1_EAX, - F(AVX512_BF16) + F(AVX_VNNI) | F(AVX512_BF16) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 737035f16a9e65b666547ec021dd67ee51794d58..fb2878c58a00ad6356d10b9d2c1ed4eedc48ce5d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4276,7 +4276,7 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) ctxt->ops->get_dr(ctxt, 6, &dr6); dr6 &= ~DR_TRAP_BITS; - dr6 |= DR6_BD | DR6_RTM; + dr6 |= DR6_BD | DR6_ACTIVE_LOW; ctxt->ops->set_dr(ctxt, 6, dr6); return emulate_db(ctxt); } diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index e7feaa7910ab3609f3c8f36951ae4c2d641765ca..0c164ca686c78c60ac1e1147daa3ba99feebb489 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -405,7 +405,7 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) svm->vmcb->save.ds = vmcb12->save.ds; svm->vmcb->save.gdtr = vmcb12->save.gdtr; svm->vmcb->save.idtr = vmcb12->save.idtr; - kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags); + kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); /* * Force-set EFER_SVME even though it is checked earlier on the @@ -425,8 +425,8 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) svm->vmcb->save.rax = vmcb12->save.rax; svm->vmcb->save.rsp = vmcb12->save.rsp; svm->vmcb->save.rip = vmcb12->save.rip; - svm->vmcb->save.dr7 = vmcb12->save.dr7; - svm->vcpu.arch.dr6 = vmcb12->save.dr6; + svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1; + svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_ACTIVE_LOW; svm->vmcb->save.cpl = vmcb12->save.cpl; } @@ -699,14 +699,14 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.ds = hsave->save.ds; svm->vmcb->save.gdtr = hsave->save.gdtr; svm->vmcb->save.idtr = hsave->save.idtr; - kvm_set_rflags(&svm->vcpu, hsave->save.rflags); + kvm_set_rflags(&svm->vcpu, hsave->save.rflags | X86_EFLAGS_FIXED); svm_set_efer(&svm->vcpu, hsave->save.efer); svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); svm_set_cr4(&svm->vcpu, hsave->save.cr4); kvm_rax_write(&svm->vcpu, hsave->save.rax); kvm_rsp_write(&svm->vcpu, hsave->save.rsp); kvm_rip_write(&svm->vcpu, hsave->save.rip); - svm->vmcb->save.dr7 = 0; + svm->vmcb->save.dr7 = DR7_FIXED_1; svm->vmcb->save.cpl = 0; svm->vmcb->control.exit_int_info = 0; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5231f40e8312d598aa9bf64606ea32f27888ecd7..e2677e38ea42799bbb7108175faf8231d5f6e1c1 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1788,7 +1788,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) get_debugreg(vcpu->arch.db[2], 2); get_debugreg(vcpu->arch.db[3], 3); /* - * We cannot reset svm->vmcb->save.dr6 to DR6_FIXED_1|DR6_RTM here, + * We cannot reset svm->vmcb->save.dr6 to DR6_ACTIVE_LOW here, * because db_interception might need it. We can do it before vmentry. */ vcpu->arch.dr6 = svm->vmcb->save.dr6; @@ -1836,7 +1836,7 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && !svm->nmi_singlestep) { - u32 payload = (svm->vmcb->save.dr6 ^ DR6_RTM) & ~DR6_FIXED_1; + u32 payload = svm->vmcb->save.dr6 ^ DR6_ACTIVE_LOW; kvm_queue_exception_p(&svm->vcpu, DB_VECTOR, payload); return 1; } @@ -3595,7 +3595,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) svm_set_dr6(svm, vcpu->arch.dr6); else - svm_set_dr6(svm, DR6_FIXED_1 | DR6_RTM); + svm_set_dr6(svm, DR6_ACTIVE_LOW); clgi(); kvm_load_guest_xsave_state(vcpu); diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 1fc2d141588907c2e710ecac2a9ca7d5dd3238f1..49271b8e687164c1b4496fbd867e9f097af7c079 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -273,6 +273,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void) SECONDARY_EXEC_TSC_SCALING; } +static inline bool cpu_has_vmx_bus_lock_detection(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_BUS_LOCK_DETECTION; +} + static inline bool cpu_has_vmx_apicv(void) { return cpu_has_vmx_apic_register_virt() && @@ -405,10 +411,19 @@ static inline u64 vmx_supported_debugctl(void) { u64 debugctl = 0; + if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; + if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) debugctl |= DEBUGCTLMSR_LBR_MASK; return debugctl; } +static inline bool cpu_has_notify_vmexit(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_NOTIFY_VM_EXITING; +} + #endif /* __KVM_X86_VMX_CAPS_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 693ce29e8dce467b9678954de6d7704cac8350d0..6accb687d0627fb30b0b5ee61fceed7e40a24570 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -413,8 +413,8 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit if (nr == DB_VECTOR) { if (!has_payload) { payload = vcpu->arch.dr6; - payload &= ~(DR6_FIXED_1 | DR6_BT); - payload ^= DR6_RTM; + payload &= ~DR6_BT; + payload ^= DR6_ACTIVE_LOW; } *exit_qual = payload; } else @@ -2158,6 +2158,8 @@ static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) { + struct kvm *kvm = vmx->vcpu.kvm; + /* * If vmcs02 hasn't been initialized, set the constant vmcs02 state * according to L0's settings (vmcs12 is irrelevant here). Host @@ -2202,6 +2204,9 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) if (cpu_has_vmx_encls_vmexit()) vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); + if (kvm_notify_vmexit_enabled(kvm)) + vmcs_write32(NOTIFY_WINDOW, kvm->arch.notify_window); + /* * Set the MSR load/store lists to match L0's settings. Only the * addresses are constant (for vmcs02), the counts can change based @@ -5857,6 +5862,12 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, case EXIT_REASON_VMFUNC: /* VM functions are emulated through L2->L0 vmexits. */ return true; + case EXIT_REASON_BUS_LOCK: + /* + * At present, bus lock VM exit is never exposed to L1. + * Handle L2's bus locks in L0 directly. + */ + return true; default: break; } @@ -5982,6 +5993,9 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE); case EXIT_REASON_ENCLS: return nested_vmx_exit_handled_encls(vcpu, vmcs12); + case EXIT_REASON_NOTIFY: + /* Notify VM exit is not exposed to L1 */ + return false; default: return true; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c115c57261c1efa5428e12c041623a19ad51b37a..131f2a1e3e3de63e569cb47ae336c6d6bae1b724 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2094,6 +2094,9 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu) if (!intel_pmu_lbr_is_enabled(vcpu)) debugctl &= ~DEBUGCTLMSR_LBR_MASK; + if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; + return debugctl; } @@ -2643,7 +2646,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX | - SECONDARY_EXEC_ENABLE_VMFUNC; + SECONDARY_EXEC_ENABLE_VMFUNC | + SECONDARY_EXEC_BUS_LOCK_DETECTION | + SECONDARY_EXEC_NOTIFY_VM_EXITING; if (cpu_has_sgx()) opt2 |= SECONDARY_EXEC_ENCLS_EXITING; if (adjust_vmx_controls(min2, opt2, @@ -4518,6 +4523,12 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) vmx_adjust_sec_exec_control(vmx, &exec_control, waitpkg, WAITPKG, ENABLE_USR_WAIT_PAUSE, false); + if (!vcpu->kvm->arch.bus_lock_detection_enabled) + exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION; + + if (!kvm_notify_vmexit_enabled(vcpu->kvm)) + exec_control &= ~SECONDARY_EXEC_NOTIFY_VM_EXITING; + vmx->secondary_exec_control = exec_control; } @@ -4614,6 +4625,9 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmx->ple_window_dirty = true; } + if (kvm_notify_vmexit_enabled(vmx->vcpu.kvm)) + vmcs_write32(NOTIFY_WINDOW, vmx->vcpu.kvm->arch.notify_window); + vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ @@ -5156,7 +5170,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); return 1; } - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; + kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW; kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); fallthrough; case BP_VECTOR: @@ -5401,7 +5415,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) * guest debugging itself. */ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { - vcpu->run->debug.arch.dr6 = DR6_BD | DR6_RTM | DR6_FIXED_1; + vcpu->run->debug.arch.dr6 = DR6_BD | DR6_ACTIVE_LOW; vcpu->run->debug.arch.dr7 = dr7; vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); vcpu->run->debug.arch.exception = DB_VECTOR; @@ -5943,6 +5957,43 @@ static int handle_encls(struct kvm_vcpu *vcpu) } #endif /* CONFIG_X86_SGX_KVM */ +static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu) +{ + /* + * Hardware may or may not set the BUS_LOCK_DETECTED flag on BUS_LOCK + * VM-Exits. Unconditionally set the flag here and leave the handling to + * vmx_handle_exit(). + */ + to_vmx(vcpu)->exit_reason.bus_lock_detected = true; + return 1; +} + +static int handle_notify(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qual = vmx_get_exit_qual(vcpu); + bool context_invalid = exit_qual & NOTIFY_VM_CONTEXT_INVALID; + + ++vcpu->stat.notify_window_exits; + + /* + * Notify VM exit happened while executing iret from NMI, + * "blocked by NMI" bit has to be set before next VM entry. + */ + if (enable_vnmi && (exit_qual & INTR_INFO_UNBLOCK_NMI)) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + + if (vcpu->kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_USER || + context_invalid) { + vcpu->run->exit_reason = KVM_EXIT_NOTIFY; + vcpu->run->notify.flags = context_invalid ? + KVM_NOTIFY_CONTEXT_INVALID : 0; + return 0; + } + + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -5999,6 +6050,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_VMFUNC] = handle_vmx_instruction, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, [EXIT_REASON_ENCLS] = handle_encls, + [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, + [EXIT_REASON_NOTIFY] = handle_notify, }; static const int kvm_vmx_max_exit_handlers = @@ -6257,7 +6310,7 @@ void dump_vmcs(void) * The guest has exited. See if we can fix it or if we need userspace * assistance. */ -static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) +static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_vmx *vmx = to_vmx(vcpu); union vmx_exit_reason exit_reason = vmx->exit_reason; @@ -6334,7 +6387,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) exit_reason.basic != EXIT_REASON_EPT_VIOLATION && exit_reason.basic != EXIT_REASON_PML_FULL && exit_reason.basic != EXIT_REASON_APIC_ACCESS && - exit_reason.basic != EXIT_REASON_TASK_SWITCH)) { + exit_reason.basic != EXIT_REASON_TASK_SWITCH && + exit_reason.basic != EXIT_REASON_NOTIFY)) { int ndata = 3; vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -6410,6 +6464,24 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) return 0; } +static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) +{ + int ret = __vmx_handle_exit(vcpu, exit_fastpath); + + /* + * Exit to user space when bus lock detected to inform that there is + * a bus lock in guest. + */ + if (to_vmx(vcpu)->exit_reason.bus_lock_detected) { + if (ret > 0) + vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; + + vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK; + return 0; + } + return ret; +} + /* * Software based L1D cache flush which is used when microcode providing * the cache control MSR is not loaded. @@ -8202,6 +8274,9 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } + kvm_has_bus_lock_exit = cpu_has_vmx_bus_lock_detection(); + kvm_has_notify_vmexit = cpu_has_notify_vmexit(); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ if (enable_ept) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index bcedbe5ff6098736f5abe848339df79e8efcf7f2..60be5371d297a5d20d108b0cb5fcbfcd88198dd9 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -84,7 +84,7 @@ union vmx_exit_reason { u32 reserved23 : 1; u32 reserved24 : 1; u32 reserved25 : 1; - u32 reserved26 : 1; + u32 bus_lock_detected : 1; u32 enclave_mode : 1; u32 smi_pending_mtf : 1; u32 smi_from_vmx_root : 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c7b4dfd2a2c43e0d4f8ee1da2ac13c4711e618ee..757891cd820c204f86f068159f5bc0cff00a97d5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -138,6 +138,10 @@ u64 __read_mostly kvm_max_tsc_scaling_ratio; EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio); u64 __read_mostly kvm_default_tsc_scaling_ratio; EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio); +bool __read_mostly kvm_has_bus_lock_exit; +EXPORT_SYMBOL_GPL(kvm_has_bus_lock_exit); +bool __read_mostly kvm_has_notify_vmexit; +EXPORT_SYMBOL_GPL(kvm_has_notify_vmexit); /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ static u32 __read_mostly tsc_tolerance_ppm = 250; @@ -235,6 +239,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), VCPU_STAT("preemption_reported", preemption_reported), VCPU_STAT("preemption_other", preemption_other), + VCPU_STAT("notify_window_exits", notify_window_exits), VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped), VM_STAT("mmu_pte_write", mmu_pte_write), VM_STAT("mmu_pde_zapped", mmu_pde_zapped), @@ -547,19 +552,24 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) */ vcpu->arch.dr6 &= ~DR_TRAP_BITS; /* - * DR6.RTM is set by all #DB exceptions that don't clear it. + * In order to reflect the #DB exception payload in guest + * dr6, three components need to be considered: active low + * bit, FIXED_1 bits and active high bits (e.g. DR6_BD, + * DR6_BS and DR6_BT) + * DR6_ACTIVE_LOW contains the FIXED_1 and active low bits. + * In the target guest dr6: + * FIXED_1 bits should always be set. + * Active low bits should be cleared if 1-setting in payload. + * Active high bits should be set if 1-setting in payload. + * + * Note, the payload is compatible with the pending debug + * exceptions/exit qualification under VMX, that active_low bits + * are active high in payload. + * So they need to be flipped for DR6. */ - vcpu->arch.dr6 |= DR6_RTM; + vcpu->arch.dr6 |= DR6_ACTIVE_LOW; vcpu->arch.dr6 |= payload; - /* - * Bit 16 should be set in the payload whenever the #DB - * exception should clear DR6.RTM. This makes the payload - * compatible with the pending debug exceptions under VMX. - * Though not currently documented in the SDM, this also - * makes the payload compatible with the exit qualification - * for #DB exceptions under VMX. - */ - vcpu->arch.dr6 ^= payload & DR6_RTM; + vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW; /* * The #DB payload is defined as compatible with the 'pending @@ -1171,6 +1181,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) fixed |= DR6_RTM; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + fixed |= DR6_BUS_LOCK; return fixed; } @@ -3842,6 +3855,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_GET_MSR_FEATURES: case KVM_CAP_MSR_PLATFORM_INFO: case KVM_CAP_EXCEPTION_PAYLOAD: + case KVM_CAP_X86_TRIPLE_FAULT_EVENT: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_LAST_CPU: case KVM_CAP_X86_USER_SPACE_MSR: @@ -3918,6 +3932,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_STEAL_TIME: r = sched_info_on(); break; + case KVM_CAP_X86_BUS_LOCK_EXIT: + if (kvm_has_bus_lock_exit) + r = KVM_BUS_LOCK_DETECTION_OFF | + KVM_BUS_LOCK_DETECTION_EXIT; + else + r = 0; + break; + case KVM_CAP_X86_NOTIFY_VMEXIT: + r = kvm_has_notify_vmexit; + break; default: break; } @@ -4406,6 +4430,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | KVM_VCPUEVENT_VALID_SMM); if (vcpu->kvm->arch.exception_payload_enabled) events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD; + if (vcpu->kvm->arch.triple_fault_event) { + events->triple_fault.pending = kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu); + events->flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; + } memset(&events->reserved, 0, sizeof(events->reserved)); } @@ -4419,7 +4447,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW | KVM_VCPUEVENT_VALID_SMM - | KVM_VCPUEVENT_VALID_PAYLOAD)) + | KVM_VCPUEVENT_VALID_PAYLOAD + | KVM_VCPUEVENT_VALID_TRIPLE_FAULT)) return -EINVAL; if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) { @@ -4497,6 +4526,15 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, } } + if (events->flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT) { + if (!vcpu->kvm->arch.triple_fault_event) + return -EINVAL; + if (events->triple_fault.pending) + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + else + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); + } + kvm_make_request(KVM_REQ_EVENT, vcpu); return 0; @@ -5411,6 +5449,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.exception_payload_enabled = cap->args[0]; r = 0; break; + case KVM_CAP_X86_TRIPLE_FAULT_EVENT: + kvm->arch.triple_fault_event = cap->args[0]; + r = 0; + break; case KVM_CAP_X86_USER_SPACE_MSR: kvm->arch.user_space_msr_mask = cap->args[0]; r = 0; @@ -5446,6 +5488,36 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, } mutex_unlock(&kvm->lock); break; + case KVM_CAP_X86_BUS_LOCK_EXIT: + r = -EINVAL; + if (cap->args[0] & ~KVM_BUS_LOCK_DETECTION_VALID_MODE) + break; + + if ((cap->args[0] & KVM_BUS_LOCK_DETECTION_OFF) && + (cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT)) + break; + + if (kvm_has_bus_lock_exit && + cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT) + kvm->arch.bus_lock_detection_enabled = true; + r = 0; + break; + case KVM_CAP_X86_NOTIFY_VMEXIT: + r = -EINVAL; + if ((u32)cap->args[0] & ~KVM_X86_NOTIFY_VMEXIT_VALID_BITS) + break; + if (!kvm_has_notify_vmexit) + break; + if (!((u32)cap->args[0] & KVM_X86_NOTIFY_VMEXIT_ENABLED)) + break; + mutex_lock(&kvm->lock); + if (!kvm->created_vcpus) { + kvm->arch.notify_window = cap->args[0] >> 32; + kvm->arch.notify_vmexit_flags = (u32)cap->args[0]; + r = 0; + } + mutex_unlock(&kvm->lock); + break; default: r = -EINVAL; break; @@ -7379,7 +7451,7 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM; + kvm_run->debug.arch.dr6 = DR6_BS | DR6_ACTIVE_LOW; kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu); kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; @@ -7423,7 +7495,7 @@ static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r) vcpu->arch.eff_db); if (dr6 != 0) { - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; + kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW; kvm_run->debug.arch.pc = eip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; @@ -8403,12 +8475,15 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; - kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; + kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); kvm_run->ready_for_interrupt_injection = pic_in_kernel(vcpu->kvm) || kvm_vcpu_ready_for_interrupt_injection(vcpu); + + if (is_smm(vcpu)) + kvm_run->flags |= KVM_RUN_X86_SMM; } static void update_cr8_intercept(struct kvm_vcpu *vcpu) @@ -9537,6 +9612,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu_load(vcpu); kvm_sigset_activate(vcpu); + kvm_run->flags = 0; kvm_load_guest_fpu(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { @@ -10305,7 +10381,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); - vcpu->arch.dr6 = DR6_INIT; + vcpu->arch.dr6 = DR6_ACTIVE_LOW; vcpu->arch.dr7 = DR7_FIXED_1; kvm_update_dr7(vcpu); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 81ea01462175813daba80f3b3dbcd46c33f39852..20a1f446acdf728193eb89b80faf48a229843ced 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -340,6 +340,11 @@ static inline bool kvm_cstate_in_guest(struct kvm *kvm) DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); +static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm) +{ + return kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_ENABLED; +} + static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) { __this_cpu_write(current_vcpu, vcpu); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4e0ebd66368fbeb804aa74fd3d2a015b4c770f29..e1c3ef841eb8007b4f2196bd44b3bc8e9d86cf8d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -251,6 +251,8 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_RISCV_SBI 31 +#define KVM_EXIT_X86_BUS_LOCK 33 +#define KVM_EXIT_NOTIFY 37 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -434,6 +436,11 @@ struct kvm_run { unsigned long args[6]; unsigned long ret[2]; } riscv_sbi; + /* KVM_EXIT_NOTIFY */ + struct { +#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; /* Fix the size of the union. */ char padding[256]; }; @@ -1061,7 +1068,10 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_SGX_ATTRIBUTE 196 +#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 +#define KVM_CAP_X86_NOTIFY_VMEXIT 219 #define KVM_CAP_ARM_CPU_FEATURE 555 @@ -1737,4 +1747,10 @@ struct kvm_hyperv_eventfd { #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) +#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) +#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +/* Available with KVM_CAP_X86_NOTIFY_VMEXIT */ +#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0) +#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1) + #endif /* __LINUX_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 89e5f3d1bba86b8bbb0e7a80cbed2d1373d093af..9a702e05fd7d6cab11f6acd8e363b11b1e2f9a16 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -111,6 +111,7 @@ struct kvm_ioapic_state { #define KVM_NR_IRQCHIPS 3 #define KVM_RUN_X86_SMM (1 << 0) +#define KVM_RUN_X86_BUS_LOCK (1 << 1) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index b8ff9e8ac0d516b183eaf2fc64bcd0ddbaf3b15c..14c177c4afd57c06ae4c1008e5b3bf08430577b3 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -88,6 +88,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_BUS_LOCK 74 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -148,7 +149,8 @@ { EXIT_REASON_XSAVES, "XSAVES" }, \ { EXIT_REASON_XRSTORS, "XRSTORS" }, \ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ - { EXIT_REASON_TPAUSE, "TPAUSE" } + { EXIT_REASON_TPAUSE, "TPAUSE" }, \ + { EXIT_REASON_BUS_LOCK, "BUS_LOCK" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index d4768897eda6e2ad13e0010ff52ef18896ec9372..17d87d8c7d6b1c18c4f350e1a18d602182dcdce0 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -26,6 +26,7 @@ /x86_64/vmx_tsc_adjust_test /x86_64/xss_msr_test /clear_dirty_log_test +/x86_64/triple_fault_event_test /demand_paging_test /dirty_log_test /dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 25618728dc4afde0536a5d678e022297f13cda3d..2756bf2d84b963067f1b97408fe3b3a25346aac8 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -60,6 +60,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test +TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c new file mode 100644 index 0000000000000000000000000000000000000000..66378140764d35924e8084b679d90ecfe1beb1f1 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include +#include + +#include "kselftest.h" + +#define VCPU_ID 0 +#define ARBITRARY_IO_PORT 0x2000 + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +static void l2_guest_code(void) +{ + asm volatile("inb %%dx, %%al" + : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); +} + +void l1_guest_code(struct vmx_pages *vmx) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + prepare_vmcs(vmx, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmlaunch()); + /* L2 should triple fault after a triple fault event injected. */ + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT); + GUEST_DONE(); +} + +int main(void) +{ + struct kvm_run *run; + struct kvm_vcpu_events events; + vm_vaddr_t vmx_pages_gva; + struct ucall uc; + + struct kvm_enable_cap cap = { + .cap = KVM_CAP_X86_TRIPLE_FAULT_EVENT, + .args = {1} + }; + + if (!nested_vmx_supported()) { + print_skip("Nested VMX not supported"); + exit(KSFT_SKIP); + } + + if (!kvm_check_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT)) { + print_skip("KVM_CAP_X86_TRIPLE_FAULT_EVENT not supported"); + exit(KSFT_SKIP); + } + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vm_enable_cap(vm, &cap); + + run = vcpu_state(vm, VCPU_ID); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + vcpu_run(vm, VCPU_ID); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Expected KVM_EXIT_IO, got: %u (%s)\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT, + "Expected IN from port %d from L2, got port %d", + ARBITRARY_IO_PORT, run->io.port); + vcpu_events_get(vm, VCPU_ID, &events); + events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; + events.triple_fault.pending = true; + vcpu_events_set(vm, VCPU_ID, &events); + run->immediate_exit = true; + vcpu_run_complete_io(vm, VCPU_ID); + + vcpu_events_get(vm, VCPU_ID, &events); + TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT, + "Triple fault event invalid"); + TEST_ASSERT(events.triple_fault.pending, + "No triple fault pending"); + vcpu_run(vm, VCPU_ID); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + TEST_FAIL("%s", (const char *)uc.args[0]); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + +}