From 1f4de308b440653be32c4f108685d423041c79c3 Mon Sep 17 00:00:00 2001
From: Binbin Wu <binbin.wu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:12 +0800
Subject: [PATCH 01/12] KVM: x86: Consolidate flags for __linearize()

Upstream commit: 7b0dd9430cf0c1ae19645d2a6608a5fb57faffe4
Conflict: none

Consolidate @write and @fetch of __linearize() into a set of flags so that
additional flags can be added without needing more/new boolean parameters,
to precisely identify the access type.

No functional change intended.

Intel-SIG: commit 7b0dd9430cf0 KVM: x86: Consolidate flags for
__linearize()
Backport KVM Linear Address Masking (LAM) support.

Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Acked-by: Kai Huang <kai.huang@intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-2-binbin.wu@linux.intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/kvm/emulate.c     | 21 +++++++++++----------
 arch/x86/kvm/kvm_emulate.h |  4 ++++
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 21cb36828d3e..d5b4653bf3bd 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -687,8 +687,8 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
 				       struct segmented_address addr,
 				       unsigned *max_size, unsigned size,
-				       bool write, bool fetch,
-				       enum x86emul_mode mode, ulong *linear)
+				       enum x86emul_mode mode, ulong *linear,
+				       unsigned int flags)
 {
 	struct desc_struct desc;
 	bool usable;
@@ -717,11 +717,11 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
 		if (!usable)
 			goto bad;
 		/* code segment in protected mode or read-only data segment */
-		if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
-					|| !(desc.type & 2)) && write)
+		if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) &&
+		    (flags & X86EMUL_F_WRITE))
 			goto bad;
 		/* unreadable code segment */
-		if (!fetch && (desc.type & 8) && !(desc.type & 2))
+		if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2))
 			goto bad;
 		lim = desc_limit_scaled(&desc);
 		if (!(desc.type & 8) && (desc.type & 4)) {
@@ -757,8 +757,8 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
 		     ulong *linear)
 {
 	unsigned max_size;
-	return __linearize(ctxt, addr, &max_size, size, write, false,
-			   ctxt->mode, linear);
+	return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear,
+			   write ? X86EMUL_F_WRITE : 0);
 }
 
 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
@@ -771,7 +771,8 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
 
 	if (ctxt->op_bytes != sizeof(unsigned long))
 		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
-	rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
+	rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear,
+			 X86EMUL_F_FETCH);
 	if (rc == X86EMUL_CONTINUE)
 		ctxt->_eip = addr.ea;
 	return rc;
@@ -907,8 +908,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
 	 * boundary check itself.  Instead, we use max_size to check
 	 * against op_size.
 	 */
-	rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
-			 &linear);
+	rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear,
+			 X86EMUL_F_FETCH);
 	if (unlikely(rc != X86EMUL_CONTINUE))
 		return rc;
 
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index a445c31555b6..d0c393d91abb 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -88,6 +88,10 @@ struct x86_instruction_info {
 #define X86EMUL_IO_NEEDED       5 /* IO is needed to complete emulation */
 #define X86EMUL_INTERCEPTED     6 /* Intercepted by nested VMCB/VMCS */
 
+/* x86-specific emulation flags */
+#define X86EMUL_F_WRITE			BIT(0)
+#define X86EMUL_F_FETCH			BIT(1)
+
 struct x86_emulate_ops {
 	void (*vm_bugged)(struct x86_emulate_ctxt *ctxt);
 	/*
-- 
Gitee


From aa5bd32854270ee3967844b749a4e1ea7f3e5966 Mon Sep 17 00:00:00 2001
From: Binbin Wu <binbin.wu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:14 +0800
Subject: [PATCH 02/12] KVM: x86: Add an emulation flag for implicit system
 access

Upstream commit: 3963c52df42231f72277cd138994ac94f1183d2b
Conflict: none

Add an emulation flag X86EMUL_F_IMPLICIT to identify implicit system access
in instruction emulation.  Don't bother wiring up any usage at this point,
as Linear Address Space Separation (LASS) will be the first "real" consumer
of the flag and LASS support will require dedicated hooks, i.e. there
aren't any existing calls where passing X86EMUL_F_IMPLICIT is meaningful.

Add the IMPLICIT flag even though there's no imminent usage so that
Linear Address Masking (LAM) support can reference the flag to document
that addresses for implicit accesses aren't untagged.

Intel-SIG: commit 3963c52df422 KVM: x86: Add an emulation flag for
implicit system access
Backport KVM Linear Address Masking (LAM) support.

Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-4-binbin.wu@linux.intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/kvm/kvm_emulate.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index d0c393d91abb..614859224b5a 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -91,6 +91,7 @@ struct x86_instruction_info {
 /* x86-specific emulation flags */
 #define X86EMUL_F_WRITE			BIT(0)
 #define X86EMUL_F_FETCH			BIT(1)
+#define X86EMUL_F_IMPLICIT		BIT(2)
 
 struct x86_emulate_ops {
 	void (*vm_bugged)(struct x86_emulate_ctxt *ctxt);
-- 
Gitee


From b19985150dc939e097613642d67214abcdeeddf2 Mon Sep 17 00:00:00 2001
From: Binbin Wu <binbin.wu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:15 +0800
Subject: [PATCH 03/12] KVM: x86: Add X86EMUL_F_INVLPG and pass it in
 em_invlpg()

Upstream commit: 538ac9a92d669c4ccfc64739a32efab2793cea1d
Conflict: none

Add an emulation flag X86EMUL_F_INVLPG, which is used to identify an
instruction that does TLB invalidation without true memory access.

Only invlpg & invlpga implemented in emulator belong to this kind.
invlpga doesn't need additional information for emulation. Just pass
the flag to em_invlpg().

Linear Address Masking (LAM) and Linear Address Space Separation (LASS)
don't apply to addresses that are inputs to TLB invalidation. The flag
will be consumed to support LAM/LASS virtualization.

Intel-SIG: commit 538ac9a92d66 KVM: x86: Add X86EMUL_F_INVLPG and pass
it in em_invlpg()
Backport KVM Linear Address Masking (LAM) support.

Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-5-binbin.wu@linux.intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/kvm/emulate.c     | 4 +++-
 arch/x86/kvm/kvm_emulate.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index d5b4653bf3bd..85bdb2f35017 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3440,8 +3440,10 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
 	ulong linear;
+	unsigned int max_size;
 
-	rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
+	rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode,
+			 &linear, X86EMUL_F_INVLPG);
 	if (rc == X86EMUL_CONTINUE)
 		ctxt->ops->invlpg(ctxt, linear);
 	/* Disable writeback. */
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 614859224b5a..7c02a09b1b17 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -92,6 +92,7 @@ struct x86_instruction_info {
 #define X86EMUL_F_WRITE			BIT(0)
 #define X86EMUL_F_FETCH			BIT(1)
 #define X86EMUL_F_IMPLICIT		BIT(2)
+#define X86EMUL_F_INVLPG		BIT(3)
 
 struct x86_emulate_ops {
 	void (*vm_bugged)(struct x86_emulate_ctxt *ctxt);
-- 
Gitee


From bf50c92b5ccea7cd507b9ae134cd99dedd635f38 Mon Sep 17 00:00:00 2001
From: Binbin Wu <binbin.wu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:16 +0800
Subject: [PATCH 04/12] KVM: x86/mmu: Drop non-PA bits when getting GFN for
 guest's PGD

Upstream commit: a130066f74008858ac425b7497d231742474a0ea
Conflict: none

Drop non-PA bits when getting GFN for guest's PGD with the maximum theoretical
mask for guest MAXPHYADDR.

Do it unconditionally because it's harmless for 32-bit guests, querying 64-bit
mode would be more expensive, and for EPT the mask isn't tied to guest mode.
Using PT_BASE_ADDR_MASK would be technically wrong (PAE paging has 64-bit
elements _except_ for CR3, which has only 32 valid bits), it wouldn't matter
in practice though.

Opportunistically use GENMASK_ULL() to define __PT_BASE_ADDR_MASK.

Intel-SIG: commit a130066f7400 KVM: x86/mmu: Drop non-PA bits when
getting GFN for guest's PGD
Backport KVM Linear Address Masking (LAM) support.

Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-6-binbin.wu@linux.intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/kvm/mmu/mmu.c          | 2 +-
 arch/x86/kvm/mmu/mmu_internal.h | 1 +
 arch/x86/kvm/mmu/paging_tmpl.h  | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 294775b7383b..aea40748b0d7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3774,7 +3774,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	hpa_t root;
 
 	root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
-	root_gfn = root_pgd >> PAGE_SHIFT;
+	root_gfn = (root_pgd & __PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
 
 	if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
 		mmu->root.hpa = kvm_mmu_get_dummy_root();
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index decc1f153669..68f8564d85a9 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -13,6 +13,7 @@
 #endif
 
 /* Page table builder macros common to shadow (host) PTEs and guest PTEs. */
+#define __PT_BASE_ADDR_MASK GENMASK_ULL(51, 12)
 #define __PT_LEVEL_SHIFT(level, bits_per_level)	\
 	(PAGE_SHIFT + ((level) - 1) * (bits_per_level))
 #define __PT_INDEX(address, level, bits_per_level) \
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index c85255073f67..4d4e98fe4f35 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -62,7 +62,7 @@
 #endif
 
 /* Common logic, but per-type values.  These also need to be undefined. */
-#define PT_BASE_ADDR_MASK	((pt_element_t)(((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)))
+#define PT_BASE_ADDR_MASK	((pt_element_t)__PT_BASE_ADDR_MASK)
 #define PT_LVL_ADDR_MASK(lvl)	__PT_LVL_ADDR_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS)
 #define PT_LVL_OFFSET_MASK(lvl)	__PT_LVL_OFFSET_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS)
 #define PT_INDEX(addr, lvl)	__PT_INDEX(addr, lvl, PT_LEVEL_BITS)
-- 
Gitee


From 42616ebfad3e9e4cbe8f9ef487968c9de0b71836 Mon Sep 17 00:00:00 2001
From: Binbin Wu <binbin.wu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:17 +0800
Subject: [PATCH 05/12] KVM: x86: Add & use kvm_vcpu_is_legal_cr3() to check
 CR3's legality

Upstream commit: 2c49db455ee27c72a680c9e4fad1c12433902ee3
Conflict: none

Add and use kvm_vcpu_is_legal_cr3() to check CR3's legality to provide
a clear distinction between CR3 and GPA checks.  This will allow exempting
bits from kvm_vcpu_is_legal_cr3() without affecting general GPA checks,
e.g. for upcoming features that will use high bits in CR3 for feature
enabling.

No functional change intended.

Intel-SIG: commit 2c49db455ee2 KVM: x86: Add & use
kvm_vcpu_is_legal_cr3() to check CR3's legality
Backport KVM Linear Address Masking (LAM) support.

Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-7-binbin.wu@linux.intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/kvm/cpuid.h      | 5 +++++
 arch/x86/kvm/svm/nested.c | 4 ++--
 arch/x86/kvm/vmx/nested.c | 4 ++--
 arch/x86/kvm/x86.c        | 4 ++--
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 57ee789ada14..7d7939af8f4f 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -288,4 +288,9 @@ static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
 			vcpu->arch.governed_features.enabled);
 }
 
+static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+{
+	return kvm_vcpu_is_legal_gpa(vcpu, cr3);
+}
+
 #endif
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 60891b9ce25f..b780bd468d51 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -296,7 +296,7 @@ static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
 	if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
 		if (CC(!(save->cr4 & X86_CR4_PAE)) ||
 		    CC(!(save->cr0 & X86_CR0_PE)) ||
-		    CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
+		    CC(!kvm_vcpu_is_legal_cr3(vcpu, save->cr3)))
 			return false;
 	}
 
@@ -505,7 +505,7 @@ static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
 static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 			       bool nested_npt, bool reload_pdptrs)
 {
-	if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
+	if (CC(!kvm_vcpu_is_legal_cr3(vcpu, cr3)))
 		return -EINVAL;
 
 	if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 478634c8e783..b0a0216bee03 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1086,7 +1086,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 			       bool nested_ept, bool reload_pdptrs,
 			       enum vm_entry_failure_code *entry_failure_code)
 {
-	if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
+	if (CC(!kvm_vcpu_is_legal_cr3(vcpu, cr3))) {
 		*entry_failure_code = ENTRY_FAIL_DEFAULT;
 		return -EINVAL;
 	}
@@ -2915,7 +2915,7 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 
 	if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
 	    CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
-	    CC(kvm_vcpu_is_illegal_gpa(vcpu, vmcs12->host_cr3)))
+	    CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3)))
 		return -EINVAL;
 
 	if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 71cd470c2ee9..1247c849544b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1288,7 +1288,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 	 * stuff CR3, e.g. for RSM emulation, and there is no guarantee that
 	 * the current vCPU mode is accurate.
 	 */
-	if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
+	if (!kvm_vcpu_is_legal_cr3(vcpu, cr3))
 		return 1;
 
 	if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, cr3))
@@ -11665,7 +11665,7 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 		 */
 		if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
 			return false;
-		if (kvm_vcpu_is_illegal_gpa(vcpu, sregs->cr3))
+		if (!kvm_vcpu_is_legal_cr3(vcpu, sregs->cr3))
 			return false;
 	} else {
 		/*
-- 
Gitee


From f0e5e7a0fd480dcb7e42cdbd42679c037777ee8d Mon Sep 17 00:00:00 2001
From: Binbin Wu <binbin.wu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:18 +0800
Subject: [PATCH 06/12] KVM: x86: Remove kvm_vcpu_is_illegal_gpa()

Upstream commit: 9c8021d4ae85f1531230fc33653e06e9f1fdb7f1
Conflict: none

Remove kvm_vcpu_is_illegal_gpa() and use !kvm_vcpu_is_legal_gpa() instead.
The "illegal" helper actually predates the "legal" helper, the only reason
the "illegal" variant wasn't removed by commit 4bda0e97868a ("KVM: x86:
Add a helper to check for a legal GPA") was to avoid code churn.  Now that
CR3 has a dedicated helper, there are fewer callers, and so the code churn
isn't that much of a deterrent.

No functional change intended.

Intel-SIG: commit 9c8021d4ae85 KVM: x86: Remove
kvm_vcpu_is_illegal_gpa()
Backport KVM Linear Address Masking (LAM) support.

Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-8-binbin.wu@linux.intel.com
[sean: provide a bit of history in the changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/kvm/cpuid.h      | 5 -----
 arch/x86/kvm/vmx/nested.c | 2 +-
 arch/x86/kvm/vmx/vmx.c    | 2 +-
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 7d7939af8f4f..28372fe1f4e6 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -47,11 +47,6 @@ static inline bool kvm_vcpu_is_legal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 	return !(gpa & vcpu->arch.reserved_gpa_bits);
 }
 
-static inline bool kvm_vcpu_is_illegal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
-{
-	return !kvm_vcpu_is_legal_gpa(vcpu, gpa);
-}
-
 static inline bool kvm_vcpu_is_legal_aligned_gpa(struct kvm_vcpu *vcpu,
 						 gpa_t gpa, gpa_t alignment)
 {
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b0a0216bee03..59e259ff7454 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2720,7 +2720,7 @@ static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
 	}
 
 	/* Reserved bits should not be set */
-	if (CC(kvm_vcpu_is_illegal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
+	if (CC(!kvm_vcpu_is_legal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
 		return false;
 
 	/* AD, if set, should be supported */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d369194939c7..b3ab0f29501f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5851,7 +5851,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	 * would also use advanced VM-exit information for EPT violations to
 	 * reconstruct the page fault error code.
 	 */
-	if (unlikely(allow_smaller_maxphyaddr && kvm_vcpu_is_illegal_gpa(vcpu, gpa)))
+	if (unlikely(allow_smaller_maxphyaddr && !kvm_vcpu_is_legal_gpa(vcpu, gpa)))
 		return kvm_emulate_instruction(vcpu, 0);
 
 	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
-- 
Gitee


From 890ed1c18f273521fa02551b1322f6ae7a41a5c0 Mon Sep 17 00:00:00 2001
From: Binbin Wu <binbin.wu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:19 +0800
Subject: [PATCH 07/12] KVM: x86: Introduce get_untagged_addr() in kvm_x86_ops
 and call it in emulator

Upstream commit: 37a41847b770c722e98ace72f3851fb49b360c08
Conflict: none

Introduce a new interface get_untagged_addr() to kvm_x86_ops to untag
the metadata from linear address.  Call the interface in linearization
of instruction emulator for 64-bit mode.

When enabled feature like Intel Linear Address Masking (LAM) or AMD Upper
Address Ignore (UAI), linear addresses may be tagged with metadata that
needs to be dropped prior to canonicality checks, i.e. the metadata is
ignored.

Introduce get_untagged_addr() to kvm_x86_ops to hide the vendor specific
code, as sadly LAM and UAI have different semantics.  Pass the emulator
flags to allow vendor specific implementation to precisely identify the
access type (LAM doesn't untag certain accesses).

Intel-SIG: commit 37a41847b770 KVM: x86: Introduce get_untagged_addr()
in kvm_x86_ops and call it in emulator
Backport KVM Linear Address Masking (LAM) support.

Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-9-binbin.wu@linux.intel.com
[sean: massage changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  2 ++
 arch/x86/kvm/emulate.c             |  2 +-
 arch/x86/kvm/kvm_emulate.h         |  3 +++
 arch/x86/kvm/x86.c                 | 10 ++++++++++
 5 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 2650ce2346ab..c122d155a98c 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -134,6 +134,7 @@ KVM_X86_OP(msr_filter_changed)
 KVM_X86_OP(complete_emulated_msr)
 KVM_X86_OP(vcpu_deliver_sipi_vector)
 KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
+KVM_X86_OP_OPTIONAL(get_untagged_addr)
 KVM_X86_OP_OPTIONAL(vm_attestation)
 KVM_X86_OP_OPTIONAL(control_pre_system_reset)
 KVM_X86_OP_OPTIONAL(control_post_system_reset)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 422878401f51..8e660ca11b71 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1768,6 +1768,8 @@ struct kvm_x86_ops {
 	 */
 	unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
 
+	gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
+
 	/*
 	 * Interfaces for HYGON CSV guest
 	 */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 85bdb2f35017..695ab5b6055c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -701,7 +701,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
 	*max_size = 0;
 	switch (mode) {
 	case X86EMUL_MODE_PROT64:
-		*linear = la;
+		*linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags);
 		va_bits = ctxt_virt_addr_bits(ctxt);
 		if (!__is_canonical_address(la, va_bits))
 			goto bad;
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 7c02a09b1b17..4351149484fb 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -230,6 +230,9 @@ struct x86_emulate_ops {
 	int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
 	void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
 	int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
+
+	gva_t (*get_untagged_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
+				   unsigned int flags);
 };
 
 /* Type, address-of, and value of an instruction's operand. */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1247c849544b..afb8ff4bdb78 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8385,6 +8385,15 @@ static void emulator_vm_bugged(struct x86_emulate_ctxt *ctxt)
 		kvm_vm_bugged(kvm);
 }
 
+static gva_t emulator_get_untagged_addr(struct x86_emulate_ctxt *ctxt,
+					gva_t addr, unsigned int flags)
+{
+	if (!kvm_x86_ops.get_untagged_addr)
+		return addr;
+
+	return static_call(kvm_x86_get_untagged_addr)(emul_to_vcpu(ctxt), addr, flags);
+}
+
 static const struct x86_emulate_ops emulate_ops = {
 	.vm_bugged           = emulator_vm_bugged,
 	.read_gpr            = emulator_read_gpr,
@@ -8429,6 +8438,7 @@ static const struct x86_emulate_ops emulate_ops = {
 	.leave_smm           = emulator_leave_smm,
 	.triple_fault        = emulator_triple_fault,
 	.set_xcr             = emulator_set_xcr,
+	.get_untagged_addr   = emulator_get_untagged_addr,
 };
 
 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
-- 
Gitee


From e1de46735fad7a4292088c8c2dd40925518ba8b8 Mon Sep 17 00:00:00 2001
From: Binbin Wu <binbin.wu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:20 +0800
Subject: [PATCH 08/12] KVM: x86: Untag addresses for LAM emulation where
 applicable

Upstream commit: b39bd520a60c667a339e315ce7a3de2f7178f6e3
Conflict: none

Stub in vmx_get_untagged_addr() and wire up calls from the emulator (via
get_untagged_addr()) and "direct" calls from various VM-Exit handlers in
VMX where LAM untagging is supposed to be applied.  Defer implementing
the guts of vmx_get_untagged_addr() to future patches purely to make the
changes easier to consume.

LAM is active only for 64-bit linear addresses and several types of
accesses are exempted.

- Cases need to untag address (handled in get_vmx_mem_address())
  Operand(s) of VMX instructions and INVPCID.
  Operand(s) of SGX ENCLS.

- Cases LAM doesn't apply to (no change needed)
  Operand of INVLPG.
  Linear address in INVPCID descriptor.
  Linear address in INVVPID descriptor.
  BASEADDR specified in SECS of ECREATE.

Note:
  - LAM doesn't apply to write to control registers or MSRs
  - LAM masking is applied before walking page tables, i.e. the faulting
    linear address in CR2 doesn't contain the metadata.
  - The guest linear address saved in VMCS doesn't contain metadata.

Intel-SIG: commit b39bd520a60c KVM: x86: Untag addresses for LAM
emulation where applicable
Backport KVM Linear Address Masking (LAM) support.

Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-10-binbin.wu@linux.intel.com
[sean: massage changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/kvm/vmx/nested.c | 5 +++++
 arch/x86/kvm/vmx/sgx.c    | 1 +
 arch/x86/kvm/vmx/vmx.c    | 7 +++++++
 arch/x86/kvm/vmx/vmx.h    | 2 ++
 arch/x86/kvm/x86.c        | 4 ++++
 5 files changed, 19 insertions(+)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 59e259ff7454..b3bc615dbaa4 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5048,6 +5048,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
 		else
 			*ret = off;
 
+		*ret = vmx_get_untagged_addr(vcpu, *ret, 0);
 		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
 		 * non-canonical form. This is the only check on the memory
 		 * destination for long mode!
@@ -5865,6 +5866,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 	vpid02 = nested_get_vpid02(vcpu);
 	switch (type) {
 	case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+		/*
+		 * LAM doesn't apply to addresses that are inputs to TLB
+		 * invalidation.
+		 */
 		if (!operand.vpid ||
 		    is_noncanonical_address(operand.gla, vcpu))
 			return nested_vmx_fail(vcpu,
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index 3e822e582497..6fef01e0536e 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -37,6 +37,7 @@ static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
 	if (!IS_ALIGNED(*gva, alignment)) {
 		fault = true;
 	} else if (likely(is_64_bit_mode(vcpu))) {
+		*gva = vmx_get_untagged_addr(vcpu, *gva, 0);
 		fault = is_noncanonical_address(*gva, vcpu);
 	} else {
 		*gva &= 0xffffffff;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b3ab0f29501f..0171e7eb65a5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8354,6 +8354,11 @@ static void vmx_vm_destroy(struct kvm *kvm)
 	free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm));
 }
 
+gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags)
+{
+	return gva;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.name = KBUILD_MODNAME,
 
@@ -8492,6 +8497,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.complete_emulated_msr = kvm_complete_insn_gp,
 
 	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
+
+	.get_untagged_addr = vmx_get_untagged_addr,
 };
 
 static unsigned int vmx_handle_intel_pt_intr(void)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 77adcf0bfddd..45e4ffaa144f 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -421,6 +421,8 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
 u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
 u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
 
+gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
+
 static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
 					     int type, bool value)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index afb8ff4bdb78..4909b33c55dc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13523,6 +13523,10 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
 
 	switch (type) {
 	case INVPCID_TYPE_INDIV_ADDR:
+		/*
+		 * LAM doesn't apply to addresses that are inputs to TLB
+		 * invalidation.
+		 */
 		if ((!pcid_enabled && (operand.pcid != 0)) ||
 		    is_noncanonical_address(operand.gla, vcpu)) {
 			kvm_inject_gp(vcpu, 0);
-- 
Gitee


From f3391c214b04c2187f8ad8d135a5db00e0b619a0 Mon Sep 17 00:00:00 2001
From: Robert Hoo <robert.hu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:21 +0800
Subject: [PATCH 09/12] KVM: x86: Virtualize LAM for supervisor pointer

Upstream commit: 93d1c9f498a7505e0e0a0198f3b3d7f97fcc5fa6
Conflict: none

Add support to allow guests to set the new CR4 control bit for LAM and add
implementation to get untagged address for supervisor pointers.

LAM modifies the canonicality check applied to 64-bit linear addresses for
data accesses, allowing software to use of the untranslated address bits for
metadata and masks the metadata bits before using them as linear addresses
to access memory. LAM uses CR4.LAM_SUP (bit 28) to configure and enable LAM
for supervisor pointers. It also changes VMENTER to allow the bit to be set
in VMCS's HOST_CR4 and GUEST_CR4 to support virtualization. Note CR4.LAM_SUP
is allowed to be set even not in 64-bit mode, but it will not take effect
since LAM only applies to 64-bit linear addresses.

Move CR4.LAM_SUP out of CR4_RESERVED_BITS, its reservation depends on vcpu
supporting LAM or not. Leave it intercepted to prevent guest from setting
the bit if LAM is not exposed to guest as well as to avoid vmread every time
when KVM fetches its value, with the expectation that guest won't toggle the
bit frequently.

Set CR4.LAM_SUP bit in the emulated IA32_VMX_CR4_FIXED1 MSR for guests to
allow guests to enable LAM for supervisor pointers in nested VMX operation.

Hardware is not required to do TLB flush when CR4.LAM_SUP toggled, KVM
doesn't need to emulate TLB flush based on it.  There's no other features
or vmx_exec_controls connection, and no other code needed in
{kvm,vmx}_set_cr4().

Skip address untag for instruction fetches (which includes branch targets),
operand of INVLPG instructions, and implicit system accesses, all of which
are not subject to untagging.  Note, get_untagged_addr() isn't invoked for
implicit system accesses as there is no reason to do so, but check the
flag anyways for documentation purposes.

Intel-SIG: commit 93d1c9f498a7 KVM: x86: Virtualize LAM for supervisor
pointer
Backport KVM Linear Address Masking (LAM) support.

Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
Co-developed-by: Binbin Wu <binbin.wu@linux.intel.com>
Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-11-binbin.wu@linux.intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/vmx/vmx.c          | 39 ++++++++++++++++++++++++++++++++-
 arch/x86/kvm/x86.h              |  2 ++
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8e660ca11b71..8191115ba11a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -125,7 +125,8 @@
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
 			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
-			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
+			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
+			  | X86_CR4_LAM_SUP))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0171e7eb65a5..f9d650bca508 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7791,6 +7791,9 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
 	cr4_fixed1_update(X86_CR4_UMIP,       ecx, feature_bit(UMIP));
 	cr4_fixed1_update(X86_CR4_LA57,       ecx, feature_bit(LA57));
 
+	entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 1);
+	cr4_fixed1_update(X86_CR4_LAM_SUP,    eax, feature_bit(LAM));
+
 #undef cr4_fixed1_update
 }
 
@@ -8354,9 +8357,43 @@ static void vmx_vm_destroy(struct kvm *kvm)
 	free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm));
 }
 
+/*
+ * Note, the SDM states that the linear address is masked *after* the modified
+ * canonicality check, whereas KVM masks (untags) the address and then performs
+ * a "normal" canonicality check.  Functionally, the two methods are identical,
+ * and when the masking occurs relative to the canonicality check isn't visible
+ * to software, i.e. KVM's behavior doesn't violate the SDM.
+ */
 gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags)
 {
-	return gva;
+	int lam_bit;
+
+	if (flags & (X86EMUL_F_FETCH | X86EMUL_F_IMPLICIT | X86EMUL_F_INVLPG))
+		return gva;
+
+	if (!is_64_bit_mode(vcpu))
+		return gva;
+
+	/*
+	 * Bit 63 determines if the address should be treated as user address
+	 * or a supervisor address.
+	 */
+	if (!(gva & BIT_ULL(63))) {
+		/* KVM doesn't yet virtualize LAM_U{48,57}. */
+		return gva;
+	} else {
+		if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_LAM_SUP))
+			return gva;
+
+		lam_bit = kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 56 : 47;
+	}
+
+	/*
+	 * Untag the address by sign-extending the lam_bit, but NOT to bit 63.
+	 * Bit 63 is retained from the raw virtual address so that untagging
+	 * doesn't change a user access to a supervisor access, and vice versa.
+	 */
+	return (sign_extend64(gva, lam_bit) & ~BIT_ULL(63)) | (gva & BIT_ULL(63));
 }
 
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e3b81352b8cd..4ae84fb6c8bd 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -524,6 +524,8 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
 		__reserved_bits |= X86_CR4_VMXE;        \
 	if (!__cpu_has(__c, X86_FEATURE_PCID))          \
 		__reserved_bits |= X86_CR4_PCIDE;       \
+	if (!__cpu_has(__c, X86_FEATURE_LAM))           \
+		__reserved_bits |= X86_CR4_LAM_SUP;     \
 	__reserved_bits;                                \
 })
 
-- 
Gitee


From d3361ddcf38e465eb736b584a06dbe940171676f Mon Sep 17 00:00:00 2001
From: Robert Hoo <robert.hu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:22 +0800
Subject: [PATCH 10/12] KVM: x86: Virtualize LAM for user pointer

Upstream commit: 3098e6eca88e543ea0d190d1fa72b1c047bb3e7d
Conflict: none

Add support to allow guests to set the new CR3 control bits for Linear
Address Masking (LAM) and add implementation to get untagged address for
user pointers.

LAM modifies the canonical check for 64-bit linear addresses, allowing
software to use the masked/ignored address bits for metadata.  Hardware
masks off the metadata bits before using the linear addresses to access
memory.  LAM uses two new CR3 non-address bits, LAM_U48 (bit 62) and
LAM_U57 (bit 61), to configure LAM for user pointers. LAM also changes
VMENTER to allow both bits to be set in VMCS's HOST_CR3 and GUEST_CR3 for
virtualization.

When EPT is on, CR3 is not trapped by KVM and it's up to the guest to set
any of the two LAM control bits. However, when EPT is off, the actual CR3
used by the guest is generated from the shadow MMU root which is different
from the CR3 that is *set* by the guest, and KVM needs to manually apply
any active control bits to VMCS's GUEST_CR3 based on the cached CR3 *seen*
by the guest.

KVM manually checks guest's CR3 to make sure it points to a valid guest
physical address (i.e. to support smaller MAXPHYSADDR in the guest). Extend
this check to allow the two LAM control bits to be set. After check, LAM
bits of guest CR3 will be stripped off to extract guest physical address.

In case of nested, for a guest which supports LAM, both VMCS12's HOST_CR3
and GUEST_CR3 are allowed to have the new LAM control bits set, i.e. when
L0 enters L1 to emulate a VMEXIT from L2 to L1 or when L0 enters L2
directly. KVM also manually checks VMCS12's HOST_CR3 and GUEST_CR3 being
valid physical address. Extend such check to allow the new LAM control bits
too.

Note, LAM doesn't have a global control bit to turn on/off LAM completely,
but purely depends on hardware's CPUID to determine it can be enabled or
not. That means, when EPT is on, even when KVM doesn't expose LAM to guest,
the guest can still set LAM control bits in CR3 w/o causing problem. This
is an unfortunate virtualization hole. KVM could choose to intercept CR3 in
this case and inject fault but this would hurt performance when running a
normal VM w/o LAM support.  This is undesirable. Just choose to let the
guest do such illegal thing as the worst case is guest being killed when
KVM eventually find out such illegal behaviour and that the guest is
misbehaving.

Intel-SIG: commit 3098e6eca88e KVM: x86: Virtualize LAM for user pointer
Backport KVM Linear Address Masking (LAM) support.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
Co-developed-by: Binbin Wu <binbin.wu@linux.intel.com>
Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-12-binbin.wu@linux.intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/kvm/cpuid.h   |  4 ++++
 arch/x86/kvm/mmu.h     |  9 +++++++++
 arch/x86/kvm/vmx/vmx.c | 12 +++++++++---
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 28372fe1f4e6..bf4fc4d6aed3 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -285,6 +285,10 @@ static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
 
 static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
+	if (kvm_cpu_cap_has(X86_FEATURE_LAM) &&
+	    guest_cpuid_has(vcpu, X86_FEATURE_LAM))
+		cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
+
 	return kvm_vcpu_is_legal_gpa(vcpu, cr3);
 }
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 253fb2093d5d..e700f1f854ae 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -146,6 +146,15 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
 	return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu));
 }
 
+static inline unsigned long kvm_get_active_cr3_lam_bits(struct kvm_vcpu *vcpu)
+{
+	if (!kvm_cpu_cap_has(X86_FEATURE_LAM) ||
+	    !guest_cpuid_has(vcpu, X86_FEATURE_LAM))
+		return 0;
+
+	return kvm_read_cr3(vcpu) & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
+}
+
 static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
 {
 	u64 root_hpa = vcpu->arch.mmu->root.hpa;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f9d650bca508..7c195a25fc90 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3430,7 +3430,8 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
 			update_guest_cr3 = false;
 		vmx_ept_load_pdptrs(vcpu);
 	} else {
-		guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu);
+		guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu) |
+			    kvm_get_active_cr3_lam_bits(vcpu);
 	}
 
 	if (update_guest_cr3)
@@ -8367,6 +8368,7 @@ static void vmx_vm_destroy(struct kvm *kvm)
 gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags)
 {
 	int lam_bit;
+	unsigned long cr3_bits;
 
 	if (flags & (X86EMUL_F_FETCH | X86EMUL_F_IMPLICIT | X86EMUL_F_INVLPG))
 		return gva;
@@ -8379,8 +8381,12 @@ gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags
 	 * or a supervisor address.
 	 */
 	if (!(gva & BIT_ULL(63))) {
-		/* KVM doesn't yet virtualize LAM_U{48,57}. */
-		return gva;
+		cr3_bits = kvm_get_active_cr3_lam_bits(vcpu);
+		if (!(cr3_bits & (X86_CR3_LAM_U57 | X86_CR3_LAM_U48)))
+			return gva;
+
+		/* LAM_U48 is ignored if LAM_U57 is set. */
+		lam_bit = cr3_bits & X86_CR3_LAM_U57 ? 56 : 47;
 	} else {
 		if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_LAM_SUP))
 			return gva;
-- 
Gitee


From fc5e0c552e10b776c65b5d67e0fce02569855f68 Mon Sep 17 00:00:00 2001
From: Robert Hoo <robert.hu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:23 +0800
Subject: [PATCH 11/12] KVM: x86: Advertise and enable LAM (user and
 supervisor)

Upstream commit: 703d794cb8cb28c07b22c1c845f5c4d4c419aff7
Conflict: none

LAM is enumerated by CPUID.7.1:EAX.LAM[bit 26]. Advertise the feature to
userspace and enable it as the final step after the LAM virtualization
support for supervisor and user pointers.

SGX LAM support is not advertised yet. SGX LAM support is enumerated in
SGX's own CPUID and there's no hard requirement that it must be supported
when LAM is reported in CPUID leaf 0x7.

Intel-SIG: commit 703d794cb8cb KVM: x86: Advertise and enable LAM (user
and supervisor)
Backport KVM Linear Address Masking (LAM) support.

Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-13-binbin.wu@linux.intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/kvm/cpuid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 10b8fa295d24..d9ed84e4dd6f 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -680,7 +680,7 @@ void kvm_set_cpu_caps(void)
 	kvm_cpu_cap_mask(CPUID_7_1_EAX,
 		F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
 		F(FZRM) | F(FSRS) | F(FSRC) |
-		F(AMX_FP16) | F(AVX_IFMA)
+		F(AMX_FP16) | F(AVX_IFMA) | F(LAM)
 	);
 
 	kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
-- 
Gitee


From d87a5434276b28efbe2e919943dbda39f2e6c3f1 Mon Sep 17 00:00:00 2001
From: Binbin Wu <binbin.wu@linux.intel.com>
Date: Wed, 13 Sep 2023 20:42:24 +0800
Subject: [PATCH 12/12] KVM: x86: Use KVM-governed feature framework to track
 "LAM enabled"

Upstream commit: 183bdd161c2b773a62f01d1c030f5a3a5b7c33b5
Conflict: none

Use the governed feature framework to track if Linear Address Masking (LAM)
is "enabled", i.e. if LAM can be used by the guest.

Using the framework to avoid the relative expensive call guest_cpuid_has()
during cr3 and vmexit handling paths for LAM.

No functional change intended.

Intel-SIG: commit 183bdd161c2b KVM: x86: Use KVM-governed feature
framework to track "LAM enabled"
Backport KVM Linear Address Masking (LAM) support.

Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Link: https://lore.kernel.org/r/20230913124227.12574-14-binbin.wu@linux.intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
[ Zhiquan Li: amend commit log ]
Signed-off-by: Zhiquan Li <zhiquan1.li@intel.com>
---
 arch/x86/kvm/cpuid.h             | 3 +--
 arch/x86/kvm/governed_features.h | 1 +
 arch/x86/kvm/mmu.h               | 3 +--
 arch/x86/kvm/vmx/vmx.c           | 1 +
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index bf4fc4d6aed3..831884e72210 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -285,8 +285,7 @@ static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
 
 static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
-	if (kvm_cpu_cap_has(X86_FEATURE_LAM) &&
-	    guest_cpuid_has(vcpu, X86_FEATURE_LAM))
+	if (guest_can_use(vcpu, X86_FEATURE_LAM))
 		cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
 
 	return kvm_vcpu_is_legal_gpa(vcpu, cr3);
diff --git a/arch/x86/kvm/governed_features.h b/arch/x86/kvm/governed_features.h
index 423a73395c10..ad463b1ed4e4 100644
--- a/arch/x86/kvm/governed_features.h
+++ b/arch/x86/kvm/governed_features.h
@@ -16,6 +16,7 @@ KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
 KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
 KVM_GOVERNED_X86_FEATURE(VGIF)
 KVM_GOVERNED_X86_FEATURE(VNMI)
+KVM_GOVERNED_X86_FEATURE(LAM)
 
 #undef KVM_GOVERNED_X86_FEATURE
 #undef KVM_GOVERNED_FEATURE
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e700f1f854ae..f04cc5ade1cd 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -148,8 +148,7 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
 
 static inline unsigned long kvm_get_active_cr3_lam_bits(struct kvm_vcpu *vcpu)
 {
-	if (!kvm_cpu_cap_has(X86_FEATURE_LAM) ||
-	    !guest_cpuid_has(vcpu, X86_FEATURE_LAM))
+	if (!guest_can_use(vcpu, X86_FEATURE_LAM))
 		return 0;
 
 	return kvm_read_cr3(vcpu) & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7c195a25fc90..e6bdfcc0c66a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7881,6 +7881,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 		kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);
 
 	kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
+	kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LAM);
 
 	vmx_setup_uret_msrs(vmx);
 
-- 
Gitee